├── docs ├── CONTRIBUTING.md ├── about │ └── license.md ├── usage.md ├── modules │ ├── data.md │ ├── method.md │ ├── metric.md │ ├── consensus.md │ └── index.md ├── requirements.txt ├── img │ ├── logo.png │ └── favicon.ico └── index.md ├── metric ├── V_measure │ ├── config │ │ ├── config_2.json │ │ ├── config_3.json │ │ └── config_1.json │ ├── V_measure_optargs.json │ ├── V_measure.yml │ └── V_measure.py ├── LISI │ ├── config │ │ └── config_1.json │ ├── LISI_optargs.json │ ├── LISI.yml │ └── LISI.r ├── ARI │ ├── ARI_optargs.json │ ├── ARI.yml │ └── ARI.py ├── FMI │ ├── FMI_optargs.json │ ├── FMI.yml │ └── FMI.py ├── MCC │ ├── MCC_optargs.json │ ├── MCC.yaml │ └── MCC.py ├── NMI │ ├── NMI_optargs.json │ ├── NMI.yml │ └── NMI.r ├── PAS │ ├── PAS.yml │ └── PAS_optargs.json ├── CHAOS │ ├── CHAOS.yml │ └── CHAOS_optargs.json ├── Entropy │ ├── Entropy_optargs.json │ ├── Entropy.yml │ └── Entropy.py ├── jaccard │ ├── jaccard_optargs.json │ ├── jaccard.yaml │ └── jaccard.py ├── SpatialARI │ ├── SpatialARI_optargs.json │ ├── SpatialARI.yml │ └── SpatialARI_env.sh ├── Completeness │ ├── Completeness_optargs.json │ ├── Completeness.yml │ └── Completeness.py ├── Homogeneity │ ├── Homogeneity_optargs.json │ ├── Homogeneity.yml │ └── Homogeneity.py ├── Calinski-Harabasz │ ├── Calinski-Harabasz_optargs.json │ ├── Calinski-Harabasz.yml │ └── Calinski-Harabasz.py ├── Davies-Bouldin │ ├── Davies-Bouldin_optargs.json │ ├── Davies-Bouldin.yml │ └── Davies-Bouldin.py ├── domain-specific-f1 │ ├── domain-specific-f1_optargs.json │ └── domain-specific-f1.yml ├── cluster-specific-silhouette │ ├── cluster-specific-silhouette_optargs.json │ ├── cluster-specific-silhouette.yml │ └── cluster-specific-silhouette.r └── README.md ├── preprocessing ├── neighbors │ ├── radius │ │ ├── config │ │ │ └── config_1.json │ │ ├── radius.yml │ │ └── radius.py │ ├── n_rings │ │ ├── config │ │ │ └── config_1.json │ │ ├── n_rings.yml │ │ └── n_rings.py │ ├── n_neighbourhood │ │ ├── config │ │ │ └── config_1.json │ │ ├── n_neighbourhood.yml │ │ └── n_neighbourhood.py │ └── delaunay_triangulation │ │ ├── delaunay_triangulation.yml │ │ └── delaunay_triangulation.py ├── transformation │ ├── log1p.yml │ └── log1p.py ├── quality_control │ └── qc_scanpy.yml ├── feature_selection │ ├── highly_variable_genes_scanpy.yml │ └── highly_variable_genes_scanpy.py ├── README.md ├── dimensionality_reduction │ ├── PCA.yml │ └── PCA.py ├── feature_selection_MoranI │ └── spatially_variable_genes_moransI.yml └── visualization │ ├── visualization.yml │ └── pdf_merge.py ├── method ├── DRSC │ ├── config │ │ ├── config_all_genes.json │ │ ├── config_default.json │ │ ├── config_FindSVGs.json │ │ └── config_FindVariableFeatures.json │ ├── DRSC_optargs.json │ ├── drsc_env.sh │ └── DRSC.yml ├── DeepST │ ├── config │ │ ├── config_2.json │ │ ├── config_3.json │ │ └── config_1.json │ ├── DeepST_optargs.json │ ├── DeepST.yml │ └── DeepST_env.sh ├── GraphST │ ├── config │ │ ├── config_1.json │ │ ├── config_3.json │ │ ├── config_4.json │ │ ├── config_2.json │ │ ├── config_dlpfc.json │ │ └── config_default.json │ ├── GraphST_optargs.json │ └── GraphST.yml ├── SpaceFlow │ ├── config │ │ ├── config_1.json │ │ ├── config_default.json │ │ └── config_seqfish.json │ ├── spaceflow_optargs.json │ └── spaceflow.yml ├── STAGATE │ ├── config │ │ ├── config_2.json │ │ ├── config_3.json │ │ ├── config_4.json │ │ ├── config_5.json │ │ ├── config_1.json │ │ ├── config_starmap.json │ │ ├── config_default.json │ │ └── config_slide_stereo.json │ ├── STAGATE_optargs.json │ └── STAGATE.yml ├── meringue │ ├── domains.tar.gz │ ├── config │ │ ├── config_1.json │ │ ├── config_2.json │ │ ├── config_mob.json │ │ ├── config_clusteringvignette.json │ │ └── config_default.json │ ├── meringue_optargs.json │ ├── meringue.yml │ └── meringue_env.sh ├── SC_MEB │ ├── config │ │ └── config_default.json │ ├── SC_MEB_optargs.json │ ├── scmeb_env.sh │ └── SC_MEB.yml ├── maple │ ├── config │ │ └── config_default.json │ ├── maple_optargs.json │ ├── maple.yml │ └── maple_env.sh ├── SOTIP │ ├── config │ │ ├── config_default.json │ │ ├── config_osmFISH.json │ │ ├── config_MIBI_TNBC.json │ │ ├── config_scMEP_CLCC.json │ │ └── config_Visium_dlpfc.json │ ├── sotip_optargs.json │ └── sotip.yml ├── precast │ ├── config │ │ ├── config_1.json │ │ ├── config_dlpfc.json │ │ ├── config_default.json │ │ └── config_BC.json │ ├── precast_optargs.json │ ├── precast_env.sh │ └── precast.yml ├── BANKSY │ ├── config │ │ ├── config_github.json │ │ ├── config_default.json │ │ ├── config_starmap.json │ │ └── config_dlpfc.json │ ├── banksy_optargs.json │ └── banksy_env.sh ├── BayesSpace │ ├── config │ │ ├── config_default.json │ │ ├── config_1.json │ │ ├── config_scc.json │ │ └── config_melanoma.json │ ├── BayesSpace_optargs.json │ └── BayesSpace.yml ├── Giotto │ ├── Giotto_optargs.json │ ├── config │ │ ├── config_visium.json │ │ ├── config_seqfish.json │ │ └── config_default.json │ └── Giotto_env.sh ├── SEDR │ ├── SEDR_optargs.json │ ├── config │ │ ├── config_1.json │ │ ├── config_2.json │ │ ├── config_dlpfc.json │ │ ├── config_default.json │ │ └── config_stereoseq.json │ └── SEDR.yml ├── bass │ ├── bass_optargs.json │ ├── config │ │ ├── config_2.json │ │ ├── config_1.json │ │ ├── config_3.json │ │ ├── config_dlpfc.json │ │ ├── config_default.json │ │ └── config_starmap.json │ └── bass_env.sh ├── conST │ ├── conST_optargs.json │ ├── config │ │ ├── config_Visium_dlpfc.json │ │ └── config_default.json │ └── conST.yml ├── seurat │ ├── config │ │ ├── config_leiden_spatial.json │ │ ├── config_leiden_VisiumHD.json │ │ ├── config_default.json │ │ └── config_leiden_CODEX.json │ └── seurat_optargs.json ├── SpiceMix │ ├── SpiceMix_optargs.json │ ├── SpiceMix.yml │ ├── config │ │ ├── config_default.json │ │ └── config_Visium_dlpfc.json │ └── README.md ├── spaGCN │ ├── spaGCN_optargs.json │ ├── spaGCN.yml │ └── config │ │ ├── config_1.json │ │ ├── config_2.json │ │ ├── config_3.json │ │ └── config_default.json ├── spatialGE │ ├── spatialGE_optargs.json │ ├── config │ │ ├── config_sct.json │ │ ├── config_default.json │ │ ├── config_highweight.json │ │ └── config_midweight.json │ └── spatialGE_env.sh ├── stardust │ ├── config │ │ ├── config_default.json │ │ └── config_tutorial.json │ ├── stardust_optargs.json │ └── stardust_env.sh ├── SCAN-IT │ ├── scanit_optargs.json │ ├── config │ │ ├── config_slideseq_mouse_cerebellum.json │ │ ├── config_default.json │ │ ├── config_slideseq_hippocampus.json │ │ ├── config_slideseq_mouse_olfactory_bulb.json │ │ └── config_seqFISH_mouse_SScortex.json │ └── scanit.yml ├── scanpy │ ├── scanpy_optargs.json │ ├── scanpy_env.yaml │ └── config │ │ ├── config_leiden_tutorial.json │ │ ├── config_leiden_MERFISH.json │ │ └── config_default.json ├── CellCharter │ ├── CellCharter_optargs.json │ ├── config │ │ ├── config_1.json │ │ ├── config_2.json │ │ ├── config_default.json │ │ └── config_dlpfc.json │ └── CellCharter.yml └── search_res.r ├── workflows ├── .gitignore ├── path_config_test.yaml ├── 01_download.smk ├── shared │ └── functions.py ├── excute_config_test.yaml ├── 05_aggregation.smk └── 02_preprocessing.smk ├── .all-contributorsrc ├── templates ├── data_optargs.json ├── metric_optargs.json ├── method_optargs.json ├── data_optargs.schema.yaml ├── metric_optargs.schema.yaml ├── method_optargs.schema.yaml ├── consensus_BC.py ├── metric.py ├── consensus_BC.r ├── metric.r ├── README.md ├── consensus.py └── data.py ├── data ├── SEA_AD_data │ ├── SEA_AD_data_optargs.json │ ├── SEA_AD_data.yml │ └── SEA_AD_LICENSE.txt ├── libd_dlpfc │ ├── libd_dlpfc_optargs.json │ └── libd_dlpfc.yml ├── osmfish_Ssp │ ├── osmfish_Ssp_optargs.json │ └── osmfish_Ssp.yml ├── spatialDLPFC │ ├── spatialDLPFC_optargs.json │ ├── README.md │ ├── spatialDLPFC.yml │ └── spatialDLPFC.r ├── sotip_simulation │ ├── sotip_simulation_optargs.json │ └── sotip.yml ├── visium_chicken_heart │ ├── chicken_heart_optargs.json │ └── chicken_heart.yml ├── mouse_kidney_coronal │ ├── mouse_kidney_coronal.yml │ └── mouse_kidney_coronal_optargs.json ├── cosmx_liver │ └── cosmx_liver.yml ├── xenium-breast-cancer │ ├── xenium-breast-cancer_optargs.json │ └── xenium-breast-cancer.yml ├── abc_atlas_wmb_thalamus │ ├── abc_atlas_wmb_thalamus_optargs.json │ ├── abc_atlas_wmb_thalamus.yml │ └── LICENSE.txt ├── STARmap-2018-mouse-cortex │ ├── STARmap-2018-mouse-cortex_optargs.json │ └── environment.yml ├── cosmx_lung │ └── cosmx_lung.yml ├── merfish_devheart │ └── merfish_devheart.yml ├── pachter_simulation │ ├── pachter_simulation.yml │ └── pachter_simulation.py ├── stereoseq_liver │ └── stereoseq_liver.yml ├── visium_breast_cancer_SEDR │ ├── visium_breast_cancer_SEDR_optargs.json │ └── visium_breast_cancer_SEDR.yml ├── mouse_brain_sagittal_anterior │ ├── mouse_brain_sagittal_anterior.yml │ └── mouse_brain_sagittal_anterior_optargs.json ├── mouse_brain_sagittal_posterior │ ├── mouse_brain_sagittal_posterior.yml │ └── mouse_brain_sagittal_posterior_optargs.json ├── xenium-mouse-brain-SergioSalas │ ├── xenium-mouse-brain-SergioSalas_optargs.json │ └── environment.yml ├── her2st-breast-cancer │ └── environment.yml ├── STARmap_plus │ └── STARmap_plus.yml ├── slideseq2_olfactory_bulb │ └── slideseq2_olfactory_bulb.yml ├── stereoseq_mouse_embryo │ └── stereoseq_mouse_embryo.yml ├── stereoseq_olfactory_bulb │ └── stereoseq_olfactory_bulb.yml ├── locus_coeruleus │ └── locus_coeruleus.yml ├── stereoseq_developing_Drosophila_embryos_larvae │ └── stereoseq_developing_Drosophila_embryos_larvae.yml ├── visium_hd_cancer_colon │ └── visium_hd_cancer_colon.yml └── xenium-ffpe-bc-idc │ └── xenium-ffpe-bc-idc.yml ├── consensus ├── 02_BC_ranking │ ├── BC_ranking.yaml │ └── BC_ranking.r ├── 01_Results_Aggregation │ └── Results_Aggregation.yaml ├── 02_Cross_method_ARI │ ├── Cross_method_ARI.yaml │ └── Cross_method_ARI.r ├── 03_Consensus_kmode │ ├── Consensus_kmode.yaml │ └── Consensus_kmode.r ├── 02_Smoothness_entropy │ ├── Smoothness_entropy.yaml │ └── Smoothness_entropy.r ├── 03_Consensus_weighted │ ├── networkanalysis-1.3.0.jar │ └── Consensus_weighted.yaml ├── 03_Cross_method_entropy │ └── Cross_method_entropy.yaml └── 03_Consensus_lca │ ├── Consensus_lca.yaml │ └── Consensus_lca.r ├── .github └── workflows │ ├── main.yml │ └── build_mkdocs.yml ├── mkdocs.yml ├── LICENSE.txt ├── CITATION.cff └── CODE_OF_CONDUCT.md /docs/CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | ../CONTRIBUTING.md -------------------------------------------------------------------------------- /docs/about/license.md: -------------------------------------------------------------------------------- 1 | ../../LICENSE.txt -------------------------------------------------------------------------------- /docs/usage.md: -------------------------------------------------------------------------------- 1 | ../workflows/README.md -------------------------------------------------------------------------------- /docs/modules/data.md: -------------------------------------------------------------------------------- 1 | ../../data/README.md -------------------------------------------------------------------------------- /docs/modules/method.md: -------------------------------------------------------------------------------- 1 | ../../method/README.md -------------------------------------------------------------------------------- /docs/modules/metric.md: -------------------------------------------------------------------------------- 1 | ../../metric/README.md -------------------------------------------------------------------------------- /metric/V_measure/config/config_2.json: -------------------------------------------------------------------------------- 1 | {"beta": 1} -------------------------------------------------------------------------------- /metric/V_measure/config/config_3.json: -------------------------------------------------------------------------------- 1 | {"beta": 2} -------------------------------------------------------------------------------- /docs/modules/consensus.md: -------------------------------------------------------------------------------- 1 | ../../consensus/README.md -------------------------------------------------------------------------------- /metric/LISI/config/config_1.json: -------------------------------------------------------------------------------- 1 | {"perplexity": 15} -------------------------------------------------------------------------------- /metric/V_measure/config/config_1.json: -------------------------------------------------------------------------------- 1 | {"beta": 0.5} -------------------------------------------------------------------------------- /docs/requirements.txt: -------------------------------------------------------------------------------- 1 | mkdocs ~= 1.6 2 | mkdocs-macros-plugin ~= 1.3 3 | -------------------------------------------------------------------------------- /preprocessing/neighbors/radius/config/config_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "radius":1 3 | } -------------------------------------------------------------------------------- /preprocessing/neighbors/n_rings/config/config_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_rings":1 3 | } -------------------------------------------------------------------------------- /preprocessing/neighbors/n_neighbourhood/config/config_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_neighs":6 3 | } -------------------------------------------------------------------------------- /method/DRSC/config/config_all_genes.json: -------------------------------------------------------------------------------- 1 | {"feature_method": "All genes", "source": "NA"} 2 | -------------------------------------------------------------------------------- /workflows/.gitignore: -------------------------------------------------------------------------------- 1 | notes.md 2 | .snakemake 3 | shared/__pycache__/ 4 | *_requirements.info -------------------------------------------------------------------------------- /method/DeepST/config/config_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "spatial_type": "BallTree", 3 | "npcs": 200 4 | } 5 | -------------------------------------------------------------------------------- /method/DeepST/config/config_3.json: -------------------------------------------------------------------------------- 1 | { 2 | "spatial_type": "KDTree", 3 | "npcs": 200 4 | } 5 | 6 | -------------------------------------------------------------------------------- /docs/img/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpatialHackathon/SACCELERATOR/HEAD/docs/img/logo.png -------------------------------------------------------------------------------- /method/DeepST/config/config_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "spatial_type": "LinearRegress", 3 | "npcs": 200 4 | } 5 | -------------------------------------------------------------------------------- /docs/img/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpatialHackathon/SACCELERATOR/HEAD/docs/img/favicon.ico -------------------------------------------------------------------------------- /.all-contributorsrc: -------------------------------------------------------------------------------- 1 | { 2 | "projectName": "SpaceHack2023", 3 | "projectOwner": "SpatialHackathon" 4 | } 5 | -------------------------------------------------------------------------------- /method/GraphST/config/config_1.json: -------------------------------------------------------------------------------- 1 | {"method": "leiden", "refine": true, "radius": 100, "n_pcs": 20, "n_genes": 3000} -------------------------------------------------------------------------------- /method/GraphST/config/config_3.json: -------------------------------------------------------------------------------- 1 | {"method": "leiden", "refine": true, "radius": 50, "n_pcs": 20, "n_genes": 3000} -------------------------------------------------------------------------------- /method/GraphST/config/config_4.json: -------------------------------------------------------------------------------- 1 | {"method": "mclust", "refine": true, "radius": 100, "n_pcs": 20, "n_genes": 3000} -------------------------------------------------------------------------------- /method/SpaceFlow/config/config_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 3000, 3 | "n_pcs": 50, 4 | "n_neighbours": 15 5 | } -------------------------------------------------------------------------------- /templates/data_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 10, 3 | "min_genes" : 20, 4 | "min_counts": 30 5 | } 6 | -------------------------------------------------------------------------------- /method/GraphST/config/config_2.json: -------------------------------------------------------------------------------- 1 | {"method": "leiden", "refine": false, "radius": 50, "n_pcs": 20, "n_genes": 3000} 2 | -------------------------------------------------------------------------------- /templates/metric_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": false, 4 | "config_file": true 5 | } -------------------------------------------------------------------------------- /method/STAGATE/config/config_2.json: -------------------------------------------------------------------------------- 1 | {"method": "louvain", "model": "KNN", "k_cutoff": 10, "n_genes": 3000, "min_cells": 0} 2 | -------------------------------------------------------------------------------- /method/STAGATE/config/config_3.json: -------------------------------------------------------------------------------- 1 | {"method": "louvain", "model": "KNN", "k_cutoff": 15, "n_genes": 3000, "min_cells": 0} 2 | -------------------------------------------------------------------------------- /method/STAGATE/config/config_4.json: -------------------------------------------------------------------------------- 1 | {"method": "mclust", "model": "KNN", "k_cutoff": 10, "n_genes": 3000, "min_cells": 0} 2 | -------------------------------------------------------------------------------- /method/STAGATE/config/config_5.json: -------------------------------------------------------------------------------- 1 | {"method": "mclust", "model": "KNN", "k_cutoff": 15, "n_genes": 3000, "min_cells": 0} 2 | -------------------------------------------------------------------------------- /method/meringue/domains.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpatialHackathon/SACCELERATOR/HEAD/method/meringue/domains.tar.gz -------------------------------------------------------------------------------- /metric/ARI/ARI_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": false, 4 | "config_file": false 5 | } 6 | -------------------------------------------------------------------------------- /data/SEA_AD_data/SEA_AD_data_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /data/libd_dlpfc/libd_dlpfc_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /data/osmfish_Ssp/osmfish_Ssp_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /method/STAGATE/config/config_1.json: -------------------------------------------------------------------------------- 1 | {"method": "louvain", "model": "Radius", "rad_cutoff": 150, "n_genes": 3000, "min_cells": 0} 2 | -------------------------------------------------------------------------------- /metric/FMI/FMI_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": false, 4 | "config_file": false 5 | } 6 | 7 | -------------------------------------------------------------------------------- /metric/LISI/LISI_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": true, 4 | "config_file": true 5 | } 6 | 7 | -------------------------------------------------------------------------------- /metric/MCC/MCC_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": false, 4 | "config_file": false 5 | } 6 | 7 | -------------------------------------------------------------------------------- /metric/NMI/NMI_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": false, 4 | "config_file": false 5 | } 6 | 7 | -------------------------------------------------------------------------------- /metric/PAS/PAS.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - r-base=4.3.1 5 | - r-optparse=1.7.3 6 | - r-pdist=1.2.1 -------------------------------------------------------------------------------- /data/spatialDLPFC/spatialDLPFC_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /metric/ARI/ARI.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.12.0 5 | - pandas=2.1.1 6 | - scikit-learn=1.3.2 -------------------------------------------------------------------------------- /metric/CHAOS/CHAOS.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - r-base=4.3.1 5 | - r-optparse=1.7.3 6 | - r-pdist=1.2.1 -------------------------------------------------------------------------------- /metric/Entropy/Entropy_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": false, 4 | "config_file": false 5 | } 6 | 7 | -------------------------------------------------------------------------------- /metric/FMI/FMI.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.12.0 5 | - pandas=2.1.1 6 | - scikit-learn=1.3.2 -------------------------------------------------------------------------------- /metric/MCC/MCC.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.12.0 5 | - pandas=2.1.1 6 | - scikit-learn=1.3.2 -------------------------------------------------------------------------------- /metric/NMI/NMI.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - r-base=4.3.1 5 | - r-aricode=1.0.2 6 | - r-optparse=1.7.3 -------------------------------------------------------------------------------- /metric/jaccard/jaccard_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": false, 4 | "config_file": false 5 | } 6 | 7 | -------------------------------------------------------------------------------- /preprocessing/transformation/log1p.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - scanpy=1.10.1 6 | -------------------------------------------------------------------------------- /consensus/02_BC_ranking/BC_ranking.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - r 3 | - conda-forge 4 | dependencies: 5 | - r-base=4.4.2 6 | - r-optparse=1.7.5 -------------------------------------------------------------------------------- /data/sotip_simulation/sotip_simulation_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /data/visium_chicken_heart/chicken_heart_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /method/meringue/config/config_1.json: -------------------------------------------------------------------------------- 1 | {"min.reads": 100, "min.lib.size": 100, "k": 18, "alpha": 1, "beta": 1, "n_pcs": 5, "filterDist": 2} 2 | -------------------------------------------------------------------------------- /method/meringue/config/config_2.json: -------------------------------------------------------------------------------- 1 | {"min.reads": 100, "min.lib.size": 100, "k": 18, "alpha": 0, "beta": 0, "n_pcs": 5, "filterDist": 2} 2 | -------------------------------------------------------------------------------- /metric/Entropy/Entropy.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.12.0 5 | - pandas=2.1.1 6 | - scikit-learn=1.4.2 -------------------------------------------------------------------------------- /metric/SpatialARI/SpatialARI_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": false, 4 | "config_file": false 5 | } 6 | -------------------------------------------------------------------------------- /metric/V_measure/V_measure_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": false, 4 | "config_file": true 5 | } 6 | 7 | -------------------------------------------------------------------------------- /preprocessing/quality_control/qc_scanpy.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - scanpy=1.10.1 6 | -------------------------------------------------------------------------------- /consensus/01_Results_Aggregation/Results_Aggregation.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.21 5 | - pandas=2.2.3 -------------------------------------------------------------------------------- /data/mouse_kidney_coronal/mouse_kidney_coronal.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - scanpy=1.10.1 6 | -------------------------------------------------------------------------------- /metric/Completeness/Completeness_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": false, 4 | "config_file": false 5 | } 6 | 7 | -------------------------------------------------------------------------------- /metric/Homogeneity/Homogeneity_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": false, 4 | "config_file": false 5 | } 6 | 7 | -------------------------------------------------------------------------------- /metric/V_measure/V_measure.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.12.0 5 | - pandas=2.1.1 6 | - scikit-learn=1.3.2 -------------------------------------------------------------------------------- /metric/jaccard/jaccard.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.12.0 5 | - pandas=2.1.1 6 | - scikit-learn=1.3.2 -------------------------------------------------------------------------------- /data/cosmx_liver/cosmx_liver.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - pip 6 | - pip: 7 | - tiledbsc==0.1.5 -------------------------------------------------------------------------------- /data/mouse_kidney_coronal/mouse_kidney_coronal_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /data/xenium-breast-cancer/xenium-breast-cancer_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /metric/Calinski-Harabasz/Calinski-Harabasz_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": false, 3 | "embedding": true, 4 | "config_file": false 5 | } 6 | -------------------------------------------------------------------------------- /metric/Completeness/Completeness.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.12.0 5 | - pandas=2.1.1 6 | - scikit-learn=1.3.2 -------------------------------------------------------------------------------- /metric/Davies-Bouldin/Davies-Bouldin_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": false, 3 | "embedding": true, 4 | "config_file": false 5 | } 6 | 7 | -------------------------------------------------------------------------------- /metric/Homogeneity/Homogeneity.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.12.0 5 | - pandas=2.1.1 6 | - scikit-learn=1.3.2 -------------------------------------------------------------------------------- /data/abc_atlas_wmb_thalamus/abc_atlas_wmb_thalamus_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /method/SC_MEB/config/config_default.json: -------------------------------------------------------------------------------- 1 | {"n_pcs": 15, "n_genes": 2000, "source":"https://shufeyangyi2015310117.github.io/SC.MEB/articles/SC.MEB_CRC.html"} -------------------------------------------------------------------------------- /metric/Davies-Bouldin/Davies-Bouldin.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.12.0 5 | - pandas=2.1.1 6 | - scikit-learn=1.3.2 -------------------------------------------------------------------------------- /data/STARmap-2018-mouse-cortex/STARmap-2018-mouse-cortex_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /data/cosmx_lung/cosmx_lung.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - scanpy=1.10.1 6 | - r-base=4.3.3 7 | - rpy2=3.5.11 -------------------------------------------------------------------------------- /data/merfish_devheart/merfish_devheart.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - scanpy=1.10.1 6 | - openpyxl=3.1.2 7 | -------------------------------------------------------------------------------- /data/pachter_simulation/pachter_simulation.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.6 5 | - pip 6 | - pip: 7 | - pypdl==1.3.2 -------------------------------------------------------------------------------- /data/stereoseq_liver/stereoseq_liver.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - scanpy=1.10.1 6 | - openpyxl=3.1.2 7 | -------------------------------------------------------------------------------- /data/visium_breast_cancer_SEDR/visium_breast_cancer_SEDR_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /method/maple/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 2000, 3 | "n_pcs": 8, 4 | "source": "https://carter-allen.github.io/stxBrain_multi_maple.html" 5 | } -------------------------------------------------------------------------------- /metric/Calinski-Harabasz/Calinski-Harabasz.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.12.0 5 | - pandas=2.1.1 6 | - scikit-learn=1.3.2 -------------------------------------------------------------------------------- /metric/domain-specific-f1/domain-specific-f1_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": true, 3 | "embedding": false, 4 | "config_file": false 5 | } 6 | 7 | -------------------------------------------------------------------------------- /preprocessing/feature_selection/highly_variable_genes_scanpy.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - scanpy=1.10.1 6 | -------------------------------------------------------------------------------- /data/mouse_brain_sagittal_anterior/mouse_brain_sagittal_anterior.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - scanpy=1.10.1 6 | -------------------------------------------------------------------------------- /data/mouse_brain_sagittal_anterior/mouse_brain_sagittal_anterior_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /data/mouse_brain_sagittal_posterior/mouse_brain_sagittal_posterior.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - scanpy=1.10.1 6 | -------------------------------------------------------------------------------- /metric/PAS/PAS_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": false, 3 | "embedding": false, 4 | "config_file": false, 5 | "physical_coordinate": true 6 | } 7 | 8 | -------------------------------------------------------------------------------- /preprocessing/README.md: -------------------------------------------------------------------------------- 1 | # Preprocessing 2 | 3 | Please request one of the organisers should your method require any special preprocessing for special instructions. 4 | -------------------------------------------------------------------------------- /preprocessing/neighbors/n_rings/n_rings.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - pip 6 | - pip: 7 | - squidpy==1.5.0 8 | -------------------------------------------------------------------------------- /preprocessing/neighbors/radius/radius.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - pip 6 | - pip: 7 | - squidpy==1.5.0 8 | -------------------------------------------------------------------------------- /data/mouse_brain_sagittal_posterior/mouse_brain_sagittal_posterior_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /data/xenium-mouse-brain-SergioSalas/xenium-mouse-brain-SergioSalas_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_cells" : 1, 3 | "min_genes" : 1, 4 | "min_counts": 1 5 | } 6 | 7 | -------------------------------------------------------------------------------- /method/SOTIP/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "res": 1, 3 | "n_neighbours": 15, 4 | "source": "https://scanpy.readthedocs.io/en/stable/api/index.html" 5 | } 6 | -------------------------------------------------------------------------------- /method/precast/config/config_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 700, 3 | "method": "HVGs", 4 | "maxIter": 30, 5 | "postminspots":15, 6 | "postminfeatures":15 7 | } -------------------------------------------------------------------------------- /metric/CHAOS/CHAOS_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": false, 3 | "embedding": false, 4 | "config_file": false, 5 | "physical_coordinate": true 6 | } 7 | 8 | -------------------------------------------------------------------------------- /metric/LISI/LISI.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - bioconda 4 | dependencies: 5 | - r-base=4.3.1 6 | - r-optparse=1.7.3 7 | - r-lisi=1.0 8 | - r-rjson=0.2.21 -------------------------------------------------------------------------------- /consensus/02_Cross_method_ARI/Cross_method_ARI.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - r 3 | - conda-forge 4 | dependencies: 5 | - r-base=4.4.2 6 | - r-optparse=1.7.5 7 | - r-mclust=6.1.1 -------------------------------------------------------------------------------- /consensus/03_Consensus_kmode/Consensus_kmode.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - r 3 | - conda-forge 4 | dependencies: 5 | - r-base=4.4.2 6 | - r-optparse=1.7.5 7 | - r-dicer=2.2.0 -------------------------------------------------------------------------------- /method/precast/config/config_dlpfc.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 2000, 3 | "method": "HVGs", 4 | "maxIter": 30, 5 | "postminspots":15, 6 | "postminfeatures":15 7 | } -------------------------------------------------------------------------------- /metric/cluster-specific-silhouette/cluster-specific-silhouette_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "groundtruth": false, 3 | "embedding": true, 4 | "config_file": false 5 | } 6 | 7 | -------------------------------------------------------------------------------- /consensus/02_Smoothness_entropy/Smoothness_entropy.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - r 3 | - conda-forge 4 | dependencies: 5 | - r-base=4.4.2 6 | - r-optparse=1.7.5 7 | - r-dbscan=1.2_0 -------------------------------------------------------------------------------- /consensus/03_Consensus_weighted/networkanalysis-1.3.0.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/SpatialHackathon/SACCELERATOR/HEAD/consensus/03_Consensus_weighted/networkanalysis-1.3.0.jar -------------------------------------------------------------------------------- /data/STARmap-2018-mouse-cortex/environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - anndata=0.10.3 5 | - gdown=4.7.3 6 | - scipy=1.11.4 7 | - pandas=2.1.4 8 | -------------------------------------------------------------------------------- /data/xenium-mouse-brain-SergioSalas/environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - anndata=0.10.3 5 | - gdown=4.6.0 6 | - scipy=1.11.4 7 | - pandas=2.1.4 -------------------------------------------------------------------------------- /method/precast/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 2000, 3 | "method": "SPARK-X", 4 | "maxIter": 20, 5 | "postminspots":15, 6 | "postminfeatures":15 7 | } -------------------------------------------------------------------------------- /preprocessing/neighbors/n_neighbourhood/n_neighbourhood.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - pip 6 | - pip: 7 | - squidpy==1.5.0 8 | -------------------------------------------------------------------------------- /data/her2st-breast-cancer/environment.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - anndata=0.10.3 5 | - scipy=1.11.4 6 | - pandas=2.1.4 7 | - p7zip 8 | - gzip 9 | -------------------------------------------------------------------------------- /data/sotip_simulation/sotip.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.6 5 | - scipy=1.11.4 6 | - anndata=0.10.3 7 | - numpy=1.26.2 8 | - pandas=2.1.3 -------------------------------------------------------------------------------- /method/BANKSY/config/config_github.json: -------------------------------------------------------------------------------- 1 | {"method": "leiden", "lambda": 0.8, "k_geom": 15, "n_pcs": 20, "n_genes":1e500, "use_agf": true, "source": "https://github.com/prabhakarlab/Banksy"} -------------------------------------------------------------------------------- /method/BayesSpace/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes":2000, 3 | "n_pcs": 15, 4 | "gamma": 3, 5 | "nrep": 50000, 6 | "reference": "Default is the same as flpfc" 7 | } 8 | -------------------------------------------------------------------------------- /preprocessing/dimensionality_reduction/PCA.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.12.0 5 | - pandas=2.1.1 6 | - scikit-learn=1.3.2 7 | - scipy=1.11.3 -------------------------------------------------------------------------------- /consensus/03_Cross_method_entropy/Cross_method_entropy.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - r 3 | - conda-forge 4 | dependencies: 5 | - r-base=4.4.2 6 | - r-optparse=1.7.5 7 | - r-clue=0.3_66 8 | -------------------------------------------------------------------------------- /data/SEA_AD_data/SEA_AD_data.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - numpy=1.26.2 5 | - pandas=2.1.4 6 | - anndata=0.10.3 7 | - scipy=1.11.4 8 | - boto3=1.33.13 9 | -------------------------------------------------------------------------------- /data/spatialDLPFC/README.md: -------------------------------------------------------------------------------- 1 | Currently there are no images included. 2 | 3 | Full resolution images could not be found. It would be possible to download lower resolution images at a later point. -------------------------------------------------------------------------------- /templates/method_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": true, 5 | "neighbors": false, 6 | "config_file": true 7 | } -------------------------------------------------------------------------------- /consensus/03_Consensus_lca/Consensus_lca.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - r 3 | - conda-forge 4 | dependencies: 5 | - r-base=4.4.2 6 | - r-optparse=1.7.5 7 | - r-dicer=2.2.0 8 | - r-polca=1.6.0 9 | -------------------------------------------------------------------------------- /method/DRSC/DRSC_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": false 7 | } 8 | -------------------------------------------------------------------------------- /method/DRSC/config/config_default.json: -------------------------------------------------------------------------------- 1 | {"feature_method": "FindSVGs", "n_genes": 2000, "source": "https://github.com/feiyoung/DR.SC/blob/64d95135ab79e3e42e86e4a2e31724d17bec29d5/R/main.R#L355"} 2 | -------------------------------------------------------------------------------- /method/Giotto/Giotto_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } -------------------------------------------------------------------------------- /method/SEDR/SEDR_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | -------------------------------------------------------------------------------- /method/SOTIP/sotip_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | -------------------------------------------------------------------------------- /method/bass/bass_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | -------------------------------------------------------------------------------- /method/conST/conST_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": true, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | -------------------------------------------------------------------------------- /method/maple/maple_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | -------------------------------------------------------------------------------- /method/meringue/config/config_mob.json: -------------------------------------------------------------------------------- 1 | {"min.reads": 100, "min.lib.size": 100, "k": 50, "alpha": 1, "beta": 1, "n_pcs": 5, "filterDist": 2.5, "source": "https://jef.works/MERINGUE/mOB_analysis"} 2 | -------------------------------------------------------------------------------- /method/precast/precast_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } -------------------------------------------------------------------------------- /method/seurat/config/config_leiden_spatial.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm": 4, 3 | "n_genes":2000, 4 | "n_pcs":30, 5 | "source": "https://satijalab.org/seurat/articles/spatial_vignette" 6 | } 7 | -------------------------------------------------------------------------------- /preprocessing/neighbors/delaunay_triangulation/delaunay_triangulation.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - pip 6 | - pip: 7 | - squidpy==1.5.0 8 | -------------------------------------------------------------------------------- /method/BANKSY/banksy_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | -------------------------------------------------------------------------------- /method/DeepST/DeepST_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | -------------------------------------------------------------------------------- /method/STAGATE/STAGATE_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": false 7 | } 8 | -------------------------------------------------------------------------------- /method/STAGATE/config/config_starmap.json: -------------------------------------------------------------------------------- 1 | {"method": "mclust", "model": "Radius", "rad_cutoff": 400, "n_genes": 3000, "min_cells": 0, "source": "https://stagate.readthedocs.io/en/latest/T9_STARmap.html"} -------------------------------------------------------------------------------- /method/SpaceFlow/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_pcs": 50, 3 | "n_neighbours": 50, 4 | "source": "https://github.com/hongleir/SpaceFlow/blob/master/SpaceFlow/SpaceFlow.py#L289#L110" 5 | } -------------------------------------------------------------------------------- /method/SpiceMix/SpiceMix_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } -------------------------------------------------------------------------------- /method/seurat/seurat_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | -------------------------------------------------------------------------------- /method/spaGCN/spaGCN_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | -------------------------------------------------------------------------------- /method/spatialGE/spatialGE_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } -------------------------------------------------------------------------------- /method/stardust/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "method": "auto", 3 | "npcs": 10, 4 | "n_genes": 3000, 5 | "source": "https://github.com/InfOmics/stardust/blob/master/R/autoStardust.R" 6 | } 7 | -------------------------------------------------------------------------------- /method/stardust/stardust_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } -------------------------------------------------------------------------------- /data/osmfish_Ssp/osmfish_Ssp.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.6 5 | - scipy=1.11.4 6 | - anndata=0.10.3 7 | - numpy=1.26 8 | - pandas=2.1.3 9 | - loompy=3.0.6 -------------------------------------------------------------------------------- /method/SCAN-IT/scanit_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | 9 | -------------------------------------------------------------------------------- /method/SOTIP/config/config_osmFISH.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_neighbours": 500, 3 | "source": "https://github.com/TencentAILabHealthcare/SOTIP/blob/master/SOTIP_analysis/tutorial/osmFISH_cortex.ipynb" 4 | } 5 | -------------------------------------------------------------------------------- /method/STAGATE/config/config_default.json: -------------------------------------------------------------------------------- 1 | {"method": "mclust", "model": "Radius", "rad_cutoff": 150, "n_genes": 3000, "min_cells": 0, "source": "https://stagate.readthedocs.io/en/latest/T1_DLPFC.html"} 2 | -------------------------------------------------------------------------------- /method/SpaceFlow/spaceflow_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | -------------------------------------------------------------------------------- /method/bass/config/config_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "C": 20, 3 | "init_method": "mclust", 4 | "beta_method": "SW", 5 | "geneSelect": "hvg", 6 | "scaleFeature": false, 7 | "n_genes": 2000, 8 | "n_pc": 20 9 | } -------------------------------------------------------------------------------- /method/meringue/meringue_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | -------------------------------------------------------------------------------- /method/scanpy/scanpy_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | 9 | -------------------------------------------------------------------------------- /method/spatialGE/config/config_sct.json: -------------------------------------------------------------------------------- 1 | { 2 | "weight": 0.025, 3 | "n_genes": 2000, 4 | "method": "sct", 5 | "source": "https://fridleylab.github.io/spatialGE/reference/transform_data.html" 6 | } 7 | -------------------------------------------------------------------------------- /data/STARmap_plus/STARmap_plus.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - gdown=5.1.0 5 | - pandas=2.2.0 6 | - requests=2.31.0 7 | - numpy=1.26.4 8 | - python=3.12.2 9 | - scipy=1.12.0 -------------------------------------------------------------------------------- /data/slideseq2_olfactory_bulb/slideseq2_olfactory_bulb.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.6 5 | - scipy=1.11.4 6 | - anndata=0.10.3 7 | - numpy=1.26.2 8 | - pandas=2.1.3 -------------------------------------------------------------------------------- /data/stereoseq_mouse_embryo/stereoseq_mouse_embryo.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.6 5 | - scipy=1.11.4 6 | - anndata=0.10.3 7 | - numpy=1.26.2 8 | - pandas=2.1.3 -------------------------------------------------------------------------------- /data/stereoseq_olfactory_bulb/stereoseq_olfactory_bulb.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.6 5 | - scipy=1.11.4 6 | - anndata=0.10.3 7 | - numpy=1.26.2 8 | - pandas=2.1.3 -------------------------------------------------------------------------------- /method/DRSC/config/config_FindSVGs.json: -------------------------------------------------------------------------------- 1 | {"feature_method": "FindSVGs", "n_genes": 480, "source": "https://github.com/feiyoung/DR.SC/blob/64d95135ab79e3e42e86e4a2e31724d17bec29d5/vignettes/DR.SC.DLPFC.Rmd#L88"} 2 | -------------------------------------------------------------------------------- /method/SEDR/config/config_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "n":6, 3 | "cluster_method": "mclust", 4 | "using_dec": false, 5 | "HVG": true, 6 | "Config_rationale": "Same as default, but turning off the dec" 7 | } -------------------------------------------------------------------------------- /method/SEDR/config/config_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "n":12, 3 | "cluster_method": "mclust", 4 | "using_dec": false, 5 | "HVG": true, 6 | "Config_rationale": "Same as dlpfc, but turning off the dec" 7 | } -------------------------------------------------------------------------------- /method/bass/config/config_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "C": 20, 3 | "init_method": "mclust", 4 | "beta_method": "fix", 5 | "geneSelect": "sparkx", 6 | "scaleFeature": false, 7 | "n_genes": 3000, 8 | "n_pc": 20 9 | } -------------------------------------------------------------------------------- /method/bass/config/config_3.json: -------------------------------------------------------------------------------- 1 | { 2 | "C": 20, 3 | "init_method": "kmeans", 4 | "beta_method": "fix", 5 | "geneSelect": "sparkx", 6 | "scaleFeature": false, 7 | "n_genes": 3000, 8 | "n_pc": 20 9 | } -------------------------------------------------------------------------------- /method/meringue/meringue.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - bioconda 4 | dependencies: 5 | - r-base=4.3.1 6 | - r-optparse=1.7.3 7 | - r-remotes=2.4.2 8 | - r-jsonlite=1.8.8 9 | - r-igraph=2.0.2 -------------------------------------------------------------------------------- /metric/cluster-specific-silhouette/cluster-specific-silhouette.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - r-base=4.3.1 5 | - r-optparse=1.7.3 6 | - r-cluster=2.1.6 7 | - r-jsonlite=1.8.7 8 | -------------------------------------------------------------------------------- /method/BANKSY/config/config_default.json: -------------------------------------------------------------------------------- 1 | {"method": "leiden", "lambda": 0.8, "k_geom": 15, "n_pcs": 20, "n_genes":1e500, "use_agf": false, "source":"https://github.com/prabhakarlab/Banksy/blob/bioc/R/cluster.R#L82"} -------------------------------------------------------------------------------- /method/BayesSpace/config/config_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes":2000, 3 | "n_pcs": 7, 4 | "gamma": 3, 5 | "nrep": 10000, 6 | "Config_rationale": "Combination of nrep and nPC based on other 3 configs" 7 | } 8 | 9 | -------------------------------------------------------------------------------- /method/CellCharter/CellCharter_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true 7 | } 8 | 9 | -------------------------------------------------------------------------------- /method/SEDR/config/config_dlpfc.json: -------------------------------------------------------------------------------- 1 | { 2 | "n":12, 3 | "cluster_method": "mclust", 4 | "using_dec": true, 5 | "HVG": true, 6 | "reference":"https://sedr.readthedocs.io/en/latest/Tutorial1_Clustering.html" 7 | } 8 | -------------------------------------------------------------------------------- /method/meringue/config/config_clusteringvignette.json: -------------------------------------------------------------------------------- 1 | {"min.reads": 100, "min.lib.size": 100, "k": 50, "alpha": 0, "beta": 0, "n_pcs": 60, "filterDist": 2, "source": "https://jef.works/MERINGUE/spatial_clustering"} 2 | -------------------------------------------------------------------------------- /metric/domain-specific-f1/domain-specific-f1.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - r-base=4.3.1 5 | - r-mclust=6.0.0 6 | - r-clue=0.3_65 7 | - r-optparse=1.7.3 8 | - r-jsonlite=1.8.7 9 | -------------------------------------------------------------------------------- /data/locus_coeruleus/locus_coeruleus.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - bioconda 4 | dependencies: 5 | - r-base=4.3.1 6 | - bioconductor-weberdivechalcdata=1.4.0 7 | - r-optparse=1.7.3 8 | - r-matrix=1.6.1 -------------------------------------------------------------------------------- /method/BayesSpace/config/config_scc.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes":2000, 3 | "n_pcs": 15, 4 | "gamma": 3, 5 | "nrep": 10000, 6 | "reference": "https://www.ezstatconsulting.com/BayesSpace/articles/ji_SCC.html" 7 | } 8 | 9 | -------------------------------------------------------------------------------- /method/SOTIP/config/config_MIBI_TNBC.json: -------------------------------------------------------------------------------- 1 | { 2 | "res": 1, 3 | "n_neighbours": 800, 4 | "source": "https://github.com/TencentAILabHealthcare/SOTIP/blob/master/SOTIP_analysis/tutorial/scMEP_CLCC.ipynb" 5 | } 6 | -------------------------------------------------------------------------------- /method/SOTIP/config/config_scMEP_CLCC.json: -------------------------------------------------------------------------------- 1 | { 2 | "res": 1, 3 | "n_neighbours": 100, 4 | "source": "https://github.com/TencentAILabHealthcare/SOTIP/blob/master/SOTIP_analysis/tutorial/scMEP_CLCC.ipynb" 5 | } 6 | -------------------------------------------------------------------------------- /method/stardust/config/config_tutorial.json: -------------------------------------------------------------------------------- 1 | { 2 | "method": "weight", 3 | "npcs": 10, 4 | "n_genes": 3000, 5 | "weight": 0.75, 6 | "source":"https://github.com/InfOmics/stardust/blob/master/README.md" 7 | } 8 | -------------------------------------------------------------------------------- /preprocessing/feature_selection_MoranI/spatially_variable_genes_moransI.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - scanpy=1.10.1 6 | - pip 7 | - pip: 8 | - squidpy==1.5.0 9 | -------------------------------------------------------------------------------- /data/xenium-breast-cancer/xenium-breast-cancer.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - anaconda 4 | dependencies: 5 | - anndata=0.10.3 6 | - gdown=4.7.1 7 | - pandas=2.1.4 8 | - openpyxl=3.0.10 9 | - requests=2.31.0 -------------------------------------------------------------------------------- /method/seurat/config/config_leiden_VisiumHD.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm": 4, 3 | "n_genes":2000, 4 | "n_pcs":50, 5 | "source":"https://satijalab.org/seurat/articles/visiumhd_analysis_vignette#unsupervised-clustering" 6 | } 7 | -------------------------------------------------------------------------------- /data/abc_atlas_wmb_thalamus/abc_atlas_wmb_thalamus.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - numpy=1.24.4 5 | - pandas=2.0.3 6 | - anndata=0.9.1 7 | - scipy=1.10.1 8 | - boto3=1.33.7 9 | - nibabel=5.1.0 -------------------------------------------------------------------------------- /method/BANKSY/config/config_starmap.json: -------------------------------------------------------------------------------- 1 | {"method": "leiden", "lambda": 0.8, "k_geom": 30, "n_pcs": 50, "n_genes":1e500, "use_agf": false, "source": "https://prabhakarlab.github.io/Banksy/articles/domain-segment.html#running-banksy"} -------------------------------------------------------------------------------- /method/BayesSpace/config/config_melanoma.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes":2000, 3 | "n_pcs": 7, 4 | "gamma": 3, 5 | "nrep": 50000, 6 | "reference":"https://www.ezstatconsulting.com/BayesSpace/articles/thrane_melanoma.html" 7 | } 8 | -------------------------------------------------------------------------------- /method/SC_MEB/SC_MEB_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true, 7 | "technology": ["Visium"] 8 | } -------------------------------------------------------------------------------- /method/seurat/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm": 4, 3 | "n_genes":3000, 4 | "n_pcs":50, 5 | "source": "https://satijalab.org/seurat/reference/sctransform, https://satijalab.org/seurat/reference/runpca" 6 | } 7 | -------------------------------------------------------------------------------- /method/spatialGE/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "weight": 0.025, 3 | "n_genes": 2000, 4 | "source": "https://github.com/FridleyLab/spatialGE/blob/8440639d32b4bac83750f93c2c5ac020fc4c4791/R/STclust.R#L48C92-L48C105" 5 | } 6 | -------------------------------------------------------------------------------- /data/visium_breast_cancer_SEDR/visium_breast_cancer_SEDR.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.6 5 | - scipy=1.11.4 6 | - anndata=0.10.3 7 | - numpy=1.26.2 8 | - pandas=2.1.3 9 | - scanpy=1.9.6 -------------------------------------------------------------------------------- /method/BANKSY/config/config_dlpfc.json: -------------------------------------------------------------------------------- 1 | {"method": "leiden", "lambda": 0.2, "k_geom": 18, "n_pcs": 20, "n_genes": 2000, "use_agf": true, "source": "https://prabhakarlab.github.io/Banksy/articles/batch-correction.html#running-banksy"} 2 | -------------------------------------------------------------------------------- /method/DRSC/config/config_FindVariableFeatures.json: -------------------------------------------------------------------------------- 1 | {"feature_method": "FindVariableFeatures", "n_genes": 500, "source": "https://github.com/feiyoung/DR.SC/blob/64d95135ab79e3e42e86e4a2e31724d17bec29d5/vignettes/DR.SC.DLPFC.Rmd#L57"} 2 | -------------------------------------------------------------------------------- /method/SpaceFlow/config/config_seqfish.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 3000, 3 | "n_pcs": 50, 4 | "n_neighbours": 50, 5 | "source":"https://github.com/hongleir/SpaceFlow/blob/master/tutorials/seqfish_mouse_embryogenesis.ipynb" 6 | } -------------------------------------------------------------------------------- /method/seurat/config/config_leiden_CODEX.json: -------------------------------------------------------------------------------- 1 | { 2 | "algorithm": 4, 3 | "n_genes":2000, 4 | "n_pcs":20, 5 | "source": "https://satijalab.org/seurat/articles/seurat5_spatial_vignette_2#human-lymph-node-akoya-codex-system" 6 | } 7 | -------------------------------------------------------------------------------- /method/BayesSpace/BayesSpace_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": false, 7 | "technology": ["Visium"] 8 | } 9 | -------------------------------------------------------------------------------- /method/SEDR/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "n":6, 3 | "cluster_method": "mclust", 4 | "using_dec": true, 5 | "HVG": true, 6 | "reference": "https://github.com/JinmiaoChenLab/SEDR/blob/master/SEDR/SEDR_model.py" 7 | } 8 | -------------------------------------------------------------------------------- /method/SEDR/config/config_stereoseq.json: -------------------------------------------------------------------------------- 1 | { 2 | "n":6, 3 | "cluster_method": "mclust", 4 | "using_dec": true, 5 | "HVG": false, 6 | "reference": "https://sedr.readthedocs.io/en/latest/Tutorial4_Stereo-seq.html" 7 | } 8 | -------------------------------------------------------------------------------- /method/spaGCN/spaGCN.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.8.18 5 | - anndata=0.9.2 6 | - numpy=1.24.4 7 | - pandas=2.0.3 8 | - pytorch=2.1.0 9 | - pip 10 | - pip: 11 | - SpaGCN==1.2.7 -------------------------------------------------------------------------------- /method/BayesSpace/BayesSpace.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - bioconda 4 | dependencies: 5 | - r-base=4.3.1 6 | - r-optparse=1.7.3 7 | - bioconductor-bayesspace=1.10.1 8 | - r-matrix=1.6_1.1 9 | - r-irlba=2.3.5.1 10 | -------------------------------------------------------------------------------- /method/GraphST/config/config_dlpfc.json: -------------------------------------------------------------------------------- 1 | {"method": "mclust", "refine": true, "radius": 50, "n_pcs": 20, "n_genes": 3000, "source": "https://deepst-tutorials.readthedocs.io/en/latest/Tutorial%201_10X%20Visium.html#Spatial-clustering-and-refinement"} -------------------------------------------------------------------------------- /method/spaGCN/config/config_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "method": "louvain", 3 | "refine": false, 4 | "alpha": 1, 5 | "p": 0.5, 6 | "n_pcs": 50, 7 | "n_neighbors": 10, 8 | "Config_rationale": "Un-refined default configuration" 9 | } -------------------------------------------------------------------------------- /method/spaGCN/config/config_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "method": "kmeans", 3 | "refine": true, 4 | "alpha": 1, 5 | "p": 0.5, 6 | "n_pcs": 50, 7 | "n_neighbors": 10, 8 | "Config_rationale": "Different clustering methods (K-means)" 9 | } -------------------------------------------------------------------------------- /method/spatialGE/config/config_highweight.json: -------------------------------------------------------------------------------- 1 | { 2 | "weight": 0.2, 3 | "n_genes": 2000, 4 | "source": "https://fridleylab.github.io/spatialGE/articles/basic_functions_vignette.html#unsupervised-spatially-informed-clustering-stclust" 5 | } 6 | -------------------------------------------------------------------------------- /method/spatialGE/config/config_midweight.json: -------------------------------------------------------------------------------- 1 | { 2 | "weight": 0.05, 3 | "n_genes": 2000, 4 | "source": "https://fridleylab.github.io/spatialGE/articles/basic_functions_vignette.html#unsupervised-spatially-informed-clustering-stclust" 5 | } 6 | -------------------------------------------------------------------------------- /data/stereoseq_developing_Drosophila_embryos_larvae/stereoseq_developing_Drosophila_embryos_larvae.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.6 5 | - scipy=1.11.4 6 | - anndata=0.10.3 7 | - numpy=1.26.2 8 | - pandas=2.1.3 -------------------------------------------------------------------------------- /method/SOTIP/config/config_Visium_dlpfc.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_pcs": 100, 3 | "res": 2, 4 | "n_neighbours": 200, 5 | "source": "https://github.com/TencentAILabHealthcare/SOTIP/blob/master/SOTIP_analysis/Visium_Cortex/SDM_Visium_cortex.ipynb" 6 | } 7 | -------------------------------------------------------------------------------- /method/spaGCN/config/config_3.json: -------------------------------------------------------------------------------- 1 | { 2 | "method": "kmeans", 3 | "refine": false, 4 | "alpha": 1, 5 | "p": 0.5, 6 | "n_pcs": 50, 7 | "n_neighbors": 10, 8 | "Config_rationale": "Un-refined + Different clustering methods" 9 | } -------------------------------------------------------------------------------- /preprocessing/visualization/visualization.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.9.18 5 | - scanpy=1.10.1 6 | - seaborn=0.13.2 7 | - matplotlib=3.8.3 8 | - PyPDF2=2.11.1 9 | - typing_extensions=4.10.0 10 | -------------------------------------------------------------------------------- /method/GraphST/GraphST_optargs.json: -------------------------------------------------------------------------------- 1 | { 2 | "matrix": "counts", 3 | "integrated_feature_selection": false, 4 | "image": false, 5 | "neighbors": false, 6 | "config_file": true, 7 | "technology": ["Visium", "Stereo-seq", "Slideseq2"] 8 | } 9 | -------------------------------------------------------------------------------- /method/SpiceMix/SpiceMix.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.8.18 5 | - pip 6 | - pip: 7 | - --extra-index-url https://download.pytorch.org/whl/cu117 8 | - torch==1.13.1+cu117 9 | - popari==0.0.71 10 | -------------------------------------------------------------------------------- /method/conST/config/config_Visium_dlpfc.json: -------------------------------------------------------------------------------- 1 | { 2 | "k": 20, 3 | "min_cells": 5, 4 | "use_img": false, 5 | "using_mask": false, 6 | "refinement": true, 7 | "source": "https://github.com/ys-zong/conST/blob/main/conST_cluster.ipynb" 8 | } 9 | -------------------------------------------------------------------------------- /method/meringue/config/config_default.json: -------------------------------------------------------------------------------- 1 | {"min.reads": 1, "min.lib.size": 1, "k": 50, "alpha": 1, "beta": 1, "n_pcs": 5, "filterDist": 2, "source": "https://github.com/JEFworks-Lab/MERINGUE/blob/ca9e2ccabd95680d9ca0b323a8a507c038f2ea13/R/cluster.R#L130"} 2 | -------------------------------------------------------------------------------- /method/scanpy/scanpy_env.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python>=3.10 5 | - numpy=1.26.2 6 | - scipy=1.11.4 7 | - pillow=10.1.0 8 | - anndata=0.10.3 9 | - leidenalg=0.10.1 10 | - louvain=0.8.2 11 | - scanpy=1.10.0 -------------------------------------------------------------------------------- /consensus/03_Consensus_weighted/Consensus_weighted.yaml: -------------------------------------------------------------------------------- 1 | channels: 2 | - r 3 | - conda-forge 4 | dependencies: 5 | - r-base=4.4.2 6 | - r-optparse=1.7.5 7 | - r-matrix=1.6_5 8 | - r-dplyr=1.1.4 9 | - r-future.apply=1.11.3 10 | - r-igraph=2.0.3 11 | -------------------------------------------------------------------------------- /data/visium_hd_cancer_colon/visium_hd_cancer_colon.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.6 5 | - scipy=1.11.4 6 | - pip 7 | - pip: 8 | - spatialdata==0.1.2 9 | - spatialdata-io==0.1.2 10 | - pypdl==1.3.2 11 | -------------------------------------------------------------------------------- /method/precast/config/config_BC.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 2000, 3 | "method": "SPARK-X", 4 | "maxIter": 30, 5 | "postminspots":1, 6 | "postminfeatures":10, 7 | "reference": "https://feiyoung.github.io/PRECAST/articles/PRECAST.BreastCancer.html" 8 | } -------------------------------------------------------------------------------- /method/STAGATE/config/config_slide_stereo.json: -------------------------------------------------------------------------------- 1 | {"method": "louvain", "model": "Radius", "rad_cutoff": 50, "n_genes": 3000, "min_cells": 50, "source": "https://stagate.readthedocs.io/en/latest/T3_Slide-seqV2.html", "source2": "https://stagate.readthedocs.io/en/latest/T4_Stereo.html"} -------------------------------------------------------------------------------- /method/maple/maple.yml: -------------------------------------------------------------------------------- 1 | name: maple_env 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | dependencies: 6 | - r-base=4.3.1 7 | - r-optparse=1.7.3 8 | - r-jsonlite=1.8.8 9 | - r-Seurat=4.4.0 10 | - bioconductor-SpatialExperiment=1.12.0 11 | - r-remotes=2.4.2 -------------------------------------------------------------------------------- /data/xenium-ffpe-bc-idc/xenium-ffpe-bc-idc.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - nodefaults 3 | - conda-forge 4 | - bioconda 5 | dependencies: 6 | - python==3.11 7 | - pip 8 | - pip: 9 | - pypdl==1.3.2 10 | - spatialdata==0.3.0 11 | - spatialdata-io==0.1.7 12 | -------------------------------------------------------------------------------- /method/spaGCN/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "method": "louvain", 3 | "refine": true, 4 | "alpha": 1, 5 | "p": 0.5, 6 | "n_pcs": 50, 7 | "n_neighbors": 10, 8 | "reference": "https://github.com/jianhuupenn/SpaGCN/blob/master/tutorial/tutorial.ipynb" 9 | } -------------------------------------------------------------------------------- /metric/SpatialARI/SpatialARI.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - bioconda 4 | dependencies: 5 | - r-base=4.3.1 6 | - r-optparse=1.7.3 7 | - r-rjson=0.2.21 8 | - r-remotes=2.4.2.1 9 | - r-aricode=1.0.3 10 | - bioconductor-bluster=1.12.0 11 | - r-spdep=0.6_13 -------------------------------------------------------------------------------- /method/Giotto/config/config_visium.json: -------------------------------------------------------------------------------- 1 | { 2 | "betas": [0, 1, 6], 3 | "beta": 2, 4 | "k": 4, 5 | "n_pcs": 10, 6 | "n_genes": 100, 7 | "bin_method": "rank", 8 | "source": "https://drieslab.github.io/Giotto_website/articles/visium_mouse_kidney.html#spatial-genes" 9 | } 10 | -------------------------------------------------------------------------------- /method/Giotto/config/config_seqfish.json: -------------------------------------------------------------------------------- 1 | { 2 | "betas": [28, 2, 3], 3 | "beta": 28, 4 | "k": 9, 5 | "n_pcs": 20, 6 | "n_genes": 100, 7 | "bin_method": "kmeans", 8 | "source": "https://drieslab.github.io/Giotto_website/articles/seqfish_cortex.html#hmrf-spatial-domains" 9 | } 10 | -------------------------------------------------------------------------------- /method/GraphST/config/config_default.json: -------------------------------------------------------------------------------- 1 | {"method": "mclust", "refine": false, "radius": 50, "n_pcs": 20, "n_genes": 3000, "source": "https://github.com/JinmiaoChenLab/GraphST/blob/main/GraphST/preprocess.py#L99", "source2": "https://github.com/JinmiaoChenLab/GraphST/blob/main/GraphST/utils.py#L33#L64"} -------------------------------------------------------------------------------- /method/Giotto/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "betas": [0, 2, 10], 3 | "beta": 10, 4 | "k": 10, 5 | "n_pcs": 10, 6 | "n_genes": 0, 7 | "bin_method": "kmeans", 8 | "source": "https://github.com/drieslab/Giotto/blob/a60fbfcaff30a2942f354e63267ae5894aa84cd4/R/python_hmrf.R#L38" 9 | } 10 | -------------------------------------------------------------------------------- /method/scanpy/config/config_leiden_tutorial.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 2000, 3 | "n_neighbors" : 15, 4 | "clustering" : "leiden", 5 | "directed": false, 6 | "n_iterations":2, 7 | "source": "https://scanpy.readthedocs.io/en/stable/tutorials/spatial/basic-analysis.html#qc-and-preprocessing" 8 | } -------------------------------------------------------------------------------- /method/CellCharter/config/config_1.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_counts": 3, 3 | "n_latent": 10, 4 | "n_layers": 4, 5 | "aggregations": "mean", 6 | "convergence_tolerance": 0.001, 7 | "covariance_regularization": 1e-06, 8 | "source": "https://cellcharter.readthedocs.io/en/latest/notebooks/codex_mouse_spleen.html" 9 | } 10 | -------------------------------------------------------------------------------- /method/CellCharter/config/config_2.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_counts": 3, 3 | "n_latent": 10, 4 | "n_layers": 5, 5 | "aggregations": "mean", 6 | "convergence_tolerance": 0.001, 7 | "covariance_regularization": 1e-06, 8 | "source": "https://cellcharter.readthedocs.io/en/latest/notebooks/codex_mouse_spleen.html" 9 | } 10 | -------------------------------------------------------------------------------- /method/CellCharter/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_counts": 3, 3 | "n_latent": 10, 4 | "n_layers": 3, 5 | "aggregations": "mean", 6 | "convergence_tolerance": 0.001, 7 | "covariance_regularization": 1e-06, 8 | "source": "https://cellcharter.readthedocs.io/en/latest/notebooks/codex_mouse_spleen.html" 9 | } 10 | -------------------------------------------------------------------------------- /data/libd_dlpfc/libd_dlpfc.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - bioconda 4 | dependencies: 5 | - r-base=4.3.1 6 | - bioconductor-spatiallibd=1.12.0 7 | - r-optparse=1.7.3 8 | - r-dbplyr=2.3.4 9 | - r-dplyr=1.1.3 10 | - r-magrittr=2.0.3 11 | - r-purrr=1.0.2 12 | - r-stringr=1.5.0 13 | - r-tibble=3.2.1 -------------------------------------------------------------------------------- /data/spatialDLPFC/spatialDLPFC.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - bioconda 4 | dependencies: 5 | - r-base=4.3.1 6 | - bioconductor-spatiallibd=1.12.0 7 | - r-optparse=1.7.3 8 | - r-dbplyr=2.3.4 9 | - r-dplyr=1.1.3 10 | - r-magrittr=2.0.3 11 | - r-purrr=1.0.2 12 | - r-stringr=1.5.0 13 | - r-tibble=3.2.1 -------------------------------------------------------------------------------- /data/visium_chicken_heart/chicken_heart.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - anndata=0.10.3 5 | - gitpython=3.1.40 6 | - pandas=2.1.4 7 | - pillow=10.1.0 8 | - pytest-shutil=1.7.0 9 | - python=3.9.18 10 | - re2=2023.06.02 11 | - scanpy=1.9.6 12 | - scipy=1.11.4 13 | - urllib3=2.1.0 14 | -------------------------------------------------------------------------------- /method/scanpy/config/config_leiden_MERFISH.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 2000, 3 | "n_pcs":15, 4 | "n_neighbors" : 15, 5 | "clustering" : "leiden", 6 | "directed": false, 7 | "n_iterations":2, 8 | "source": "https://scanpy.readthedocs.io/en/stable/tutorials/spatial/basic-analysis.html#qc-and-preprocessing" 9 | } -------------------------------------------------------------------------------- /method/SCAN-IT/config/config_slideseq_mouse_cerebellum.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 3000, 3 | "SomNode_k": 5, 4 | "knn_n_neighbours": 5, 5 | "n_h": 30, 6 | "n_epoch": 2000, 7 | "alpha_n_layers": 2, 8 | "n_neighbours": 15, 9 | "source": "https://github.com/zcang/SCAN-IT/blob/main/examples/Slide-seq" 10 | } -------------------------------------------------------------------------------- /method/SCAN-IT/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 3000, 3 | "SomNode_k": 20, 4 | "knn_n_neighbours": 10, 5 | "n_h": 32, 6 | "n_epoch": 1000, 7 | "alpha_n_layers": 1, 8 | "n_neighbours": 15, 9 | "source": "https://github.com/zcang/SCAN-IT/blob/main/scanit/tools/_scanit_representation.py" 10 | } -------------------------------------------------------------------------------- /method/SCAN-IT/config/config_slideseq_hippocampus.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 3000, 3 | "SomNode_k": 5, 4 | "knn_n_neighbours": 15, 5 | "n_h": 30, 6 | "n_epoch": 5000, 7 | "alpha_n_layers": 2, 8 | "n_neighbours": 15, 9 | "source": "https://github.com/zcang/SCAN-IT/blob/main/examples/Slide-seq/scanit.ipynb" 10 | } -------------------------------------------------------------------------------- /method/bass/config/config_dlpfc.json: -------------------------------------------------------------------------------- 1 | { 2 | "C": 20, 3 | "init_method": "mclust", 4 | "beta_method": "SW", 5 | "geneSelect": "sparkx", 6 | "scaleFeature": false, 7 | "n_genes": 3000, 8 | "n_pcs": 20, 9 | "burnin": 2000, 10 | "nsample": 10000, 11 | "source": "https://zhengli09.github.io/BASS-Analysis/DLPFC.html" 12 | } 13 | -------------------------------------------------------------------------------- /method/SCAN-IT/config/config_slideseq_mouse_olfactory_bulb.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 3000, 3 | "SomNode_k": 1, 4 | "knn_n_neighbours": 15, 5 | "n_h": 30, 6 | "n_epoch": 5000, 7 | "alpha_n_layers": 2, 8 | "n_neighbours": 15, 9 | "source": "https://github.com/zcang/SCAN-IT/blob/main/examples/Slide-seq/scanit.ipynb" 10 | } -------------------------------------------------------------------------------- /method/bass/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "C": 20, 3 | "init_method": "kmeans", 4 | "beta_method": "SW", 5 | "geneSelect": "sparkx", 6 | "scaleFeature": true, 7 | "n_genes": 3000, 8 | "n_pcs": 20, 9 | "burnin": 2000, 10 | "nsample": 5000, 11 | "source": "https://zhengli09.github.io/BASS-Analysis/MERFISH.html" 12 | } 13 | -------------------------------------------------------------------------------- /method/bass/config/config_starmap.json: -------------------------------------------------------------------------------- 1 | { 2 | "C": 15, 3 | "init_method": "kmeans", 4 | "beta_method": "SW", 5 | "geneSelect": "sparkx", 6 | "scaleFeature": true, 7 | "n_genes": 3000, 8 | "n_pcs": 20, 9 | "burnin": 2000, 10 | "nsample": 5000, 11 | "source": "https://zhengli09.github.io/BASS-Analysis/STARmap.html" 12 | } 13 | -------------------------------------------------------------------------------- /method/conST/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "k": 10, 3 | "min_cells": 3, 4 | "use_img": false, 5 | "using_mask": false, 6 | "refinement": false, 7 | "source1": "https://github.com/ys-zong/conST/blob/main/conST_cluster.ipynb", 8 | "source2": "https://github.com/ys-zong/conST/blob/main/src/utils_func.py#L51" 9 | } 10 | -------------------------------------------------------------------------------- /method/SCAN-IT/config/config_seqFISH_mouse_SScortex.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_genes": 3000, 3 | "SomNode_k": 5, 4 | "knn_n_neighbours": 5, 5 | "n_h": 10, 6 | "n_epoch": 2000, 7 | "alpha_n_layers": 1, 8 | "n_neighbours": 10, 9 | "source": "https://github.com/zcang/SCAN-IT/blob/main/examples/seqFISH-mouse-SScortex/scanit.ipynb" 10 | } -------------------------------------------------------------------------------- /method/scanpy/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "n_neighbors" : 15, 3 | "clustering" : "leiden", 4 | "source": "https://scanpy.readthedocs.io/en/stable/generated/scanpy.pp.pca.html,https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.neighbors.html, https://scanpy.readthedocs.io/en/stable/generated/scanpy.pp.highly_variable_genes.html" 5 | } -------------------------------------------------------------------------------- /method/CellCharter/config/config_dlpfc.json: -------------------------------------------------------------------------------- 1 | { 2 | "min_counts": 3, 3 | "n_genes": 5000, 4 | "n_latent": 5, 5 | "n_layers": 4, 6 | "aggregations": "mean", 7 | "convergence_tolerance": 0.001, 8 | "covariance_regularization": 1e-06, 9 | "source": "https://github.com/CSOgroup/cellcharter_analyses/blob/main/src/benchmarking/CellCharter/individual.py#R30" 10 | } 11 | -------------------------------------------------------------------------------- /method/DRSC/drsc_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the DR.SC conda environment named drsc_env 4 | # conda env create -f DRSC.yml 5 | 6 | # Activate the environment 7 | # conda activate drsc_env 8 | 9 | # Install the required R packages 10 | Rscript -e "remotes::install_version(package = 'DR.SC', version = '3.3', repos = 'https://cran.uni-muenster.de/')" -------------------------------------------------------------------------------- /data/SEA_AD_data/SEA_AD_LICENSE.txt: -------------------------------------------------------------------------------- 1 | Seattle Alzheimers Disease (https://portal.brain-map.org/explore/seattle-alzheimers-disease) MERSCOPE v1 MTG Dataset is licensed 2 | under the Allen Institute terms of use (See https://alleninstitute.org/citation-policy/ for the Allen Institute Citation Policy 3 | and https://alleninstitute.org/terms-of-use/ for the Allen Institute Terms of Use. 4 | -------------------------------------------------------------------------------- /method/spatialGE/spatialGE_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the spatialGE conda environment named spatialGE_env 4 | # conda env create -f spatialGE.yml -n spatialGE_env 5 | 6 | # Activate the environment 7 | # conda activate spatialGE_env 8 | 9 | # Install the required R packages 10 | Rscript -e "remotes::install_github('FridleyLab/spatialGE@1.2.0.0000')" 11 | 12 | -------------------------------------------------------------------------------- /method/stardust/stardust_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the stardust conda environment named stardust_env 4 | # conda env create -f stardust.yml 5 | 6 | # Activate the environment 7 | # source activate stardust_env 8 | 9 | # Install the required R packages 10 | Rscript -e "remotes::install_github('InfOmics/stardust', ref = 'f1b541704d4b4189b4daf4132289a084253349d9')" -------------------------------------------------------------------------------- /method/precast/precast_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the precast conda environment named drsc_env 4 | # conda env create -f precast.yml 5 | 6 | # Activate the environment 7 | # conda activate precast_env 8 | 9 | # Install the required R packages 10 | Rscript -e "remotes::install_version(package = 'PRECAST', version = '1.6.3', repos = 'https://cran.uni-muenster.de/')" -------------------------------------------------------------------------------- /method/SpiceMix/config/config_default.json: -------------------------------------------------------------------------------- 1 | { 2 | "K": 15, 3 | "lambda_Sigma_x_inv": 1e-4, 4 | "device": "cuda:0", 5 | "dtype": "float64", 6 | "num_preiterations": 5, 7 | "num_iterations": 200, 8 | "preprocess": { 9 | "n_genes": null 10 | }, 11 | "source": "https://popari.readthedocs.io/en/latest/tutorial_gallery/preprocessing_demo.html" 12 | } 13 | -------------------------------------------------------------------------------- /method/SpiceMix/config/config_Visium_dlpfc.json: -------------------------------------------------------------------------------- 1 | { 2 | "K": 10, 3 | "lambda_Sigma_x_inv": 1e-6, 4 | "device": "cuda:0", 5 | "dtype": "float64", 6 | "num_preiterations": 5, 7 | "num_iterations": 200, 8 | "preprocess": { 9 | "n_genes": 500 10 | }, 11 | "source": "https://github.com/ma-compbio/SpiceMix/blob/master/SpiceMix/main_Maynard2021.ipynb" 12 | } 13 | -------------------------------------------------------------------------------- /method/BANKSY/banksy_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the BANKSY conda environment named banksy_env 4 | # conda env create -f banksy.yml 5 | 6 | # Activate the environment 7 | # conda activate banksy_env 8 | 9 | # Install the required R packages 10 | Rscript -e "remotes::install_github('prabhakarlab/Banksy', dependencies = FALSE, ref = 'dbda6fde952e65f45409d9bca8e1f821746755cc')" 11 | 12 | -------------------------------------------------------------------------------- /metric/SpatialARI/SpatialARI_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the SpatialARI conda environment named SpatialARI_env 4 | # conda env create -f SpatialARI.yml -n SpatialARI_env 5 | 6 | # Activate the environment 7 | # conda activate SpatialARI_env 8 | 9 | # Install the required R packages 10 | Rscript -e "remotes::install_github('RoseYuan/ClusteringMetrics@5691a9e', dependencies=TRUE)" 11 | 12 | -------------------------------------------------------------------------------- /method/meringue/meringue_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the MERINGUE conda environment named scmeb_env 4 | # conda env create -f meringue.yml 5 | 6 | # Activate the environment 7 | # source activate meringue_env 8 | 9 | # Install the required R packages 10 | Rscript -e "remotes::install_github('JEFworks-Lab/MERINGUE', ref = 'ca9e2ccabd95680d9ca0b323a8a507c038f2ea13', build_vignettes = FALSE)" 11 | 12 | 13 | -------------------------------------------------------------------------------- /method/DRSC/DRSC.yml: -------------------------------------------------------------------------------- 1 | name: drsc_env 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | dependencies: 6 | - r-base=4.3.1 7 | - r-optparse=1.7.3 8 | - r-biocmanager=1.30.22 9 | - bioconductor-SingleCellExperiment=1.24.0 10 | - bioconductor-S4Vectors=0.40.2 11 | - r-crayon=1.5.2 12 | - r-Matrix=1.6.3 13 | - r-seurat=4.4.0 14 | - r-remotes=2.4.2 15 | - r-RcppArmadillo=0.12.6 16 | - r-igraph=1.5.1 17 | - r-curl=5.1.0 -------------------------------------------------------------------------------- /method/SC_MEB/scmeb_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the SC.MEB conda environment named scmeb_env 4 | # conda env create -f SC.MEB.yml 5 | 6 | # Activate the environment 7 | # source activate scmeb_env 8 | 9 | # Install the required R packages 10 | # conda run -n scmeb_env R -e "install.packages('SC.MEB')" 11 | Rscript -e "remotes::install_version(package = 'SC.MEB', version = '1.1', repos = 'https://cran.uni-muenster.de/')" 12 | 13 | -------------------------------------------------------------------------------- /method/SC_MEB/SC_MEB.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - bioconda 4 | dependencies: 5 | - r-base=4.3.1 6 | - r-optparse=1.7.3 7 | - r-jsonlite=1.8.8 8 | - r-remotes=2.4.2 9 | - r-mclust=6.1 10 | - r-purrr=1.0.2 11 | - r-rcpparmadillo=0.12.8.3.0 12 | - r-BiocManager=1.30.22 13 | - bioconductor-SingleCellExperiment=1.24.0 14 | - bioconductor-BiocSingular=1.18.0 15 | - bioconductor-scater=1.30.1 16 | - bioconductor-scran=1.30.0 17 | -------------------------------------------------------------------------------- /method/maple/maple_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the maple conda environment named maple_env 4 | # conda env create -f maple.yml 5 | 6 | # Activate the environment 7 | # source activate maple_env 8 | 9 | # Install the required R packages 10 | Rscript -e "remotes::install_github('carter-allen/spruce', ref = '47b02300cc9a1d83213682bd78464115867d1763')" 11 | Rscript -e "remotes::install_github('carter-allen/maple', ref = 'b173e89a7bc82c6ae09c7e0709d09ed22082172d')" -------------------------------------------------------------------------------- /method/conST/conST.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - pytorch 4 | - default 5 | dependencies: 6 | - python=3.9.16 7 | - torch=2.2.0 8 | - torchvision=0.17.0 9 | - torch_geometric=2.5.0 10 | - torch_scatter=2.1.2 11 | - torch_sparse=0.6.18 12 | - scikit-learn=1.4.1 13 | - umap-learn=0.5.5 14 | - scanpy=1.9.8 15 | - seaborn=0.13.2 16 | - scipy=1.12.0 17 | - networkx=3.2.1 18 | - pandas=2.2.0 19 | - anndata=0.10.5 20 | - timm=0.9.12 21 | - leidenalg=0.10.2 -------------------------------------------------------------------------------- /method/SOTIP/sotip.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.8.0 5 | - numpy=1.21.2 6 | - pandas=1.3.4 7 | - scipy=1.7.1 8 | - matplotlib=3.4.3 9 | - seaborn=0.11.2 10 | - scanpy=1.8.2 11 | - palettable=3.3.0 12 | - scikit-learn=1.0.1 13 | - networkx=2.6.3 14 | - shapely=1.8.0 15 | - pyemd=0.5.1 16 | - pip 17 | - git 18 | - pip: 19 | - squidpy==1.1.2 20 | - "git+https://github.com/TencentAILabHealthcare/SOTIP.git@d3b762cca2a527dd5bc51408924c807832c5e1bb#egg=sotip" -------------------------------------------------------------------------------- /method/bass/bass_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the BASS conda environment named scmeb_env 4 | # conda env create -f bass.yml -n bass_env 5 | 6 | # Activate the environment 7 | # source activate bass_env 8 | 9 | # Install the required R packages 10 | Rscript -e "remotes::install_github('xzhoulab/SPARK', ref = 'a8b4bf27b804604dfda53da42992f100b8e4e727', dependencies = FALSE)" 11 | Rscript -e "remotes::install_github('zhengli09/BASS', ref = '37980c94a99f4b01ad5fa63555b3c5ab8af82b7e', dependencies = FALSE)" 12 | 13 | 14 | -------------------------------------------------------------------------------- /method/precast/precast.yml: -------------------------------------------------------------------------------- 1 | name: precast_env 2 | channels: 3 | - conda-forge 4 | - bioconda 5 | dependencies: 6 | - r-base=4.3.1 7 | - r-optparse=1.7.3 8 | - r-jsonlite=1.8.8 9 | - r-biocmanager=1.30.22 10 | - bioconductor-SingleCellExperiment=1.24.0 11 | - bioconductor-S4Vectors=0.40.2 12 | - bioconductor-scater 13 | - r-ggpubr=0.6.0 14 | - r-gtools 15 | - r-crayon=1.5.2 16 | - r-Matrix=1.6.3 17 | - r-seurat=4.4.0 18 | - r-remotes=2.4.2 19 | - r-RcppArmadillo=0.12.6 20 | - r-igraph=1.5.1 21 | - r-curl=5.1.0 22 | -------------------------------------------------------------------------------- /method/GraphST/GraphST.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.6 5 | - anndata=0.9.2 6 | - numpy=1.26.2 7 | - pandas=2.1.4 8 | - scanpy=1.9.6 9 | - pytorch=2.1.0 10 | - scipy=1.11.4 11 | - scikit-learn=1.3.2 12 | - scikit-misc=0.1.4 13 | - python-igraph=0.11.3 14 | - rpy2=3.5.11 15 | - tqdm=4.66.1 16 | - matplotlib-base=3.8.2 17 | - louvain=0.8.1 18 | - leidenalg=0.10.1 19 | - r-base=4.3.2 20 | - r-mclust=6.0.1 21 | - pip 22 | - pip: 23 | - GraphST==1.1.1 24 | - pot==0.9.1 25 | 26 | 27 | 28 | -------------------------------------------------------------------------------- /method/DeepST/DeepST.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | # - pytorch 4 | # - pyg 5 | - default 6 | dependencies: 7 | - python=3.9 8 | - scanpy=1.9.2 9 | - anndata==0.8.0 10 | - bokeh=2.3.1 11 | - h5py=3.8.0 12 | - imageio=2.9.0 13 | - leidenalg=0.9.1 14 | - matplotlib=3.7.0 15 | - numpy=1.23.5 16 | - python-igraph=0.10.4 17 | - python-louvain=0.15 18 | - scikit-learn=1.2.1 19 | - scikit-network=0.28.3 20 | - scipy=1.10.1 21 | - seaborn=0.12.2 22 | - tqdm=4.64.1 23 | - umap-learn=0.5.3 24 | - psutil=5.9.4 25 | - pandas=1.5.3 -------------------------------------------------------------------------------- /method/STAGATE/STAGATE.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | dependencies: 4 | - python=3.11.5 5 | - scanpy=1.9.6 6 | - tensorflow=2.14.0 7 | - anndata=0.10.3 8 | - numpy=1.26.2 9 | - pandas=2.1.4 10 | - scikit-learn=1.3.2 11 | - tqdm=4.66.1 12 | - scipy=1.11.4 13 | - python-igraph=0.11.3 14 | - louvain=0.8.1 15 | - matplotlib-base=3.8.2 16 | - rpy2=3.5.11 17 | - r-base=4.3.2 18 | - r-mclust=6.0.1 19 | - scikit-misc=0.1.4 20 | - pip 21 | - pip: 22 | - git+https://github.com/QIFEIDKN/STAGATE@48ce7f874c83a9f1f68187be00370181261ab7c5 23 | -------------------------------------------------------------------------------- /method/DeepST/DeepST_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the stardust conda environment named stardust_env 4 | # conda env create -f stardust.yml 5 | 6 | # Activate the environment 7 | # source activate stardust_env 8 | 9 | # Install the required R packages 10 | 11 | pip3 install torch==1.13.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu --no-cache-dir #### CPU 12 | pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv torch_geometric -f https://data.pyg.org/whl/torch-1.13.0+cpu.html --no-cache-dir ### CPU 13 | -------------------------------------------------------------------------------- /method/SEDR/SEDR.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - conda-forge 3 | - pytorch 4 | - default 5 | dependencies: 6 | - python=3.11.3 7 | - pytorch=2.0.0 8 | - numpy=1.24.4 9 | - scanpy=1.9.6 10 | - python-igraph=0.11.3 11 | - leidenalg=0.10.1 12 | - louvain=0.8.1 13 | - anndata=0.9.1 14 | - rpy2=3.5.11 15 | - pandas=2.0.3 16 | - scipy=1.10.1 17 | - scikit-learn=1.2.2 18 | - tqdm=4.65 19 | - r-base=4.3.2 20 | - r-mclust=6.0.1 21 | - scikit-misc=0.1.4 22 | - pip 23 | - pip: 24 | - git+https://github.com/JinmiaoChenLab/SEDR@8c273af3c520b663b2e83a4fbfdf462e0c7b8b7a -------------------------------------------------------------------------------- /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | on: 2 | push: 3 | branches: 4 | - main 5 | - native 6 | 7 | jobs: 8 | contrib-readme-job: 9 | runs-on: ubuntu-latest 10 | name: A job to automate contrib in readme 11 | permissions: 12 | contents: write 13 | pull-requests: write 14 | steps: 15 | - name: Contribute List 16 | uses: akhilmhdh/contributors-readme-action@v2.3.10 17 | with: 18 | image_size: 75 19 | env: 20 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 21 | -------------------------------------------------------------------------------- /docs/modules/index.md: -------------------------------------------------------------------------------- 1 | # Modules 2 | 3 | The Spacehack 2.0 workflow consists of multiple module types: 4 | 5 | - [Datasets](data.md) to analyze 6 | - [Methods](method.md) to identify spatial doamins 7 | - [Metrics](metric.md) to evaluate performance against a groundtruth 8 | - [Consensus](consensus.md) to build a consensus clustering across multiple methods 9 | 10 | If you want to generate a new module you have to make sure to follow the format and 11 | specification as defined in their respective sections. 12 | Also, it might be worth having a look at the [Contribution guide](../CONTRIBUTING.md). 13 | 14 | ![Workflow](../img/workflow.svg) 15 | -------------------------------------------------------------------------------- /templates/data_optargs.schema.yaml: -------------------------------------------------------------------------------- 1 | $schema: "https://json-schema.org/draft/2020-12/schema" 2 | 3 | description: Which (optional) quality control parameters can the dataset use 4 | type: object 5 | properties: 6 | 7 | min_cells: 8 | description: what is the minimal number of cells expressed required for a gene to pass filtering? 9 | type: int 10 | 11 | min_genes: 12 | description: What is the minimal number of genes expressed required for a cell to pass filtering. 13 | type: int 14 | 15 | min_counts: 16 | description: What is the minimum number of counts required for a cell to pass filtering? 17 | type: int -------------------------------------------------------------------------------- /method/SpaceFlow/spaceflow.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - pytorch 3 | - conda-forge 4 | dependencies: 5 | - pytorch=1.13.1 6 | - torchaudio=0.13.1 7 | - torchvision=0.14.1 8 | - umap-learn=0.5.3 9 | - numpy=1.21.6 10 | - python=3.7.12 11 | - scikit-learn=1.0.2 12 | - scipy=1.7.3 13 | - mkl==2024.0 14 | - pip: 15 | - cmcrameri==1.7 16 | - gudhi==3.8.0 17 | - leidenalg==0.10.1 18 | - matplotlib==3.5.3 19 | - networkx==2.6.3 20 | - notebook==6.5.6 21 | - pandas==1.3.5 22 | - scanpy==1.9.3 23 | - spaceflow==1.0.4 24 | - squidpy==1.2.2 25 | - torch-geometric==2.3.1 26 | - torch-sparse==0.6.17 27 | - torch-scatter==2.1.1 -------------------------------------------------------------------------------- /templates/metric_optargs.schema.yaml: -------------------------------------------------------------------------------- 1 | $schema: "https://json-schema.org/draft/2020-12/schema" 2 | 3 | description: Which (optional) parameters can the metric use 4 | type: object 5 | properties: 6 | 7 | groundtruth: 8 | description: Does the metric need groundtruth labels? 9 | type: boolean 10 | 11 | embedding: 12 | description: Does the metric need embeddings? 13 | type: boolean 14 | 15 | config_file: 16 | description: Does the metric take an additional config file? 17 | type: boolean 18 | 19 | # Optional, only add when your metric requires this 20 | physical_coordinate: 21 | description: Does the metric take physcial coordination of the sample? 22 | type: boolean -------------------------------------------------------------------------------- /data/abc_atlas_wmb_thalamus/LICENSE.txt: -------------------------------------------------------------------------------- 1 | ABC Atlas - Mouse Whole Brain by Allen Institute for Brain Science 2 | 3 | ABC Atlas - Mouse Whole Brain 4 | (https://knowledge.brain-map.org/data/LVDBJAW8BI5YSS1QUBG/collections) 5 | MERSCOPE v1 whole brain Data Collection is licensed under a 6 | Creative Commons Attribution 4.0 International License, and 7 | 10x scRNAseq whole brain Data Collection is licensed under a 8 | Creative Commons Attribution-NonCommercial 4.0 International License. 9 | 10 | See https://alleninstitute.org/citation-policy/ for the Allen Institute Citation 11 | Policy and https://alleninstitute.org/terms-of-use/ for the Allen Institute 12 | Terms of Use. 13 | 14 | See https://creativecommons.org/licenses/by/4.0/ and 15 | https://creativecommons.org/licenses/by-nc/4.0/ for a copy of each license. -------------------------------------------------------------------------------- /method/SCAN-IT/scanit.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - pyg 3 | - pytorch 4 | - anaconda 5 | - conda-forge 6 | dependencies: 7 | - python=3.10.14 8 | - umap-learn=0.5.5 9 | - gcc=12.1.0 10 | - anndata=0.10.3 11 | - gudhi=3.8.0 12 | - matplotlib=3.8.0 13 | # - torchvision=0.15.2 14 | #- torchaudio=0.12.1 #ytorch/noarch::pytorch-mutex-1.0-cuda; pytorch/linux-64::torchaudio-0.12.1-py310_cu116 <- 2.1.0 15 | - torchvision=0.16.0 16 | - torchaudio=2.1.0 17 | - cpuonly=2.0 18 | - pytorch=2.1.0 19 | - pyg=2.4.0 20 | - scanpy=1.9.6 21 | - scikit-learn=1.3.2 22 | - scipy=1.11.4 23 | - seaborn=0.12.2 24 | - statsmodels=0.14.0 25 | - numpy=1.26.2 26 | - pandas=2.1.4 27 | - networkx=2.6.3 28 | - leidenalg=0.10.1 29 | - somoclu=1.7.6 30 | - patsy=0.5.6 31 | - pip 32 | - pip: 33 | - somde==0.1.8 34 | - "git+https://github.com/zcang/SCAN-IT.git@ebf38949eea9348cd1791f392789a8a8c0ae1e47#egg=scanit" 35 | -------------------------------------------------------------------------------- /templates/method_optargs.schema.yaml: -------------------------------------------------------------------------------- 1 | $schema: "https://json-schema.org/draft/2020-12/schema" 2 | 3 | description: Which (optional) parameters can the method use 4 | type: object 5 | properties: 6 | 7 | matrix: 8 | description: What input does the method take 9 | type: string 10 | enum: 11 | - counts 12 | - transform 13 | - dimensionality_reduction 14 | # - counts_or_transform 15 | 16 | integrated_feature_selection: 17 | description: Can the method use existing feature selections? 18 | type: boolean 19 | 20 | image: 21 | description: Can the method use H&E images? 22 | type: boolean 23 | 24 | neighbors: 25 | description: Can the method use existing neighbor definitions? 26 | type: boolean 27 | 28 | config_file: 29 | description: Does the method take an additional config file? 30 | type: boolean 31 | -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: SACCELERATOR 2 | repo_url: https://github.com/SpatialHackathon/SACCELERATOR/ 3 | edit_uri: blob/native/docs/ 4 | copyright: Copyright 2025 SpaceHack 2.0 Organizing Committee 5 | 6 | nav: 7 | - Home: index.md 8 | - Usage: usage.md 9 | - Modules: 10 | - Overview: modules/index.md 11 | - Data: modules/data.md 12 | - Method: modules/method.md 13 | - Metric: modules/metric.md 14 | - Consensus: modules/consensus.md 15 | - Extending & Contributing: CONTRIBUTING.md 16 | - About: 17 | - SpaceHack: https://spatialhackathon.github.io/ 18 | - Repository: https://github.com/SpatialHackathon/SACCELERATOR 19 | - License: about/license.md 20 | 21 | theme: 22 | name: mkdocs 23 | highlightjs: true 24 | color_mode: auto 25 | user_color_mode_toggle: true 26 | 27 | plugins: 28 | - search 29 | - macros 30 | 31 | extra: 32 | repo_branch_url: https://github.com/SpatialHackathon/SACCELERATOR/tree/native 33 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | MIT No Attribution 2 | 3 | Copyright 2023 SpaceHack 2.0 Organizing Committee 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this 6 | software and associated documentation files (the "Software"), to deal in the Software 7 | without restriction, including without limitation the rights to use, copy, modify, 8 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to 9 | permit persons to whom the Software is furnished to do so. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, 12 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 13 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 14 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 15 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 16 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /preprocessing/visualization/pdf_merge.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Jieran Sun; Implemented visualization 4 | 5 | import argparse 6 | import os 7 | from pathlib import Path 8 | import shutil 9 | 10 | 11 | # TODO adjust description 12 | parser = argparse.ArgumentParser(description="Merging all visualization pdfs into one big pdf") 13 | 14 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True) 15 | parser.add_argument("-p", "--pdfs", nargs="+", help="All input pdf datasets", required=True) 16 | 17 | from PyPDF2 import PdfMerger 18 | 19 | args = parser.parse_args() 20 | out_dir = Path(args.out_dir) 21 | 22 | # Create a merger object 23 | merger = PdfMerger() 24 | 25 | for pdf_file in args.pdfs: 26 | merger.append(pdf_file) 27 | pdf_file = Path(pdf_file) 28 | shutil.rmtree(pdf_file.parent, ignore_errors=True) 29 | 30 | # write the output 31 | out_dir.mkdir(parents=True, exist_ok=True) 32 | merger.write(out_dir / "pp_report.pdf") -------------------------------------------------------------------------------- /method/CellCharter/CellCharter.yml: -------------------------------------------------------------------------------- 1 | channels: 2 | - pytorch 3 | - conda-forge 4 | dependencies: 5 | - python=3.10.14 6 | - pytorch=1.12.1 7 | - torchvision=0.13.1 8 | - torchaudio=0.12.1 9 | - cudatoolkit=11.6 10 | - mkl=2024.0.0 11 | - scikit-misc=0.1.4 12 | # - torchaudio=0.12 13 | # - cudatoolkit 14 | # - scikit-learn=1.3.0 15 | # - rasterio=1.3.8 16 | # - urllib3=1.26.16 17 | # - typing-extensions=4.5.0 18 | # - numpy=1.23.4 19 | # - markdown-it-py=2.2.0 20 | # - torchmetrics=0.11.4 21 | # - scipy=1.10.1 22 | - scanpy=1.9.8 23 | # - pandas=2.2.1 24 | # - python-igraph=0.11.4 25 | # - igraph=0.10.10 26 | - pip 27 | - pip: 28 | - pyro-ppl==1.8.6 29 | - scvi-tools==0.20.3 30 | # - flax==0.7.2 31 | # - pycave==3.2.1 32 | # - jax==0.4.14 33 | # - jaxlib==0.4.14 34 | # - chex==0.1.7 35 | # - squidpy==1.3.0 36 | # - sknw==0.14 37 | - cellcharter==0.2.0 38 | # - spatialdata==0.1.0 39 | # - spatialdata-plot==0.2.0 40 | # - python-dateutil==2.9.0.post0 41 | -------------------------------------------------------------------------------- /workflows/path_config_test.yaml: -------------------------------------------------------------------------------- 1 | # The yaml file follows the following structure 2 | 3 | datasets: 4 | libd_dlpfc: 5 | env: data/libd_dlpfc/libd_dlpfc.yml 6 | script: data/libd_dlpfc/libd_dlpfc.r 7 | optargs: data/libd_dlpfc/libd_dlpfc_optargs.json 8 | 9 | neighbors_infos: 10 | delaunay_triangulation: 11 | script: preprocessing/neighbors/delaunay_triangulation/delaunay_triangulation.py 12 | env: preprocessing/neighbors/delaunay_triangulation/delaunay_triangulation.yml 13 | 14 | # Make sure the methods name is the same as the folder name 15 | methods: 16 | CellCharter: 17 | env: method/CellCharter/CellCharter.yml 18 | script: method/CellCharter/CellCharter.py 19 | optargs: method/CellCharter/CellCharter_optargs.json 20 | 21 | # All metrics as of 14.02.2024 22 | metrics: 23 | ARI: 24 | env: metric/ARI/ARI.yml 25 | script: metric/ARI/ARI.py 26 | optargs: metric/ARI/ARI_optargs.json 27 | 28 | # make sure the config_files name is the same as methods name 29 | config_files: 30 | CellCharter: 31 | config_default: "config/config_default.json" 32 | -------------------------------------------------------------------------------- /.github/workflows/build_mkdocs.yml: -------------------------------------------------------------------------------- 1 | name: Build and deploy MkDocs to GitHub Pages 2 | 3 | on: 4 | pull_request: 5 | # Test build for PRs targeting default branch 6 | branches: 7 | - native 8 | push: 9 | # Deploy on push to default branch 10 | branches: 11 | - native 12 | 13 | permissions: 14 | contents: write 15 | 16 | jobs: 17 | build: 18 | runs-on: ubuntu-latest 19 | steps: 20 | - uses: actions/checkout@v4 21 | - uses: actions/setup-python@v5 22 | with: 23 | python-version: 3.13 24 | - name: Install dependencies 25 | run: | 26 | python -m pip install --upgrade pip 27 | pip install -r docs/requirements.txt 28 | 29 | - name: Build MkDocs site (strict) 30 | # Test build on PR to native branch 31 | if: github.event_name == 'pull_request' 32 | run: mkdocs build --strict 33 | 34 | - name: Deploy to GitHub Pages 35 | # Deploy on push to native branch 36 | if: github.event_name == 'push' && github.ref == 'refs/heads/native' 37 | run: mkdocs gh-deploy --force 38 | -------------------------------------------------------------------------------- /method/SpiceMix/README.md: -------------------------------------------------------------------------------- 1 | Here's an example of calling the `SpiceMix.py` script: 2 | ```{bash} 3 | python SpiceMix.py -m ~/scratch/SpaceHack2/data/LIBD_DLPFC/Br8100_151673/counts.mtx -c ~/scratch/SpaceHack2/data/LIBD_DLPFC/Br8100_151673/coordinates.tsv -o ~/scratch/SpaceHack2/data/LIBD_DLPFC/Br8100_151673/observations.tsv -d ./output_test --n_clusters 7 --seed 0 --config config/config_1.json 4 | ``` 5 | 6 | The config file should look something like this: 7 | ```{json} 8 | 9 | { 10 | "K": 15, 11 | "lambda_Sigma_x_inv": 1e-4, 12 | "device": "cuda:0", 13 | "dtype": "float64", 14 | "num_preiterations": 5, 15 | "num_iterations": 200 16 | } 17 | ``` 18 | If you want preprocessing to be done within the script (such as log normalization, HVG selection, neighborhood graph construction), specify the `preprocess` parameter: 19 | ```{json} 20 | 21 | { 22 | "K": 15, 23 | "lambda_Sigma_x_inv": 1e-4, 24 | "device": "cuda:0", 25 | "dtype": "float64", 26 | "num_preiterations": 5, 27 | "num_iterations": 200, 28 | "preprocess": { 29 | "hvgs": 3500 30 | } 31 | } 32 | ``` 33 | -------------------------------------------------------------------------------- /workflows/01_download.smk: -------------------------------------------------------------------------------- 1 | import os 2 | from shared.functions import get_git_directory 3 | 4 | # workflow specific setting 5 | configfile: "path_config.yaml" 6 | configfile: "excute_config.yaml" 7 | 8 | # Attach the specific github directory here 9 | GIT_DIR = get_git_directory(config) 10 | 11 | # Leave only datasets 12 | DATASETS = config.pop("datasets") 13 | datasets_selected = config["datasets_selected"] 14 | 15 | # Get all the dataset folder 16 | def get_all_input(wildcards): 17 | all_folder = [] 18 | for dataset in datasets_selected: 19 | all_folder.append(config["DATASET_DIR"] + "/" + dataset) 20 | return all_folder 21 | 22 | 23 | ############## starting snakemake pipelines ################## 24 | 25 | # Defining all output wanted from this snakemake 26 | rule all: 27 | input: 28 | get_all_input, 29 | 30 | rule download: 31 | output: 32 | dir=directory(config["DATASET_DIR"] + "/{dataset}"), 33 | conda: 34 | lambda wildcards: GIT_DIR + DATASETS[wildcards.dataset]["env"] 35 | params: 36 | script=lambda wildcards: GIT_DIR + DATASETS[wildcards.dataset]["script"], 37 | shell: 38 | "{params.script} -o {output.dir}" 39 | -------------------------------------------------------------------------------- /method/Giotto/Giotto_env.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | # Create the Giotto conda environment named giotto_env 4 | #conda env create -f giotto.yml 5 | 6 | # Activate the environment 7 | #conda activate giotto_env 8 | 9 | # Install the required R packages 10 | Rscript -e "remotes::install_version('colorRamp2', version = '0.1.0', repos = 'https://cran.r-project.org/')" 11 | Rscript -e "remotes::install_bitbucket(repo = 'qzhudfci/smfishhmrf-r', ref='2ab48253591b2dd3c545e117c4256f92ecb287ee')" 12 | Rscript -e "remotes::install_version('irlba', version = '2.3.5.1', repos = 'https://cran.r-project.org/')" 13 | # Install Giotto packages 14 | Rscript -e "remotes::install_github('drieslab/GiottoUtils@v0.1.0', dependencies = FALSE)" # , ref = '7c8f0010de6c916228834823455f48ed5b3fa706')" 15 | Rscript -e "remotes::install_github('drieslab/GiottoClass@v0.1.0', dependencies = FALSE)" # , ref = 'fca6eb3f5ee6e8e7e9cfe8a0bb82721107f4872d')" 16 | Rscript -e "remotes::install_github('drieslab/GiottoData@v0.2.6.2', dependencies = FALSE)" # , ref = '50606245a01f151c6c308f3282f7b3fd87c67027')" 17 | Rscript -e "remotes::install_github('drieslab/GiottoVisuals@v0.1.0', dependencies = FALSE)" # , ref = '8a68d8840ba4724b9a6cbc223dc7d6ef6f88f050')" 18 | Rscript -e "remotes::install_github('drieslab/Giotto@v4.0.0', dependencies = FALSE)" # , ref = 'fc7a6a51efc6853ff43f6028d1cce9a6070537e2')" 19 | -------------------------------------------------------------------------------- /workflows/shared/functions.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | 4 | 5 | def get_git_directory(config): 6 | if config.get("GIT_DIR") is not None: 7 | git_dir = config["GIT_DIR"] 8 | else: 9 | # Should change the SpaceHack directory. 10 | git_dir = os.getenv("GIT_DIR", "/home/ubuntu/workspace/SpaceHack2023") 11 | 12 | if not git_dir.endswith("/"): 13 | git_dir += "/" 14 | return git_dir 15 | 16 | # Adding additional condition here to exclude .ipynb that can be generated via the preprocessing scripts and other hidden unwanted folders 17 | def get_sample_dirs(data_dir): 18 | return [f.path for f in os.scandir(data_dir) if f.is_dir() and not f.name.startswith('.')] 19 | 20 | 21 | def check_files_in_folder(folder_path, file_list): 22 | # Get a list of all files in the folder 23 | files_in_folder = os.listdir(folder_path) 24 | # Check each file in the file_list 25 | for file in file_list: 26 | if file not in files_in_folder: 27 | return False 28 | return True 29 | 30 | 31 | def get_ncluster(file_path, sample, default_value=7): 32 | if not os.path.exists(file_path): 33 | return default_value 34 | try: 35 | df = pd.read_csv(file_path, sep="\t", index_col=0) 36 | df_filtered = df[df["directory"] == sample] 37 | return int(df_filtered["n_clusters"].mean()) 38 | except: 39 | return default_value 40 | -------------------------------------------------------------------------------- /templates/consensus_BC.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; implmented method 4 | # Author_and_contribution: Peiying Cai; created template 5 | # Author_and_contribution: ENTER YOUR NAME AND CONTRIBUTION HERE 6 | 7 | import argparse 8 | import pandas as pd 9 | from pathlib import Path 10 | 11 | # TODO adjust description 12 | description = "... to select base clusterings" 13 | 14 | parser = argparse.ArgumentParser(description=description) 15 | 16 | parser.add_argument( 17 | "-i", "--input_file", required=True, 18 | help="Input containing the aggregated labels." 19 | ) 20 | parser.add_argument( 21 | "-o", "--output_file", required=True, 22 | help="Desired output file." 23 | ) 24 | # TODO add additional arguments 25 | 26 | args = parser.parse_args() 27 | 28 | # Load input file 29 | label_df = pd.read_csv(args.input_file, sep="\t", index_col=0) 30 | 31 | ## Your code goes here 32 | # TODO 33 | # output_df: DataFrame with number of clusters as columns, clustering label names as values 34 | # Example: 35 | # 7 8 36 | # 0 method1_default_7_label method1_default_8_label 37 | # 1 method2_default_7_label method2_default_8_label 38 | # 2 method3_default_7_label method3_default_8_label 39 | 40 | ## Write output 41 | output_path = Path(args.output_file) 42 | output_path.parent.mkdir(parents=True, exist_ok=True) 43 | 44 | # Save the results 45 | output_df.to_csv(output_path, sep="\t", index=False) 46 | -------------------------------------------------------------------------------- /templates/metric.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: ENTER YOUR NAME AND CONTRIBUTION HERE 5 | 6 | import argparse 7 | 8 | # TODO adjust description 9 | parser = argparse.ArgumentParser(description="Calculate metric ...") 10 | 11 | parser.add_argument( 12 | "-l", "--labels", help="Labels from domain clustering.", required=True 13 | ) 14 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False) 15 | parser.add_argument( 16 | "-e", 17 | "--embedding", 18 | help="Embedding of points in latent space. Potential usage for metrics without groundtruth.", 19 | required=False, 20 | ) 21 | parser.add_argument( 22 | "-c", 23 | "--config", 24 | help="Optional config file (json) used to pass additional parameters.", 25 | required=False, 26 | ) # format should be json 27 | parser.add_argument("-o", "--out_file", help="Output file.", required=True) 28 | 29 | args = parser.parse_args() 30 | 31 | # Use these filepaths as input 32 | label_file = args.labels 33 | 34 | if args.ground_truth is not None: 35 | groundtruth_file = args.ground_truth 36 | if args.embedding is not None: 37 | embedding_file = args.embedding 38 | if args.config is not None: 39 | config_file = args.config 40 | 41 | 42 | ## Your code goes here 43 | # TODO 44 | # metric: float = ... output of the metric as float 45 | 46 | 47 | ## Write output 48 | from pathlib import Path 49 | 50 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True) 51 | 52 | with open(args.out_file, "w") as file: 53 | file.write(f"{metric:.5e}\n") 54 | -------------------------------------------------------------------------------- /templates/consensus_BC.r: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; implmented method 4 | # Author_and_contribution: Peiying Cai; created template 5 | # Author_and_contribution: ENTER YOUR NAME AND CONTRIBUTION HERE 6 | 7 | suppressPackageStartupMessages(library(optparse)) 8 | 9 | option_list <- list( 10 | make_option( 11 | c("-i", "--input_file"), 12 | type = "character", default = NULL, 13 | help = "Input containing the aggregated labels." 14 | ), 15 | make_option( 16 | c("-o", "--output_file"), 17 | type = "character", default = NULL, 18 | help = "desired output file" 19 | ) 20 | # TODO add additional arguments 21 | ) 22 | 23 | # TODO adjust description 24 | description <- "... to select base clusterings" 25 | 26 | opt_parser <- OptionParser( 27 | usage = description, 28 | option_list = option_list 29 | ) 30 | opt <- parse_args(opt_parser) 31 | 32 | # Use these filepaths as input 33 | input_file <- opt$input_file 34 | output_file <- opt$output_file 35 | 36 | ##### Load files 37 | label_df <- read.delim(input_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss") 38 | 39 | ## Your code goes here 40 | # TODO 41 | # output_df: data frame with number of clusters as column headers, and clustering label names as values 42 | # Example: 43 | # 7 8 44 | # method1_default_7_label method1_default_8_label 45 | # method2_default_7_label method2_default_8_label 46 | # method3_default_7_label method3_default_8_label 47 | 48 | 49 | ## Write output 50 | dir.create(dirname(output_file), showWarnings = FALSE, recursive = TRUE) 51 | 52 | # Save the results 53 | write.table(output_df, file = output_file, sep = "\t", col.names = NA, quote = FALSE) 54 | -------------------------------------------------------------------------------- /consensus/02_Cross_method_ARI/Cross_method_ARI.r: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; Create the script 4 | 5 | suppressPackageStartupMessages(library(optparse)) 6 | 7 | option_list <- list( 8 | make_option( 9 | c("-i", "--input_file"), 10 | type = "character", default = NULL, 11 | help = "Input containing the aggregated labels." 12 | ), 13 | make_option( 14 | c("-o", "--output_file"), 15 | type = "character", default = NULL, 16 | help = "desired output file" 17 | ) 18 | ) 19 | 20 | description <- "Return cross-method ARI" 21 | 22 | opt_parser <- OptionParser( 23 | usage = description, 24 | option_list = option_list 25 | ) 26 | opt <- parse_args(opt_parser) 27 | 28 | # Use these filepaths as input 29 | input_file <- opt$input_file 30 | output_file <- opt$output_file 31 | 32 | #set.seed(seed) 33 | 34 | suppressPackageStartupMessages({ 35 | library(mclust) 36 | }) 37 | 38 | ##### Define ARI function 39 | calc_aris <- function(m, flavour="ARI") { 40 | a <- diag(ncol(m)) 41 | for(i in 1:(ncol(m)-1)) 42 | for(j in 2:ncol(m)) { 43 | if(flavour=="ARI") { 44 | require(mclust) 45 | a[i,j] <- a[j,i] <- mclust::adjustedRandIndex(m[,i], m[,j]) 46 | } else if(flavour=="sARI") { 47 | # TODO implement spatial ARI 48 | } 49 | } 50 | rownames(a) <- colnames(a) <- colnames(m) 51 | a 52 | } 53 | 54 | ##### Load files 55 | label_df <- read.delim(input_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss") 56 | 57 | # Calculate cross-method ARI 58 | ari_mat <- calc_aris(label_df) 59 | 60 | dir.create(dirname(output_file), showWarnings = FALSE, recursive = TRUE) 61 | 62 | # Save the results 63 | write.table(ari_mat, file = output_file, sep = "\t", col.names = NA, quote = FALSE) 64 | -------------------------------------------------------------------------------- /metric/ARI/ARI.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script 4 | 5 | import argparse 6 | 7 | parser = argparse.ArgumentParser( 8 | description="Calculate Adjusted Rand Index (scikit-learn)" 9 | ) 10 | 11 | parser.add_argument( 12 | "-l", "--labels", help="Labels from domain clustering.", required=True 13 | ) 14 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False) 15 | parser.add_argument( 16 | "-e", 17 | "--embedding", 18 | help="Embedding of points in latent space. Potential usage for metrics without groundtruth.", 19 | required=False, 20 | ) 21 | parser.add_argument( 22 | "-c", 23 | "--config", 24 | help="Optional config file used to pass additional parameters.", 25 | required=False, 26 | ) 27 | parser.add_argument("-o", "--out_file", help="Output file.", required=True) 28 | 29 | args = parser.parse_args() 30 | 31 | # Use these filepaths as input 32 | label_file = args.labels 33 | 34 | if args.ground_truth is not None: 35 | groundtruth_file = args.ground_truth 36 | 37 | 38 | ## Your code goes here 39 | if args.ground_truth is None: 40 | raise Exception("Groundtruth labels needed to calculate the Adjusted Rand Index") 41 | 42 | import pandas as pd 43 | from sklearn.metrics import adjusted_rand_score 44 | 45 | domains = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes 46 | groundtruth = ( 47 | pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes 48 | ) 49 | 50 | common_index = domains.index.intersection(groundtruth.index) 51 | 52 | metric = adjusted_rand_score(groundtruth.loc[common_index], domains.loc[common_index]) 53 | 54 | 55 | ## Write output 56 | with open(args.out_file, "w") as file: 57 | file.write(f"{metric:.5e}\n") 58 | -------------------------------------------------------------------------------- /workflows/excute_config_test.yaml: -------------------------------------------------------------------------------- 1 | # This yaml file contains all the parameters and settings for the workflow. 2 | # 1. Universal parameters: Used in all smk file, set once 3 | # 2. Process-specific parameters: some other parameters, if needed 4 | # 3. Dataset, methods and metrics: 5 | # select which one to be included in the workflow, comment out unwanted ones. 6 | # For methods, you can also specify cluster numbers for specific datasets. 7 | 8 | ###### Universal parameters ####### 9 | # Directories, modify based on your own 10 | GIT_DIR: /work/PRTNR/CHUV/DIR/rgottar1/spatial/Cluster_Benchmark/SpaceHack2023 11 | DATASET_DIR: /work/PRTNR/CHUV/DIR/rgottar1/spatial/Cluster_Benchmark/data 12 | SEED: 2023 13 | 14 | ###### Dataset selected for excutation ####### 15 | datasets_selected: 16 | - "libd_dlpfc" 17 | 18 | ###### Methods selected for excutation ####### 19 | methods_selected: 20 | - "CellCharter" 21 | 22 | # If some datasets specify number of clusters. Add it here 23 | n_clusters: 24 | libd_dlpfc: [9] 25 | 26 | ###### Metrics selected for excutation ####### 27 | metrics_selected: 28 | - "ARI" 29 | 30 | ###### Base clustering selection parameters ####### 31 | # As used by scanpy (sc.pp.neighbors()). 32 | selection_criteria: 33 | - "Cross_method_ARI" 34 | - "Smoothness_entropy" 35 | - "Manual_selection" 36 | n_neighbors: 6 37 | 38 | ###### Consensus Clustering parameters ####### 39 | bc_numbers: [8] # number of base clustering results 40 | consensus_algorithms: 41 | - "lca" 42 | - "kmode" 43 | - "weighted" 44 | # In case you need to re-define desired cluster number. Do it here. Otherwise n_clust value would be used 45 | n_clust_consensus: 46 | abc_atlas_wmb_thalamus: [16, 19, 20, 21, 24, 28, 32] 47 | 48 | # For weighted clustering 49 | lambda: null 50 | 51 | # For cross-method entropy 52 | cross_method_entropy: true -------------------------------------------------------------------------------- /data/pachter_simulation/pachter_simulation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Made by Paul Kiessling pakiessling@ukaachen.de 4 | 5 | 6 | import os 7 | import argparse 8 | import tempfile 9 | import shutil 10 | from pypdl import Downloader 11 | 12 | LINKS = { 13 | "https://zenodo.org/records/13944111/files/concordex_sim.zip":"fb7c79fd9cec2c79e3b74fb50be40ff4" 14 | } 15 | 16 | 17 | def download_links(links, temp_dir): 18 | headers = { 19 | "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0" 20 | } 21 | dl = Downloader(headers=headers) 22 | for link, checksum in links.items(): 23 | print(f"Downloading {link}") 24 | file = dl.start( 25 | url=link, 26 | file_path=temp_dir, 27 | segments=10, 28 | display=True, 29 | multithread=True, 30 | block=True, 31 | retries=3, 32 | ) 33 | if not file.validate_hash(checksum, "md5"): 34 | raise ValueError(f"File {file} is corrupted") 35 | 36 | 37 | 38 | 39 | def main(): 40 | # Set up command-line argument parser 41 | parser = argparse.ArgumentParser( 42 | description="Convert Visium HD data to Spacehack format." 43 | ) 44 | 45 | # Add arguments for output folder 46 | parser.add_argument( 47 | "-o", "--out_dir", help="Output directory to write files to.", required=True 48 | ) 49 | 50 | # Parse the command-line arguments 51 | args = parser.parse_args() 52 | 53 | # Download and process 54 | with tempfile.TemporaryDirectory() as temp_dir: # 55 | download_links(LINKS, temp_dir) 56 | for file in os.listdir(temp_dir): 57 | if file.endswith(".tar.gz") or file.endswith(".zip"): 58 | shutil.unpack_archive(os.path.join(temp_dir, file), args.out_dir) 59 | 60 | 61 | 62 | if __name__ == "__main__": 63 | main() 64 | -------------------------------------------------------------------------------- /templates/metric.r: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: ENTER YOUR NAME AND CONTRIBUTION HERE 5 | 6 | suppressPackageStartupMessages(library(optparse)) 7 | 8 | # TODO adjust description 9 | option_list <- list( 10 | make_option( 11 | c("-l", "--labels"), 12 | type = "character", default = NULL, 13 | help = "Labels from domain clustering." 14 | ), 15 | make_option( 16 | c("-g", "--ground_truth"), 17 | type = "character", default = NA, 18 | help = "Groundtruth labels." 19 | ), 20 | make_option( 21 | c("-e", "--embedding"), 22 | type = "character", default = NA, 23 | help = "Embedding of points in latent space. Potential usage for metrics without groundtruth." 24 | ), 25 | # format should be json 26 | make_option( 27 | c("-c", "--config"), 28 | type = "character", default = NA, 29 | help = "Optional config file (json) used to pass additional parameters." 30 | ), 31 | make_option( 32 | c("-o", "--out_file"), 33 | type = "character", default = NULL, 34 | help = "Output file." 35 | ) 36 | ) 37 | 38 | # TODO adjust description 39 | description <- "Calculate metric ..." 40 | 41 | opt_parser <- OptionParser( 42 | usage = description, 43 | option_list = option_list 44 | ) 45 | opt <- parse_args(opt_parser) 46 | 47 | # Use these filepaths as input 48 | label_file <- opt$labels 49 | 50 | if (!is.na(opt$ground_truth)) { 51 | groundtruth_file <- opt$ground_truth 52 | } 53 | if (!is.na(opt$embedding)) { 54 | embedding_file <- opt$embedding 55 | } 56 | if (!is.na(opt$config)) { 57 | config_file <- opt$config 58 | } 59 | 60 | 61 | ## Your code goes here 62 | # TODO 63 | # metric = ... # output of the metric as float 64 | 65 | 66 | ## Write output 67 | outfile <- file(opt$out_file) 68 | dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE) 69 | 70 | writeLines(format(metric, digits = 6, scientific = TRUE), outfile) 71 | close(outfile) 72 | -------------------------------------------------------------------------------- /method/search_res.r: -------------------------------------------------------------------------------- 1 | binary_search <- function( 2 | spe, 3 | do_clustering, 4 | extract_nclust, 5 | n_clust_target, 6 | resolution_update = 2, 7 | resolution_init = 1, 8 | resolution_boundaries=NULL, 9 | num_rs = 100, 10 | tolerance = 1e-3, 11 | ...) { 12 | 13 | # Initialize boundaries 14 | lb <- rb <- NULL 15 | n_clust <- -1 16 | 17 | if (!is.null(resolution_boundaries)){ 18 | lb <- resolution_boundaries[1] 19 | rb <- resolution_boundaries[2] 20 | } else { 21 | res <- resolution_init 22 | result <- do_clustering(spe, resolution = res, ...) 23 | # Adjust cluster_ids extraction per method 24 | n_clust <- extract_nclust(result) 25 | if (n_clust > n_clust_target) { 26 | while (n_clust > n_clust_target && res > 1e-5) { 27 | rb <- res 28 | res <- res/resolution_update 29 | result <- do_clustering(spe, resolution = res, ...) 30 | n_clust <- extract_nclust(result) 31 | } 32 | lb <- res 33 | } else if (n_clust < n_clust_target) { 34 | while (n_clust < n_clust_target) { 35 | lb <- res 36 | res <- res*resolution_update 37 | result <- do_clustering(spe, resolution = res, ...) 38 | n_clust <- extract_nclust(result) 39 | } 40 | rb <- res 41 | } 42 | if (n_clust == n_clust_target) {lb = rb = res } 43 | } 44 | 45 | i <- 0 46 | while ((rb - lb > tolerance || lb == rb) && i < num_rs) { 47 | mid <- sqrt(lb * rb) 48 | message("Resolution: ", mid) 49 | result <- do_clustering(spe, resolution = mid, ...) 50 | n_clust <- extract_nclust(result) 51 | if (n_clust == n_clust_target || lb == rb) break 52 | if (n_clust > n_clust_target) { 53 | rb <- mid 54 | } else { 55 | lb <- mid 56 | } 57 | i <- i + 1 58 | } 59 | 60 | # Warning if target not met 61 | if (n_clust != n_clust_target) { 62 | warning(sprintf("Warning: n_clust = %d not found in binary search, return best approximation with res = %f and n_clust = %d. (rb = %f, lb = %f, i = %d)", n_clust_target, mid, n_clust, rb, lb, i)) 63 | } 64 | return(result) 65 | } -------------------------------------------------------------------------------- /metric/Davies-Bouldin/Davies-Bouldin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: Kirti Biharie; implemented Davis-Bouldin score 5 | 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser(description="Calculate Davies-Bouldin Score (scikit-learn)") 9 | 10 | parser.add_argument( 11 | "-l", "--labels", help="Labels from domain clustering.", required=True 12 | ) 13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False) 14 | parser.add_argument( 15 | "-e", 16 | "--embedding", 17 | help="Embedding of points in latent space. Potential usage for metrics without groundtruth.", 18 | required=False, 19 | ) 20 | parser.add_argument( 21 | "-c", 22 | "--config", 23 | help="Optional config file (json) used to pass additional parameters.", 24 | required=False, 25 | ) # format should be json 26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True) 27 | 28 | args = parser.parse_args() 29 | 30 | # Use these filepaths as input 31 | label_file = args.labels 32 | 33 | if args.ground_truth is not None: 34 | groundtruth_file = args.ground_truth 35 | if args.embedding is not None: 36 | embedding_file = args.embedding 37 | if args.config is not None: 38 | config_file = args.config 39 | 40 | 41 | ## Your code goes here 42 | if args.embedding is None: 43 | raise Exception("Embeddings needed to calculate the Davies-Bouldin Score") 44 | 45 | import pandas as pd 46 | import sklearn.metrics 47 | 48 | embeddings = pd.read_table(embedding_file, index_col=0) 49 | labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes 50 | 51 | common_index = labels.index.intersection(embeddings.index) 52 | embeddings = embeddings.loc[common_index] 53 | labels = labels.loc[common_index] 54 | 55 | metric = sklearn.metrics.davies_bouldin_score(embeddings, labels) 56 | 57 | ## Write output 58 | from pathlib import Path 59 | 60 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True) 61 | 62 | with open(args.out_file, "w") as file: 63 | file.write(f"{metric:.5e}\n") -------------------------------------------------------------------------------- /metric/Calinski-Harabasz/Calinski-Harabasz.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: Kirti Biharie; implemented Calinski-Harabasz score 5 | 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser(description="Calculate Calinski-Harabasz Score (scikit-learn)") 9 | 10 | parser.add_argument( 11 | "-l", "--labels", help="Labels from domain clustering.", required=True 12 | ) 13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False) 14 | parser.add_argument( 15 | "-e", 16 | "--embedding", 17 | help="Embedding of points in latent space. Potential usage for metrics without groundtruth.", 18 | required=False, 19 | ) 20 | parser.add_argument( 21 | "-c", 22 | "--config", 23 | help="Optional config file (json) used to pass additional parameters.", 24 | required=False, 25 | ) # format should be json 26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True) 27 | 28 | args = parser.parse_args() 29 | 30 | # Use these filepaths as input 31 | label_file = args.labels 32 | 33 | if args.ground_truth is not None: 34 | groundtruth_file = args.ground_truth 35 | if args.embedding is not None: 36 | embedding_file = args.embedding 37 | if args.config is not None: 38 | config_file = args.config 39 | 40 | 41 | ## Your code goes here 42 | if args.embedding is None: 43 | raise Exception("Embeddings needed to calculate the Calinski-Harabasz Score") 44 | 45 | import pandas as pd 46 | import sklearn.metrics 47 | 48 | embeddings = pd.read_table(embedding_file, index_col=0) 49 | labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes 50 | 51 | common_index = labels.index.intersection(embeddings.index) 52 | embeddings = embeddings.loc[common_index] 53 | labels = labels.loc[common_index] 54 | 55 | metric = sklearn.metrics.calinski_harabasz_score(embeddings, labels) 56 | 57 | ## Write output 58 | from pathlib import Path 59 | 60 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True) 61 | 62 | with open(args.out_file, "w") as file: 63 | file.write(f"{metric:.5e}\n") -------------------------------------------------------------------------------- /metric/FMI/FMI.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: Kirti Biharie; implemented fmi score 5 | 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser(description="Calculate Fowlkes-Mallows index (scikit-learn)") 9 | 10 | parser.add_argument( 11 | "-l", "--labels", help="Labels from domain clustering.", required=True 12 | ) 13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False) 14 | parser.add_argument( 15 | "-e", 16 | "--embedding", 17 | help="Embedding of points in latent space. Potential usage for metrics without groundtruth.", 18 | required=False, 19 | ) 20 | parser.add_argument( 21 | "-c", 22 | "--config", 23 | help="Optional config file (json) used to pass additional parameters.", 24 | required=False, 25 | ) # format should be json 26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True) 27 | 28 | args = parser.parse_args() 29 | 30 | # Use these filepaths as input 31 | label_file = args.labels 32 | 33 | if args.ground_truth is not None: 34 | groundtruth_file = args.ground_truth 35 | if args.embedding is not None: 36 | embedding_file = args.embedding 37 | if args.config is not None: 38 | config_file = args.config 39 | 40 | 41 | ## Your code goes here 42 | if args.ground_truth is None: 43 | raise Exception("Groundtruth labels needed to calculate the Fowlkes-Mallows index") 44 | 45 | import pandas as pd 46 | import sklearn.metrics 47 | 48 | ground_truth = pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes 49 | labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes 50 | 51 | common_index = labels.index.intersection(ground_truth.index) 52 | ground_truth = ground_truth.loc[common_index] 53 | labels = labels.loc[common_index] 54 | 55 | metric = sklearn.metrics.fowlkes_mallows_score(ground_truth, labels) 56 | 57 | ## Write output 58 | from pathlib import Path 59 | 60 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True) 61 | 62 | with open(args.out_file, "w") as file: 63 | file.write(f"{metric:.5e}\n") -------------------------------------------------------------------------------- /metric/Completeness/Completeness.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: Kirti Biharie; implemented completeness score 5 | 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser(description="Calculate Completeness Score (scikit-learn)") 9 | 10 | parser.add_argument( 11 | "-l", "--labels", help="Labels from domain clustering.", required=True 12 | ) 13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False) 14 | parser.add_argument( 15 | "-e", 16 | "--embedding", 17 | help="Embedding of points in latent space. Potential usage for metrics without groundtruth.", 18 | required=False, 19 | ) 20 | parser.add_argument( 21 | "-c", 22 | "--config", 23 | help="Optional config file (json) used to pass additional parameters.", 24 | required=False, 25 | ) # format should be json 26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True) 27 | 28 | args = parser.parse_args() 29 | 30 | # Use these filepaths as input 31 | label_file = args.labels 32 | 33 | if args.ground_truth is not None: 34 | groundtruth_file = args.ground_truth 35 | if args.embedding is not None: 36 | embedding_file = args.embedding 37 | if args.config is not None: 38 | config_file = args.config 39 | 40 | 41 | ## Your code goes here 42 | if args.ground_truth is None: 43 | raise Exception("Groundtruth labels needed to calculate the Completeness Score") 44 | 45 | import pandas as pd 46 | import sklearn.metrics 47 | 48 | ground_truth = pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes 49 | labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes 50 | 51 | common_index = labels.index.intersection(ground_truth.index) 52 | ground_truth = ground_truth.loc[common_index] 53 | labels = labels.loc[common_index] 54 | 55 | metric = sklearn.metrics.completeness_score(ground_truth, labels) 56 | 57 | ## Write output 58 | from pathlib import Path 59 | 60 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True) 61 | 62 | with open(args.out_file, "w") as file: 63 | file.write(f"{metric:.5e}\n") -------------------------------------------------------------------------------- /metric/Homogeneity/Homogeneity.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: Kirti Biharie; implemented homogeneity score 5 | 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser(description="Calculate Homogeneity Score (scikit-learn)") 9 | 10 | parser.add_argument( 11 | "-l", "--labels", help="Labels from domain clustering.", required=True 12 | ) 13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False) 14 | parser.add_argument( 15 | "-e", 16 | "--embedding", 17 | help="Embedding of points in latent space. Potential usage for metrics without groundtruth.", 18 | required=False, 19 | ) 20 | parser.add_argument( 21 | "-c", 22 | "--config", 23 | help="Optional config file (json) used to pass additional parameters.", 24 | required=False, 25 | ) # format should be json 26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True) 27 | 28 | args = parser.parse_args() 29 | 30 | # Use these filepaths as input 31 | label_file = args.labels 32 | 33 | if args.ground_truth is not None: 34 | groundtruth_file = args.ground_truth 35 | if args.embedding is not None: 36 | embedding_file = args.embedding 37 | if args.config is not None: 38 | config_file = args.config 39 | 40 | 41 | ## Your code goes here 42 | if args.ground_truth is None: 43 | raise Exception("Groundtruth labels needed to calculate the Homogeneity Score") 44 | 45 | import pandas as pd 46 | import sklearn.metrics 47 | 48 | ground_truth = pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes 49 | labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes 50 | 51 | common_index = labels.index.intersection(ground_truth.index) 52 | ground_truth = ground_truth.loc[common_index] 53 | labels = labels.loc[common_index] 54 | 55 | metric = sklearn.metrics.homogeneity_score(ground_truth, labels) 56 | 57 | ## Write output 58 | from pathlib import Path 59 | 60 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True) 61 | 62 | with open(args.out_file, "w") as file: 63 | file.write(f"{metric:.5e}\n") 64 | -------------------------------------------------------------------------------- /metric/V_measure/V_measure.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script 4 | 5 | import argparse 6 | 7 | parser = argparse.ArgumentParser(description="Calculate V-measure (scikit-learn)") 8 | 9 | parser.add_argument( 10 | "-l", "--labels", help="Labels from domain clustering.", required=True 11 | ) 12 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False) 13 | parser.add_argument( 14 | "-e", 15 | "--embedding", 16 | help="Embedding of points in latent space. Potential usage for metrics without groundtruth.", 17 | required=False, 18 | ) 19 | parser.add_argument( 20 | "-c", 21 | "--config", 22 | help="Optional config file used to pass additional parameters.", 23 | required=False, 24 | ) 25 | parser.add_argument("-o", "--out_file", help="Output file.", required=True) 26 | 27 | args = parser.parse_args() 28 | 29 | # Use these filepaths as input 30 | label_file = args.labels 31 | 32 | if args.ground_truth is not None: 33 | groundtruth_file = args.ground_truth 34 | if args.config is not None: 35 | config_file = args.config 36 | 37 | 38 | ## Your code goes here 39 | if args.ground_truth is None: 40 | raise Exception("Groundtruth labels needed to calculate the Adjusted Rand Index.") 41 | 42 | if args.config is None: 43 | raise Exception("Config file not provided.") 44 | 45 | import json 46 | 47 | import pandas as pd 48 | import os 49 | from sklearn.metrics import v_measure_score 50 | 51 | with open(config_file, "r") as f: 52 | config = json.load(f) 53 | 54 | domains = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes 55 | groundtruth = ( 56 | pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes 57 | ) 58 | common_index = domains.index.intersection(groundtruth.index) 59 | 60 | metric = v_measure_score( 61 | groundtruth.loc[common_index], domains.loc[common_index], beta=config["beta"] 62 | ) 63 | 64 | ## Write output 65 | out_file_path = args.out_file 66 | out_dir = os.path.dirname(out_file_path) 67 | os.makedirs(out_dir, exist_ok=True) 68 | 69 | with open(out_file_path, "w") as file: 70 | file.write(f"{metric:.5e}\n") 71 | -------------------------------------------------------------------------------- /workflows/05_aggregation.smk: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | from pathlib import Path 4 | from shared.functions import check_files_in_folder, get_git_directory, get_sample_dirs 5 | 6 | configfile: "path_config.yaml" 7 | configfile: "excute_config.yaml" 8 | 9 | GIT_DIR = Path(get_git_directory(config)) 10 | 11 | # Get all the methods and metrics that's being used 12 | DATASET_DIR = Path(config["DATASET_DIR"]) 13 | datasets_selected= config["datasets_selected"] 14 | 15 | def generate_input_files(data_dir): 16 | 17 | # Function to check for the existence of domains.tsv in a sample folder 18 | def has_domains(sample_path): 19 | return any( 20 | os.path.isfile(os.path.join(root, "domains.tsv")) 21 | for root, dirs, files in os.walk(sample_path) 22 | ) 23 | 24 | result_files = [] 25 | 26 | # sample directory 27 | aggregate_files = [ 28 | f"{sample}/combined_methods.tsv" 29 | for sample in get_sample_dirs(data_dir) 30 | if any( 31 | os.path.isfile(os.path.join(root, "domains.tsv")) 32 | for root, dirs, files in os.walk(sample) 33 | ) 34 | ] 35 | 36 | return aggregate_files 37 | 38 | 39 | def generate_all_input(wildcards): 40 | all_input = [] 41 | 42 | for dataset in datasets_selected: 43 | data_dir = DATASET_DIR / dataset 44 | if not data_dir.is_dir(): 45 | continue 46 | 47 | all_input += generate_input_files(data_dir=data_dir) 48 | 49 | return all_input 50 | 51 | rule all: 52 | input: 53 | generate_all_input, 54 | 55 | 56 | rule aggregate_nclusters: 57 | input: 58 | results_folder=DATASET_DIR / "{dataset}" / "{sample}", 59 | script=GIT_DIR / "consensus" / "01_Results_Aggregation" / "Results_Aggregation.py", 60 | output: 61 | file=DATASET_DIR / "{dataset}" / "{sample}" / "combined_methods.tsv", 62 | wildcard_constraints: 63 | dataset="[a-zA-Z0-9_-]+", 64 | sample="[a-zA-Z0-9_-]+", 65 | conda: 66 | GIT_DIR / "consensus" / "01_Results_Aggregation" / "Results_Aggregation.yaml" 67 | shell: 68 | """ 69 | {input.script} \ 70 | -i {input.results_folder} \ 71 | -o {output.file} \ 72 | """ -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | ![Logo](img/logo.png) 2 | 3 | # SACCELERATOR 4 | 5 | SACCELERATOR is the outcome of SpaceHack 2.0, a community-driven project to (not) benchmark domain identification methods for spatially-resolved transcriptomics data. 6 | 7 | ## SACCELERATOR - a flexible framework for applying spatially aware clustering methods 8 | 9 | Spatial omics have transformed tissue architecture and cellular heterogeneity analysis by integrating molecular data with spatial localization. In spatially resolved transcriptomics, identifying spatial domains is critical for analysis of anatomical regions within heterogeneous datasets and understanding tissue function. Since 2020, more than 50 spatially aware clustering methods have been developed for this task. However, the reliability of existing benchmarks is undermined by their narrow focus on Visium and brain tissue datasets, as well as the dependence on questionable ground truth annotations. Here, we implemented a consensus framework that surpasses traditional benchmarking practices. 10 | 11 | Our framework comprises a community-driven benchmark-like platform that streamlines data formatting, method integration, and metric evaluation while accommodating new methods and datasets. Currently, the platform includes 22 spatially aware clustering methods across 15 datasets spanning 9 technologies and diverse tissue types. The benchmark approach uncovered significant limitations in generalizability and reproducibility where methods that perform well on healthy tissues often falter on cancer samples. We also found that anatomical labels commonly used as ground truths are often biased, potentially error-prone, and in some cases, unsuitable for benchmarking efforts. 12 | 13 | In light of these issues, we adopt a flexible expert-in-the-loop consensus-driven approach. This goes beyond traditional ensemble/consensus methods, and allows researchers to interact with intermediate results to determine which tools should be used to generate a consensus. We believe that the inclusion of an expert-in-the-loop is critical to ensure that the computational analysis matches the biological question at hand, and we believe that when the focus of the analysis is to uncover novel biological discoveries, tissue experts are accessible more often than not. 14 | 15 | ## Citation 16 | 17 | If you are using SACCELERATOR please cite 18 | 19 | > Sun, J. et al. Beyond benchmarking: an expert-guided consensus approach to spatially aware clustering. bioRxiv https://doi.org/10.1101/2025.06.23.660861 (2025). 20 | -------------------------------------------------------------------------------- /metric/MCC/MCC.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: Thomas Chartrand; created script 5 | 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser(description="Calculate Matthew's correlation coefficient (scikit-learn)") 9 | 10 | parser.add_argument( 11 | "-l", "--labels", help="Labels from domain clustering.", required=True 12 | ) 13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False) 14 | parser.add_argument( 15 | "-e", 16 | "--embedding", 17 | help="Embedding of points in latent space. Potential usage for metrics without groundtruth.", 18 | required=False, 19 | ) 20 | parser.add_argument( 21 | "-c", 22 | "--config", 23 | help="Optional config file used to pass additional parameters.", 24 | required=False, 25 | ) 26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True) 27 | parser.add_argument( 28 | "--matched_labels", 29 | help="Flag indicating ground-truth and clustering labels have already been matched.", 30 | action='store_true', 31 | ) 32 | 33 | args = parser.parse_args() 34 | 35 | # Use these filepaths as input 36 | label_file = args.labels 37 | 38 | if args.ground_truth is not None: 39 | groundtruth_file = args.ground_truth 40 | if args.config is not None: 41 | config_file = args.config 42 | 43 | 44 | ## Your code goes here 45 | if args.ground_truth is None: 46 | raise Exception("Groundtruth labels needed.") 47 | 48 | import pandas as pd 49 | from scipy.optimize import linear_sum_assignment 50 | from sklearn.metrics import matthews_corrcoef 51 | 52 | domains = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes 53 | groundtruth = ( 54 | pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes 55 | ) 56 | common_index = domains.index.intersection(groundtruth.index) 57 | groundtruth = groundtruth.loc[common_index] 58 | domains = domains.loc[common_index] 59 | 60 | if not args.matched_labels: 61 | contingency_table = pd.crosstab(domains, groundtruth) 62 | row_ind, col_ind = linear_sum_assignment(contingency_table, maximize=True) 63 | domains = domains.map(dict(zip(row_ind, col_ind))) 64 | 65 | metric = matthews_corrcoef(groundtruth, domains) 66 | 67 | ## Write output 68 | from pathlib import Path 69 | 70 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True) 71 | 72 | with open(args.out_file, "w") as file: 73 | file.write(f"{metric:.5e}\n") 74 | -------------------------------------------------------------------------------- /preprocessing/transformation/log1p.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script 4 | 5 | import argparse 6 | 7 | # TODO adjust description 8 | parser = argparse.ArgumentParser(description="log1p transformation using scanpy") 9 | 10 | parser.add_argument( 11 | "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True 12 | ) 13 | parser.add_argument("-m", "--matrix", help="Path to counts (as mtx).", required=True) 14 | parser.add_argument( 15 | "-f", "--features", help="Path to features (as tsv).", required=True 16 | ) 17 | parser.add_argument( 18 | "-o", "--observations", help="Path to observations (as tsv).", required=True 19 | ) 20 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True) 21 | parser.add_argument( 22 | "--config", 23 | help="Optional config file (json) used to pass additional parameters.", 24 | required=False, 25 | ) 26 | 27 | args = parser.parse_args() 28 | 29 | from pathlib import Path 30 | 31 | out_dir = Path(args.out_dir) 32 | 33 | # Output files 34 | transformed_counts_file = out_dir / "counts.mtx" 35 | # if additional output files are required write it also to out_dir 36 | 37 | # Use these filepaths as input ... 38 | coord_file = args.coordinates 39 | matrix_file = args.matrix 40 | feature_file = args.features 41 | observation_file = args.observations 42 | 43 | if args.config is not None: 44 | config_file = args.config 45 | 46 | 47 | # ... or AnnData if you want 48 | def get_anndata(args): 49 | # Untested template 50 | import anndata as ad 51 | import pandas as pd 52 | import scipy as sp 53 | 54 | X = sp.io.mmread(args.matrix) 55 | if sp.sparse.issparse(X): 56 | X = X.tocsr() 57 | observations = pd.read_table(args.observations, index_col=0) 58 | features = pd.read_table(args.features, index_col=0) 59 | coordinates = ( 60 | pd.read_table(args.coordinates, index_col=0) 61 | .loc[observations.index, :] 62 | .to_numpy() 63 | ) 64 | 65 | adata = ad.AnnData( 66 | X=X, obs=observations, var=features, obsm={"spatial": coordinates} 67 | ) 68 | 69 | return adata 70 | 71 | 72 | adata = get_anndata(args) 73 | 74 | ## Your code goes here 75 | import scanpy as sc 76 | 77 | sc.pp.normalize_total(adata) 78 | sc.pp.log1p(adata) 79 | 80 | transformed_counts = adata.X 81 | 82 | 83 | ## Write output 84 | import scipy as sp 85 | 86 | out_dir.mkdir(parents=True, exist_ok=True) 87 | sp.io.mmwrite(transformed_counts_file, transformed_counts, precision=5) 88 | -------------------------------------------------------------------------------- /metric/Entropy/Entropy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: Kirti Biharie; implemented Entropy 5 | 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser(description="Calculate Shannon's Entropy") 9 | 10 | parser.add_argument( 11 | "-l", "--labels", help="Labels from domain clustering.", required=True 12 | ) 13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False) 14 | parser.add_argument( 15 | "-e", 16 | "--embedding", 17 | help="Embedding of points in latent space. Potential usage for metrics without groundtruth.", 18 | required=False, 19 | ) 20 | parser.add_argument( 21 | "-c", 22 | "--config", 23 | help="Optional config file (json) used to pass additional parameters.", 24 | required=False, 25 | ) # format should be json 26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True) 27 | 28 | args = parser.parse_args() 29 | 30 | # Use these filepaths as input 31 | label_file = args.labels 32 | 33 | if args.ground_truth is not None: 34 | groundtruth_file = args.ground_truth 35 | if args.embedding is not None: 36 | embedding_file = args.embedding 37 | if args.config is not None: 38 | config_file = args.config 39 | 40 | 41 | ## Your code goes here 42 | if args.ground_truth is None: 43 | raise Exception("Groundtruth labels needed to calculate Shannon's Entropy") 44 | 45 | import pandas as pd 46 | import sklearn.metrics 47 | import math 48 | 49 | ground_truth = pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes 50 | labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes 51 | 52 | common_index = labels.index.intersection(ground_truth.index) 53 | ground_truth = ground_truth.loc[common_index] 54 | labels = labels.loc[common_index] 55 | 56 | df = pd.concat([labels, ground_truth],axis=1) 57 | df.columns = ["pred", "true"] 58 | total_pred = df.groupby("pred").size() 59 | counts = df.groupby(["pred", "true"]).size() 60 | 61 | # For every predicted cluster 62 | metric = -sum((total_pred.loc[pred]/len(common_index)) * 63 | # For every groundtruth class: calculate Shannon's entropy 64 | sum((count/total_pred.loc[pred]) * math.log2(count/total_pred.loc[pred]) 65 | for count in counts.loc[pred]) 66 | for pred in df["pred"].unique()) 67 | 68 | ## Write output 69 | from pathlib import Path 70 | 71 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True) 72 | 73 | with open(args.out_file, "w") as file: 74 | file.write(f"{metric:.5e}\n") -------------------------------------------------------------------------------- /metric/LISI/LISI.r: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: Kirti Biharie; implemented LISI score 5 | 6 | suppressPackageStartupMessages(library(optparse)) 7 | 8 | option_list <- list( 9 | make_option( 10 | c("-l", "--labels"), 11 | type = "character", default = NULL, 12 | help = "Labels from domain clustering." 13 | ), 14 | make_option( 15 | c("-g", "--ground_truth"), 16 | type = "character", default = NA, 17 | help = "Groundtruth labels." 18 | ), 19 | make_option( 20 | c("-e", "--embedding"), 21 | type = "character", default = NA, 22 | help = "Embedding of points in latent space. Potential usage for metrics without groundtruth." 23 | ), 24 | # format should be json 25 | make_option( 26 | c("-c", "--config"), 27 | type = "character", default = NA, 28 | help = "Optional config file (json) used to pass additional parameters." 29 | ), 30 | make_option( 31 | c("-o", "--out_file"), 32 | type = "character", default = NULL, 33 | help = "Output file." 34 | ) 35 | ) 36 | 37 | description <- "Calculate LISI Score" 38 | 39 | opt_parser <- OptionParser( 40 | usage = description, 41 | option_list = option_list 42 | ) 43 | opt <- parse_args(opt_parser) 44 | 45 | # Use these filepaths as input 46 | label_file <- opt$labels 47 | 48 | if (!is.na(opt$ground_truth)) { 49 | groundtruth_file <- opt$ground_truth 50 | } 51 | if (!is.na(opt$embedding)) { 52 | embedding_file <- opt$embedding 53 | } 54 | if (!is.na(opt$config)) { 55 | config_file <- opt$config 56 | } 57 | 58 | 59 | ## Your code goes here 60 | library(lisi) 61 | library(rjson) 62 | 63 | if (is.na(opt$ground_truth)) { 64 | stop("Groundtruth labels needed to calculate the LISI Score") 65 | } 66 | 67 | if (is.na(opt$embedding)) { 68 | stop("Embeddings needed to calculate the LISI Score") 69 | } 70 | 71 | if (is.na(opt$config)) { 72 | stop("Config file not provided") 73 | } 74 | 75 | ground_truth <- read.delim(groundtruth_file, sep="\t", row.names=1) 76 | embeddings <- read.delim(embedding_file, sep="\t", row.names=1) 77 | config <- fromJSON(file=config_file) 78 | 79 | common_index <- intersect(rownames(ground_truth), rownames(embeddings)) 80 | ground_truth <- ground_truth[common_index,,drop=FALSE] 81 | embeddings <- embeddings[common_index,,drop=FALSE] 82 | 83 | metric <- mean(compute_lisi(embeddings, ground_truth, "label", perplexity=config$perplexity)[,"label"]) 84 | 85 | ## Write output 86 | outfile <- file(opt$out_file) 87 | dir.create(dirname(opt$out_file), showWarnings = FALSE, recursive = TRUE) 88 | 89 | writeLines(format(metric, digits = 6, scientific = TRUE), outfile) 90 | close(outfile) 91 | -------------------------------------------------------------------------------- /CITATION.cff: -------------------------------------------------------------------------------- 1 | # This CITATION.cff file was generated with cffinit. 2 | # Visit https://bit.ly/cffinit to generate yours today! 3 | 4 | cff-version: 1.2.0 5 | title: >- 6 | SpaceHack 2.0: an expert in the loop consensus driven 7 | framework for spatially aware clustering 8 | message: >- 9 | If you use this software, please cite it using the 10 | metadata from this file. 11 | type: software 12 | authors: 13 | - name: SpaceHack 2.0. Participants 14 | repository-code: 'https://github.com/SpatialHackathon/SpaceHack2023' 15 | url: 'https://spatialhackathon.github.io/past.html' 16 | abstract: >- 17 | Spatial omics have transformed tissue architecture and 18 | cellular heterogeneity analysis by integrating molecular 19 | data with spatial localization. In spatially resolved 20 | transcriptomics, identifying spatial domains is critical 21 | for analysis of anatomical regions within heterogeneous 22 | datasets and understanding tissue function. Since 2020, 23 | more than 50 spatially aware clustering methods have been 24 | developed for this task. However, the reliability of 25 | existing benchmarks is undermined by their narrow focus on 26 | Visium and brain tissue datasets, as well as the 27 | dependence on questionable ground truth annotations. Here, 28 | we implemented a consensus framework that surpasses 29 | traditional benchmarking practices. 30 | 31 | 32 | Our framework comprises a community-driven benchmark-like 33 | platform that streamlines data formatting, method 34 | integration, and metric evaluation while accommodating new 35 | methods and datasets. Currently, the platform includes 22 36 | spatially aware clustering methods across 15 datasets 37 | spanning 9 technologies and diverse tissue types. The 38 | benchmark approach uncovered significant limitations in 39 | generalizability and reproducibility where methods that 40 | perform well on healthy tissues often falter on cancer 41 | samples. We also found that anatomical labels commonly 42 | used as ground truths are often biased, potentially 43 | error-prone, and in some cases, unsuitable for 44 | benchmarking efforts. 45 | 46 | 47 | In light of these issues, we adopt a flexible 48 | expert-in-the-loop consensus-driven approach. This goes 49 | beyond traditional ensemble/consensus methods, and allows 50 | researchers to interact with intermediate results to 51 | determine which tools should be used to generate a 52 | consensus. We believe that the inclusion of an 53 | expert-in-the-loop is critical to ensure that the 54 | computational analysis matches the biological question at 55 | hand, and we believe that when the focus of the analysis 56 | is to un cover novel biological discoveries, tissue 57 | experts are accessible more often than not. 58 | license: MIT-0 59 | -------------------------------------------------------------------------------- /metric/jaccard/jaccard.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: Thomas Chartrand; created script 5 | 6 | import argparse 7 | 8 | parser = argparse.ArgumentParser(description="Calculate Matthew's correlation coefficient (scikit-learn)") 9 | 10 | parser.add_argument( 11 | "-l", "--labels", help="Labels from domain clustering.", required=True 12 | ) 13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False) 14 | parser.add_argument( 15 | "-e", 16 | "--embedding", 17 | help="Embedding of points in latent space. Potential usage for metrics without groundtruth.", 18 | required=False, 19 | ) 20 | parser.add_argument( 21 | "-c", 22 | "--config", 23 | help="Optional config file used to pass additional parameters.", 24 | required=False, 25 | ) 26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True) 27 | parser.add_argument( 28 | "--matched_labels", 29 | help="Flag indicating ground-truth and clustering labels have already been matched.", 30 | action='store_true', 31 | ) 32 | 33 | args = parser.parse_args() 34 | 35 | # Use these filepaths as input 36 | label_file = args.labels 37 | 38 | if args.ground_truth is not None: 39 | groundtruth_file = args.ground_truth 40 | if args.config is not None: 41 | config_file = args.config 42 | 43 | 44 | ## Your code goes here 45 | if args.ground_truth is None: 46 | raise Exception("Groundtruth labels needed.") 47 | 48 | import pandas as pd 49 | from scipy.optimize import linear_sum_assignment 50 | from sklearn.metrics import jaccard_score 51 | 52 | domains = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes 53 | groundtruth = ( 54 | pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes 55 | ) 56 | common_index = domains.index.intersection(groundtruth.index) 57 | groundtruth = groundtruth.loc[common_index] 58 | domains = domains.loc[common_index] 59 | 60 | if not args.matched_labels: 61 | contingency_table = pd.crosstab(domains, groundtruth) 62 | row_ind, col_ind = linear_sum_assignment(contingency_table, maximize=True) 63 | domains = domains.map(dict(zip(row_ind, col_ind))) 64 | 65 | metric = jaccard_score(groundtruth, domains, average='weighted') 66 | 67 | domain_scores = jaccard_score(groundtruth, domains, average=None) 68 | domains_df = pd.DataFrame({ 69 | "cluster": ["all", *sorted(groundtruth.unique())], 70 | "jaccard_score": [metric, *domain_scores] 71 | }) 72 | 73 | ## Write output 74 | from pathlib import Path 75 | 76 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True) 77 | 78 | with open(args.out_file, "w") as file: 79 | file.write(domains_df.to_json(orient='records')) -------------------------------------------------------------------------------- /metric/README.md: -------------------------------------------------------------------------------- 1 | # Metric modules 2 | 3 | ## Implementing a new dataset module 4 | 5 | To implement a new metric follow the [Contribution guide](../CONTRIBUTING.md) and make sure you adopt all the necessary conventions specified in this document. 6 | 7 | For examples have a look 8 | [here for a method in Python](./ARI/) or 9 | [here for a method in R](./LISI/). 10 | 11 | ## Metric module layout and interface 12 | 13 | Metric modules require 3 files (see templates). '{metric}' in the file names should be 14 | replaced by the name of your module and all files placed in a subfolder of the same name. 15 | 16 | * `{metric}.yml`: dependencies of the metric module script following the format: 17 | ```yaml 18 | channels: 19 | - conda-forge 20 | dependencies: 21 | - anndata=0.10.3 22 | - gitpython=3.1.40 23 | ``` 24 | 25 | * `{metric}_optargs.json`: defining optional arguments for the workflow following the format: 26 | ```json 27 | { 28 | "groundtruth": true, # Does the metric need groundtruth labels? (boolean) 29 | "embedding": false, # Does the metric need embeddings? (boolean) 30 | "config_file": true # Does the metric take an additional config file? (boolean) 31 | } 32 | ``` 33 | 34 | It is optional to add when your metric requires this: 35 | 36 | ``` 37 | physical_coordinate: 38 | description: Does the metric take physcial coordination of the sample? 39 | type: boolean 40 | ``` 41 | 42 | * `{metric}.py/.r`: metric module script. 43 | * Check the TODOs in the `metric.py` or `metric.r` [template](../templates/). 44 | * The command line arguments are fixed and should not be modified. 45 | * see further instruction below. 46 | 47 | 48 | ### Input Format 49 | 50 | * `Labels File (-l, --labels)`: Path to a file containing cluster labels. Format: Text file where each row corresponds to a label for a specific observation. 51 | 52 | Optional Files: 53 | 54 | * `Ground Truth File (-g, --ground_truth)`: Path to a file containing ground truth labels. Use this for metrics requiring true labels for comparison. 55 | * `Embedding File (-e, --embedding)`: Path to a file containing latent space embeddings. Useful for metrics that do not rely on ground truth labels. 56 | * `Config File (-c, --config)`: Path to an optional JSON file with additional parameters for metric calculation. 57 | 58 | ### Output Format 59 | 60 | The script writes the calculated metric to the specified output file (`-o, --out_file`) in scientific notation with five decimal places. 61 | 62 | ### Example usage of module scripts (Testing) 63 | 64 | ```sh 65 | python metric.py -l labels.txt -g ground_truth.txt -o result.txt 66 | ``` 67 | 68 | ### Add to workflow 69 | 70 | * Add your metric to the excute_config.yaml under `Metrics selected for execution`. 71 | * Add your metric scripts to the path_config.yaml under `metrics`. 72 | -------------------------------------------------------------------------------- /preprocessing/neighbors/delaunay_triangulation/delaunay_triangulation.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script 4 | # Author_and_contribution: Qirong Mao; modifying output file format 5 | 6 | import argparse 7 | 8 | # TODO adjust description 9 | parser = argparse.ArgumentParser( 10 | description="Neighbor definition using Delaunay triangulation" 11 | ) 12 | 13 | parser.add_argument( 14 | "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True 15 | ) 16 | parser.add_argument( 17 | "-m", "--matrix", help="Path to (transformed) counts (as mtx).", required=True 18 | ) 19 | parser.add_argument( 20 | "-f", "--features", help="Path to features (as tsv).", required=True 21 | ) 22 | parser.add_argument( 23 | "-o", "--observations", help="Path to observations (as tsv).", required=True 24 | ) 25 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True) 26 | parser.add_argument( 27 | "--config", 28 | help="Optional config file (json) used to pass additional parameters.", 29 | required=False, 30 | ) 31 | 32 | args = parser.parse_args() 33 | 34 | # Output files 35 | from pathlib import Path 36 | 37 | out_dir = Path(args.out_dir) 38 | 39 | spatial_connectivities_file = out_dir / "spatial_connectivities.mtx" 40 | ##spatial_distances_file = out_dir / "spatial_distances.mtx" 41 | 42 | 43 | # Use these filepaths and inputs ... 44 | coord_file = args.coordinates 45 | matrix_file = args.matrix 46 | feature_file = args.features 47 | observation_file = args.observations 48 | 49 | 50 | # ... or AnnData if you want 51 | def get_anndata(args): 52 | # Untested template 53 | import anndata as ad 54 | import pandas as pd 55 | import scipy as sp 56 | 57 | X = sp.io.mmread(args.matrix) 58 | if sp.sparse.issparse(X): 59 | X = X.tocsr() 60 | observations = pd.read_table(args.observations, index_col=0) 61 | features = pd.read_table(args.features, index_col=0) 62 | coordinates = ( 63 | pd.read_table(args.coordinates, index_col=0) 64 | .loc[observations.index, :] 65 | .to_numpy() 66 | ) 67 | 68 | adata = ad.AnnData( 69 | X=X, obs=observations, var=features, obsm={"spatial": coordinates} 70 | ) 71 | 72 | return adata 73 | 74 | 75 | adata = get_anndata(args) 76 | 77 | ## Your code goes here 78 | import squidpy as sq 79 | 80 | sq.gr.spatial_neighbors(adata, delaunay=True, coord_type="generic") 81 | 82 | neighbors = adata.obsp["spatial_connectivities"].astype(int) 83 | ##distance = adata.obsp["spatial_distances"].astype(float) 84 | 85 | ## Write output 86 | import scipy as sp 87 | 88 | out_dir.mkdir(parents=True, exist_ok=True) 89 | 90 | sp.io.mmwrite(spatial_connectivities_file, neighbors) 91 | ##sp.io.mmwrite(spatial_distances_file, distance) 92 | -------------------------------------------------------------------------------- /metric/NMI/NMI.r: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: Mark D. Robinson; coded the domain-specific F1 5 | 6 | suppressPackageStartupMessages(library(optparse)) 7 | 8 | # TODO adjust description 9 | option_list <- list( 10 | make_option( 11 | c("-l", "--labels"), 12 | type = "character", default = NULL, 13 | help = "Labels from domain clustering." 14 | ), 15 | make_option( 16 | c("-g", "--ground_truth"), 17 | type = "character", default = NA, 18 | help = "Groundtruth labels." 19 | ), 20 | make_option( 21 | c("-e", "--embedding"), 22 | type = "character", default = NA, 23 | help = "Embedding of points in latent space. Potential usage for metrics without groundtruth." 24 | ), 25 | # format should be json 26 | make_option( 27 | c("-c", "--config"), 28 | type = "character", default = NA, 29 | help = "Optional config file (json) used to pass additional parameters." 30 | ), 31 | make_option( 32 | c("-o", "--out_file"), 33 | type = "character", default = NULL, 34 | help = "Output file." 35 | ) 36 | ) 37 | 38 | # TODO adjust description 39 | description <- "Calculate domain-specific F1 score (returns JSON with vector: F1 for each true domain)" 40 | 41 | opt_parser <- OptionParser( 42 | usage = description, 43 | option_list = option_list 44 | ) 45 | opt <- parse_args(opt_parser) 46 | 47 | # Use these filepaths as input 48 | label_file <- opt$labels 49 | 50 | if (!is.na(opt$ground_truth)) { 51 | groundtruth_file <- opt$ground_truth 52 | } 53 | if (!is.na(opt$embedding)) { 54 | embedding_file <- opt$embedding 55 | } 56 | if (!is.na(opt$config)) { 57 | config_file <- opt$config 58 | } 59 | 60 | 61 | ## Code for calculating metric goes here 62 | ## -------------------------------------- 63 | ## Code for calculating metric goes here 64 | ## -------------------------------------- 65 | 66 | library(aricode) 67 | 68 | # # for testing - start 69 | # label_file <- "results/libd_dlpfc/Br5595_151670/SpaGCN/domains.tsv" 70 | # groundtruth_file <- "data/libd_dlpfc/Br5595_151670/labels.tsv" 71 | # outfile <- "NMI.txt" 72 | # # for testing - stop 73 | 74 | domains <- read.delim(label_file, sep="\t", row.names = 1) 75 | groundtruth <- read.delim(groundtruth_file, sep="\t", row.names = 1) 76 | 77 | rn <- intersect(rownames(domains), rownames(groundtruth)) 78 | 79 | # subset to common set 80 | domains <- domains[rn,,drop = FALSE] 81 | groundtruth <- groundtruth[rn,,drop = FALSE] 82 | 83 | metric <- NMI(domains$label,groundtruth$label) 84 | 85 | print(metric) 86 | 87 | ## Write output 88 | dir.create(dirname(opt$out_file), showWarnings = FALSE, recursive = TRUE) 89 | 90 | outfile <- file(opt$out_file) 91 | writeLines(format(metric, digits = 6, scientific = TRUE), outfile) 92 | close(outfile) 93 | -------------------------------------------------------------------------------- /preprocessing/feature_selection/highly_variable_genes_scanpy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script 4 | 5 | import argparse 6 | 7 | # TODO adjust description 8 | parser = argparse.ArgumentParser(description="HVG selection using scanpy") 9 | 10 | parser.add_argument( 11 | "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True 12 | ) 13 | parser.add_argument( 14 | "-m", "--matrix", help="Path to (transformed) counts (as mtx).", required=True 15 | ) 16 | parser.add_argument( 17 | "-f", "--features", help="Path to features (as tsv).", required=True 18 | ) 19 | parser.add_argument( 20 | "-o", "--observations", help="Path to observations (as tsv).", required=True 21 | ) 22 | # parser.add_argument( 23 | # "-n", "--n_top_genes", help="Number of genes to keep.", required=False, type=int 24 | # ) 25 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True) 26 | parser.add_argument( 27 | "--config", 28 | help="Optional config file (json) used to pass additional parameters.", 29 | required=False, 30 | ) 31 | 32 | args = parser.parse_args() 33 | 34 | from pathlib import Path 35 | 36 | out_dir = Path(args.out_dir) 37 | 38 | # Output files 39 | feature_selection_file = out_dir / "features.tsv" 40 | # if additional output files are required write it also to out_dir 41 | 42 | # Use these filepaths and inputs ... 43 | coord_file = args.coordinates 44 | matrix_file = args.matrix 45 | feature_file = args.features 46 | observation_file = args.observations 47 | 48 | # if args.n_top_genes is not None: 49 | # n_top_genes = args.n_top_genes 50 | if args.config is not None: 51 | config_file = args.config 52 | 53 | 54 | # ... or AnnData if you want 55 | def get_anndata(args): 56 | # Untested template 57 | import anndata as ad 58 | import pandas as pd 59 | import scipy as sp 60 | 61 | X = sp.io.mmread(args.matrix) 62 | if sp.sparse.issparse(X): 63 | X = X.tocsr() 64 | observations = pd.read_table(args.observations, index_col=0) 65 | features = pd.read_table(args.features, index_col=0) 66 | coordinates = ( 67 | pd.read_table(args.coordinates, index_col=0) 68 | .loc[observations.index, :] 69 | .to_numpy() 70 | ) 71 | 72 | adata = ad.AnnData( 73 | X=X, obs=observations, var=features, obsm={"spatial": coordinates} 74 | ) 75 | 76 | return adata 77 | 78 | 79 | adata = get_anndata(args) 80 | 81 | ## Your code goes here 82 | import scanpy as sc 83 | 84 | features_df = adata.var.copy() 85 | 86 | sc.pp.highly_variable_genes(adata, flavor="seurat") 87 | 88 | features_df["selected"] = adata.var["highly_variable"] 89 | 90 | 91 | ## Write output 92 | out_dir.mkdir(parents=True, exist_ok=True) 93 | features_df.to_csv(feature_selection_file, sep="\t", index_label="") 94 | -------------------------------------------------------------------------------- /consensus/02_Smoothness_entropy/Smoothness_entropy.r: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; Create the script 4 | 5 | suppressPackageStartupMessages(library(optparse)) 6 | 7 | option_list <- list( 8 | make_option( 9 | c("-i", "--input_file"), 10 | type = "character", default = NULL, 11 | help = "Input containing the aggregated labels." 12 | ), 13 | make_option( 14 | c("-c", "--coordinates"), 15 | type = "character", default = NULL, 16 | help = "file path to the spatial coordinates of the spots/cells" 17 | ), 18 | make_option( 19 | c("-o", "--output_file"), 20 | type = "character", default = NULL, 21 | help = "desired output file" 22 | ), 23 | make_option( 24 | c("-n", "--neighbors"), 25 | type = "integer", default = NULL, 26 | help = "Number of neighbors to calculate the smoothness" 27 | ), 28 | make_option( 29 | c("-s", "--seed"), 30 | type = "integer", default = NULL, 31 | help = "seed for neighboring algorithm" 32 | ) 33 | ) 34 | 35 | description <- "Calculate overall smoothness of the clustering" 36 | 37 | opt_parser <- OptionParser( 38 | usage = description, 39 | option_list = option_list 40 | ) 41 | opt <- parse_args(opt_parser) 42 | 43 | # Use these filepaths as input 44 | input_file <- opt$input_file 45 | output_file <- opt$output_file 46 | coord_file <- opt$coordinates 47 | neighbors <- ifelse(is.null(opt$neighbors), 6, opt$neighbors) 48 | seed <- ifelse(is.null(opt$seed), 2025, opt$seed) 49 | 50 | 51 | 52 | set.seed(seed) 53 | 54 | suppressPackageStartupMessages({ 55 | library(dbscan) 56 | }) 57 | 58 | ##### Define function 59 | calc_entropy <- function(u) { 60 | p <- u[u>0] 61 | p <- p/sum(p) 62 | -sum(p*log(p)) 63 | } 64 | 65 | spot_entropy <- function(spatial_coords, label, k) { 66 | suppressPackageStartupMessages(require(dbscan)) 67 | knns <- dbscan::kNN(spatial_coords, k=k) 68 | label <- as.factor(label) 69 | neighb_labels <- apply(knns$id, 2, function(u) label[u]) 70 | apply(neighb_labels, 71 | 1, function(u) calc_entropy(table(factor(u, levels=levels(label))))) 72 | } 73 | 74 | ##### Load files 75 | label_df <- read.delim(input_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss") 76 | coord_df <- read.delim(coord_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss") 77 | coord_df <- coord_df[row.names(label_df), ] 78 | 79 | # Calculate point-wise entropy with its neighbor 80 | spot_entropy_df <- apply(label_df, 2, 81 | function(u) spot_entropy(coord_df, u, neighbors)) 82 | 83 | # calculate colmeans and save it to a dataframe 84 | sm_df <- data.frame(smoothness = colMeans(spot_entropy_df)) 85 | 86 | dir.create(dirname(output_file), showWarnings = FALSE, recursive = TRUE) 87 | 88 | # Save the results 89 | write.table(sm_df, file = output_file, sep = "\t", col.names = NA, quote = FALSE) 90 | -------------------------------------------------------------------------------- /preprocessing/neighbors/radius/radius.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script, 4 | # Author_and_contribution: Qirong Mao; implemented method 5 | 6 | import argparse 7 | 8 | # TODO adjust description 9 | parser = argparse.ArgumentParser( 10 | description="Neighbor definition based on the radius (only for generic coordinates)" 11 | ) 12 | 13 | parser.add_argument( 14 | "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True 15 | ) 16 | 17 | parser.add_argument( 18 | "-m", "--matrix", help="Path to (transformed) counts (as mtx).", required=True 19 | ) 20 | 21 | parser.add_argument( 22 | "-f", "--features", help="Path to features (as tsv).", required=True 23 | ) 24 | 25 | parser.add_argument( 26 | "-o", "--observations", help="Path to observations (as tsv).", required=True 27 | ) 28 | 29 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True) 30 | 31 | parser.add_argument( 32 | "--config", 33 | help="Optional config file (json) used to pass additional parameters.", 34 | required=False, 35 | ) 36 | 37 | args = parser.parse_args() 38 | 39 | # Output files 40 | from pathlib import Path 41 | 42 | out_dir = Path(args.out_dir) 43 | 44 | spatial_connectivities_file = out_dir / "spatial_connectivities.mtx" 45 | ##spatial_distances_file = out_dir / "spatial_distances.mtx" 46 | 47 | # Use these filepaths and inputs ... 48 | coord_file = args.coordinates 49 | matrix_file = args.matrix 50 | feature_file = args.features 51 | observation_file = args.observations 52 | 53 | ## Loading radius parameters from config_file 54 | if args.config is not None: 55 | config_file = args.config 56 | 57 | import json 58 | 59 | with open(config_file) as f: 60 | parameters = json.load(f) 61 | 62 | radius = parameters["radius"] 63 | 64 | 65 | # ... or AnnData if you want 66 | def get_anndata(args): 67 | # Untested template 68 | import anndata as ad 69 | import pandas as pd 70 | import scipy as sp 71 | 72 | X = sp.io.mmread(args.matrix) 73 | if sp.sparse.issparse(X): 74 | X = X.tocsr() 75 | observations = pd.read_table(args.observations, index_col=0) 76 | features = pd.read_table(args.features, index_col=0) 77 | coordinates = ( 78 | pd.read_table(args.coordinates, index_col=0) 79 | .loc[observations.index, :] 80 | .to_numpy() 81 | ) 82 | 83 | adata = ad.AnnData( 84 | X=X, obs=observations, var=features, obsm={"spatial": coordinates} 85 | ) 86 | 87 | return adata 88 | 89 | 90 | adata = get_anndata(args) 91 | 92 | ## Your code goes here 93 | import squidpy as sq 94 | 95 | sq.gr.spatial_neighbors(adata, radius=radius, coord_type='generic') 96 | 97 | neighbors = adata.obsp["spatial_connectivities"].astype(int) 98 | ##distance = adata.obsp["spatial_distances"].astype(float) 99 | 100 | ## Write output 101 | import scipy as sp 102 | 103 | out_dir.mkdir(parents=True, exist_ok=True) 104 | 105 | sp.io.mmwrite(spatial_connectivities_file, neighbors) 106 | ##sp.io.mmwrite(spatial_distances_file, distance) 107 | -------------------------------------------------------------------------------- /preprocessing/neighbors/n_neighbourhood/n_neighbourhood.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script, 4 | # Author_and_contribution: Qirong Mao; implemented method 5 | 6 | import argparse 7 | 8 | # TODO adjust description 9 | parser = argparse.ArgumentParser( 10 | description="Neighbor definition based on numbers of neibourhood (only for generic coordinates)" 11 | ) 12 | 13 | parser.add_argument( 14 | "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True 15 | ) 16 | 17 | parser.add_argument( 18 | "-m", "--matrix", help="Path to (transformed) counts (as mtx).", required=True 19 | ) 20 | 21 | parser.add_argument( 22 | "-f", "--features", help="Path to features (as tsv).", required=True 23 | ) 24 | 25 | parser.add_argument( 26 | "-o", "--observations", help="Path to observations (as tsv).", required=True 27 | ) 28 | 29 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True) 30 | 31 | parser.add_argument( 32 | "--config", 33 | help="Optional config file (json) used to pass additional parameters.", 34 | required=False, 35 | ) 36 | 37 | args = parser.parse_args() 38 | 39 | # Output files 40 | from pathlib import Path 41 | 42 | out_dir = Path(args.out_dir) 43 | 44 | spatial_connectivities_file = out_dir / "spatial_connectivities.mtx" 45 | ##spatial_distances_file = out_dir / "spatial_distances.mtx" 46 | 47 | # Use these filepaths and inputs ... 48 | coord_file = args.coordinates 49 | matrix_file = args.matrix 50 | feature_file = args.features 51 | observation_file = args.observations 52 | 53 | ## Loading n_neighs parameter from config_file 54 | 55 | if args.config is not None: 56 | config_file = args.config 57 | 58 | import json 59 | 60 | with open(config) as f: 61 | parameters = json.load(f) 62 | 63 | n_neighs = data["n_neighs"] 64 | 65 | # ... or AnnData if you want 66 | def get_anndata(args): 67 | # Untested template 68 | import anndata as ad 69 | import pandas as pd 70 | import scipy as sp 71 | 72 | X = sp.io.mmread(args.matrix) 73 | if sp.sparse.issparse(X): 74 | X = X.tocsr() 75 | observations = pd.read_table(args.observations, index_col=0) 76 | features = pd.read_table(args.features, index_col=0) 77 | coordinates = ( 78 | pd.read_table(args.coordinates, index_col=0) 79 | .loc[observations.index, :] 80 | .to_numpy() 81 | ) 82 | 83 | adata = ad.AnnData( 84 | X=X, obs=observations, var=features, obsm={"spatial": coordinates} 85 | ) 86 | 87 | return adata 88 | 89 | 90 | adata = get_anndata(args) 91 | 92 | ## Your code goes here 93 | import squidpy as sq 94 | 95 | sq.gr.spatial_neighbors(adata, n_neighs=n_neighs, coord_type="generic") 96 | 97 | neighbors = adata.obsp["spatial_connectivities"].astype(int) 98 | ##distance = adata.obsp["spatial_distances"].astype(float) 99 | 100 | ## Write output 101 | import scipy as sp 102 | 103 | out_dir.mkdir(parents=True, exist_ok=True) 104 | 105 | sp.io.mmwrite(spatial_connectivities_file, neighbors) 106 | ##sp.io.mmwrite(spatial_distances_file, distance) 107 | -------------------------------------------------------------------------------- /preprocessing/neighbors/n_rings/n_rings.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script, 4 | # Author_and_contribution: Qirong Mao; implemented method 5 | 6 | 7 | import argparse 8 | 9 | # TODO adjust description 10 | parser = argparse.ArgumentParser( 11 | description="Neighbor definition based on number of rings of neighbors (only for grid coordinates)" 12 | ) 13 | 14 | parser.add_argument( 15 | "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True 16 | ) 17 | 18 | parser.add_argument( 19 | "-m", "--matrix", help="Path to (transformed) counts (as mtx).", required=True 20 | ) 21 | 22 | parser.add_argument( 23 | "-f", "--features", help="Path to features (as tsv).", required=True 24 | ) 25 | 26 | parser.add_argument( 27 | "-o", "--observations", help="Path to observations (as tsv).", required=True 28 | ) 29 | 30 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True) 31 | 32 | parser.add_argument( 33 | "--config", 34 | help="Optional config file (json) used to pass additional parameters.", 35 | required=False, 36 | ) 37 | 38 | args = parser.parse_args() 39 | 40 | # Output files 41 | from pathlib import Path 42 | 43 | out_dir = Path(args.out_dir) 44 | 45 | spatial_connectivities_file = out_dir / "spatial_connectivities.mtx" 46 | ##spatial_distances_file = out_dir / "spatial_distances.mtx" 47 | 48 | # Use these filepaths and inputs ... 49 | coord_file = args.coordinates 50 | matrix_file = args.matrix 51 | feature_file = args.features 52 | observation_file = args.observations 53 | 54 | 55 | ## Loading delaunay parameters from config_file 56 | if args.config is not None: 57 | config_file = args.config 58 | 59 | import json 60 | 61 | with open(config_file) as f: 62 | parameters = json.load(f) 63 | 64 | n_rings = parameters["n_rings"] 65 | 66 | 67 | # ... or AnnData if you want 68 | def get_anndata(args): 69 | # Untested template 70 | import anndata as ad 71 | import pandas as pd 72 | import scipy as sp 73 | 74 | X = sp.io.mmread(args.matrix) 75 | if sp.sparse.issparse(X): 76 | X = X.tocsr() 77 | observations = pd.read_table(args.observations, index_col=0) 78 | features = pd.read_table(args.features, index_col=0) 79 | coordinates = ( 80 | pd.read_table(args.coordinates, index_col=0) 81 | .loc[observations.index, :] 82 | .to_numpy() 83 | ) 84 | 85 | adata = ad.AnnData( 86 | X=X, obs=observations, var=features, obsm={"spatial": coordinates} 87 | ) 88 | 89 | return adata 90 | 91 | 92 | adata = get_anndata(args) 93 | 94 | ## Your code goes here 95 | import squidpy as sq 96 | 97 | sq.gr.spatial_neighbors(adata,n_rings=n_rings, coord_type="grid") 98 | 99 | neighbors = adata.obsp["spatial_connectivities"].astype(int) 100 | ##distance = adata.obsp["spatial_distances"].astype(float) 101 | 102 | ## Write output 103 | import scipy as sp 104 | 105 | out_dir.mkdir(parents=True, exist_ok=True) 106 | 107 | sp.io.mmwrite(spatial_connectivities_file, neighbors) 108 | ##sp.io.mmwrite(spatial_distances_file, distance) 109 | -------------------------------------------------------------------------------- /templates/README.md: -------------------------------------------------------------------------------- 1 | # SpaceHack - templates for modules 2 | 3 | This directory contains the templates that you can use to implement a new dataset, method or metric. 4 | 5 | For further instructions have a look in the corresponding directory; 6 | 7 | - [data](/data) 8 | - [method](/method) 9 | - [metric](/metric) 10 | - [consensus](/consensus) 11 | 12 | ### How to contribute a module 13 | 14 | Module contribution will be managed via GitHub. The steps to contribute a module are: 15 | 1. Create or claim a **GitHub issue** from the [SpaceHack issue board.](https://github.com/SpatialHackathon/SpaceHack2023/issues) that describes the module you want to implement. There are currently 90 issues to claim, but if you come up with a new idea, please **create** a new issue, add the appropriate **tags**, and **assign** the task to yourself. 16 | 2. Add **metadata** to our metadata [spreadsheet](https://docs.google.com/spreadsheets/d/1QCeAF4yQG4bhZSGPQwwVBj_XF7ADY_2mK5xivAIfHsc/edit). Please fill in as much as you can as metadata is helpful! If you feel the need, please also add new columns or add additional notes. The metadata should be added to the appropriate tabs: 17 | - [datasets](https://docs.google.com/spreadsheets/d/1QCeAF4yQG4bhZSGPQwwVBj_XF7ADY_2mK5xivAIfHsc/edit#gid=1453488771) 18 | - [computational methods](https://docs.google.com/spreadsheets/d/1QCeAF4yQG4bhZSGPQwwVBj_XF7ADY_2mK5xivAIfHsc/edit#gid=0) 19 | - [evaluation metrics](https://docs.google.com/spreadsheets/d/1QCeAF4yQG4bhZSGPQwwVBj_XF7ADY_2mK5xivAIfHsc/edit#gid=4776337) 20 | - [simulations and technical evaluation](https://docs.google.com/spreadsheets/d/1QCeAF4yQG4bhZSGPQwwVBj_XF7ADY_2mK5xivAIfHsc/edit#gid=640974611) 21 | 3. Now you are ready to create a new git **[branch](https://learngitbranching.js.org/)**. Try to give your new branch an intuitive prefix such as `data_...`, `method_...`, `metric_...` or `consensus_...`. You can create a new branch in several ways: (i) [create a branch directly from the issue board](https://docs.github.com/en/issues/tracking-your-work-with-issues/creating-a-branch-for-an-issue) and then `git checkout` that branch, or (ii) via the command line: 22 | ``` 23 | # clone the template repository 24 | git clone https://github.com/SpatialHackathon/SpaceHack2023.git 25 | # create and switch to a new branch for your e.g. method "X" 26 | git branch method_x_naveedishaque # try to make the branch name unique! 27 | git checkout method_x_naveedishaque 28 | # link the branch to the issue via the issue board: https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue 29 | ``` 30 | 4. Modify the files, filenames, and code in `template/`, referring to the examples in the `data`, `method`, or `metric` subfolder. If your method requires a specific type or preprocessing, please reach out to the organisers! 31 | 5. Test. We are currently working on validators and automatic testing scripts... but this is tricky. Reach out to Niklas Muller-Botticher when you are ready to test! 32 | 6. Create a [pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request?tool=cli) 33 | 7. Code review (by whom?) and merge your contributed module into the GitHub main branch! 34 | -------------------------------------------------------------------------------- /consensus/03_Consensus_kmode/Consensus_kmode.r: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; Create the script 4 | 5 | suppressPackageStartupMessages(library(optparse)) 6 | 7 | option_list <- list( 8 | make_option( 9 | c("-i", "--input_file"), 10 | type = "character", default = NULL, 11 | help = "Input containing the aggregated labels." 12 | ), 13 | make_option( 14 | c("--seed"), 15 | type = "integer", default = NULL, 16 | help = "seed for input" 17 | ), 18 | make_option( 19 | c("-b", "--base_clusterings"), 20 | type = "character", default = NULL, 21 | help = "Path to base-clustering ranking file" 22 | ), 23 | make_option( 24 | c("--n_clusters"), 25 | type = "character", default = NULL, 26 | help = "Desired number of clusters in the consensus output" 27 | ), 28 | make_option( 29 | c("--n_bcs"), 30 | type = "integer", default = NULL, 31 | help = "Desired number of base clustering results feed into the algorithm" 32 | ), 33 | make_option( 34 | c("-o", "--output_file"), 35 | type = "character", default = NULL, 36 | help = "desired output file" 37 | ) 38 | ) 39 | 40 | description <- "Calculate consensus for selected BCs" 41 | 42 | opt_parser <- OptionParser( 43 | usage = description, 44 | option_list = option_list 45 | ) 46 | opt <- parse_args(opt_parser) 47 | 48 | # Use these filepaths as input 49 | input_file <- opt$input_file 50 | output_file <- opt$output_file 51 | bc_file <- opt$base_clusterings 52 | n_bcs <- ifelse(is.null(opt$n_bcs), 8, opt$n_bcs) 53 | n_clust <- ifelse(is.null(opt$n_clusters), "7", opt$n_clusters) 54 | seed <- opt$seed 55 | 56 | # Your code goes here 57 | suppressPackageStartupMessages({ 58 | library(diceR) 59 | }) 60 | 61 | label_df <- read.delim(input_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss") 62 | bc_list <- read.delim(bc_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss", check.names=FALSE)[[as.character(n_clust)]] 63 | bc_list <- bc_list[!is.na(bc_list)] 64 | 65 | if (length(bc_list) < n_bcs){ 66 | warning(sprintf("Not enough (%s) base clusterings(BCs) are available, use %s BCs instead.", n_bcs, length(bc_list))) 67 | } 68 | bc_list <- bc_list[1:min(n_bcs, length(bc_list))] 69 | 70 | label_selected <- label_df[, bc_list] 71 | 72 | # Make sure all the clusters are ranked 1 to n without jumping (SOTIP) 73 | label_selected <- as.data.frame(lapply(label_selected, function(u){ 74 | unique_labels <- sort(unique(u)) 75 | if (all(unique_labels==seq_along(unique_labels))) { 76 | return(as.factor(u)) 77 | } else { 78 | # Count occurrences of each number 79 | freq <- table(u) 80 | rank_map <- rank(-freq, ties.method = "first") # Negative for descending order 81 | new_vec <- rank_map[as.character(u)] 82 | new_vec <- as.factor(as.numeric(new_vec)) 83 | return(new_vec) 84 | } 85 | })) 86 | 87 | kmode_vec <- diceR:::k_modes(label_selected, is.relabelled = FALSE, seed = seed) 88 | kmode_df <- data.frame(consensus_kmode=kmode_vec, row.names = row.names(label_selected)) 89 | 90 | dir.create(dirname(output_file), showWarnings = FALSE, recursive = TRUE) 91 | # Save the results 92 | write.table(kmode_df, file = output_file, sep = "\t", col.names = NA, quote = FALSE) 93 | -------------------------------------------------------------------------------- /consensus/03_Consensus_lca/Consensus_lca.r: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; Create the script 4 | 5 | suppressPackageStartupMessages(library(optparse)) 6 | 7 | option_list <- list( 8 | make_option( 9 | c("-i", "--input_file"), 10 | type = "character", default = NULL, 11 | help = "Input containing the aggregated labels." 12 | ), 13 | make_option( 14 | c("--seed"), 15 | type = "integer", default = NULL, 16 | help = "seed for input" 17 | ), 18 | make_option( 19 | c("-b", "--base_clusterings"), 20 | type = "character", default = NULL, 21 | help = "Path to base-clustering ranking file" 22 | ), 23 | make_option( 24 | c("--n_clusters"), 25 | type = "integer", default = NULL, 26 | help = "Desired number of clusters in the consensus output" 27 | ), 28 | make_option( 29 | c("--n_bcs"), 30 | type = "integer", default = NULL, 31 | help = "Desired number of base clustering results feed into the algorithm" 32 | ), 33 | make_option( 34 | c("-o", "--output_file"), 35 | type = "character", default = NULL, 36 | help = "desired output file" 37 | ) 38 | ) 39 | 40 | description <- "Calculate consensus for selected BCs" 41 | 42 | opt_parser <- OptionParser( 43 | usage = description, 44 | option_list = option_list 45 | ) 46 | opt <- parse_args(opt_parser) 47 | 48 | # Use these filepaths as input 49 | input_file <- opt$input_file 50 | output_file <- opt$output_file 51 | bc_file <- opt$base_clusterings 52 | n_bcs <- ifelse(is.null(opt$n_bcs), 8, opt$n_bcs) 53 | n_clust <- ifelse(is.null(opt$n_clusters), "7", opt$n_clusters) 54 | seed <- opt$seed 55 | 56 | # Your code goes here 57 | suppressPackageStartupMessages({ 58 | library(diceR) 59 | }) 60 | 61 | 62 | 63 | label_df <- read.delim(input_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss") 64 | bc_list <- read.delim(bc_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss", check.names=FALSE)[[as.character(n_clust)]] 65 | bc_list <- bc_list[!is.na(bc_list)] 66 | 67 | if (length(bc_list) < n_bcs){ 68 | warning(sprintf("Not enough (%s) base clusterings(BCs) are available, use %s BCs instead.", n_bcs, length(bc_list))) 69 | } 70 | bc_list <- bc_list[1:min(n_bcs, length(bc_list))] 71 | 72 | label_selected <- label_df[, bc_list] 73 | 74 | # Make sure all the clusters are ranked 1 to n without jumping (SOTIP) 75 | label_selected <- as.data.frame(lapply(label_selected, function(u){ 76 | unique_labels <- sort(unique(u)) 77 | if (all(unique_labels==seq_along(unique_labels))) { 78 | return(factor(u, levels = unique_labels)) 79 | } else { 80 | # Count occurrences of each number 81 | freq <- table(u) 82 | rank_map <- rank(-freq, ties.method = "first") # Negative for descending order 83 | new_vec <- rank_map[as.character(u)] 84 | new_vec <- factor(as.numeric(new_vec)) 85 | return(new_vec) 86 | } 87 | })) 88 | lca_vec <- diceR:::LCA(label_selected, is.relabelled = FALSE, seed = seed) 89 | lca_df <- data.frame(consensus_lca=lca_vec, row.names = row.names(label_selected)) 90 | 91 | dir.create(dirname(output_file), showWarnings = FALSE, recursive = TRUE) 92 | 93 | # Save the results 94 | write.table(lca_df, file = output_file, sep = "\t", col.names = NA, quote = FALSE) 95 | -------------------------------------------------------------------------------- /consensus/02_BC_ranking/BC_ranking.r: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; Create the script 4 | 5 | suppressPackageStartupMessages(library(optparse)) 6 | 7 | option_list <- list( 8 | make_option( 9 | c("-i", "--input_file"), 10 | type = "character", default = NULL, 11 | help = "Input containing the aggregated labels." 12 | ), 13 | make_option( 14 | c("-o", "--output_file"), 15 | type = "character", default = NULL, 16 | help = "desired output file" 17 | ), 18 | make_option( 19 | c("--selection_metrics"), 20 | type = "character", default = NULL, 21 | help = "file containing the metric information for BC selection" 22 | ), 23 | make_option( 24 | c("-m", "--max_percentage"), 25 | type = "double", default = NULL, 26 | help = "maximal percentage of the largest class" 27 | ) 28 | ) 29 | 30 | description <- "Automatically select the base-clusterings based on different algorithms" 31 | 32 | opt_parser <- OptionParser( 33 | usage = description, 34 | option_list = option_list 35 | ) 36 | opt <- parse_args(opt_parser) 37 | 38 | # Use these filepaths as input 39 | input_file <- opt$input_file 40 | output_file <- opt$output_file 41 | smoothness_file <- opt$smoothness 42 | ari_file <- opt$ari 43 | max_percentage <- ifelse(is.null(opt$max_percentage), 0.8, opt$max_percentage) 44 | 45 | ##### Load files 46 | label_df <- read.delim(input_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss") 47 | 48 | ##### Filter out class-imbalanced case 49 | label_df[, sapply(label_df, function(col) { 50 | max(table(col)) / length(col) <= max_percentage 51 | })] 52 | 53 | ##### Separate label_df into different cluster number ones 54 | n_clusters <- apply(label_df, 2, function(u){length(unique(u))}) 55 | label_lists <- split(names(n_clusters), n_clusters) 56 | 57 | ##### Select base-clsuterings based on algorithms 58 | 59 | if (!is.null(ari_file)){ 60 | # ARI df is a nxn dataframe with n = number of results instance, each value is the cross-ARI result 61 | selection_df <- read.delim(smoothness_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss") 62 | s_mean <- colMeans(selection_df) 63 | } else { 64 | if (is.null(smoothness_file)){ 65 | consensus_dir <- file.path(dirname(input_file), "consensus") 66 | file_searched <- list.files(path = consensus_dir, pattern = "smoothness", 67 | full.names = TRUE, ignore.case = TRUE) 68 | if (length(file_searched) == 0){ 69 | stop("No smoothness entropy file found or defined.") 70 | } 71 | smoothness_file <- file_searched[1] 72 | } 73 | # Smoothness df is a one-column dataframe with row names refers to individual result instance 74 | selection_df <- read.delim(smoothness_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss") 75 | # Reversed it as high entropy means less smooth 76 | s_mean <- - rowMeans(selection_df) 77 | } 78 | 79 | d_length <- max(lengths(label_lists)) 80 | s_bc_list <- sapply(label_lists, function(nclu_names){ 81 | s_n <- s_mean[names(s_mean) %in% nclu_names] 82 | selected_names <- names(sort(s_n, decreasing = TRUE)) 83 | length(selected_names) <- d_length 84 | return(selected_names) 85 | }) 86 | 87 | result_df <- as.data.frame(s_bc_list) 88 | dir.create(dirname(output_file), showWarnings = FALSE, recursive = TRUE) 89 | 90 | # Save the results 91 | write.table(result_df, file = output_file, sep = "\t", col.names = NA, quote = FALSE) 92 | -------------------------------------------------------------------------------- /templates/consensus.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; implmented method 4 | # Author_and_contribution: Peiying Cai; created template 5 | # Author_and_contribution: ENTER YOUR NAME AND CONTRIBUTION HERE 6 | 7 | import argparse 8 | 9 | # TODO adjust description 10 | parser = argparse.ArgumentParser(description="Calculate consensus ... for selected BCs") 11 | 12 | parser.add_argument( 13 | "-i", "--input_file", help="Input containing the aggregated labels.", required=True) 14 | parser.add_argument( 15 | "--seed", type=int, default=None, help="Seed for random number generator") 16 | parser.add_argument( 17 | "-b", "--base_clusterings", help="Path to base-clustering ranking file", required=True) 18 | parser.add_argument( 19 | "-o", "--output_file", help="Desired output file", required=True) 20 | # TODO adjust default numbers in `n_bcs` and `n_clusters` 21 | # make sure that `n_clusters` exists among the column names of the base-clustering ranking file 22 | parser.add_argument( 23 | "--n_clusters", type=int, default=7, help="Desired number of clusters in the consensus output") 24 | parser.add_argument( 25 | "--n_bcs", type=int, default=8, help="Desired number of base clustering results fed into the algorithm") 26 | 27 | args = parser.parse_args() 28 | from pathlib import Path 29 | import pandas as pd 30 | import sys 31 | import warnings 32 | 33 | seed = args.seed 34 | output_file = args.output_file 35 | output_path = Path(args.output_file) 36 | 37 | # Read input label data 38 | label_df = pd.read_csv(args.input_file, sep="\t", index_col=0) 39 | 40 | # Read base clustering rankings 41 | bc_df = pd.read_csv(args.base_clusterings, sep="\t", index_col=0) 42 | 43 | n_clust_str = str(args.n_clusters) 44 | if n_clust_str not in bc_df.columns: 45 | sys.exit(f"Error: n_clusters={args.n_clusters} not found in base clustering file columns.") 46 | 47 | # bc_list stores all 'method_config_n_clust_label' entries matching n_clust 48 | bc_list = bc_df[n_clust_str].dropna().tolist() 49 | 50 | if len(bc_list) < args.n_bcs: 51 | warnings.warn(f"Not enough ({args.n_bcs}) base clusterings (BCs) are available, use {len(bc_list)} BCs instead.") 52 | bc_list = bc_list[:min(args.n_bcs, len(bc_list))] 53 | 54 | # Subset the label data to keep only the selected base clusterings 55 | label_selected = label_df[bc_list] 56 | 57 | # Make sure clusters are ranked 1 to n without jumps (SOTIP) 58 | def rank_labels(u): 59 | unique_labels = sorted(u.dropna().unique()) 60 | if unique_labels == list(range(1, len(unique_labels) + 1)): 61 | # Already consecutive starting from 1 62 | return u.astype('Int64') 63 | 64 | freq = u.dropna().value_counts() 65 | # Rank by descending frequency, ties.method='first' equivalent 66 | rank_map = {label: rank+1 for rank, label in enumerate(freq.index)} 67 | return u.map(rank_map).astype('Int64') 68 | 69 | label_selected = label_selected.apply(rank_labels, axis=0) 70 | 71 | 72 | # TODO set the seed, if the algorithm requires the seed elsewhere please pass it on 73 | import random 74 | 75 | random.seed(seed) 76 | # np.random.seed(seed) 77 | # torch.manual_seed(seed) 78 | 79 | ## Your code goes here 80 | # TODO 81 | # Input: label_selected (DataFrame) with samples as rows and base clusterings as columns 82 | 83 | # output_df = ... 84 | 85 | ## Write output 86 | output_path.parent.mkdir(parents=True, exist_ok=True) 87 | output_df.to_csv(output_file, sep="\t", index=True) 88 | 89 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | education, socio-economic status, nationality, personal appearance, race, 10 | religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at {{ email }}. All 59 | complaints will be reviewed and investigated and will result in a response that 60 | is deemed necessary and appropriate to the circumstances. The project team is 61 | obligated to maintain confidentiality with regard to the reporter of an incident. 62 | Further details of specific enforcement policies may be posted separately. 63 | 64 | Project maintainers who do not follow or enforce the Code of Conduct in good 65 | faith may face temporary or permanent repercussions as determined by other 66 | members of the project's leadership. 67 | 68 | ## Attribution 69 | 70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 72 | 73 | [homepage]: https://www.contributor-covenant.org 74 | -------------------------------------------------------------------------------- /preprocessing/dimensionality_reduction/PCA.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script 4 | 5 | import argparse 6 | 7 | # TODO adjust description 8 | parser = argparse.ArgumentParser(description="PCA (with standard-scaling)") 9 | 10 | parser.add_argument( 11 | "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True 12 | ) 13 | parser.add_argument( 14 | "-m", "--matrix", help="Path to (transformed) counts (as mtx).", required=True 15 | ) 16 | parser.add_argument( 17 | "-f", "--features", help="Path to features (as tsv).", required=True 18 | ) 19 | parser.add_argument( 20 | "-o", "--observations", help="Path to observations (as tsv).", required=True 21 | ) 22 | parser.add_argument( 23 | "-n", 24 | "--n_components", 25 | help="Number of components/factors to generate.", 26 | required=False, 27 | type=int, 28 | ) 29 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True) 30 | parser.add_argument( 31 | "--config", 32 | help="Optional config file (json) used to pass additional parameters.", 33 | required=False, 34 | ) 35 | parser.add_argument( 36 | "--seed", 37 | help="Seed for random state control on PCA.", 38 | required=True, 39 | ) 40 | 41 | args = parser.parse_args() 42 | 43 | # Output files 44 | from pathlib import Path 45 | 46 | out_dir = Path(args.out_dir) 47 | 48 | # Output files 49 | dim_red_file = out_dir / "dimensionality_reduction.tsv" 50 | # if additional output files are required write it also to out_dir 51 | 52 | # Use these filepaths as input ... 53 | coord_file = args.coordinates 54 | matrix_file = args.matrix 55 | feature_file = args.features 56 | observation_file = args.observations 57 | 58 | if args.n_components is not None: 59 | n_components = args.n_components 60 | if args.config is not None: 61 | config_file = args.config 62 | 63 | ## Your code goes here 64 | import pandas as pd 65 | import scipy as sp 66 | from sklearn.decomposition import PCA 67 | from sklearn.preprocessing import StandardScaler 68 | 69 | matrix = sp.io.mmread(matrix_file) 70 | if sp.sparse.issparse(matrix): 71 | matrix = matrix.tocsr() 72 | 73 | observations = pd.read_table(observation_file, index_col=0) 74 | features = pd.read_table(feature_file, index_col=0) 75 | 76 | # Filter features and observations 77 | if "selected" in observations.columns: 78 | matrix = matrix[observations["selected"].to_numpy().nonzero()[0], :] 79 | observations = observations.loc[lambda df: df["selected"]].index 80 | else: 81 | observations = observations.index 82 | if "selected" in features.columns: 83 | matrix = matrix[:, features["selected"].to_numpy().nonzero()[0]] 84 | features = features.loc[lambda df: df["selected"]].index 85 | else: 86 | features = features.index 87 | 88 | matrix = matrix.toarray() if sp.sparse.issparse(matrix) else matrix 89 | matrix = pd.DataFrame(matrix, columns=features, index=observations) 90 | 91 | scaler = StandardScaler().set_output(transform="pandas") 92 | matrix = scaler.fit_transform(matrix) 93 | 94 | pca = PCA(n_components=n_components, svd_solver="arpack", random_state = int(args.seed)).set_output(transform="pandas") 95 | dim_red_df = pca.fit_transform(matrix) 96 | 97 | 98 | ## Write output 99 | out_dir.mkdir(parents=True, exist_ok=True) 100 | dim_red_df.to_csv(dim_red_file, sep="\t", index_label="", float_format="%g") 101 | -------------------------------------------------------------------------------- /templates/data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: ENTER YOUR NAME AND CONTRIBUTION HERE 5 | 6 | import argparse 7 | 8 | # TODO adjust description 9 | parser = argparse.ArgumentParser(description="Load data for ...") 10 | 11 | parser.add_argument( 12 | "-o", "--out_dir", help="Output directory to write files to.", required=True 13 | ) 14 | 15 | args = parser.parse_args() 16 | 17 | 18 | from pathlib import Path 19 | 20 | import pandas as pd 21 | 22 | out_dir = Path(args.out_dir) 23 | 24 | # The folder structure should look like the following 25 | # out_dir 26 | # |_______sample_1 (sample name can be chosen freely) 27 | # |_____coordinates.tsv 28 | # |_____features.tsv 29 | # |_____observations.tsv 30 | # |_____counts.mtx (use scipy.io.mmwrite) 31 | # |_____labels.tsv (optional) 32 | # |_____H_E.(tiff/png/...) (optional) 33 | # |_____H_E.json (optional, required if H_E is provided) 34 | # |_______sample_2 35 | # | ... 36 | # |_______samples.tsv 37 | # |_______experiment.json 38 | # if additional output files are required write it also to out_dir 39 | 40 | 41 | ## Your code goes here 42 | # TODO 43 | # features_df = ... # DataFrame with index (gene-id/name) and n columns (?) 44 | # observations_df = ... # DataFrame with index (cell-id/barcode) and n columns (?) 45 | # coordinates_df = ... # DataFrame with index (cell-id/barcode) and 2/3 columns (x, y, z?) 46 | # counts = ... # array with #observations rows x #features columns 47 | # labels_df = None # optional, DataFrame with index (cell-id/barcode) and 1 column (label) 48 | # img = None # optional 49 | # technology = ... # i.e. "Visium", "ST", "MERSCOPE", "MERFISH", "Stereo-seq", "Slide-seq", "Xenium", "STARmap", "STARmap+", "osmFISH", "seqFISH" 50 | # samples_df = ... # DataFrame with information on samples. columns: (patient, sample, position, replicate, directory, n_clusters), columns can be NA 51 | 52 | # Make sure to use consistent indexes for the DataFrames 53 | # i.e. the index (not necessarily the order) of observations and coordinates should match 54 | # But the order of observations and features must match counts (observations x features) 55 | 56 | 57 | # Example how a sample could be written 58 | def write_sample( 59 | path, 60 | sample, 61 | coordinates_df, 62 | observations_df, 63 | features_df, 64 | counts, 65 | labels_df=None, 66 | img=None, 67 | ): 68 | if img is not None: 69 | # TODO write to image_file 70 | # H_E.json must contain the scale 71 | pass 72 | 73 | import scipy as sp 74 | 75 | sample_path = Path(path) / sample 76 | 77 | coordinates_df.to_csv(sample_path / "coordinates.tsv", sep="\t", index_label="") 78 | features_df.to_csv(sample_path / "features.tsv", sep="\t", index_label="") 79 | observations_df.to_csv(sample_path / "observations.tsv", sep="\t", index_label="") 80 | sp.io.mmwrite(sample_path / "counts.mtx", counts) 81 | 82 | if labels_df is not None: 83 | labels_df.columns = ["label"] 84 | labels_df.to_csv(sample_path / "labels.tsv", sep="\t", index_label="") 85 | 86 | 87 | ## Metadata files 88 | samples_df.loc[ 89 | :, ["patient", "sample", "position", "replicate", "directory", "n_clusters"] 90 | ].to_csv(out_dir / "samples.tsv", sep="\t", index_label="") 91 | 92 | import json 93 | 94 | with open(out_dir / "experiment.json", "w") as f: 95 | exp_info = {"technology": technology} 96 | json.dump(exp_info, f) 97 | -------------------------------------------------------------------------------- /metric/cluster-specific-silhouette/cluster-specific-silhouette.r: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: Mark D. Robinson; coded the domain-specific F1 5 | 6 | suppressPackageStartupMessages(library(optparse)) 7 | 8 | # TODO adjust description 9 | option_list <- list( 10 | make_option( 11 | c("-l", "--labels"), 12 | type = "character", default = NULL, 13 | help = "Labels from domain clustering." 14 | ), 15 | make_option( 16 | c("-g", "--ground_truth"), 17 | type = "character", default = NA, 18 | help = "Groundtruth labels." 19 | ), 20 | make_option( 21 | c("-e", "--embedding"), 22 | type = "character", default = NA, 23 | help = "Embedding of points in latent space. Potential usage for metrics without groundtruth." 24 | ), 25 | # format should be json 26 | make_option( 27 | c("-c", "--config"), 28 | type = "character", default = NA, 29 | help = "Optional config file (json) used to pass additional parameters." 30 | ), 31 | make_option( 32 | c("-o", "--out_file"), 33 | type = "character", default = NULL, 34 | help = "Output file." 35 | ) 36 | ) 37 | 38 | # TODO adjust description 39 | description <- "Calculate domain-specific F1 score (returns JSON with vector: F1 for each true domain)" 40 | 41 | opt_parser <- OptionParser( 42 | usage = description, 43 | option_list = option_list 44 | ) 45 | opt <- parse_args(opt_parser) 46 | 47 | # Use these filepaths as input 48 | label_file <- opt$labels 49 | 50 | if (!is.na(opt$ground_truth)) { 51 | groundtruth_file <- opt$ground_truth 52 | } 53 | if (!is.na(opt$embedding)) { 54 | embedding_file <- opt$embedding 55 | } 56 | if (!is.na(opt$config)) { 57 | config_file <- opt$config 58 | } 59 | 60 | 61 | ## Code for calculating metric goes here 62 | ## -------------------------------------- 63 | library(cluster) 64 | library(jsonlite) 65 | 66 | # # for testing - start 67 | # label_file <- "~/scratch/SpaceHack2/method_results/LIBD_DLPFC/Br5292_151507/SpaGCN/domains.tsv" 68 | # outfile <- "./cluster-specific-silhouette.json" 69 | # embedding_file <- "~/scratch/SpaceHack2/method_results/LIBD_DLPFC/Br5292_151507/log1p/hvg/pca_20/dimensionality_reduction.tsv" 70 | # groundtruth_file <- "data/libd_dlpfc/Br5595_151670/labels.tsv" 71 | # # for testing - stop 72 | 73 | domains <- read.delim(label_file, sep="\t", row.names = 1) 74 | embedding <- read.delim(embedding_file, sep="\t", row.names = 1) 75 | 76 | rn <- intersect(rownames(domains), rownames(embedding)) 77 | 78 | # subset to common set 79 | embedding <- embedding[rn,,drop = FALSE] 80 | domains <- domains[rn,,drop = FALSE] 81 | 82 | # calculate silhouette score on the embeddings per cell 83 | sil <- silhouette(list(clustering=domains$label), 84 | dist=dist(embedding)) 85 | 86 | agg_median <- aggregate(sil[,"sil_width",drop=FALSE], 87 | list(cluster=sil[,"cluster"]), FUN = median) 88 | agg_mean <- aggregate(sil[,"sil_width",drop=FALSE], 89 | list(cluster=sil[,"cluster"]), FUN = mean) 90 | 91 | (df <- data.frame(cluster = agg_mean$cluster, 92 | mean_sil_width = agg_mean$sil_width, 93 | median_sil_width = agg_median$sil_width)) 94 | 95 | ## Write output 96 | outfile <- file(opt$out_file) 97 | write_json(df,outfile) 98 | 99 | # from template 100 | # dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE) 101 | # 102 | # writeLines(format(metric, digits = 6, scientific = TRUE), outfile) 103 | # close(outfile) 104 | -------------------------------------------------------------------------------- /data/spatialDLPFC/spatialDLPFC.r: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env Rscript 2 | 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template 4 | # Author_and_contribution: Niklas Mueller-Boetticher; contributed code 5 | 6 | suppressPackageStartupMessages(library(optparse)) 7 | 8 | option_list <- list( 9 | make_option( 10 | c("-o", "--out_dir"), 11 | type = "character", default = NULL, 12 | help = "Output directory to write files to." 13 | ) 14 | ) 15 | 16 | # TODO adjust description 17 | description <- "Load data for ..." 18 | 19 | opt_parser <- OptionParser( 20 | usage = description, 21 | option_list = option_list 22 | ) 23 | opt <- parse_args(opt_parser) 24 | 25 | out_dir <- opt$out_dir 26 | 27 | # The folder structure should look like the following 28 | # out_dir 29 | # |_______sample_1 (sample name can be chosen freely) 30 | # |_____coordinates.tsv 31 | # |_____features.tsv 32 | # |_____observations.tsv 33 | # |_____counts.mtx (use Matrix::writeMM) 34 | # |_____labels.tsv (optional) 35 | # |_____H_E.(tiff/png/...) (optional) 36 | # |_____H_E.json (optional, required if H_E is provided) 37 | # |_______sample_2 38 | # | ... 39 | # |_______samples.tsv 40 | # |_______experiment.json 41 | # if additional output files are required write it also to out_dir 42 | 43 | 44 | ## Your code goes here 45 | technology <- "Visium" 46 | 47 | suppressPackageStartupMessages(library(spatialLIBD)) 48 | suppressPackageStartupMessages(library(magrittr)) 49 | 50 | write_tsv <- function(df, path) { 51 | write.table(df, path, sep = "\t", col.names = NA, quote = FALSE) 52 | } 53 | 54 | write_SpatialExperiment_to_folder <- function(spe, path, obs_col, assay_name = "counts") { 55 | dir.create(path, showWarnings = FALSE, recursive = TRUE) 56 | 57 | colData(spe)[obs_col] %>% 58 | as.data.frame() %>% 59 | write_tsv(file.path(path, "observations.tsv")) 60 | 61 | rowData(spe) %>% 62 | as.data.frame() %>% 63 | write_tsv(file.path(path, "features.tsv")) 64 | 65 | coords <- spatialCoords(spe) 66 | mode(coords) <- "integer" 67 | as.data.frame(coords) %>% 68 | dplyr::rename(x = "pxl_col_in_fullres", y = "pxl_row_in_fullres") %>% 69 | write_tsv(file.path(path, "coordinates.tsv")) 70 | 71 | assay(spe, assay_name) %>% 72 | t() %>% 73 | Matrix::writeMM(file.path(path, "counts.mtx")) 74 | } 75 | 76 | spe <- fetch_data("spatialDLPFC_Visium") 77 | 78 | keep_cols <- c("sample_id", "subject", "position", "sex", "age", "row", "col") 79 | colData(spe) <- colData(spe)[, keep_cols] 80 | 81 | keep_rows <- c("gene_name", "gene_version", "source", "gene_type") 82 | rowData(spe) <- rowData(spe)[, keep_rows] 83 | 84 | for (sample in unique(colData(spe)$sample_id)) { 85 | spe_sample <- spe[, spe$sample_id == sample] 86 | write_SpatialExperiment_to_folder( 87 | spe_sample, 88 | file.path(out_dir, sample), 89 | obs_col = c("row", "col") 90 | ) 91 | } 92 | 93 | samples_df <- colData(spe) %>% 94 | as.data.frame() %>% 95 | dplyr::mutate(replicate = NA) %>% 96 | dplyr::select(patient = subject, sample = sample_id, position, replicate, sex, age) %>% 97 | dplyr::distinct() %>% 98 | dplyr::mutate(directory = sample) %>% 99 | `row.names<-`(NULL) 100 | 101 | 102 | ## Metadata files 103 | row.names(samples_df) <- NULL 104 | write.table(samples_df, file = file.path(out_dir, "samples.tsv"), sep = "\t", col.names = NA, quote = FALSE, na = "") 105 | 106 | json <- file(file.path(out_dir, "experiment.json")) 107 | writeLines(c(paste0('{"technology": "', technology, '"}')), json) 108 | close(json) 109 | -------------------------------------------------------------------------------- /workflows/02_preprocessing.smk: -------------------------------------------------------------------------------- 1 | import os 2 | from shared.functions import check_files_in_folder, get_git_directory, get_sample_dirs 3 | 4 | configfile: "path_config.yaml" 5 | configfile: "excute_config.yaml" 6 | 7 | GIT_DIR = get_git_directory(config) 8 | DATASETS = config.pop("datasets") 9 | datasets_selected = config.pop("datasets_selected") 10 | 11 | # If all required input files are in the folder, generate the required output file for all sample folders 12 | def create_input(file_list, input_file_name, data_dir): 13 | input_files = [] 14 | for sample_dir in get_sample_dirs(data_dir): 15 | if check_files_in_folder(sample_dir, file_list): 16 | input_files.append(sample_dir + input_file_name) 17 | return input_files 18 | 19 | def create_quality_control_input(wildcards): 20 | file_list = ["coordinates.tsv", "counts.mtx", "features.tsv", "observations.tsv"] 21 | all_qc_file = [] 22 | for dataset in datasets_selected: 23 | data_dir = config["DATASET_DIR"] + "/" + dataset 24 | if "experiment.json" in os.listdir(data_dir): 25 | all_qc_file += create_input(file_list, "/qc/counts.mtx", data_dir) 26 | all_qc_file += create_input(file_list, "/qc/features.tsv", data_dir) 27 | all_qc_file += create_input(file_list, "/qc/observations.tsv", data_dir) 28 | all_qc_file += create_input(file_list, "/qc/coordinates.tsv", data_dir) 29 | 30 | return all_qc_file 31 | 32 | # Get the optargs.json file for QC input, if exists 33 | def get_opt(wildcards): 34 | import json 35 | dataset = wildcards["dataset"] 36 | 37 | # default value 38 | opt = {"min_cells":1, "min_genes":1, "min_counts":1} 39 | 40 | # Check if customized value exist 41 | if "optargs" in DATASETS[dataset] and os.path.exists(GIT_DIR + DATASETS[dataset]["optargs"]): 42 | with open(GIT_DIR + DATASETS[dataset]["optargs"], "r") as file: 43 | opt_load = json.load(file) 44 | # Update opt values based on existing opt_load value 45 | opt.update({k: v for k, v in opt_load.items() if k in opt}) 46 | 47 | return opt 48 | 49 | ####################### Preprocessing ####################### 50 | rule all: 51 | input: 52 | create_quality_control_input, 53 | 54 | rule quality_control: 55 | input: 56 | coordinates=config["DATASET_DIR"] + "/{dataset}/{sample}/coordinates.tsv", 57 | matrix=config["DATASET_DIR"] + "/{dataset}/{sample}/counts.mtx", 58 | features=config["DATASET_DIR"] + "/{dataset}/{sample}/features.tsv", 59 | observations=config["DATASET_DIR"] + "/{dataset}/{sample}/observations.tsv", 60 | output: 61 | dir=directory(config["DATASET_DIR"] + "/{dataset}/{sample}/qc"), 62 | counts=config["DATASET_DIR"] + "/{dataset}/{sample}/qc/counts.mtx", 63 | features=config["DATASET_DIR"] + "/{dataset}/{sample}/qc/features.tsv", 64 | observations=config["DATASET_DIR"] + "/{dataset}/{sample}/qc/observations.tsv", 65 | coordinates=config["DATASET_DIR"] + "/{dataset}/{sample}/qc/coordinates.tsv", 66 | conda: 67 | GIT_DIR + "preprocessing/quality_control/qc_scanpy.yml" 68 | params: 69 | opt=lambda wildcards: get_opt(wildcards) 70 | shell: 71 | """ 72 | python {GIT_DIR}preprocessing/quality_control/qc_scanpy.py \ 73 | -c {input.coordinates} \ 74 | -m {input.matrix} \ 75 | -f {input.features} \ 76 | -o {input.observations} \ 77 | --min_genes {params.opt[min_genes]}\ 78 | --min_cells {params.opt[min_cells]} \ 79 | --min_counts {params.opt[min_counts]} \ 80 | -d {output.dir} 81 | """ --------------------------------------------------------------------------------