├── docs
    ├── CONTRIBUTING.md
    ├── about
    │   └── license.md
    ├── usage.md
    ├── modules
    │   ├── data.md
    │   ├── method.md
    │   ├── metric.md
    │   ├── consensus.md
    │   └── index.md
    ├── requirements.txt
    ├── img
    │   ├── logo.png
    │   └── favicon.ico
    └── index.md
├── metric
    ├── V_measure
    │   ├── config
    │   │   ├── config_2.json
    │   │   ├── config_3.json
    │   │   └── config_1.json
    │   ├── V_measure_optargs.json
    │   ├── V_measure.yml
    │   └── V_measure.py
    ├── LISI
    │   ├── config
    │   │   └── config_1.json
    │   ├── LISI_optargs.json
    │   ├── LISI.yml
    │   └── LISI.r
    ├── ARI
    │   ├── ARI_optargs.json
    │   ├── ARI.yml
    │   └── ARI.py
    ├── FMI
    │   ├── FMI_optargs.json
    │   ├── FMI.yml
    │   └── FMI.py
    ├── MCC
    │   ├── MCC_optargs.json
    │   ├── MCC.yaml
    │   └── MCC.py
    ├── NMI
    │   ├── NMI_optargs.json
    │   ├── NMI.yml
    │   └── NMI.r
    ├── PAS
    │   ├── PAS.yml
    │   └── PAS_optargs.json
    ├── CHAOS
    │   ├── CHAOS.yml
    │   └── CHAOS_optargs.json
    ├── Entropy
    │   ├── Entropy_optargs.json
    │   ├── Entropy.yml
    │   └── Entropy.py
    ├── jaccard
    │   ├── jaccard_optargs.json
    │   ├── jaccard.yaml
    │   └── jaccard.py
    ├── SpatialARI
    │   ├── SpatialARI_optargs.json
    │   ├── SpatialARI.yml
    │   └── SpatialARI_env.sh
    ├── Completeness
    │   ├── Completeness_optargs.json
    │   ├── Completeness.yml
    │   └── Completeness.py
    ├── Homogeneity
    │   ├── Homogeneity_optargs.json
    │   ├── Homogeneity.yml
    │   └── Homogeneity.py
    ├── Calinski-Harabasz
    │   ├── Calinski-Harabasz_optargs.json
    │   ├── Calinski-Harabasz.yml
    │   └── Calinski-Harabasz.py
    ├── Davies-Bouldin
    │   ├── Davies-Bouldin_optargs.json
    │   ├── Davies-Bouldin.yml
    │   └── Davies-Bouldin.py
    ├── domain-specific-f1
    │   ├── domain-specific-f1_optargs.json
    │   └── domain-specific-f1.yml
    ├── cluster-specific-silhouette
    │   ├── cluster-specific-silhouette_optargs.json
    │   ├── cluster-specific-silhouette.yml
    │   └── cluster-specific-silhouette.r
    └── README.md
├── preprocessing
    ├── neighbors
    │   ├── radius
    │   │   ├── config
    │   │   │   └── config_1.json
    │   │   ├── radius.yml
    │   │   └── radius.py
    │   ├── n_rings
    │   │   ├── config
    │   │   │   └── config_1.json
    │   │   ├── n_rings.yml
    │   │   └── n_rings.py
    │   ├── n_neighbourhood
    │   │   ├── config
    │   │   │   └── config_1.json
    │   │   ├── n_neighbourhood.yml
    │   │   └── n_neighbourhood.py
    │   └── delaunay_triangulation
    │   │   ├── delaunay_triangulation.yml
    │   │   └── delaunay_triangulation.py
    ├── transformation
    │   ├── log1p.yml
    │   └── log1p.py
    ├── quality_control
    │   └── qc_scanpy.yml
    ├── feature_selection
    │   ├── highly_variable_genes_scanpy.yml
    │   └── highly_variable_genes_scanpy.py
    ├── README.md
    ├── dimensionality_reduction
    │   ├── PCA.yml
    │   └── PCA.py
    ├── feature_selection_MoranI
    │   └── spatially_variable_genes_moransI.yml
    └── visualization
    │   ├── visualization.yml
    │   └── pdf_merge.py
├── method
    ├── DRSC
    │   ├── config
    │   │   ├── config_all_genes.json
    │   │   ├── config_default.json
    │   │   ├── config_FindSVGs.json
    │   │   └── config_FindVariableFeatures.json
    │   ├── DRSC_optargs.json
    │   ├── drsc_env.sh
    │   └── DRSC.yml
    ├── DeepST
    │   ├── config
    │   │   ├── config_2.json
    │   │   ├── config_3.json
    │   │   └── config_1.json
    │   ├── DeepST_optargs.json
    │   ├── DeepST.yml
    │   └── DeepST_env.sh
    ├── GraphST
    │   ├── config
    │   │   ├── config_1.json
    │   │   ├── config_3.json
    │   │   ├── config_4.json
    │   │   ├── config_2.json
    │   │   ├── config_dlpfc.json
    │   │   └── config_default.json
    │   ├── GraphST_optargs.json
    │   └── GraphST.yml
    ├── SpaceFlow
    │   ├── config
    │   │   ├── config_1.json
    │   │   ├── config_default.json
    │   │   └── config_seqfish.json
    │   ├── spaceflow_optargs.json
    │   └── spaceflow.yml
    ├── STAGATE
    │   ├── config
    │   │   ├── config_2.json
    │   │   ├── config_3.json
    │   │   ├── config_4.json
    │   │   ├── config_5.json
    │   │   ├── config_1.json
    │   │   ├── config_starmap.json
    │   │   ├── config_default.json
    │   │   └── config_slide_stereo.json
    │   ├── STAGATE_optargs.json
    │   └── STAGATE.yml
    ├── meringue
    │   ├── domains.tar.gz
    │   ├── config
    │   │   ├── config_1.json
    │   │   ├── config_2.json
    │   │   ├── config_mob.json
    │   │   ├── config_clusteringvignette.json
    │   │   └── config_default.json
    │   ├── meringue_optargs.json
    │   ├── meringue.yml
    │   └── meringue_env.sh
    ├── SC_MEB
    │   ├── config
    │   │   └── config_default.json
    │   ├── SC_MEB_optargs.json
    │   ├── scmeb_env.sh
    │   └── SC_MEB.yml
    ├── maple
    │   ├── config
    │   │   └── config_default.json
    │   ├── maple_optargs.json
    │   ├── maple.yml
    │   └── maple_env.sh
    ├── SOTIP
    │   ├── config
    │   │   ├── config_default.json
    │   │   ├── config_osmFISH.json
    │   │   ├── config_MIBI_TNBC.json
    │   │   ├── config_scMEP_CLCC.json
    │   │   └── config_Visium_dlpfc.json
    │   ├── sotip_optargs.json
    │   └── sotip.yml
    ├── precast
    │   ├── config
    │   │   ├── config_1.json
    │   │   ├── config_dlpfc.json
    │   │   ├── config_default.json
    │   │   └── config_BC.json
    │   ├── precast_optargs.json
    │   ├── precast_env.sh
    │   └── precast.yml
    ├── BANKSY
    │   ├── config
    │   │   ├── config_github.json
    │   │   ├── config_default.json
    │   │   ├── config_starmap.json
    │   │   └── config_dlpfc.json
    │   ├── banksy_optargs.json
    │   └── banksy_env.sh
    ├── BayesSpace
    │   ├── config
    │   │   ├── config_default.json
    │   │   ├── config_1.json
    │   │   ├── config_scc.json
    │   │   └── config_melanoma.json
    │   ├── BayesSpace_optargs.json
    │   └── BayesSpace.yml
    ├── Giotto
    │   ├── Giotto_optargs.json
    │   ├── config
    │   │   ├── config_visium.json
    │   │   ├── config_seqfish.json
    │   │   └── config_default.json
    │   └── Giotto_env.sh
    ├── SEDR
    │   ├── SEDR_optargs.json
    │   ├── config
    │   │   ├── config_1.json
    │   │   ├── config_2.json
    │   │   ├── config_dlpfc.json
    │   │   ├── config_default.json
    │   │   └── config_stereoseq.json
    │   └── SEDR.yml
    ├── bass
    │   ├── bass_optargs.json
    │   ├── config
    │   │   ├── config_2.json
    │   │   ├── config_1.json
    │   │   ├── config_3.json
    │   │   ├── config_dlpfc.json
    │   │   ├── config_default.json
    │   │   └── config_starmap.json
    │   └── bass_env.sh
    ├── conST
    │   ├── conST_optargs.json
    │   ├── config
    │   │   ├── config_Visium_dlpfc.json
    │   │   └── config_default.json
    │   └── conST.yml
    ├── seurat
    │   ├── config
    │   │   ├── config_leiden_spatial.json
    │   │   ├── config_leiden_VisiumHD.json
    │   │   ├── config_default.json
    │   │   └── config_leiden_CODEX.json
    │   └── seurat_optargs.json
    ├── SpiceMix
    │   ├── SpiceMix_optargs.json
    │   ├── SpiceMix.yml
    │   ├── config
    │   │   ├── config_default.json
    │   │   └── config_Visium_dlpfc.json
    │   └── README.md
    ├── spaGCN
    │   ├── spaGCN_optargs.json
    │   ├── spaGCN.yml
    │   └── config
    │   │   ├── config_1.json
    │   │   ├── config_2.json
    │   │   ├── config_3.json
    │   │   └── config_default.json
    ├── spatialGE
    │   ├── spatialGE_optargs.json
    │   ├── config
    │   │   ├── config_sct.json
    │   │   ├── config_default.json
    │   │   ├── config_highweight.json
    │   │   └── config_midweight.json
    │   └── spatialGE_env.sh
    ├── stardust
    │   ├── config
    │   │   ├── config_default.json
    │   │   └── config_tutorial.json
    │   ├── stardust_optargs.json
    │   └── stardust_env.sh
    ├── SCAN-IT
    │   ├── scanit_optargs.json
    │   ├── config
    │   │   ├── config_slideseq_mouse_cerebellum.json
    │   │   ├── config_default.json
    │   │   ├── config_slideseq_hippocampus.json
    │   │   ├── config_slideseq_mouse_olfactory_bulb.json
    │   │   └── config_seqFISH_mouse_SScortex.json
    │   └── scanit.yml
    ├── scanpy
    │   ├── scanpy_optargs.json
    │   ├── scanpy_env.yaml
    │   └── config
    │   │   ├── config_leiden_tutorial.json
    │   │   ├── config_leiden_MERFISH.json
    │   │   └── config_default.json
    ├── CellCharter
    │   ├── CellCharter_optargs.json
    │   ├── config
    │   │   ├── config_1.json
    │   │   ├── config_2.json
    │   │   ├── config_default.json
    │   │   └── config_dlpfc.json
    │   └── CellCharter.yml
    └── search_res.r
├── workflows
    ├── .gitignore
    ├── path_config_test.yaml
    ├── 01_download.smk
    ├── shared
    │   └── functions.py
    ├── excute_config_test.yaml
    ├── 05_aggregation.smk
    └── 02_preprocessing.smk
├── .all-contributorsrc
├── templates
    ├── data_optargs.json
    ├── metric_optargs.json
    ├── method_optargs.json
    ├── data_optargs.schema.yaml
    ├── metric_optargs.schema.yaml
    ├── method_optargs.schema.yaml
    ├── consensus_BC.py
    ├── metric.py
    ├── consensus_BC.r
    ├── metric.r
    ├── README.md
    ├── consensus.py
    └── data.py
├── data
    ├── SEA_AD_data
    │   ├── SEA_AD_data_optargs.json
    │   ├── SEA_AD_data.yml
    │   └── SEA_AD_LICENSE.txt
    ├── libd_dlpfc
    │   ├── libd_dlpfc_optargs.json
    │   └── libd_dlpfc.yml
    ├── osmfish_Ssp
    │   ├── osmfish_Ssp_optargs.json
    │   └── osmfish_Ssp.yml
    ├── spatialDLPFC
    │   ├── spatialDLPFC_optargs.json
    │   ├── README.md
    │   ├── spatialDLPFC.yml
    │   └── spatialDLPFC.r
    ├── sotip_simulation
    │   ├── sotip_simulation_optargs.json
    │   └── sotip.yml
    ├── visium_chicken_heart
    │   ├── chicken_heart_optargs.json
    │   └── chicken_heart.yml
    ├── mouse_kidney_coronal
    │   ├── mouse_kidney_coronal.yml
    │   └── mouse_kidney_coronal_optargs.json
    ├── cosmx_liver
    │   └── cosmx_liver.yml
    ├── xenium-breast-cancer
    │   ├── xenium-breast-cancer_optargs.json
    │   └── xenium-breast-cancer.yml
    ├── abc_atlas_wmb_thalamus
    │   ├── abc_atlas_wmb_thalamus_optargs.json
    │   ├── abc_atlas_wmb_thalamus.yml
    │   └── LICENSE.txt
    ├── STARmap-2018-mouse-cortex
    │   ├── STARmap-2018-mouse-cortex_optargs.json
    │   └── environment.yml
    ├── cosmx_lung
    │   └── cosmx_lung.yml
    ├── merfish_devheart
    │   └── merfish_devheart.yml
    ├── pachter_simulation
    │   ├── pachter_simulation.yml
    │   └── pachter_simulation.py
    ├── stereoseq_liver
    │   └── stereoseq_liver.yml
    ├── visium_breast_cancer_SEDR
    │   ├── visium_breast_cancer_SEDR_optargs.json
    │   └── visium_breast_cancer_SEDR.yml
    ├── mouse_brain_sagittal_anterior
    │   ├── mouse_brain_sagittal_anterior.yml
    │   └── mouse_brain_sagittal_anterior_optargs.json
    ├── mouse_brain_sagittal_posterior
    │   ├── mouse_brain_sagittal_posterior.yml
    │   └── mouse_brain_sagittal_posterior_optargs.json
    ├── xenium-mouse-brain-SergioSalas
    │   ├── xenium-mouse-brain-SergioSalas_optargs.json
    │   └── environment.yml
    ├── her2st-breast-cancer
    │   └── environment.yml
    ├── STARmap_plus
    │   └── STARmap_plus.yml
    ├── slideseq2_olfactory_bulb
    │   └── slideseq2_olfactory_bulb.yml
    ├── stereoseq_mouse_embryo
    │   └── stereoseq_mouse_embryo.yml
    ├── stereoseq_olfactory_bulb
    │   └── stereoseq_olfactory_bulb.yml
    ├── locus_coeruleus
    │   └── locus_coeruleus.yml
    ├── stereoseq_developing_Drosophila_embryos_larvae
    │   └── stereoseq_developing_Drosophila_embryos_larvae.yml
    ├── visium_hd_cancer_colon
    │   └── visium_hd_cancer_colon.yml
    └── xenium-ffpe-bc-idc
    │   └── xenium-ffpe-bc-idc.yml
├── consensus
    ├── 02_BC_ranking
    │   ├── BC_ranking.yaml
    │   └── BC_ranking.r
    ├── 01_Results_Aggregation
    │   └── Results_Aggregation.yaml
    ├── 02_Cross_method_ARI
    │   ├── Cross_method_ARI.yaml
    │   └── Cross_method_ARI.r
    ├── 03_Consensus_kmode
    │   ├── Consensus_kmode.yaml
    │   └── Consensus_kmode.r
    ├── 02_Smoothness_entropy
    │   ├── Smoothness_entropy.yaml
    │   └── Smoothness_entropy.r
    ├── 03_Consensus_weighted
    │   ├── networkanalysis-1.3.0.jar
    │   └── Consensus_weighted.yaml
    ├── 03_Cross_method_entropy
    │   └── Cross_method_entropy.yaml
    └── 03_Consensus_lca
    │   ├── Consensus_lca.yaml
    │   └── Consensus_lca.r
├── .github
    └── workflows
    │   ├── main.yml
    │   └── build_mkdocs.yml
├── mkdocs.yml
├── LICENSE.txt
├── CITATION.cff
└── CODE_OF_CONDUCT.md


/docs/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | ../CONTRIBUTING.md


--------------------------------------------------------------------------------
/docs/about/license.md:
--------------------------------------------------------------------------------
1 | ../../LICENSE.txt


--------------------------------------------------------------------------------
/docs/usage.md:
--------------------------------------------------------------------------------
1 | ../workflows/README.md


--------------------------------------------------------------------------------
/docs/modules/data.md:
--------------------------------------------------------------------------------
1 | ../../data/README.md


--------------------------------------------------------------------------------
/docs/modules/method.md:
--------------------------------------------------------------------------------
1 | ../../method/README.md


--------------------------------------------------------------------------------
/docs/modules/metric.md:
--------------------------------------------------------------------------------
1 | ../../metric/README.md


--------------------------------------------------------------------------------
/metric/V_measure/config/config_2.json:
--------------------------------------------------------------------------------
1 | {"beta": 1}


--------------------------------------------------------------------------------
/metric/V_measure/config/config_3.json:
--------------------------------------------------------------------------------
1 | {"beta": 2}


--------------------------------------------------------------------------------
/docs/modules/consensus.md:
--------------------------------------------------------------------------------
1 | ../../consensus/README.md


--------------------------------------------------------------------------------
/metric/LISI/config/config_1.json:
--------------------------------------------------------------------------------
1 | {"perplexity": 15}


--------------------------------------------------------------------------------
/metric/V_measure/config/config_1.json:
--------------------------------------------------------------------------------
1 | {"beta": 0.5}


--------------------------------------------------------------------------------
/docs/requirements.txt:
--------------------------------------------------------------------------------
1 | mkdocs ~= 1.6
2 | mkdocs-macros-plugin ~= 1.3
3 | 


--------------------------------------------------------------------------------
/preprocessing/neighbors/radius/config/config_1.json:
--------------------------------------------------------------------------------
1 | {
2 |     "radius":1
3 | }


--------------------------------------------------------------------------------
/preprocessing/neighbors/n_rings/config/config_1.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n_rings":1
3 | }


--------------------------------------------------------------------------------
/preprocessing/neighbors/n_neighbourhood/config/config_1.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n_neighs":6
3 | }


--------------------------------------------------------------------------------
/method/DRSC/config/config_all_genes.json:
--------------------------------------------------------------------------------
1 | {"feature_method": "All genes", "source": "NA"}
2 | 


--------------------------------------------------------------------------------
/workflows/.gitignore:
--------------------------------------------------------------------------------
1 | notes.md
2 | .snakemake
3 | shared/__pycache__/
4 | *_requirements.info


--------------------------------------------------------------------------------
/method/DeepST/config/config_2.json:
--------------------------------------------------------------------------------
1 | {
2 | "spatial_type": "BallTree",
3 | "npcs": 200 
4 | }
5 | 


--------------------------------------------------------------------------------
/method/DeepST/config/config_3.json:
--------------------------------------------------------------------------------
1 | {
2 | "spatial_type": "KDTree",
3 | "npcs": 200
4 | }
5 | 
6 | 


--------------------------------------------------------------------------------
/docs/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SpatialHackathon/SACCELERATOR/HEAD/docs/img/logo.png


--------------------------------------------------------------------------------
/method/DeepST/config/config_1.json:
--------------------------------------------------------------------------------
1 | {
2 | "spatial_type": "LinearRegress",
3 | "npcs": 200
4 | }
5 | 


--------------------------------------------------------------------------------
/docs/img/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SpatialHackathon/SACCELERATOR/HEAD/docs/img/favicon.ico


--------------------------------------------------------------------------------
/.all-contributorsrc:
--------------------------------------------------------------------------------
1 | {
2 |   "projectName": "SpaceHack2023",
3 |   "projectOwner": "SpatialHackathon"
4 | }
5 | 


--------------------------------------------------------------------------------
/method/GraphST/config/config_1.json:
--------------------------------------------------------------------------------
1 | {"method": "leiden", "refine": true, "radius": 100, "n_pcs": 20, "n_genes": 3000}


--------------------------------------------------------------------------------
/method/GraphST/config/config_3.json:
--------------------------------------------------------------------------------
1 | {"method": "leiden", "refine": true, "radius": 50, "n_pcs": 20, "n_genes": 3000}


--------------------------------------------------------------------------------
/method/GraphST/config/config_4.json:
--------------------------------------------------------------------------------
1 | {"method": "mclust", "refine": true, "radius": 100, "n_pcs": 20, "n_genes": 3000}


--------------------------------------------------------------------------------
/method/SpaceFlow/config/config_1.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n_genes": 3000,
3 |     "n_pcs": 50,
4 |     "n_neighbours": 15
5 | }


--------------------------------------------------------------------------------
/templates/data_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 10,
3 |     "min_genes" : 20,
4 |     "min_counts": 30
5 | }
6 | 


--------------------------------------------------------------------------------
/method/GraphST/config/config_2.json:
--------------------------------------------------------------------------------
1 | {"method": "leiden", "refine": false, "radius": 50, "n_pcs": 20, "n_genes": 3000}
2 | 


--------------------------------------------------------------------------------
/templates/metric_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": false,
4 |     "config_file": true
5 | }


--------------------------------------------------------------------------------
/method/STAGATE/config/config_2.json:
--------------------------------------------------------------------------------
1 | {"method": "louvain", "model": "KNN", "k_cutoff": 10, "n_genes": 3000, "min_cells": 0}
2 | 


--------------------------------------------------------------------------------
/method/STAGATE/config/config_3.json:
--------------------------------------------------------------------------------
1 | {"method": "louvain", "model": "KNN", "k_cutoff": 15, "n_genes": 3000, "min_cells": 0}
2 | 


--------------------------------------------------------------------------------
/method/STAGATE/config/config_4.json:
--------------------------------------------------------------------------------
1 | {"method": "mclust", "model": "KNN", "k_cutoff": 10, "n_genes": 3000, "min_cells": 0}
2 | 


--------------------------------------------------------------------------------
/method/STAGATE/config/config_5.json:
--------------------------------------------------------------------------------
1 | {"method": "mclust", "model": "KNN", "k_cutoff": 15, "n_genes": 3000, "min_cells": 0}
2 | 


--------------------------------------------------------------------------------
/method/meringue/domains.tar.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SpatialHackathon/SACCELERATOR/HEAD/method/meringue/domains.tar.gz


--------------------------------------------------------------------------------
/metric/ARI/ARI_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": false,
4 |     "config_file": false
5 | }
6 | 


--------------------------------------------------------------------------------
/data/SEA_AD_data/SEA_AD_data_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/data/libd_dlpfc/libd_dlpfc_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/data/osmfish_Ssp/osmfish_Ssp_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/method/STAGATE/config/config_1.json:
--------------------------------------------------------------------------------
1 | {"method": "louvain", "model": "Radius", "rad_cutoff": 150, "n_genes": 3000, "min_cells": 0}
2 | 


--------------------------------------------------------------------------------
/metric/FMI/FMI_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": false,
4 |     "config_file": false
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/metric/LISI/LISI_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": true,
4 |     "config_file": true
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/metric/MCC/MCC_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": false,
4 |     "config_file": false
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/metric/NMI/NMI_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": false,
4 |     "config_file": false
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/metric/PAS/PAS.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - r-base=4.3.1
5 |   - r-optparse=1.7.3
6 |   - r-pdist=1.2.1


--------------------------------------------------------------------------------
/data/spatialDLPFC/spatialDLPFC_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/metric/ARI/ARI.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.12.0
5 |   - pandas=2.1.1
6 |   - scikit-learn=1.3.2


--------------------------------------------------------------------------------
/metric/CHAOS/CHAOS.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - r-base=4.3.1
5 |   - r-optparse=1.7.3
6 |   - r-pdist=1.2.1


--------------------------------------------------------------------------------
/metric/Entropy/Entropy_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": false,
4 |     "config_file": false
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/metric/FMI/FMI.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.12.0
5 |   - pandas=2.1.1
6 |   - scikit-learn=1.3.2


--------------------------------------------------------------------------------
/metric/MCC/MCC.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.12.0
5 |   - pandas=2.1.1
6 |   - scikit-learn=1.3.2


--------------------------------------------------------------------------------
/metric/NMI/NMI.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - r-base=4.3.1
5 |   - r-aricode=1.0.2
6 |   - r-optparse=1.7.3


--------------------------------------------------------------------------------
/metric/jaccard/jaccard_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": false,
4 |     "config_file": false
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/preprocessing/transformation/log1p.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - scanpy=1.10.1
6 | 


--------------------------------------------------------------------------------
/consensus/02_BC_ranking/BC_ranking.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - r
3 |   - conda-forge
4 | dependencies:
5 |   - r-base=4.4.2
6 |   - r-optparse=1.7.5


--------------------------------------------------------------------------------
/data/sotip_simulation/sotip_simulation_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/data/visium_chicken_heart/chicken_heart_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/method/meringue/config/config_1.json:
--------------------------------------------------------------------------------
1 | {"min.reads": 100, "min.lib.size": 100, "k": 18, "alpha": 1, "beta": 1, "n_pcs": 5, "filterDist": 2}
2 | 


--------------------------------------------------------------------------------
/method/meringue/config/config_2.json:
--------------------------------------------------------------------------------
1 | {"min.reads": 100, "min.lib.size": 100, "k": 18, "alpha": 0, "beta": 0, "n_pcs": 5, "filterDist": 2}
2 | 


--------------------------------------------------------------------------------
/metric/Entropy/Entropy.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.12.0
5 |   - pandas=2.1.1
6 |   - scikit-learn=1.4.2


--------------------------------------------------------------------------------
/metric/SpatialARI/SpatialARI_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": false,
4 |     "config_file": false
5 | }
6 | 


--------------------------------------------------------------------------------
/metric/V_measure/V_measure_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": false,
4 |     "config_file": true
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/preprocessing/quality_control/qc_scanpy.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - scanpy=1.10.1
6 | 


--------------------------------------------------------------------------------
/consensus/01_Results_Aggregation/Results_Aggregation.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.21
5 |   - pandas=2.2.3


--------------------------------------------------------------------------------
/data/mouse_kidney_coronal/mouse_kidney_coronal.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - scanpy=1.10.1
6 | 


--------------------------------------------------------------------------------
/metric/Completeness/Completeness_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": false,
4 |     "config_file": false
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/metric/Homogeneity/Homogeneity_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": false,
4 |     "config_file": false
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/metric/V_measure/V_measure.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.12.0
5 |   - pandas=2.1.1
6 |   - scikit-learn=1.3.2


--------------------------------------------------------------------------------
/metric/jaccard/jaccard.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.12.0
5 |   - pandas=2.1.1
6 |   - scikit-learn=1.3.2


--------------------------------------------------------------------------------
/data/cosmx_liver/cosmx_liver.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - pip
6 |   - pip:
7 |     - tiledbsc==0.1.5


--------------------------------------------------------------------------------
/data/mouse_kidney_coronal/mouse_kidney_coronal_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/data/xenium-breast-cancer/xenium-breast-cancer_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/metric/Calinski-Harabasz/Calinski-Harabasz_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": false,
3 |     "embedding": true,
4 |     "config_file": false
5 | }
6 | 


--------------------------------------------------------------------------------
/metric/Completeness/Completeness.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.12.0
5 |   - pandas=2.1.1
6 |   - scikit-learn=1.3.2


--------------------------------------------------------------------------------
/metric/Davies-Bouldin/Davies-Bouldin_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": false,
3 |     "embedding": true,
4 |     "config_file": false
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/metric/Homogeneity/Homogeneity.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.12.0
5 |   - pandas=2.1.1
6 |   - scikit-learn=1.3.2


--------------------------------------------------------------------------------
/data/abc_atlas_wmb_thalamus/abc_atlas_wmb_thalamus_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/method/SC_MEB/config/config_default.json:
--------------------------------------------------------------------------------
1 | {"n_pcs": 15, "n_genes": 2000, "source":"https://shufeyangyi2015310117.github.io/SC.MEB/articles/SC.MEB_CRC.html"}


--------------------------------------------------------------------------------
/metric/Davies-Bouldin/Davies-Bouldin.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.12.0
5 |   - pandas=2.1.1
6 |   - scikit-learn=1.3.2


--------------------------------------------------------------------------------
/data/STARmap-2018-mouse-cortex/STARmap-2018-mouse-cortex_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/data/cosmx_lung/cosmx_lung.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - scanpy=1.10.1
6 |   - r-base=4.3.3
7 |   - rpy2=3.5.11


--------------------------------------------------------------------------------
/data/merfish_devheart/merfish_devheart.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - scanpy=1.10.1
6 |   - openpyxl=3.1.2
7 | 


--------------------------------------------------------------------------------
/data/pachter_simulation/pachter_simulation.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.11.6
5 |   - pip
6 |   - pip:
7 |     - pypdl==1.3.2


--------------------------------------------------------------------------------
/data/stereoseq_liver/stereoseq_liver.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - scanpy=1.10.1
6 |   - openpyxl=3.1.2
7 | 


--------------------------------------------------------------------------------
/data/visium_breast_cancer_SEDR/visium_breast_cancer_SEDR_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/method/maple/config/config_default.json:
--------------------------------------------------------------------------------
1 | {
2 |   "n_genes": 2000,
3 |   "n_pcs": 8,
4 |   "source": "https://carter-allen.github.io/stxBrain_multi_maple.html"
5 | }


--------------------------------------------------------------------------------
/metric/Calinski-Harabasz/Calinski-Harabasz.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.12.0
5 |   - pandas=2.1.1
6 |   - scikit-learn=1.3.2


--------------------------------------------------------------------------------
/metric/domain-specific-f1/domain-specific-f1_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": true,
3 |     "embedding": false,
4 |     "config_file": false
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/preprocessing/feature_selection/highly_variable_genes_scanpy.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - scanpy=1.10.1
6 | 


--------------------------------------------------------------------------------
/data/mouse_brain_sagittal_anterior/mouse_brain_sagittal_anterior.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - scanpy=1.10.1
6 | 


--------------------------------------------------------------------------------
/data/mouse_brain_sagittal_anterior/mouse_brain_sagittal_anterior_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/data/mouse_brain_sagittal_posterior/mouse_brain_sagittal_posterior.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - scanpy=1.10.1
6 | 


--------------------------------------------------------------------------------
/metric/PAS/PAS_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": false,
3 |     "embedding": false,
4 |     "config_file": false,
5 |     "physical_coordinate": true
6 | }
7 | 
8 | 


--------------------------------------------------------------------------------
/preprocessing/README.md:
--------------------------------------------------------------------------------
1 | # Preprocessing
2 | 
3 | Please request one of the organisers should your method require any special preprocessing for special instructions.
4 | 


--------------------------------------------------------------------------------
/preprocessing/neighbors/n_rings/n_rings.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - pip
6 |   - pip:
7 |     - squidpy==1.5.0
8 | 


--------------------------------------------------------------------------------
/preprocessing/neighbors/radius/radius.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - pip
6 |   - pip:
7 |     - squidpy==1.5.0
8 | 


--------------------------------------------------------------------------------
/data/mouse_brain_sagittal_posterior/mouse_brain_sagittal_posterior_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/data/xenium-mouse-brain-SergioSalas/xenium-mouse-brain-SergioSalas_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "min_cells" : 1,
3 |     "min_genes" : 1,
4 |     "min_counts": 1
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/method/SOTIP/config/config_default.json:
--------------------------------------------------------------------------------
1 | {
2 |     "res": 1,
3 |     "n_neighbours": 15,
4 |     "source": "https://scanpy.readthedocs.io/en/stable/api/index.html"
5 | }
6 | 


--------------------------------------------------------------------------------
/method/precast/config/config_1.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n_genes": 700,
3 |     "method": "HVGs",
4 |     "maxIter": 30,
5 |     "postminspots":15,
6 |     "postminfeatures":15
7 | }


--------------------------------------------------------------------------------
/metric/CHAOS/CHAOS_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": false,
3 |     "embedding": false,
4 |     "config_file": false,
5 |     "physical_coordinate": true
6 | }
7 | 
8 | 


--------------------------------------------------------------------------------
/metric/LISI/LISI.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 |   - bioconda
4 | dependencies:
5 |   - r-base=4.3.1
6 |   - r-optparse=1.7.3
7 |   - r-lisi=1.0
8 |   - r-rjson=0.2.21


--------------------------------------------------------------------------------
/consensus/02_Cross_method_ARI/Cross_method_ARI.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - r
3 |   - conda-forge
4 | dependencies:
5 |   - r-base=4.4.2
6 |   - r-optparse=1.7.5
7 |   - r-mclust=6.1.1


--------------------------------------------------------------------------------
/consensus/03_Consensus_kmode/Consensus_kmode.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - r
3 |   - conda-forge
4 | dependencies:
5 |   - r-base=4.4.2
6 |   - r-optparse=1.7.5
7 |   - r-dicer=2.2.0


--------------------------------------------------------------------------------
/method/precast/config/config_dlpfc.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n_genes": 2000,
3 |     "method": "HVGs",
4 |     "maxIter": 30,
5 |     "postminspots":15,
6 |     "postminfeatures":15
7 | }


--------------------------------------------------------------------------------
/metric/cluster-specific-silhouette/cluster-specific-silhouette_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "groundtruth": false,
3 |     "embedding": true,
4 |     "config_file": false
5 | }
6 | 
7 | 


--------------------------------------------------------------------------------
/consensus/02_Smoothness_entropy/Smoothness_entropy.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - r
3 |   - conda-forge
4 | dependencies:
5 |   - r-base=4.4.2
6 |   - r-optparse=1.7.5
7 |   - r-dbscan=1.2_0


--------------------------------------------------------------------------------
/consensus/03_Consensus_weighted/networkanalysis-1.3.0.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SpatialHackathon/SACCELERATOR/HEAD/consensus/03_Consensus_weighted/networkanalysis-1.3.0.jar


--------------------------------------------------------------------------------
/data/STARmap-2018-mouse-cortex/environment.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - anndata=0.10.3
5 |   - gdown=4.7.3
6 |   - scipy=1.11.4
7 |   - pandas=2.1.4
8 | 


--------------------------------------------------------------------------------
/data/xenium-mouse-brain-SergioSalas/environment.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - anndata=0.10.3
5 |   - gdown=4.6.0
6 |   - scipy=1.11.4
7 |   - pandas=2.1.4


--------------------------------------------------------------------------------
/method/precast/config/config_default.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n_genes": 2000,
3 |     "method": "SPARK-X",
4 |     "maxIter": 20,
5 |     "postminspots":15,
6 |     "postminfeatures":15
7 | }


--------------------------------------------------------------------------------
/preprocessing/neighbors/n_neighbourhood/n_neighbourhood.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - pip
6 |   - pip:
7 |     - squidpy==1.5.0
8 | 


--------------------------------------------------------------------------------
/data/her2st-breast-cancer/environment.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - anndata=0.10.3
5 |   - scipy=1.11.4
6 |   - pandas=2.1.4
7 |   - p7zip
8 |   - gzip
9 | 


--------------------------------------------------------------------------------
/data/sotip_simulation/sotip.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.11.6
5 |   - scipy=1.11.4
6 |   - anndata=0.10.3
7 |   - numpy=1.26.2
8 |   - pandas=2.1.3


--------------------------------------------------------------------------------
/method/BANKSY/config/config_github.json:
--------------------------------------------------------------------------------
1 | {"method": "leiden", "lambda": 0.8, "k_geom": 15, "n_pcs": 20, "n_genes":1e500, "use_agf": true, "source": "https://github.com/prabhakarlab/Banksy"}


--------------------------------------------------------------------------------
/method/BayesSpace/config/config_default.json:
--------------------------------------------------------------------------------
1 | {
2 |   "n_genes":2000,
3 |   "n_pcs": 15,
4 |   "gamma": 3,
5 |   "nrep": 50000,
6 |   "reference": "Default is the same as flpfc"
7 | }
8 | 


--------------------------------------------------------------------------------
/preprocessing/dimensionality_reduction/PCA.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.12.0
5 |   - pandas=2.1.1
6 |   - scikit-learn=1.3.2
7 |   - scipy=1.11.3


--------------------------------------------------------------------------------
/consensus/03_Cross_method_entropy/Cross_method_entropy.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - r
3 |   - conda-forge
4 | dependencies:
5 |   - r-base=4.4.2
6 |   - r-optparse=1.7.5
7 |   - r-clue=0.3_66
8 | 


--------------------------------------------------------------------------------
/data/SEA_AD_data/SEA_AD_data.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - numpy=1.26.2
5 |   - pandas=2.1.4
6 |   - anndata=0.10.3
7 |   - scipy=1.11.4
8 |   - boto3=1.33.13
9 | 


--------------------------------------------------------------------------------
/data/spatialDLPFC/README.md:
--------------------------------------------------------------------------------
1 | Currently there are no images included.
2 | 
3 | Full resolution images could not be found. It would be possible to download lower resolution images at a later point.


--------------------------------------------------------------------------------
/templates/method_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": true,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }


--------------------------------------------------------------------------------
/consensus/03_Consensus_lca/Consensus_lca.yaml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - r
3 |   - conda-forge
4 | dependencies:
5 |   - r-base=4.4.2
6 |   - r-optparse=1.7.5
7 |   - r-dicer=2.2.0
8 |   - r-polca=1.6.0
9 | 


--------------------------------------------------------------------------------
/method/DRSC/DRSC_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": false
7 | }
8 | 


--------------------------------------------------------------------------------
/method/DRSC/config/config_default.json:
--------------------------------------------------------------------------------
1 | {"feature_method": "FindSVGs", "n_genes": 2000, "source": "https://github.com/feiyoung/DR.SC/blob/64d95135ab79e3e42e86e4a2e31724d17bec29d5/R/main.R#L355"}
2 | 


--------------------------------------------------------------------------------
/method/Giotto/Giotto_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }


--------------------------------------------------------------------------------
/method/SEDR/SEDR_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 


--------------------------------------------------------------------------------
/method/SOTIP/sotip_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 


--------------------------------------------------------------------------------
/method/bass/bass_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 


--------------------------------------------------------------------------------
/method/conST/conST_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": true,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 


--------------------------------------------------------------------------------
/method/maple/maple_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 


--------------------------------------------------------------------------------
/method/meringue/config/config_mob.json:
--------------------------------------------------------------------------------
1 | {"min.reads": 100, "min.lib.size": 100, "k": 50, "alpha": 1, "beta": 1, "n_pcs": 5, "filterDist": 2.5, "source": "https://jef.works/MERINGUE/mOB_analysis"}
2 | 


--------------------------------------------------------------------------------
/method/precast/precast_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }


--------------------------------------------------------------------------------
/method/seurat/config/config_leiden_spatial.json:
--------------------------------------------------------------------------------
1 | {
2 |   "algorithm": 4,
3 |   "n_genes":2000,
4 |   "n_pcs":30,
5 |   "source": "https://satijalab.org/seurat/articles/spatial_vignette"
6 | }
7 | 


--------------------------------------------------------------------------------
/preprocessing/neighbors/delaunay_triangulation/delaunay_triangulation.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - pip
6 |   - pip:
7 |     - squidpy==1.5.0
8 | 


--------------------------------------------------------------------------------
/method/BANKSY/banksy_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 


--------------------------------------------------------------------------------
/method/DeepST/DeepST_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 


--------------------------------------------------------------------------------
/method/STAGATE/STAGATE_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": false
7 | }
8 | 


--------------------------------------------------------------------------------
/method/STAGATE/config/config_starmap.json:
--------------------------------------------------------------------------------
1 | {"method": "mclust", "model": "Radius", "rad_cutoff": 400, "n_genes": 3000, "min_cells": 0, "source": "https://stagate.readthedocs.io/en/latest/T9_STARmap.html"}


--------------------------------------------------------------------------------
/method/SpaceFlow/config/config_default.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n_pcs": 50,
3 |     "n_neighbours": 50,
4 |     "source": "https://github.com/hongleir/SpaceFlow/blob/master/SpaceFlow/SpaceFlow.py#L289#L110"
5 | }


--------------------------------------------------------------------------------
/method/SpiceMix/SpiceMix_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }


--------------------------------------------------------------------------------
/method/seurat/seurat_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 


--------------------------------------------------------------------------------
/method/spaGCN/spaGCN_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 


--------------------------------------------------------------------------------
/method/spatialGE/spatialGE_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }


--------------------------------------------------------------------------------
/method/stardust/config/config_default.json:
--------------------------------------------------------------------------------
1 | {
2 |   "method": "auto",
3 |   "npcs": 10,
4 |   "n_genes": 3000,
5 |   "source": "https://github.com/InfOmics/stardust/blob/master/R/autoStardust.R"
6 | }
7 | 


--------------------------------------------------------------------------------
/method/stardust/stardust_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }


--------------------------------------------------------------------------------
/data/osmfish_Ssp/osmfish_Ssp.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.11.6
5 |   - scipy=1.11.4
6 |   - anndata=0.10.3
7 |   - numpy=1.26
8 |   - pandas=2.1.3
9 |   - loompy=3.0.6


--------------------------------------------------------------------------------
/method/SCAN-IT/scanit_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/method/SOTIP/config/config_osmFISH.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n_neighbours": 500,
3 |     "source": "https://github.com/TencentAILabHealthcare/SOTIP/blob/master/SOTIP_analysis/tutorial/osmFISH_cortex.ipynb"
4 | }
5 | 


--------------------------------------------------------------------------------
/method/STAGATE/config/config_default.json:
--------------------------------------------------------------------------------
1 | {"method": "mclust", "model": "Radius", "rad_cutoff": 150, "n_genes": 3000, "min_cells": 0, "source": "https://stagate.readthedocs.io/en/latest/T1_DLPFC.html"}
2 | 


--------------------------------------------------------------------------------
/method/SpaceFlow/spaceflow_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 


--------------------------------------------------------------------------------
/method/bass/config/config_2.json:
--------------------------------------------------------------------------------
1 | {
2 |   "C": 20,
3 |   "init_method": "mclust",
4 |   "beta_method": "SW",
5 |   "geneSelect": "hvg",
6 |   "scaleFeature": false,
7 |   "n_genes": 2000,
8 |   "n_pc": 20
9 | }


--------------------------------------------------------------------------------
/method/meringue/meringue_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 


--------------------------------------------------------------------------------
/method/scanpy/scanpy_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/method/spatialGE/config/config_sct.json:
--------------------------------------------------------------------------------
1 | {
2 |   "weight": 0.025,
3 |   "n_genes": 2000,
4 |   "method": "sct",
5 |   "source": "https://fridleylab.github.io/spatialGE/reference/transform_data.html"
6 | }
7 | 


--------------------------------------------------------------------------------
/data/STARmap_plus/STARmap_plus.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - gdown=5.1.0
5 |   - pandas=2.2.0
6 |   - requests=2.31.0
7 |   - numpy=1.26.4
8 |   - python=3.12.2
9 |   - scipy=1.12.0


--------------------------------------------------------------------------------
/data/slideseq2_olfactory_bulb/slideseq2_olfactory_bulb.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.11.6
5 |   - scipy=1.11.4
6 |   - anndata=0.10.3
7 |   - numpy=1.26.2
8 |   - pandas=2.1.3


--------------------------------------------------------------------------------
/data/stereoseq_mouse_embryo/stereoseq_mouse_embryo.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.11.6
5 |   - scipy=1.11.4
6 |   - anndata=0.10.3
7 |   - numpy=1.26.2
8 |   - pandas=2.1.3


--------------------------------------------------------------------------------
/data/stereoseq_olfactory_bulb/stereoseq_olfactory_bulb.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.11.6
5 |   - scipy=1.11.4
6 |   - anndata=0.10.3
7 |   - numpy=1.26.2
8 |   - pandas=2.1.3


--------------------------------------------------------------------------------
/method/DRSC/config/config_FindSVGs.json:
--------------------------------------------------------------------------------
1 | {"feature_method": "FindSVGs", "n_genes": 480, "source": "https://github.com/feiyoung/DR.SC/blob/64d95135ab79e3e42e86e4a2e31724d17bec29d5/vignettes/DR.SC.DLPFC.Rmd#L88"}
2 | 


--------------------------------------------------------------------------------
/method/SEDR/config/config_1.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n":6,
3 |     "cluster_method": "mclust", 
4 |     "using_dec": false, 
5 |     "HVG": true,
6 |     "Config_rationale": "Same as default, but turning off the dec"
7 | }


--------------------------------------------------------------------------------
/method/SEDR/config/config_2.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n":12,
3 |     "cluster_method": "mclust", 
4 |     "using_dec": false, 
5 |     "HVG": true,
6 |     "Config_rationale": "Same as dlpfc, but turning off the dec"
7 | }


--------------------------------------------------------------------------------
/method/bass/config/config_1.json:
--------------------------------------------------------------------------------
1 | {
2 |   "C": 20,
3 |   "init_method": "mclust",
4 |   "beta_method": "fix",
5 |   "geneSelect": "sparkx",
6 |   "scaleFeature": false,
7 |   "n_genes": 3000,
8 |   "n_pc": 20
9 | }


--------------------------------------------------------------------------------
/method/bass/config/config_3.json:
--------------------------------------------------------------------------------
1 | {
2 |   "C": 20,
3 |   "init_method": "kmeans",
4 |   "beta_method": "fix",
5 |   "geneSelect": "sparkx",
6 |   "scaleFeature": false,
7 |   "n_genes": 3000,
8 |   "n_pc": 20
9 | }


--------------------------------------------------------------------------------
/method/meringue/meringue.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 |   - bioconda
4 | dependencies:
5 |   - r-base=4.3.1
6 |   - r-optparse=1.7.3
7 |   - r-remotes=2.4.2
8 |   - r-jsonlite=1.8.8
9 |   - r-igraph=2.0.2


--------------------------------------------------------------------------------
/metric/cluster-specific-silhouette/cluster-specific-silhouette.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - r-base=4.3.1
5 |   - r-optparse=1.7.3
6 |   - r-cluster=2.1.6
7 |   - r-jsonlite=1.8.7
8 | 


--------------------------------------------------------------------------------
/method/BANKSY/config/config_default.json:
--------------------------------------------------------------------------------
1 | {"method": "leiden", "lambda": 0.8, "k_geom": 15, "n_pcs": 20, "n_genes":1e500, "use_agf": false, "source":"https://github.com/prabhakarlab/Banksy/blob/bioc/R/cluster.R#L82"}


--------------------------------------------------------------------------------
/method/BayesSpace/config/config_1.json:
--------------------------------------------------------------------------------
1 | {
2 |   "n_genes":2000,
3 |   "n_pcs": 7,
4 |   "gamma": 3,
5 |   "nrep": 10000,
6 |   "Config_rationale": "Combination of nrep and nPC based on other 3 configs"
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/method/CellCharter/CellCharter_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/method/SEDR/config/config_dlpfc.json:
--------------------------------------------------------------------------------
1 | {
2 | "n":12,
3 | "cluster_method": "mclust", 
4 | "using_dec": true, 
5 | "HVG": true,
6 | "reference":"https://sedr.readthedocs.io/en/latest/Tutorial1_Clustering.html"
7 | }
8 | 


--------------------------------------------------------------------------------
/method/meringue/config/config_clusteringvignette.json:
--------------------------------------------------------------------------------
1 | {"min.reads": 100, "min.lib.size": 100, "k": 50, "alpha": 0, "beta": 0, "n_pcs": 60, "filterDist": 2, "source": "https://jef.works/MERINGUE/spatial_clustering"}
2 | 


--------------------------------------------------------------------------------
/metric/domain-specific-f1/domain-specific-f1.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - r-base=4.3.1
5 |   - r-mclust=6.0.0
6 |   - r-clue=0.3_65
7 |   - r-optparse=1.7.3
8 |   - r-jsonlite=1.8.7
9 | 


--------------------------------------------------------------------------------
/data/locus_coeruleus/locus_coeruleus.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 |   - bioconda
4 | dependencies:
5 |   - r-base=4.3.1
6 |   - bioconductor-weberdivechalcdata=1.4.0
7 |   - r-optparse=1.7.3
8 |   - r-matrix=1.6.1


--------------------------------------------------------------------------------
/method/BayesSpace/config/config_scc.json:
--------------------------------------------------------------------------------
1 | {
2 |   "n_genes":2000,
3 |   "n_pcs": 15,
4 |   "gamma": 3,
5 |   "nrep": 10000,
6 |   "reference": "https://www.ezstatconsulting.com/BayesSpace/articles/ji_SCC.html"
7 | }
8 | 
9 | 


--------------------------------------------------------------------------------
/method/SOTIP/config/config_MIBI_TNBC.json:
--------------------------------------------------------------------------------
1 | {
2 |     "res": 1,
3 |     "n_neighbours": 800,
4 |     "source": "https://github.com/TencentAILabHealthcare/SOTIP/blob/master/SOTIP_analysis/tutorial/scMEP_CLCC.ipynb"
5 | }
6 | 


--------------------------------------------------------------------------------
/method/SOTIP/config/config_scMEP_CLCC.json:
--------------------------------------------------------------------------------
1 | {
2 |     "res": 1,
3 |     "n_neighbours": 100,
4 |     "source": "https://github.com/TencentAILabHealthcare/SOTIP/blob/master/SOTIP_analysis/tutorial/scMEP_CLCC.ipynb"
5 | }
6 | 


--------------------------------------------------------------------------------
/method/stardust/config/config_tutorial.json:
--------------------------------------------------------------------------------
1 | {
2 |   "method": "weight",
3 |   "npcs": 10,
4 |   "n_genes": 3000,
5 |   "weight": 0.75,
6 |   "source":"https://github.com/InfOmics/stardust/blob/master/README.md"
7 | }
8 | 


--------------------------------------------------------------------------------
/preprocessing/feature_selection_MoranI/spatially_variable_genes_moransI.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.9.18
5 |   - scanpy=1.10.1
6 |   - pip
7 |   - pip:
8 |     - squidpy==1.5.0
9 | 


--------------------------------------------------------------------------------
/data/xenium-breast-cancer/xenium-breast-cancer.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 |   - anaconda
4 | dependencies:
5 |   - anndata=0.10.3
6 |   - gdown=4.7.1
7 |   - pandas=2.1.4
8 |   - openpyxl=3.0.10
9 |   - requests=2.31.0


--------------------------------------------------------------------------------
/method/seurat/config/config_leiden_VisiumHD.json:
--------------------------------------------------------------------------------
1 | {
2 |   "algorithm": 4,
3 |   "n_genes":2000,
4 |   "n_pcs":50,
5 |   "source":"https://satijalab.org/seurat/articles/visiumhd_analysis_vignette#unsupervised-clustering"
6 | }
7 | 


--------------------------------------------------------------------------------
/data/abc_atlas_wmb_thalamus/abc_atlas_wmb_thalamus.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - numpy=1.24.4
5 |   - pandas=2.0.3
6 |   - anndata=0.9.1
7 |   - scipy=1.10.1
8 |   - boto3=1.33.7
9 |   - nibabel=5.1.0


--------------------------------------------------------------------------------
/method/BANKSY/config/config_starmap.json:
--------------------------------------------------------------------------------
1 | {"method": "leiden", "lambda": 0.8, "k_geom": 30, "n_pcs": 50, "n_genes":1e500, "use_agf": false, "source": "https://prabhakarlab.github.io/Banksy/articles/domain-segment.html#running-banksy"}


--------------------------------------------------------------------------------
/method/BayesSpace/config/config_melanoma.json:
--------------------------------------------------------------------------------
1 | {
2 |   "n_genes":2000,
3 |   "n_pcs": 7,
4 |   "gamma": 3,
5 |   "nrep": 50000,
6 |   "reference":"https://www.ezstatconsulting.com/BayesSpace/articles/thrane_melanoma.html"
7 | }
8 | 


--------------------------------------------------------------------------------
/method/SC_MEB/SC_MEB_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true,
7 |     "technology": ["Visium"]
8 | }


--------------------------------------------------------------------------------
/method/seurat/config/config_default.json:
--------------------------------------------------------------------------------
1 | {
2 |   "algorithm": 4,
3 |   "n_genes":3000,
4 |   "n_pcs":50,
5 |   "source": "https://satijalab.org/seurat/reference/sctransform, https://satijalab.org/seurat/reference/runpca"
6 | }
7 | 


--------------------------------------------------------------------------------
/method/spatialGE/config/config_default.json:
--------------------------------------------------------------------------------
1 | {
2 |   "weight": 0.025,
3 |   "n_genes": 2000,
4 |   "source": "https://github.com/FridleyLab/spatialGE/blob/8440639d32b4bac83750f93c2c5ac020fc4c4791/R/STclust.R#L48C92-L48C105"
5 | }
6 | 


--------------------------------------------------------------------------------
/data/visium_breast_cancer_SEDR/visium_breast_cancer_SEDR.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.11.6
5 |   - scipy=1.11.4
6 |   - anndata=0.10.3
7 |   - numpy=1.26.2
8 |   - pandas=2.1.3
9 |   - scanpy=1.9.6


--------------------------------------------------------------------------------
/method/BANKSY/config/config_dlpfc.json:
--------------------------------------------------------------------------------
1 | {"method": "leiden", "lambda": 0.2, "k_geom": 18, "n_pcs": 20, "n_genes": 2000, "use_agf": true, "source": "https://prabhakarlab.github.io/Banksy/articles/batch-correction.html#running-banksy"}
2 | 


--------------------------------------------------------------------------------
/method/DRSC/config/config_FindVariableFeatures.json:
--------------------------------------------------------------------------------
1 | {"feature_method": "FindVariableFeatures", "n_genes": 500, "source": "https://github.com/feiyoung/DR.SC/blob/64d95135ab79e3e42e86e4a2e31724d17bec29d5/vignettes/DR.SC.DLPFC.Rmd#L57"}
2 | 


--------------------------------------------------------------------------------
/method/SpaceFlow/config/config_seqfish.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n_genes": 3000,
3 |     "n_pcs": 50,
4 |     "n_neighbours": 50,
5 |     "source":"https://github.com/hongleir/SpaceFlow/blob/master/tutorials/seqfish_mouse_embryogenesis.ipynb"
6 | }


--------------------------------------------------------------------------------
/method/seurat/config/config_leiden_CODEX.json:
--------------------------------------------------------------------------------
1 | {
2 |   "algorithm": 4,
3 |   "n_genes":2000,
4 |   "n_pcs":20,
5 |   "source": "https://satijalab.org/seurat/articles/seurat5_spatial_vignette_2#human-lymph-node-akoya-codex-system"
6 | }
7 | 


--------------------------------------------------------------------------------
/method/BayesSpace/BayesSpace_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": false,
7 |     "technology": ["Visium"]
8 | }
9 | 


--------------------------------------------------------------------------------
/method/SEDR/config/config_default.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n":6,
3 |     "cluster_method": "mclust",
4 |     "using_dec": true, 
5 |     "HVG": true,
6 |     "reference": "https://github.com/JinmiaoChenLab/SEDR/blob/master/SEDR/SEDR_model.py"
7 | }
8 | 


--------------------------------------------------------------------------------
/method/SEDR/config/config_stereoseq.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n":6,
3 |     "cluster_method": "mclust", 
4 |     "using_dec": true, 
5 |     "HVG": false,
6 |     "reference": "https://sedr.readthedocs.io/en/latest/Tutorial4_Stereo-seq.html"
7 | }
8 | 


--------------------------------------------------------------------------------
/method/spaGCN/spaGCN.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 | dependencies:
 4 |   - python=3.8.18
 5 |   - anndata=0.9.2
 6 |   - numpy=1.24.4
 7 |   - pandas=2.0.3
 8 |   - pytorch=2.1.0
 9 |   - pip
10 |   - pip:
11 |     - SpaGCN==1.2.7


--------------------------------------------------------------------------------
/method/BayesSpace/BayesSpace.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 |   - bioconda
 4 | dependencies:
 5 |   - r-base=4.3.1
 6 |   - r-optparse=1.7.3
 7 |   - bioconductor-bayesspace=1.10.1
 8 |   - r-matrix=1.6_1.1
 9 |   - r-irlba=2.3.5.1
10 | 


--------------------------------------------------------------------------------
/method/GraphST/config/config_dlpfc.json:
--------------------------------------------------------------------------------
1 | {"method": "mclust", "refine": true, "radius": 50, "n_pcs": 20, "n_genes": 3000, "source": "https://deepst-tutorials.readthedocs.io/en/latest/Tutorial%201_10X%20Visium.html#Spatial-clustering-and-refinement"}


--------------------------------------------------------------------------------
/method/spaGCN/config/config_1.json:
--------------------------------------------------------------------------------
1 | {
2 |     "method": "louvain", 
3 |     "refine": false, 
4 |     "alpha": 1, 
5 |     "p": 0.5, 
6 |     "n_pcs": 50, 
7 |     "n_neighbors": 10,
8 |     "Config_rationale": "Un-refined default configuration"
9 | }


--------------------------------------------------------------------------------
/method/spaGCN/config/config_2.json:
--------------------------------------------------------------------------------
1 | {
2 |     "method": "kmeans", 
3 |     "refine": true, 
4 |     "alpha": 1, 
5 |     "p": 0.5, 
6 |     "n_pcs": 50, 
7 |     "n_neighbors": 10,
8 |     "Config_rationale": "Different clustering methods (K-means)"
9 | }


--------------------------------------------------------------------------------
/method/spatialGE/config/config_highweight.json:
--------------------------------------------------------------------------------
1 | {
2 |   "weight": 0.2,
3 |   "n_genes": 2000,
4 |   "source": "https://fridleylab.github.io/spatialGE/articles/basic_functions_vignette.html#unsupervised-spatially-informed-clustering-stclust"
5 | }
6 | 


--------------------------------------------------------------------------------
/method/spatialGE/config/config_midweight.json:
--------------------------------------------------------------------------------
1 | {
2 |   "weight": 0.05,
3 |   "n_genes": 2000,
4 |   "source": "https://fridleylab.github.io/spatialGE/articles/basic_functions_vignette.html#unsupervised-spatially-informed-clustering-stclust"
5 | }
6 | 


--------------------------------------------------------------------------------
/data/stereoseq_developing_Drosophila_embryos_larvae/stereoseq_developing_Drosophila_embryos_larvae.yml:
--------------------------------------------------------------------------------
1 | channels:
2 |   - conda-forge
3 | dependencies:
4 |   - python=3.11.6
5 |   - scipy=1.11.4
6 |   - anndata=0.10.3
7 |   - numpy=1.26.2
8 |   - pandas=2.1.3


--------------------------------------------------------------------------------
/method/SOTIP/config/config_Visium_dlpfc.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n_pcs": 100,
3 |     "res": 2,
4 |     "n_neighbours": 200,
5 |     "source": "https://github.com/TencentAILabHealthcare/SOTIP/blob/master/SOTIP_analysis/Visium_Cortex/SDM_Visium_cortex.ipynb"
6 | }
7 | 


--------------------------------------------------------------------------------
/method/spaGCN/config/config_3.json:
--------------------------------------------------------------------------------
1 | {
2 |     "method": "kmeans", 
3 |     "refine": false, 
4 |     "alpha": 1, 
5 |     "p": 0.5, 
6 |     "n_pcs": 50, 
7 |     "n_neighbors": 10,
8 |     "Config_rationale": "Un-refined + Different clustering methods"
9 | }


--------------------------------------------------------------------------------
/preprocessing/visualization/visualization.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 | dependencies:
 4 |   - python=3.9.18
 5 |   - scanpy=1.10.1
 6 |   - seaborn=0.13.2
 7 |   - matplotlib=3.8.3
 8 |   - PyPDF2=2.11.1
 9 |   - typing_extensions=4.10.0
10 | 


--------------------------------------------------------------------------------
/method/GraphST/GraphST_optargs.json:
--------------------------------------------------------------------------------
1 | {
2 |     "matrix": "counts",
3 |     "integrated_feature_selection": false,
4 |     "image": false,
5 |     "neighbors": false,
6 |     "config_file": true,
7 |     "technology": ["Visium", "Stereo-seq", "Slideseq2"]
8 | }
9 | 


--------------------------------------------------------------------------------
/method/SpiceMix/SpiceMix.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 | dependencies:
 4 |   - python=3.8.18
 5 |   - pip
 6 |   - pip:
 7 |     - --extra-index-url https://download.pytorch.org/whl/cu117
 8 |     - torch==1.13.1+cu117 
 9 |     - popari==0.0.71
10 | 


--------------------------------------------------------------------------------
/method/conST/config/config_Visium_dlpfc.json:
--------------------------------------------------------------------------------
1 | {
2 |     "k": 20,
3 |     "min_cells": 5,
4 |     "use_img": false,
5 |     "using_mask": false,
6 |     "refinement": true,
7 |     "source": "https://github.com/ys-zong/conST/blob/main/conST_cluster.ipynb"
8 | }
9 | 


--------------------------------------------------------------------------------
/method/meringue/config/config_default.json:
--------------------------------------------------------------------------------
1 | {"min.reads": 1, "min.lib.size": 1, "k": 50, "alpha": 1, "beta": 1, "n_pcs": 5, "filterDist": 2, "source": "https://github.com/JEFworks-Lab/MERINGUE/blob/ca9e2ccabd95680d9ca0b323a8a507c038f2ea13/R/cluster.R#L130"}
2 | 


--------------------------------------------------------------------------------
/method/scanpy/scanpy_env.yaml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 | dependencies:
 4 |   - python>=3.10
 5 |   - numpy=1.26.2 
 6 |   - scipy=1.11.4 
 7 |   - pillow=10.1.0
 8 |   - anndata=0.10.3
 9 |   - leidenalg=0.10.1
10 |   - louvain=0.8.2
11 |   - scanpy=1.10.0


--------------------------------------------------------------------------------
/consensus/03_Consensus_weighted/Consensus_weighted.yaml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - r
 3 |   - conda-forge
 4 | dependencies:
 5 |   - r-base=4.4.2
 6 |   - r-optparse=1.7.5
 7 |   - r-matrix=1.6_5
 8 |   - r-dplyr=1.1.4
 9 |   - r-future.apply=1.11.3
10 |   - r-igraph=2.0.3
11 | 


--------------------------------------------------------------------------------
/data/visium_hd_cancer_colon/visium_hd_cancer_colon.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 | dependencies:
 4 |   - python=3.11.6
 5 |   - scipy=1.11.4
 6 |   - pip
 7 |   - pip:
 8 |     - spatialdata==0.1.2
 9 |     - spatialdata-io==0.1.2
10 |     - pypdl==1.3.2
11 | 


--------------------------------------------------------------------------------
/method/precast/config/config_BC.json:
--------------------------------------------------------------------------------
1 | {
2 |     "n_genes": 2000,
3 |     "method": "SPARK-X",
4 |     "maxIter": 30,
5 |     "postminspots":1,
6 |     "postminfeatures":10,
7 |     "reference": "https://feiyoung.github.io/PRECAST/articles/PRECAST.BreastCancer.html"
8 | }


--------------------------------------------------------------------------------
/method/STAGATE/config/config_slide_stereo.json:
--------------------------------------------------------------------------------
1 | {"method": "louvain", "model": "Radius", "rad_cutoff": 50, "n_genes": 3000, "min_cells": 50, "source": "https://stagate.readthedocs.io/en/latest/T3_Slide-seqV2.html", "source2": "https://stagate.readthedocs.io/en/latest/T4_Stereo.html"}


--------------------------------------------------------------------------------
/method/maple/maple.yml:
--------------------------------------------------------------------------------
 1 | name: maple_env
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 | dependencies:
 6 |   - r-base=4.3.1
 7 |   - r-optparse=1.7.3
 8 |   - r-jsonlite=1.8.8
 9 |   - r-Seurat=4.4.0
10 |   - bioconductor-SpatialExperiment=1.12.0
11 |   - r-remotes=2.4.2


--------------------------------------------------------------------------------
/data/xenium-ffpe-bc-idc/xenium-ffpe-bc-idc.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - nodefaults
 3 |   - conda-forge
 4 |   - bioconda
 5 | dependencies:
 6 |   - python==3.11
 7 |   - pip
 8 |   - pip:
 9 |       - pypdl==1.3.2
10 |       - spatialdata==0.3.0
11 |       - spatialdata-io==0.1.7
12 |       


--------------------------------------------------------------------------------
/method/spaGCN/config/config_default.json:
--------------------------------------------------------------------------------
1 | {
2 |     "method": "louvain", 
3 |     "refine": true, 
4 |     "alpha": 1, 
5 |     "p": 0.5, 
6 |     "n_pcs": 50, 
7 |     "n_neighbors": 10,
8 |     "reference": "https://github.com/jianhuupenn/SpaGCN/blob/master/tutorial/tutorial.ipynb"
9 | }


--------------------------------------------------------------------------------
/metric/SpatialARI/SpatialARI.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 |   - bioconda
 4 | dependencies:
 5 |   - r-base=4.3.1
 6 |   - r-optparse=1.7.3
 7 |   - r-rjson=0.2.21
 8 |   - r-remotes=2.4.2.1
 9 |   - r-aricode=1.0.3
10 |   - bioconductor-bluster=1.12.0
11 |   - r-spdep=0.6_13


--------------------------------------------------------------------------------
/method/Giotto/config/config_visium.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "betas": [0, 1, 6],
 3 |   "beta": 2,
 4 |   "k": 4,
 5 |   "n_pcs": 10,
 6 |   "n_genes": 100,
 7 |   "bin_method": "rank",
 8 |   "source": "https://drieslab.github.io/Giotto_website/articles/visium_mouse_kidney.html#spatial-genes"
 9 | }
10 | 


--------------------------------------------------------------------------------
/method/Giotto/config/config_seqfish.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "betas": [28, 2, 3],
 3 |   "beta": 28,
 4 |   "k": 9,
 5 |   "n_pcs": 20,
 6 |   "n_genes": 100,
 7 |   "bin_method": "kmeans",
 8 |   "source": "https://drieslab.github.io/Giotto_website/articles/seqfish_cortex.html#hmrf-spatial-domains"
 9 | }
10 | 


--------------------------------------------------------------------------------
/method/GraphST/config/config_default.json:
--------------------------------------------------------------------------------
1 | {"method": "mclust", "refine": false, "radius": 50, "n_pcs": 20, "n_genes": 3000, "source": "https://github.com/JinmiaoChenLab/GraphST/blob/main/GraphST/preprocess.py#L99", "source2": "https://github.com/JinmiaoChenLab/GraphST/blob/main/GraphST/utils.py#L33#L64"}


--------------------------------------------------------------------------------
/method/Giotto/config/config_default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "betas": [0, 2, 10],
 3 |   "beta": 10,
 4 |   "k": 10,
 5 |   "n_pcs": 10,
 6 |   "n_genes": 0,
 7 |   "bin_method": "kmeans",
 8 |   "source": "https://github.com/drieslab/Giotto/blob/a60fbfcaff30a2942f354e63267ae5894aa84cd4/R/python_hmrf.R#L38"
 9 | }
10 | 


--------------------------------------------------------------------------------
/method/scanpy/config/config_leiden_tutorial.json:
--------------------------------------------------------------------------------
1 | { 
2 |     "n_genes": 2000, 
3 |     "n_neighbors" : 15, 
4 |     "clustering" : "leiden",
5 |     "directed": false, 
6 |     "n_iterations":2,
7 |     "source": "https://scanpy.readthedocs.io/en/stable/tutorials/spatial/basic-analysis.html#qc-and-preprocessing"
8 | }


--------------------------------------------------------------------------------
/method/CellCharter/config/config_1.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "min_counts": 3,
 3 | "n_latent": 10,
 4 | "n_layers": 4, 
 5 | "aggregations": "mean", 
 6 | "convergence_tolerance": 0.001,
 7 | "covariance_regularization": 1e-06,
 8 | "source": "https://cellcharter.readthedocs.io/en/latest/notebooks/codex_mouse_spleen.html"
 9 | }
10 | 


--------------------------------------------------------------------------------
/method/CellCharter/config/config_2.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "min_counts": 3,
 3 | "n_latent": 10,
 4 | "n_layers": 5, 
 5 | "aggregations": "mean", 
 6 | "convergence_tolerance": 0.001,
 7 | "covariance_regularization": 1e-06,
 8 | "source": "https://cellcharter.readthedocs.io/en/latest/notebooks/codex_mouse_spleen.html"
 9 | }
10 | 


--------------------------------------------------------------------------------
/method/CellCharter/config/config_default.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "min_counts": 3,
 3 | "n_latent": 10,
 4 | "n_layers": 3, 
 5 | "aggregations": "mean", 
 6 | "convergence_tolerance": 0.001,
 7 | "covariance_regularization": 1e-06,
 8 | "source": "https://cellcharter.readthedocs.io/en/latest/notebooks/codex_mouse_spleen.html"
 9 | }
10 | 


--------------------------------------------------------------------------------
/data/libd_dlpfc/libd_dlpfc.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 |   - bioconda
 4 | dependencies:
 5 |   - r-base=4.3.1
 6 |   - bioconductor-spatiallibd=1.12.0
 7 |   - r-optparse=1.7.3
 8 |   - r-dbplyr=2.3.4
 9 |   - r-dplyr=1.1.3
10 |   - r-magrittr=2.0.3
11 |   - r-purrr=1.0.2
12 |   - r-stringr=1.5.0
13 |   - r-tibble=3.2.1


--------------------------------------------------------------------------------
/data/spatialDLPFC/spatialDLPFC.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 |   - bioconda
 4 | dependencies:
 5 |   - r-base=4.3.1
 6 |   - bioconductor-spatiallibd=1.12.0
 7 |   - r-optparse=1.7.3
 8 |   - r-dbplyr=2.3.4
 9 |   - r-dplyr=1.1.3
10 |   - r-magrittr=2.0.3
11 |   - r-purrr=1.0.2
12 |   - r-stringr=1.5.0
13 |   - r-tibble=3.2.1


--------------------------------------------------------------------------------
/data/visium_chicken_heart/chicken_heart.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 | dependencies:
 4 |   - anndata=0.10.3
 5 |   - gitpython=3.1.40
 6 |   - pandas=2.1.4
 7 |   - pillow=10.1.0
 8 |   - pytest-shutil=1.7.0
 9 |   - python=3.9.18
10 |   - re2=2023.06.02
11 |   - scanpy=1.9.6
12 |   - scipy=1.11.4
13 |   - urllib3=2.1.0
14 | 


--------------------------------------------------------------------------------
/method/scanpy/config/config_leiden_MERFISH.json:
--------------------------------------------------------------------------------
1 | { 
2 |     "n_genes": 2000, 
3 |     "n_pcs":15,
4 |     "n_neighbors" : 15, 
5 |     "clustering" : "leiden",
6 |     "directed": false, 
7 |     "n_iterations":2,
8 |     "source": "https://scanpy.readthedocs.io/en/stable/tutorials/spatial/basic-analysis.html#qc-and-preprocessing"
9 | }


--------------------------------------------------------------------------------
/method/SCAN-IT/config/config_slideseq_mouse_cerebellum.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "n_genes": 3000,
 3 |     "SomNode_k": 5,
 4 |     "knn_n_neighbours": 5,
 5 |     "n_h": 30,
 6 |     "n_epoch": 2000,
 7 |     "alpha_n_layers": 2,
 8 |     "n_neighbours": 15, 
 9 |     "source": "https://github.com/zcang/SCAN-IT/blob/main/examples/Slide-seq"
10 | }


--------------------------------------------------------------------------------
/method/SCAN-IT/config/config_default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "n_genes": 3000,
 3 |     "SomNode_k": 20,
 4 |     "knn_n_neighbours": 10,
 5 |     "n_h": 32,
 6 |     "n_epoch": 1000,
 7 |     "alpha_n_layers": 1,
 8 |     "n_neighbours": 15, 
 9 |     "source": "https://github.com/zcang/SCAN-IT/blob/main/scanit/tools/_scanit_representation.py"
10 | }


--------------------------------------------------------------------------------
/method/SCAN-IT/config/config_slideseq_hippocampus.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "n_genes": 3000,
 3 |     "SomNode_k": 5,
 4 |     "knn_n_neighbours": 15,
 5 |     "n_h": 30,
 6 |     "n_epoch": 5000,
 7 |     "alpha_n_layers": 2,
 8 |     "n_neighbours": 15, 
 9 |     "source": "https://github.com/zcang/SCAN-IT/blob/main/examples/Slide-seq/scanit.ipynb"
10 | }


--------------------------------------------------------------------------------
/method/bass/config/config_dlpfc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "C": 20,
 3 |   "init_method": "mclust",
 4 |   "beta_method": "SW",
 5 |   "geneSelect": "sparkx",
 6 |   "scaleFeature": false,
 7 |   "n_genes": 3000,
 8 |   "n_pcs": 20,
 9 |   "burnin": 2000,
10 |   "nsample": 10000,
11 |   "source": "https://zhengli09.github.io/BASS-Analysis/DLPFC.html"
12 | }
13 | 


--------------------------------------------------------------------------------
/method/SCAN-IT/config/config_slideseq_mouse_olfactory_bulb.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "n_genes": 3000,
 3 |     "SomNode_k": 1,
 4 |     "knn_n_neighbours": 15,
 5 |     "n_h": 30,
 6 |     "n_epoch": 5000,
 7 |     "alpha_n_layers": 2,
 8 |     "n_neighbours": 15, 
 9 |     "source": "https://github.com/zcang/SCAN-IT/blob/main/examples/Slide-seq/scanit.ipynb"
10 | }


--------------------------------------------------------------------------------
/method/bass/config/config_default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "C": 20,
 3 |   "init_method": "kmeans",
 4 |   "beta_method": "SW",
 5 |   "geneSelect": "sparkx",
 6 |   "scaleFeature": true,
 7 |   "n_genes": 3000,
 8 |   "n_pcs": 20,
 9 |   "burnin": 2000,
10 |   "nsample": 5000,
11 |   "source": "https://zhengli09.github.io/BASS-Analysis/MERFISH.html"
12 | }
13 | 


--------------------------------------------------------------------------------
/method/bass/config/config_starmap.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "C": 15,
 3 |   "init_method": "kmeans",
 4 |   "beta_method": "SW",
 5 |   "geneSelect": "sparkx",
 6 |   "scaleFeature": true,
 7 |   "n_genes": 3000,
 8 |   "n_pcs": 20,
 9 |   "burnin": 2000,
10 |   "nsample": 5000,
11 |   "source": "https://zhengli09.github.io/BASS-Analysis/STARmap.html"
12 | }
13 | 


--------------------------------------------------------------------------------
/method/conST/config/config_default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "k": 10,
 3 |     "min_cells": 3,
 4 |     "use_img": false,
 5 |     "using_mask": false,
 6 |     "refinement": false,
 7 |     "source1": "https://github.com/ys-zong/conST/blob/main/conST_cluster.ipynb",
 8 |     "source2": "https://github.com/ys-zong/conST/blob/main/src/utils_func.py#L51"
 9 | }
10 | 


--------------------------------------------------------------------------------
/method/SCAN-IT/config/config_seqFISH_mouse_SScortex.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "n_genes": 3000,
 3 |     "SomNode_k": 5,
 4 |     "knn_n_neighbours": 5,
 5 |     "n_h": 10,
 6 |     "n_epoch": 2000,
 7 |     "alpha_n_layers": 1,
 8 |     "n_neighbours": 10, 
 9 |     "source": "https://github.com/zcang/SCAN-IT/blob/main/examples/seqFISH-mouse-SScortex/scanit.ipynb"
10 | }


--------------------------------------------------------------------------------
/method/scanpy/config/config_default.json:
--------------------------------------------------------------------------------
1 | { 
2 |     "n_neighbors" : 15, 
3 |     "clustering" : "leiden",
4 |     "source": "https://scanpy.readthedocs.io/en/stable/generated/scanpy.pp.pca.html,https://scanpy.readthedocs.io/en/stable/api/generated/scanpy.pp.neighbors.html, https://scanpy.readthedocs.io/en/stable/generated/scanpy.pp.highly_variable_genes.html"
5 | }


--------------------------------------------------------------------------------
/method/CellCharter/config/config_dlpfc.json:
--------------------------------------------------------------------------------
 1 | {
 2 | "min_counts": 3,
 3 | "n_genes": 5000,
 4 | "n_latent": 5,
 5 | "n_layers": 4, 
 6 | "aggregations": "mean", 
 7 | "convergence_tolerance": 0.001,
 8 | "covariance_regularization": 1e-06,
 9 | "source": "https://github.com/CSOgroup/cellcharter_analyses/blob/main/src/benchmarking/CellCharter/individual.py#R30"
10 | }
11 | 


--------------------------------------------------------------------------------
/method/DRSC/drsc_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Create the DR.SC conda environment named drsc_env
 4 | # conda env create -f DRSC.yml
 5 | 
 6 | # Activate the environment
 7 | # conda activate drsc_env
 8 | 
 9 | # Install the required R packages
10 | Rscript -e "remotes::install_version(package = 'DR.SC', version = '3.3', repos = 'https://cran.uni-muenster.de/')"


--------------------------------------------------------------------------------
/data/SEA_AD_data/SEA_AD_LICENSE.txt:
--------------------------------------------------------------------------------
1 | Seattle Alzheimers Disease (https://portal.brain-map.org/explore/seattle-alzheimers-disease) MERSCOPE v1 MTG Dataset is licensed 
2 | under the Allen Institute terms of use (See https://alleninstitute.org/citation-policy/ for the Allen Institute Citation Policy 
3 | and https://alleninstitute.org/terms-of-use/ for the Allen Institute Terms of Use.
4 | 


--------------------------------------------------------------------------------
/method/spatialGE/spatialGE_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Create the spatialGE conda environment named spatialGE_env
 4 | # conda env create -f spatialGE.yml -n spatialGE_env
 5 | 
 6 | # Activate the environment
 7 | # conda activate spatialGE_env
 8 | 
 9 | # Install the required R packages
10 | Rscript -e "remotes::install_github('FridleyLab/spatialGE@1.2.0.0000')"
11 | 
12 | 


--------------------------------------------------------------------------------
/method/stardust/stardust_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Create the stardust conda environment named stardust_env
 4 | # conda env create -f stardust.yml
 5 | 
 6 | # Activate the environment
 7 | # source activate stardust_env
 8 | 
 9 | # Install the required R packages
10 | Rscript -e "remotes::install_github('InfOmics/stardust', ref = 'f1b541704d4b4189b4daf4132289a084253349d9')"


--------------------------------------------------------------------------------
/method/precast/precast_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Create the precast conda environment named drsc_env
 4 | # conda env create -f precast.yml
 5 | 
 6 | # Activate the environment
 7 | # conda activate precast_env
 8 | 
 9 | # Install the required R packages
10 | Rscript -e "remotes::install_version(package = 'PRECAST', version = '1.6.3', repos = 'https://cran.uni-muenster.de/')"


--------------------------------------------------------------------------------
/method/SpiceMix/config/config_default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "K": 15,
 3 |     "lambda_Sigma_x_inv": 1e-4,
 4 |     "device": "cuda:0",
 5 |     "dtype": "float64",
 6 |     "num_preiterations": 5,
 7 |     "num_iterations": 200,
 8 |     "preprocess": {
 9 |         "n_genes": null
10 |     },
11 |     "source": "https://popari.readthedocs.io/en/latest/tutorial_gallery/preprocessing_demo.html"
12 | }
13 | 


--------------------------------------------------------------------------------
/method/SpiceMix/config/config_Visium_dlpfc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "K": 10,
 3 |     "lambda_Sigma_x_inv": 1e-6,
 4 |     "device": "cuda:0",
 5 |     "dtype": "float64",
 6 |     "num_preiterations": 5,
 7 |     "num_iterations": 200,
 8 |     "preprocess": {
 9 |         "n_genes": 500
10 |     },
11 |     "source": "https://github.com/ma-compbio/SpiceMix/blob/master/SpiceMix/main_Maynard2021.ipynb"
12 | }
13 | 


--------------------------------------------------------------------------------
/method/BANKSY/banksy_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Create the BANKSY conda environment named banksy_env
 4 | # conda env create -f banksy.yml
 5 | 
 6 | # Activate the environment
 7 | # conda activate banksy_env
 8 | 
 9 | # Install the required R packages
10 | Rscript -e "remotes::install_github('prabhakarlab/Banksy', dependencies = FALSE, ref = 'dbda6fde952e65f45409d9bca8e1f821746755cc')"
11 | 
12 | 


--------------------------------------------------------------------------------
/metric/SpatialARI/SpatialARI_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Create the SpatialARI conda environment named SpatialARI_env
 4 | # conda env create -f SpatialARI.yml -n SpatialARI_env
 5 | 
 6 | # Activate the environment
 7 | # conda activate SpatialARI_env
 8 | 
 9 | # Install the required R packages
10 | Rscript -e "remotes::install_github('RoseYuan/ClusteringMetrics@5691a9e', dependencies=TRUE)"
11 | 
12 | 


--------------------------------------------------------------------------------
/method/meringue/meringue_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Create the MERINGUE conda environment named scmeb_env
 4 | # conda env create -f meringue.yml
 5 | 
 6 | # Activate the environment
 7 | # source activate meringue_env
 8 | 
 9 | # Install the required R packages
10 | Rscript -e "remotes::install_github('JEFworks-Lab/MERINGUE', ref = 'ca9e2ccabd95680d9ca0b323a8a507c038f2ea13', build_vignettes = FALSE)"
11 | 
12 | 
13 | 


--------------------------------------------------------------------------------
/method/DRSC/DRSC.yml:
--------------------------------------------------------------------------------
 1 | name: drsc_env
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 | dependencies:
 6 |   - r-base=4.3.1
 7 |   - r-optparse=1.7.3
 8 |   - r-biocmanager=1.30.22
 9 |   - bioconductor-SingleCellExperiment=1.24.0
10 |   - bioconductor-S4Vectors=0.40.2
11 |   - r-crayon=1.5.2
12 |   - r-Matrix=1.6.3
13 |   - r-seurat=4.4.0
14 |   - r-remotes=2.4.2
15 |   - r-RcppArmadillo=0.12.6
16 |   - r-igraph=1.5.1
17 |   - r-curl=5.1.0


--------------------------------------------------------------------------------
/method/SC_MEB/scmeb_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Create the SC.MEB conda environment named scmeb_env
 4 | # conda env create -f SC.MEB.yml
 5 | 
 6 | # Activate the environment
 7 | # source activate scmeb_env
 8 | 
 9 | # Install the required R packages
10 | # conda run -n scmeb_env R -e "install.packages('SC.MEB')"
11 | Rscript -e "remotes::install_version(package = 'SC.MEB', version = '1.1', repos = 'https://cran.uni-muenster.de/')"
12 | 
13 | 


--------------------------------------------------------------------------------
/method/SC_MEB/SC_MEB.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 |   - bioconda
 4 | dependencies:
 5 |   - r-base=4.3.1
 6 |   - r-optparse=1.7.3
 7 |   - r-jsonlite=1.8.8
 8 |   - r-remotes=2.4.2
 9 |   - r-mclust=6.1
10 |   - r-purrr=1.0.2
11 |   - r-rcpparmadillo=0.12.8.3.0
12 |   - r-BiocManager=1.30.22
13 |   - bioconductor-SingleCellExperiment=1.24.0
14 |   - bioconductor-BiocSingular=1.18.0 
15 |   - bioconductor-scater=1.30.1
16 |   - bioconductor-scran=1.30.0
17 | 


--------------------------------------------------------------------------------
/method/maple/maple_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Create the maple conda environment named maple_env
 4 | # conda env create -f maple.yml
 5 | 
 6 | # Activate the environment
 7 | # source activate maple_env
 8 | 
 9 | # Install the required R packages
10 | Rscript -e "remotes::install_github('carter-allen/spruce', ref = '47b02300cc9a1d83213682bd78464115867d1763')"
11 | Rscript -e "remotes::install_github('carter-allen/maple', ref = 'b173e89a7bc82c6ae09c7e0709d09ed22082172d')"


--------------------------------------------------------------------------------
/method/conST/conST.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 |   - pytorch
 4 |   - default
 5 | dependencies:
 6 |   - python=3.9.16
 7 |   - torch=2.2.0
 8 |   - torchvision=0.17.0
 9 |   - torch_geometric=2.5.0
10 |   - torch_scatter=2.1.2
11 |   - torch_sparse=0.6.18
12 |   - scikit-learn=1.4.1
13 |   - umap-learn=0.5.5
14 |   - scanpy=1.9.8
15 |   - seaborn=0.13.2
16 |   - scipy=1.12.0
17 |   - networkx=3.2.1
18 |   - pandas=2.2.0
19 |   - anndata=0.10.5
20 |   - timm=0.9.12
21 |   - leidenalg=0.10.2


--------------------------------------------------------------------------------
/method/SOTIP/sotip.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 | dependencies:
 4 |   - python=3.8.0
 5 |   - numpy=1.21.2
 6 |   - pandas=1.3.4 
 7 |   - scipy=1.7.1
 8 |   - matplotlib=3.4.3
 9 |   - seaborn=0.11.2
10 |   - scanpy=1.8.2
11 |   - palettable=3.3.0
12 |   - scikit-learn=1.0.1
13 |   - networkx=2.6.3 
14 |   - shapely=1.8.0 
15 |   - pyemd=0.5.1 
16 |   - pip 
17 |   - git
18 |   - pip:
19 |     - squidpy==1.1.2
20 |     - "git+https://github.com/TencentAILabHealthcare/SOTIP.git@d3b762cca2a527dd5bc51408924c807832c5e1bb#egg=sotip"


--------------------------------------------------------------------------------
/method/bass/bass_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Create the BASS conda environment named scmeb_env
 4 | # conda env create -f bass.yml -n bass_env
 5 | 
 6 | # Activate the environment
 7 | # source activate bass_env
 8 | 
 9 | # Install the required R packages
10 | Rscript -e "remotes::install_github('xzhoulab/SPARK', ref = 'a8b4bf27b804604dfda53da42992f100b8e4e727', dependencies = FALSE)"
11 | Rscript -e "remotes::install_github('zhengli09/BASS', ref = '37980c94a99f4b01ad5fa63555b3c5ab8af82b7e', dependencies = FALSE)"
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/method/precast/precast.yml:
--------------------------------------------------------------------------------
 1 | name: precast_env
 2 | channels:
 3 |   - conda-forge
 4 |   - bioconda
 5 | dependencies:
 6 |   - r-base=4.3.1
 7 |   - r-optparse=1.7.3
 8 |   - r-jsonlite=1.8.8
 9 |   - r-biocmanager=1.30.22
10 |   - bioconductor-SingleCellExperiment=1.24.0
11 |   - bioconductor-S4Vectors=0.40.2
12 |   - bioconductor-scater
13 |   - r-ggpubr=0.6.0
14 |   - r-gtools
15 |   - r-crayon=1.5.2
16 |   - r-Matrix=1.6.3
17 |   - r-seurat=4.4.0
18 |   - r-remotes=2.4.2
19 |   - r-RcppArmadillo=0.12.6
20 |   - r-igraph=1.5.1
21 |   - r-curl=5.1.0
22 | 


--------------------------------------------------------------------------------
/method/GraphST/GraphST.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 | dependencies:
 4 |   - python=3.11.6
 5 |   - anndata=0.9.2
 6 |   - numpy=1.26.2
 7 |   - pandas=2.1.4
 8 |   - scanpy=1.9.6
 9 |   - pytorch=2.1.0
10 |   - scipy=1.11.4
11 |   - scikit-learn=1.3.2
12 |   - scikit-misc=0.1.4
13 |   - python-igraph=0.11.3
14 |   - rpy2=3.5.11
15 |   - tqdm=4.66.1
16 |   - matplotlib-base=3.8.2
17 |   - louvain=0.8.1
18 |   - leidenalg=0.10.1
19 |   - r-base=4.3.2
20 |   - r-mclust=6.0.1
21 |   - pip
22 |   - pip:
23 |     - GraphST==1.1.1
24 |     - pot==0.9.1
25 | 
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/method/DeepST/DeepST.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 | #  - pytorch
 4 | #  - pyg
 5 |   - default
 6 | dependencies:
 7 |   - python=3.9
 8 |   - scanpy=1.9.2
 9 |   - anndata==0.8.0 
10 |   - bokeh=2.3.1
11 |   - h5py=3.8.0
12 |   - imageio=2.9.0   
13 |   - leidenalg=0.9.1
14 |   - matplotlib=3.7.0 
15 |   - numpy=1.23.5
16 |   - python-igraph=0.10.4
17 |   - python-louvain=0.15 
18 |   - scikit-learn=1.2.1 
19 |   - scikit-network=0.28.3
20 |   - scipy=1.10.1
21 |   - seaborn=0.12.2
22 |   - tqdm=4.64.1
23 |   - umap-learn=0.5.3 
24 |   - psutil=5.9.4
25 |   - pandas=1.5.3


--------------------------------------------------------------------------------
/method/STAGATE/STAGATE.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 | dependencies:
 4 |   - python=3.11.5
 5 |   - scanpy=1.9.6
 6 |   - tensorflow=2.14.0
 7 |   - anndata=0.10.3
 8 |   - numpy=1.26.2
 9 |   - pandas=2.1.4
10 |   - scikit-learn=1.3.2
11 |   - tqdm=4.66.1
12 |   - scipy=1.11.4
13 |   - python-igraph=0.11.3
14 |   - louvain=0.8.1
15 |   - matplotlib-base=3.8.2
16 |   - rpy2=3.5.11
17 |   - r-base=4.3.2
18 |   - r-mclust=6.0.1
19 |   - scikit-misc=0.1.4
20 |   - pip
21 |   - pip:
22 |     - git+https://github.com/QIFEIDKN/STAGATE@48ce7f874c83a9f1f68187be00370181261ab7c5
23 | 


--------------------------------------------------------------------------------
/method/DeepST/DeepST_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Create the stardust conda environment named stardust_env
 4 | # conda env create -f stardust.yml
 5 | 
 6 | # Activate the environment
 7 | # source activate stardust_env
 8 | 
 9 | # Install the required R packages
10 | 
11 | pip3 install torch==1.13.0 torchvision torchaudio --extra-index-url https://download.pytorch.org/whl/cpu --no-cache-dir #### CPU
12 | pip install pyg_lib torch_scatter torch_sparse torch_cluster torch_spline_conv torch_geometric -f https://data.pyg.org/whl/torch-1.13.0+cpu.html --no-cache-dir ### CPU
13 | 


--------------------------------------------------------------------------------
/method/SEDR/SEDR.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - conda-forge
 3 |   - pytorch
 4 |   - default
 5 | dependencies:
 6 |   - python=3.11.3
 7 |   - pytorch=2.0.0
 8 |   - numpy=1.24.4
 9 |   - scanpy=1.9.6
10 |   - python-igraph=0.11.3
11 |   - leidenalg=0.10.1
12 |   - louvain=0.8.1
13 |   - anndata=0.9.1
14 |   - rpy2=3.5.11
15 |   - pandas=2.0.3
16 |   - scipy=1.10.1
17 |   - scikit-learn=1.2.2
18 |   - tqdm=4.65
19 |   - r-base=4.3.2
20 |   - r-mclust=6.0.1
21 |   - scikit-misc=0.1.4
22 |   - pip
23 |   - pip:
24 |     - git+https://github.com/JinmiaoChenLab/SEDR@8c273af3c520b663b2e83a4fbfdf462e0c7b8b7a


--------------------------------------------------------------------------------
/.github/workflows/main.yml:
--------------------------------------------------------------------------------
 1 | on:
 2 |     push:
 3 |         branches:
 4 |             - main
 5 |             - native
 6 | 
 7 | jobs:
 8 |     contrib-readme-job:
 9 |         runs-on: ubuntu-latest
10 |         name: A job to automate contrib in readme
11 |         permissions:
12 |           contents: write
13 |           pull-requests: write
14 |         steps:
15 |             - name: Contribute List
16 |               uses: akhilmhdh/contributors-readme-action@v2.3.10
17 |               with:
18 |                   image_size: 75
19 |               env:
20 |                   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
21 | 


--------------------------------------------------------------------------------
/docs/modules/index.md:
--------------------------------------------------------------------------------
 1 | # Modules
 2 | 
 3 | The Spacehack 2.0 workflow consists of multiple module types:
 4 | 
 5 | - [Datasets](data.md) to analyze
 6 | - [Methods](method.md) to identify spatial doamins
 7 | - [Metrics](metric.md) to evaluate performance against a groundtruth
 8 | - [Consensus](consensus.md) to build a consensus clustering across multiple methods
 9 | 
10 | If you want to generate a new module you have to make sure to follow the format and 
11 | specification as defined in their respective sections. 
12 | Also, it might be worth having a look at the [Contribution guide](../CONTRIBUTING.md).
13 | 
14 | ![Workflow](../img/workflow.svg)
15 | 


--------------------------------------------------------------------------------
/templates/data_optargs.schema.yaml:
--------------------------------------------------------------------------------
 1 | $schema: "https://json-schema.org/draft/2020-12/schema"
 2 | 
 3 | description: Which (optional) quality control parameters can the dataset use
 4 | type: object
 5 | properties:
 6 | 
 7 |     min_cells:
 8 |         description: what is the minimal number of cells expressed required for a gene to pass filtering?
 9 |         type: int
10 | 
11 |     min_genes:
12 |         description: What is the minimal number of genes expressed required for a cell to pass filtering.
13 |         type: int
14 | 
15 |     min_counts:
16 |         description: What is the minimum number of counts required for a cell to pass filtering?
17 |         type: int


--------------------------------------------------------------------------------
/method/SpaceFlow/spaceflow.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - pytorch
 3 |   - conda-forge
 4 | dependencies:
 5 |   - pytorch=1.13.1
 6 |   - torchaudio=0.13.1
 7 |   - torchvision=0.14.1
 8 |   - umap-learn=0.5.3
 9 |   - numpy=1.21.6
10 |   - python=3.7.12
11 |   - scikit-learn=1.0.2
12 |   - scipy=1.7.3
13 |   - mkl==2024.0
14 |   - pip:
15 |       - cmcrameri==1.7
16 |       - gudhi==3.8.0
17 |       - leidenalg==0.10.1
18 |       - matplotlib==3.5.3
19 |       - networkx==2.6.3
20 |       - notebook==6.5.6
21 |       - pandas==1.3.5
22 |       - scanpy==1.9.3
23 |       - spaceflow==1.0.4
24 |       - squidpy==1.2.2
25 |       - torch-geometric==2.3.1
26 |       - torch-sparse==0.6.17
27 |       - torch-scatter==2.1.1


--------------------------------------------------------------------------------
/templates/metric_optargs.schema.yaml:
--------------------------------------------------------------------------------
 1 | $schema: "https://json-schema.org/draft/2020-12/schema"
 2 | 
 3 | description: Which (optional) parameters can the metric use
 4 | type: object
 5 | properties:
 6 | 
 7 |     groundtruth: 
 8 |         description: Does the metric need groundtruth labels?
 9 |         type: boolean
10 | 
11 |     embedding:
12 |         description: Does the metric need embeddings?
13 |         type: boolean
14 | 
15 |     config_file:
16 |         description: Does the metric take an additional config file?
17 |         type: boolean
18 | 
19 |     # Optional, only add when your metric requires this
20 |     physical_coordinate:
21 |         description: Does the metric take physcial coordination of the sample?
22 |         type: boolean


--------------------------------------------------------------------------------
/data/abc_atlas_wmb_thalamus/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | ABC Atlas - Mouse Whole Brain by Allen Institute for Brain Science
 2 |  
 3 | ABC Atlas - Mouse Whole Brain
 4 | (https://knowledge.brain-map.org/data/LVDBJAW8BI5YSS1QUBG/collections)
 5 | MERSCOPE v1 whole brain Data Collection is licensed under a
 6 | Creative Commons Attribution 4.0 International License, and
 7 | 10x scRNAseq whole brain Data Collection is licensed under a
 8 | Creative Commons Attribution-NonCommercial 4.0 International License.
 9 |  
10 | See https://alleninstitute.org/citation-policy/ for the Allen Institute Citation
11 | Policy and https://alleninstitute.org/terms-of-use/ for the Allen Institute
12 | Terms of Use.
13 |  
14 | See https://creativecommons.org/licenses/by/4.0/ and
15 | https://creativecommons.org/licenses/by-nc/4.0/ for a copy of each license.


--------------------------------------------------------------------------------
/method/SCAN-IT/scanit.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - pyg
 3 |   - pytorch
 4 |   - anaconda
 5 |   - conda-forge
 6 | dependencies:
 7 |   - python=3.10.14
 8 |   - umap-learn=0.5.5
 9 |   - gcc=12.1.0
10 |   - anndata=0.10.3
11 |   - gudhi=3.8.0
12 |   - matplotlib=3.8.0
13 |     #  - torchvision=0.15.2
14 |     #- torchaudio=0.12.1 #ytorch/noarch::pytorch-mutex-1.0-cuda; pytorch/linux-64::torchaudio-0.12.1-py310_cu116 <- 2.1.0
15 |   - torchvision=0.16.0
16 |   - torchaudio=2.1.0
17 |   - cpuonly=2.0
18 |   - pytorch=2.1.0
19 |   - pyg=2.4.0
20 |   - scanpy=1.9.6
21 |   - scikit-learn=1.3.2
22 |   - scipy=1.11.4
23 |   - seaborn=0.12.2
24 |   - statsmodels=0.14.0
25 |   - numpy=1.26.2
26 |   - pandas=2.1.4
27 |   - networkx=2.6.3
28 |   - leidenalg=0.10.1
29 |   - somoclu=1.7.6
30 |   - patsy=0.5.6
31 |   - pip
32 |   - pip:
33 |       - somde==0.1.8     
34 |       - "git+https://github.com/zcang/SCAN-IT.git@ebf38949eea9348cd1791f392789a8a8c0ae1e47#egg=scanit"
35 | 


--------------------------------------------------------------------------------
/templates/method_optargs.schema.yaml:
--------------------------------------------------------------------------------
 1 | $schema: "https://json-schema.org/draft/2020-12/schema"
 2 | 
 3 | description: Which (optional) parameters can the method use
 4 | type: object
 5 | properties:
 6 | 
 7 |     matrix: 
 8 |         description: What input does the method take
 9 |         type: string
10 |         enum:
11 |             - counts
12 |             - transform
13 |             - dimensionality_reduction
14 |             # - counts_or_transform
15 | 
16 |     integrated_feature_selection:
17 |         description: Can the method use existing feature selections?
18 |         type: boolean
19 | 
20 |     image:
21 |         description: Can the method use H&E images?
22 |         type: boolean
23 | 
24 |     neighbors:
25 |         description: Can the method use existing neighbor definitions?
26 |         type: boolean
27 | 
28 |     config_file:
29 |         description: Does the method take an additional config file?
30 |         type: boolean
31 | 


--------------------------------------------------------------------------------
/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: SACCELERATOR
 2 | repo_url: https://github.com/SpatialHackathon/SACCELERATOR/
 3 | edit_uri: blob/native/docs/
 4 | copyright: Copyright 2025 SpaceHack 2.0 Organizing Committee
 5 | 
 6 | nav: 
 7 |   - Home: index.md
 8 |   - Usage: usage.md
 9 |   - Modules: 
10 |     - Overview: modules/index.md
11 |     - Data: modules/data.md
12 |     - Method: modules/method.md
13 |     - Metric: modules/metric.md
14 |     - Consensus: modules/consensus.md
15 |   - Extending & Contributing: CONTRIBUTING.md
16 |   - About:
17 |     - SpaceHack: https://spatialhackathon.github.io/
18 |     - Repository: https://github.com/SpatialHackathon/SACCELERATOR
19 |     - License: about/license.md
20 | 
21 | theme:
22 |   name: mkdocs
23 |   highlightjs: true
24 |   color_mode: auto
25 |   user_color_mode_toggle: true
26 | 
27 | plugins:
28 |   - search
29 |   - macros
30 | 
31 | extra:
32 |   repo_branch_url: https://github.com/SpatialHackathon/SACCELERATOR/tree/native
33 | 


--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
 1 | MIT No Attribution
 2 | 
 3 | Copyright 2023 SpaceHack 2.0 Organizing Committee
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of this
 6 | software and associated documentation files (the "Software"), to deal in the Software
 7 | without restriction, including without limitation the rights to use, copy, modify,
 8 | merge, publish, distribute, sublicense, and/or sell copies of the Software, and to
 9 | permit persons to whom the Software is furnished to do so.
10 | 
11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
12 | INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
13 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
14 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
15 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
16 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/preprocessing/visualization/pdf_merge.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Jieran Sun; Implemented visualization
 4 | 
 5 | import argparse
 6 | import os
 7 | from pathlib import Path
 8 | import shutil
 9 | 
10 | 
11 | # TODO adjust description
12 | parser = argparse.ArgumentParser(description="Merging all visualization pdfs into one big pdf")
13 | 
14 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True)
15 | parser.add_argument("-p", "--pdfs", nargs="+", help="All input pdf datasets", required=True)
16 | 
17 | from PyPDF2 import PdfMerger
18 | 
19 | args = parser.parse_args()
20 | out_dir = Path(args.out_dir)
21 | 
22 | # Create a merger object
23 | merger = PdfMerger()
24 | 
25 | for pdf_file in args.pdfs:
26 |     merger.append(pdf_file)
27 |     pdf_file = Path(pdf_file)
28 |     shutil.rmtree(pdf_file.parent, ignore_errors=True)
29 | 
30 | # write the output
31 | out_dir.mkdir(parents=True, exist_ok=True)
32 | merger.write(out_dir / "pp_report.pdf")


--------------------------------------------------------------------------------
/method/CellCharter/CellCharter.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - pytorch
 3 |   - conda-forge
 4 | dependencies:
 5 |   - python=3.10.14
 6 |   - pytorch=1.12.1
 7 |   - torchvision=0.13.1
 8 |   - torchaudio=0.12.1
 9 |   - cudatoolkit=11.6
10 |   - mkl=2024.0.0
11 |   - scikit-misc=0.1.4
12 | #  - torchaudio=0.12
13 | #  - cudatoolkit
14 | #  - scikit-learn=1.3.0
15 | #  - rasterio=1.3.8
16 | #  - urllib3=1.26.16
17 | #  - typing-extensions=4.5.0
18 | #  - numpy=1.23.4
19 | #  - markdown-it-py=2.2.0
20 | #  - torchmetrics=0.11.4
21 | #  - scipy=1.10.1
22 |   - scanpy=1.9.8
23 | #  - pandas=2.2.1
24 | #  - python-igraph=0.11.4
25 | #  - igraph=0.10.10
26 |   - pip
27 |   - pip: 
28 |     - pyro-ppl==1.8.6
29 |     - scvi-tools==0.20.3
30 | #    - flax==0.7.2
31 | #    - pycave==3.2.1
32 | #    - jax==0.4.14
33 | #    - jaxlib==0.4.14
34 | #    - chex==0.1.7
35 | #    - squidpy==1.3.0
36 | #    - sknw==0.14
37 |     - cellcharter==0.2.0
38 | #    - spatialdata==0.1.0
39 | #    - spatialdata-plot==0.2.0
40 | #    - python-dateutil==2.9.0.post0
41 | 


--------------------------------------------------------------------------------
/workflows/path_config_test.yaml:
--------------------------------------------------------------------------------
 1 | # The yaml file follows the following structure
 2 | 
 3 | datasets:
 4 |   libd_dlpfc:
 5 |     env: data/libd_dlpfc/libd_dlpfc.yml
 6 |     script: data/libd_dlpfc/libd_dlpfc.r
 7 |     optargs: data/libd_dlpfc/libd_dlpfc_optargs.json
 8 | 
 9 | neighbors_infos:
10 |   delaunay_triangulation:
11 |     script: preprocessing/neighbors/delaunay_triangulation/delaunay_triangulation.py
12 |     env: preprocessing/neighbors/delaunay_triangulation/delaunay_triangulation.yml
13 | 
14 | # Make sure the methods name is the same as the folder name
15 | methods:
16 |   CellCharter:
17 |     env: method/CellCharter/CellCharter.yml
18 |     script: method/CellCharter/CellCharter.py
19 |     optargs: method/CellCharter/CellCharter_optargs.json
20 | 
21 | # All metrics as of 14.02.2024
22 | metrics:
23 |   ARI:
24 |     env: metric/ARI/ARI.yml
25 |     script: metric/ARI/ARI.py
26 |     optargs: metric/ARI/ARI_optargs.json
27 | 
28 | # make sure the config_files name is the same as methods name
29 | config_files:
30 |   CellCharter:
31 |     config_default: "config/config_default.json"
32 | 


--------------------------------------------------------------------------------
/.github/workflows/build_mkdocs.yml:
--------------------------------------------------------------------------------
 1 | name: Build and deploy MkDocs to GitHub Pages 
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     # Test build for PRs targeting default branch
 6 |     branches:
 7 |       - native
 8 |   push:
 9 |     # Deploy on push to default branch
10 |     branches:
11 |       - native
12 | 
13 | permissions:
14 |   contents: write
15 | 
16 | jobs:
17 |   build:
18 |     runs-on: ubuntu-latest
19 |     steps:
20 |       - uses: actions/checkout@v4
21 |       - uses: actions/setup-python@v5
22 |         with:
23 |           python-version: 3.13
24 |       - name: Install dependencies
25 |         run: |
26 |           python -m pip install --upgrade pip
27 |           pip install -r docs/requirements.txt
28 | 
29 |       - name: Build MkDocs site (strict)
30 |         # Test build on PR to native branch
31 |         if: github.event_name == 'pull_request'
32 |         run: mkdocs build --strict
33 | 
34 |       - name: Deploy to GitHub Pages
35 |         # Deploy on push to native branch
36 |         if: github.event_name == 'push' && github.ref == 'refs/heads/native'
37 |         run: mkdocs gh-deploy --force
38 | 


--------------------------------------------------------------------------------
/method/SpiceMix/README.md:
--------------------------------------------------------------------------------
 1 | Here's an example of calling the `SpiceMix.py` script:
 2 | ```{bash}
 3 | python SpiceMix.py -m ~/scratch/SpaceHack2/data/LIBD_DLPFC/Br8100_151673/counts.mtx -c ~/scratch/SpaceHack2/data/LIBD_DLPFC/Br8100_151673/coordinates.tsv -o ~/scratch/SpaceHack2/data/LIBD_DLPFC/Br8100_151673/observations.tsv -d ./output_test --n_clusters 7 --seed 0  --config config/config_1.json
 4 | ```
 5 | 
 6 | The config file should look something like this:
 7 | ```{json}
 8 | 
 9 | {
10 |     "K": 15,
11 |     "lambda_Sigma_x_inv": 1e-4,
12 |     "device": "cuda:0",
13 |     "dtype": "float64",
14 |     "num_preiterations": 5,
15 |     "num_iterations": 200
16 | }
17 | ```
18 | If you want preprocessing to be done within the script (such as log normalization, HVG selection, neighborhood graph construction), specify the `preprocess` parameter:
19 | ```{json}
20 | 
21 | {
22 |     "K": 15,
23 |     "lambda_Sigma_x_inv": 1e-4,
24 |     "device": "cuda:0",
25 |     "dtype": "float64",
26 |     "num_preiterations": 5,
27 |     "num_iterations": 200,
28 |     "preprocess": {
29 |         "hvgs": 3500
30 |     }
31 | }
32 | ```
33 | 


--------------------------------------------------------------------------------
/workflows/01_download.smk:
--------------------------------------------------------------------------------
 1 | import os
 2 | from shared.functions import get_git_directory
 3 | 
 4 | # workflow specific setting
 5 | configfile: "path_config.yaml"
 6 | configfile: "excute_config.yaml"
 7 | 
 8 | # Attach the specific github directory here
 9 | GIT_DIR = get_git_directory(config)
10 | 
11 | # Leave only datasets
12 | DATASETS = config.pop("datasets")
13 | datasets_selected = config["datasets_selected"]
14 | 
15 | # Get all the dataset folder
16 | def get_all_input(wildcards):
17 |     all_folder = []
18 |     for dataset in datasets_selected:
19 |         all_folder.append(config["DATASET_DIR"] + "/" + dataset)
20 |     return all_folder
21 | 
22 | 
23 | ############## starting snakemake pipelines ##################
24 | 
25 | # Defining all output wanted from this snakemake
26 | rule all:
27 |     input:
28 |         get_all_input,
29 | 
30 | rule download:
31 |     output:
32 |         dir=directory(config["DATASET_DIR"] + "/{dataset}"),
33 |     conda:
34 |         lambda wildcards: GIT_DIR + DATASETS[wildcards.dataset]["env"]
35 |     params:
36 |         script=lambda wildcards: GIT_DIR + DATASETS[wildcards.dataset]["script"],
37 |     shell:
38 |         "{params.script} -o {output.dir}"
39 | 


--------------------------------------------------------------------------------
/method/Giotto/Giotto_env.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | # Create the Giotto conda environment named giotto_env
 4 | #conda env create -f giotto.yml
 5 | 
 6 | # Activate the environment
 7 | #conda activate giotto_env
 8 | 
 9 | # Install the required R packages
10 | Rscript -e "remotes::install_version('colorRamp2', version = '0.1.0', repos = 'https://cran.r-project.org/')"
11 | Rscript -e "remotes::install_bitbucket(repo = 'qzhudfci/smfishhmrf-r', ref='2ab48253591b2dd3c545e117c4256f92ecb287ee')"
12 | Rscript -e "remotes::install_version('irlba', version = '2.3.5.1', repos = 'https://cran.r-project.org/')"
13 | # Install Giotto packages
14 | Rscript -e "remotes::install_github('drieslab/GiottoUtils@v0.1.0', dependencies = FALSE)" # , ref = '7c8f0010de6c916228834823455f48ed5b3fa706')"
15 | Rscript -e "remotes::install_github('drieslab/GiottoClass@v0.1.0', dependencies = FALSE)" # , ref = 'fca6eb3f5ee6e8e7e9cfe8a0bb82721107f4872d')"
16 | Rscript -e "remotes::install_github('drieslab/GiottoData@v0.2.6.2', dependencies = FALSE)" # , ref = '50606245a01f151c6c308f3282f7b3fd87c67027')"
17 | Rscript -e "remotes::install_github('drieslab/GiottoVisuals@v0.1.0', dependencies = FALSE)" # , ref = '8a68d8840ba4724b9a6cbc223dc7d6ef6f88f050')"
18 | Rscript -e "remotes::install_github('drieslab/Giotto@v4.0.0', dependencies = FALSE)" # , ref = 'fc7a6a51efc6853ff43f6028d1cce9a6070537e2')"
19 | 


--------------------------------------------------------------------------------
/workflows/shared/functions.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | 
 4 | 
 5 | def get_git_directory(config):
 6 |     if config.get("GIT_DIR") is not None:
 7 |         git_dir = config["GIT_DIR"]
 8 |     else:
 9 |         # Should change the SpaceHack directory.
10 |         git_dir = os.getenv("GIT_DIR", "/home/ubuntu/workspace/SpaceHack2023")
11 | 
12 |     if not git_dir.endswith("/"):
13 |         git_dir += "/"
14 |     return git_dir
15 | 
16 | # Adding additional condition here to exclude .ipynb that can be generated via the preprocessing scripts and other hidden unwanted folders
17 | def get_sample_dirs(data_dir):
18 |     return [f.path for f in os.scandir(data_dir) if f.is_dir() and not f.name.startswith('.')]
19 | 
20 | 
21 | def check_files_in_folder(folder_path, file_list):
22 |     # Get a list of all files in the folder
23 |     files_in_folder = os.listdir(folder_path)
24 |     # Check each file in the file_list
25 |     for file in file_list:
26 |         if file not in files_in_folder:
27 |             return False
28 |     return True
29 | 
30 | 
31 | def get_ncluster(file_path, sample, default_value=7):
32 |     if not os.path.exists(file_path):
33 |         return default_value
34 |     try:
35 |         df = pd.read_csv(file_path, sep="\t", index_col=0)
36 |         df_filtered = df[df["directory"] == sample]
37 |         return int(df_filtered["n_clusters"].mean())
38 |     except:
39 |         return default_value
40 | 


--------------------------------------------------------------------------------
/templates/consensus_BC.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; implmented method
 4 | # Author_and_contribution: Peiying Cai; created template
 5 | # Author_and_contribution: ENTER YOUR NAME AND CONTRIBUTION HERE
 6 | 
 7 | import argparse
 8 | import pandas as pd
 9 | from pathlib import Path
10 | 
11 | # TODO adjust description
12 | description = "... to select base clusterings"
13 | 
14 | parser = argparse.ArgumentParser(description=description)
15 | 
16 | parser.add_argument(
17 |     "-i", "--input_file", required=True,
18 |     help="Input containing the aggregated labels."
19 | )
20 | parser.add_argument(
21 |     "-o", "--output_file", required=True,
22 |     help="Desired output file."
23 | )
24 | # TODO add additional arguments
25 | 
26 | args = parser.parse_args()
27 | 
28 | # Load input file
29 | label_df = pd.read_csv(args.input_file, sep="\t", index_col=0)
30 | 
31 | ## Your code goes here
32 | # TODO
33 | # output_df: DataFrame with number of clusters as columns, clustering label names as values
34 | # Example:
35 | #     7                          8
36 | # 0   method1_default_7_label    method1_default_8_label
37 | # 1   method2_default_7_label    method2_default_8_label
38 | # 2   method3_default_7_label    method3_default_8_label
39 | 
40 | ## Write output
41 | output_path = Path(args.output_file)
42 | output_path.parent.mkdir(parents=True, exist_ok=True)
43 | 
44 | # Save the results
45 | output_df.to_csv(output_path, sep="\t", index=False)
46 | 


--------------------------------------------------------------------------------
/templates/metric.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: ENTER YOUR NAME AND CONTRIBUTION HERE
 5 | 
 6 | import argparse
 7 | 
 8 | # TODO adjust description
 9 | parser = argparse.ArgumentParser(description="Calculate metric ...")
10 | 
11 | parser.add_argument(
12 |     "-l", "--labels", help="Labels from domain clustering.", required=True
13 | )
14 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
15 | parser.add_argument(
16 |     "-e",
17 |     "--embedding",
18 |     help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
19 |     required=False,
20 | )
21 | parser.add_argument(
22 |     "-c",
23 |     "--config",
24 |     help="Optional config file (json) used to pass additional parameters.",
25 |     required=False,
26 | )  # format should be json
27 | parser.add_argument("-o", "--out_file", help="Output file.", required=True)
28 | 
29 | args = parser.parse_args()
30 | 
31 | # Use these filepaths as input
32 | label_file = args.labels
33 | 
34 | if args.ground_truth is not None:
35 |     groundtruth_file = args.ground_truth
36 | if args.embedding is not None:
37 |     embedding_file = args.embedding
38 | if args.config is not None:
39 |     config_file = args.config
40 | 
41 | 
42 | ## Your code goes here
43 | # TODO
44 | # metric: float = ... output of the metric as float
45 | 
46 | 
47 | ## Write output
48 | from pathlib import Path
49 | 
50 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True)
51 | 
52 | with open(args.out_file, "w") as file:
53 |     file.write(f"{metric:.5e}\n")
54 | 


--------------------------------------------------------------------------------
/templates/consensus_BC.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; implmented method
 4 | # Author_and_contribution: Peiying Cai; created template
 5 | # Author_and_contribution: ENTER YOUR NAME AND CONTRIBUTION HERE
 6 | 
 7 | suppressPackageStartupMessages(library(optparse))
 8 | 
 9 | option_list <- list(
10 |   make_option(
11 |     c("-i", "--input_file"),
12 |     type = "character", default = NULL,
13 |     help = "Input containing the aggregated labels."
14 |   ),
15 |   make_option(
16 |     c("-o", "--output_file"),
17 |     type = "character", default = NULL,
18 |     help = "desired output file"
19 |   )
20 | # TODO add additional arguments
21 | )
22 | 
23 | # TODO adjust description
24 | description <- "... to select base clusterings"
25 | 
26 | opt_parser <- OptionParser(
27 |   usage = description,
28 |   option_list = option_list
29 | )
30 | opt <- parse_args(opt_parser)
31 | 
32 | # Use these filepaths as input
33 | input_file <- opt$input_file
34 | output_file <- opt$output_file
35 | 
36 | ##### Load files
37 | label_df <- read.delim(input_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss")
38 | 
39 | ## Your code goes here
40 | # TODO
41 | # output_df: data frame with number of clusters as column headers, and clustering label names as values
42 | # Example:
43 | # 7                          8
44 | # method1_default_7_label    method1_default_8_label
45 | # method2_default_7_label    method2_default_8_label
46 | # method3_default_7_label    method3_default_8_label
47 | 
48 | 
49 | ## Write output
50 | dir.create(dirname(output_file), showWarnings = FALSE, recursive = TRUE)
51 | 
52 | # Save the results
53 | write.table(output_df, file = output_file, sep = "\t", col.names = NA, quote = FALSE)
54 | 


--------------------------------------------------------------------------------
/consensus/02_Cross_method_ARI/Cross_method_ARI.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; Create the script
 4 | 
 5 | suppressPackageStartupMessages(library(optparse))
 6 | 
 7 | option_list <- list(
 8 |   make_option(
 9 |     c("-i", "--input_file"),
10 |     type = "character", default = NULL,
11 |     help = "Input containing the aggregated labels."
12 |   ),
13 |   make_option(
14 |     c("-o", "--output_file"),
15 |     type = "character", default = NULL,
16 |     help = "desired output file"
17 |   )
18 | )
19 | 
20 | description <- "Return cross-method ARI"
21 | 
22 | opt_parser <- OptionParser(
23 |   usage = description,
24 |   option_list = option_list
25 | )
26 | opt <- parse_args(opt_parser)
27 | 
28 | # Use these filepaths as input
29 | input_file <- opt$input_file
30 | output_file <- opt$output_file
31 | 
32 | #set.seed(seed)
33 | 
34 | suppressPackageStartupMessages({
35 |   library(mclust)
36 | })
37 | 
38 | ##### Define ARI function
39 | calc_aris <- function(m, flavour="ARI") {
40 |   a <- diag(ncol(m))
41 |   for(i in 1:(ncol(m)-1))
42 |     for(j in 2:ncol(m)) {
43 |       if(flavour=="ARI") {
44 |         require(mclust)
45 |         a[i,j] <- a[j,i] <- mclust::adjustedRandIndex(m[,i], m[,j])
46 |       } else if(flavour=="sARI") {
47 |         # TODO implement spatial ARI
48 |       }
49 |     }
50 |   rownames(a) <- colnames(a) <- colnames(m)
51 |   a
52 | }
53 | 
54 | ##### Load files
55 | label_df <- read.delim(input_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss")
56 | 
57 | # Calculate cross-method ARI
58 | ari_mat <- calc_aris(label_df)
59 | 
60 | dir.create(dirname(output_file), showWarnings = FALSE, recursive = TRUE)
61 | 
62 | # Save the results
63 | write.table(ari_mat, file = output_file, sep = "\t", col.names = NA, quote = FALSE)
64 | 


--------------------------------------------------------------------------------
/metric/ARI/ARI.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script
 4 | 
 5 | import argparse
 6 | 
 7 | parser = argparse.ArgumentParser(
 8 |     description="Calculate Adjusted Rand Index (scikit-learn)"
 9 | )
10 | 
11 | parser.add_argument(
12 |     "-l", "--labels", help="Labels from domain clustering.", required=True
13 | )
14 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
15 | parser.add_argument(
16 |     "-e",
17 |     "--embedding",
18 |     help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
19 |     required=False,
20 | )
21 | parser.add_argument(
22 |     "-c",
23 |     "--config",
24 |     help="Optional config file used to pass additional parameters.",
25 |     required=False,
26 | )
27 | parser.add_argument("-o", "--out_file", help="Output file.", required=True)
28 | 
29 | args = parser.parse_args()
30 | 
31 | # Use these filepaths as input
32 | label_file = args.labels
33 | 
34 | if args.ground_truth is not None:
35 |     groundtruth_file = args.ground_truth
36 | 
37 | 
38 | ## Your code goes here
39 | if args.ground_truth is None:
40 |     raise Exception("Groundtruth labels needed to calculate the Adjusted Rand Index")
41 | 
42 | import pandas as pd
43 | from sklearn.metrics import adjusted_rand_score
44 | 
45 | domains = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes
46 | groundtruth = (
47 |     pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes
48 | )
49 | 
50 | common_index = domains.index.intersection(groundtruth.index)
51 | 
52 | metric = adjusted_rand_score(groundtruth.loc[common_index], domains.loc[common_index])
53 | 
54 | 
55 | ## Write output
56 | with open(args.out_file, "w") as file:
57 |     file.write(f"{metric:.5e}\n")
58 | 


--------------------------------------------------------------------------------
/workflows/excute_config_test.yaml:
--------------------------------------------------------------------------------
 1 | # This yaml file contains all the parameters and settings for the workflow.
 2 | # 1. Universal parameters: Used in all smk file, set once
 3 | # 2. Process-specific parameters: some other parameters, if needed
 4 | # 3. Dataset, methods and metrics: 
 5 | #      select which one to be included in the workflow, comment out unwanted ones.
 6 | #      For methods, you can also specify cluster numbers for specific datasets.
 7 | 
 8 | ###### Universal parameters #######
 9 | # Directories, modify based on your own
10 | GIT_DIR: /work/PRTNR/CHUV/DIR/rgottar1/spatial/Cluster_Benchmark/SpaceHack2023
11 | DATASET_DIR: /work/PRTNR/CHUV/DIR/rgottar1/spatial/Cluster_Benchmark/data
12 | SEED: 2023
13 | 
14 | ###### Dataset selected for excutation #######
15 | datasets_selected:
16 |   - "libd_dlpfc"
17 | 
18 | ###### Methods selected for excutation #######
19 | methods_selected:
20 |   - "CellCharter"
21 | 
22 | # If some datasets specify number of clusters. Add it here
23 | n_clusters:
24 |   libd_dlpfc: [9]
25 | 
26 | ###### Metrics selected for excutation #######
27 | metrics_selected:
28 |   - "ARI"
29 | 
30 | ###### Base clustering selection parameters #######
31 | # As used by scanpy (sc.pp.neighbors()).
32 | selection_criteria:
33 |   - "Cross_method_ARI"
34 |   - "Smoothness_entropy"
35 |   - "Manual_selection"
36 | n_neighbors: 6
37 | 
38 | ###### Consensus Clustering parameters #######
39 | bc_numbers: [8]     # number of base clustering results
40 | consensus_algorithms:
41 |   - "lca"
42 |   - "kmode"
43 |   - "weighted"
44 | # In case you need to re-define desired cluster number. Do it here. Otherwise n_clust value would be used
45 | n_clust_consensus:
46 |   abc_atlas_wmb_thalamus: [16, 19, 20, 21, 24, 28, 32]
47 | 
48 | # For weighted clustering
49 | lambda: null
50 | 
51 | # For cross-method entropy
52 | cross_method_entropy: true


--------------------------------------------------------------------------------
/data/pachter_simulation/pachter_simulation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Made by Paul Kiessling pakiessling@ukaachen.de
 4 | 
 5 | 
 6 | import os
 7 | import argparse
 8 | import tempfile
 9 | import shutil
10 | from pypdl import Downloader
11 | 
12 | LINKS = {
13 |     "https://zenodo.org/records/13944111/files/concordex_sim.zip":"fb7c79fd9cec2c79e3b74fb50be40ff4"
14 | }
15 | 
16 | 
17 | def download_links(links, temp_dir):
18 |     headers = {
19 |         "User-Agent": "Mozilla/5.0 (Windows NT 6.1; Win64; x64; rv:47.0) Gecko/20100101 Firefox/47.0"
20 |     }
21 |     dl = Downloader(headers=headers)
22 |     for link, checksum in links.items():
23 |         print(f"Downloading {link}")
24 |         file = dl.start(
25 |             url=link,
26 |             file_path=temp_dir,
27 |             segments=10,
28 |             display=True,
29 |             multithread=True,
30 |             block=True,
31 |             retries=3,
32 |         )
33 |         if not file.validate_hash(checksum, "md5"):
34 |             raise ValueError(f"File {file} is corrupted")
35 | 
36 | 
37 | 
38 | 
39 | def main():
40 |     # Set up command-line argument parser
41 |     parser = argparse.ArgumentParser(
42 |         description="Convert Visium HD data to Spacehack format."
43 |     )
44 | 
45 |     # Add arguments for output folder
46 |     parser.add_argument(
47 |         "-o", "--out_dir", help="Output directory to write files to.", required=True
48 |     )
49 | 
50 |     # Parse the command-line arguments
51 |     args = parser.parse_args()
52 | 
53 |     # Download and process
54 |     with tempfile.TemporaryDirectory() as temp_dir:  #
55 |         download_links(LINKS, temp_dir)
56 |         for file in os.listdir(temp_dir):
57 |             if file.endswith(".tar.gz") or file.endswith(".zip"):
58 |                 shutil.unpack_archive(os.path.join(temp_dir, file), args.out_dir)
59 |            
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     main()
64 | 


--------------------------------------------------------------------------------
/templates/metric.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: ENTER YOUR NAME AND CONTRIBUTION HERE
 5 | 
 6 | suppressPackageStartupMessages(library(optparse))
 7 | 
 8 | # TODO adjust description
 9 | option_list <- list(
10 |   make_option(
11 |     c("-l", "--labels"),
12 |     type = "character", default = NULL,
13 |     help = "Labels from domain clustering."
14 |   ),
15 |   make_option(
16 |     c("-g", "--ground_truth"),
17 |     type = "character", default = NA,
18 |     help = "Groundtruth labels."
19 |   ),
20 |   make_option(
21 |     c("-e", "--embedding"),
22 |     type = "character", default = NA,
23 |     help = "Embedding of points in latent space. Potential usage for metrics without groundtruth."
24 |   ),
25 |   # format should be json
26 |   make_option(
27 |     c("-c", "--config"),
28 |     type = "character", default = NA,
29 |     help = "Optional config file (json) used to pass additional parameters."
30 |   ),
31 |   make_option(
32 |     c("-o", "--out_file"),
33 |     type = "character", default = NULL,
34 |     help = "Output file."
35 |   )
36 | )
37 | 
38 | # TODO adjust description
39 | description <- "Calculate metric ..."
40 | 
41 | opt_parser <- OptionParser(
42 |   usage = description,
43 |   option_list = option_list
44 | )
45 | opt <- parse_args(opt_parser)
46 | 
47 | # Use these filepaths as input
48 | label_file <- opt$labels
49 | 
50 | if (!is.na(opt$ground_truth)) {
51 |   groundtruth_file <- opt$ground_truth
52 | }
53 | if (!is.na(opt$embedding)) {
54 |   embedding_file <- opt$embedding
55 | }
56 | if (!is.na(opt$config)) {
57 |   config_file <- opt$config
58 | }
59 | 
60 | 
61 | ## Your code goes here
62 | # TODO
63 | # metric = ... #  output of the metric as float
64 | 
65 | 
66 | ## Write output
67 | outfile <- file(opt$out_file)
68 | dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE)
69 | 
70 | writeLines(format(metric, digits = 6, scientific = TRUE), outfile)
71 | close(outfile)
72 | 


--------------------------------------------------------------------------------
/method/search_res.r:
--------------------------------------------------------------------------------
 1 | binary_search <- function(
 2 |     spe,
 3 |     do_clustering,
 4 |     extract_nclust,
 5 |     n_clust_target,
 6 |     resolution_update = 2,
 7 |     resolution_init = 1,
 8 |     resolution_boundaries=NULL,
 9 |     num_rs = 100,
10 |     tolerance = 1e-3,
11 |     ...) {
12 | 
13 |   # Initialize boundaries
14 |   lb <- rb <- NULL
15 |   n_clust <- -1
16 | 
17 |   if (!is.null(resolution_boundaries)){
18 |     lb <- resolution_boundaries[1]
19 |     rb <- resolution_boundaries[2]
20 |   } else {
21 |     res <-  resolution_init
22 |     result <- do_clustering(spe, resolution = res, ...)
23 |     # Adjust cluster_ids extraction per method
24 |     n_clust <- extract_nclust(result)
25 |     if (n_clust > n_clust_target) {
26 |       while (n_clust > n_clust_target && res > 1e-5) {
27 |         rb <- res
28 |         res <- res/resolution_update
29 |         result <- do_clustering(spe, resolution = res, ...)
30 |         n_clust <- extract_nclust(result)
31 |       }
32 |       lb <- res
33 |     } else if (n_clust < n_clust_target) {
34 |       while (n_clust < n_clust_target) {
35 |         lb <- res 
36 |         res <- res*resolution_update
37 |         result <- do_clustering(spe, resolution = res, ...)
38 |         n_clust <- extract_nclust(result)
39 |       }
40 |       rb <- res
41 |     }
42 |     if (n_clust == n_clust_target) {lb = rb = res }
43 |   }
44 | 
45 |   i <- 0
46 |   while ((rb - lb > tolerance || lb == rb) && i < num_rs) {
47 |     mid <- sqrt(lb * rb)
48 |     message("Resolution: ", mid)
49 |     result <- do_clustering(spe, resolution = mid, ...)
50 |     n_clust <- extract_nclust(result)
51 |     if (n_clust == n_clust_target || lb == rb) break
52 |     if (n_clust > n_clust_target) {
53 |       rb <- mid
54 |     } else {
55 |       lb <- mid
56 |     }
57 |     i <- i + 1
58 |   }
59 | 
60 |   # Warning if target not met
61 |   if (n_clust != n_clust_target) {
62 |     warning(sprintf("Warning: n_clust = %d not found in binary search, return best approximation with res = %f and n_clust = %d. (rb = %f, lb = %f, i = %d)", n_clust_target, mid, n_clust, rb, lb, i))
63 |   }
64 |   return(result)
65 | }


--------------------------------------------------------------------------------
/metric/Davies-Bouldin/Davies-Bouldin.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: Kirti Biharie; implemented Davis-Bouldin score
 5 | 
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser(description="Calculate Davies-Bouldin Score (scikit-learn)")
 9 | 
10 | parser.add_argument(
11 |     "-l", "--labels", help="Labels from domain clustering.", required=True
12 | )
13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
14 | parser.add_argument(
15 |     "-e",
16 |     "--embedding",
17 |     help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
18 |     required=False,
19 | )
20 | parser.add_argument(
21 |     "-c",
22 |     "--config",
23 |     help="Optional config file (json) used to pass additional parameters.",
24 |     required=False,
25 | )  # format should be json
26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True)
27 | 
28 | args = parser.parse_args()
29 | 
30 | # Use these filepaths as input
31 | label_file = args.labels
32 | 
33 | if args.ground_truth is not None:
34 |     groundtruth_file = args.ground_truth
35 | if args.embedding is not None:
36 |     embedding_file = args.embedding
37 | if args.config is not None:
38 |     config_file = args.config
39 | 
40 | 
41 | ## Your code goes here
42 | if args.embedding is None:
43 |     raise Exception("Embeddings needed to calculate the Davies-Bouldin Score")
44 | 
45 | import pandas as pd
46 | import sklearn.metrics
47 | 
48 | embeddings = pd.read_table(embedding_file, index_col=0)
49 | labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes
50 | 
51 | common_index = labels.index.intersection(embeddings.index)
52 | embeddings = embeddings.loc[common_index]
53 | labels = labels.loc[common_index]
54 | 
55 | metric = sklearn.metrics.davies_bouldin_score(embeddings, labels)
56 | 
57 | ## Write output
58 | from pathlib import Path
59 | 
60 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True)
61 | 
62 | with open(args.out_file, "w") as file:
63 |     file.write(f"{metric:.5e}\n")


--------------------------------------------------------------------------------
/metric/Calinski-Harabasz/Calinski-Harabasz.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: Kirti Biharie; implemented Calinski-Harabasz score
 5 | 
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser(description="Calculate Calinski-Harabasz Score (scikit-learn)")
 9 | 
10 | parser.add_argument(
11 |     "-l", "--labels", help="Labels from domain clustering.", required=True
12 | )
13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
14 | parser.add_argument(
15 |     "-e",
16 |     "--embedding",
17 |     help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
18 |     required=False,
19 | )
20 | parser.add_argument(
21 |     "-c",
22 |     "--config",
23 |     help="Optional config file (json) used to pass additional parameters.",
24 |     required=False,
25 | )  # format should be json
26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True)
27 | 
28 | args = parser.parse_args()
29 | 
30 | # Use these filepaths as input
31 | label_file = args.labels
32 | 
33 | if args.ground_truth is not None:
34 |     groundtruth_file = args.ground_truth
35 | if args.embedding is not None:
36 |     embedding_file = args.embedding
37 | if args.config is not None:
38 |     config_file = args.config
39 | 
40 | 
41 | ## Your code goes here
42 | if args.embedding is None:
43 |     raise Exception("Embeddings needed to calculate the Calinski-Harabasz Score")
44 | 
45 | import pandas as pd
46 | import sklearn.metrics
47 | 
48 | embeddings = pd.read_table(embedding_file, index_col=0)
49 | labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes
50 | 
51 | common_index = labels.index.intersection(embeddings.index)
52 | embeddings = embeddings.loc[common_index]
53 | labels = labels.loc[common_index]
54 | 
55 | metric = sklearn.metrics.calinski_harabasz_score(embeddings, labels)
56 | 
57 | ## Write output
58 | from pathlib import Path
59 | 
60 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True)
61 | 
62 | with open(args.out_file, "w") as file:
63 |     file.write(f"{metric:.5e}\n")


--------------------------------------------------------------------------------
/metric/FMI/FMI.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: Kirti Biharie; implemented fmi score
 5 | 
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser(description="Calculate Fowlkes-Mallows index (scikit-learn)")
 9 | 
10 | parser.add_argument(
11 |     "-l", "--labels", help="Labels from domain clustering.", required=True
12 | )
13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
14 | parser.add_argument(
15 |     "-e",
16 |     "--embedding",
17 |     help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
18 |     required=False,
19 | )
20 | parser.add_argument(
21 |     "-c",
22 |     "--config",
23 |     help="Optional config file (json) used to pass additional parameters.",
24 |     required=False,
25 | )  # format should be json
26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True)
27 | 
28 | args = parser.parse_args()
29 | 
30 | # Use these filepaths as input
31 | label_file = args.labels
32 | 
33 | if args.ground_truth is not None:
34 |     groundtruth_file = args.ground_truth
35 | if args.embedding is not None:
36 |     embedding_file = args.embedding
37 | if args.config is not None:
38 |     config_file = args.config
39 | 
40 | 
41 | ## Your code goes here
42 | if args.ground_truth is None:
43 |     raise Exception("Groundtruth labels needed to calculate the Fowlkes-Mallows index")
44 | 
45 | import pandas as pd
46 | import sklearn.metrics
47 | 
48 | ground_truth = pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes
49 | labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes
50 | 
51 | common_index = labels.index.intersection(ground_truth.index)
52 | ground_truth = ground_truth.loc[common_index]
53 | labels = labels.loc[common_index]
54 | 
55 | metric = sklearn.metrics.fowlkes_mallows_score(ground_truth, labels)
56 | 
57 | ## Write output
58 | from pathlib import Path
59 | 
60 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True)
61 | 
62 | with open(args.out_file, "w") as file:
63 |     file.write(f"{metric:.5e}\n")


--------------------------------------------------------------------------------
/metric/Completeness/Completeness.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: Kirti Biharie; implemented completeness score
 5 | 
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser(description="Calculate Completeness Score (scikit-learn)")
 9 | 
10 | parser.add_argument(
11 |     "-l", "--labels", help="Labels from domain clustering.", required=True
12 | )
13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
14 | parser.add_argument(
15 |     "-e",
16 |     "--embedding",
17 |     help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
18 |     required=False,
19 | )
20 | parser.add_argument(
21 |     "-c",
22 |     "--config",
23 |     help="Optional config file (json) used to pass additional parameters.",
24 |     required=False,
25 | )  # format should be json
26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True)
27 | 
28 | args = parser.parse_args()
29 | 
30 | # Use these filepaths as input
31 | label_file = args.labels
32 | 
33 | if args.ground_truth is not None:
34 |     groundtruth_file = args.ground_truth
35 | if args.embedding is not None:
36 |     embedding_file = args.embedding
37 | if args.config is not None:
38 |     config_file = args.config
39 | 
40 | 
41 | ## Your code goes here
42 | if args.ground_truth is None:
43 |     raise Exception("Groundtruth labels needed to calculate the Completeness Score")
44 | 
45 | import pandas as pd
46 | import sklearn.metrics
47 | 
48 | ground_truth = pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes
49 | labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes
50 | 
51 | common_index = labels.index.intersection(ground_truth.index)
52 | ground_truth = ground_truth.loc[common_index]
53 | labels = labels.loc[common_index]
54 | 
55 | metric = sklearn.metrics.completeness_score(ground_truth, labels)
56 | 
57 | ## Write output
58 | from pathlib import Path
59 | 
60 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True)
61 | 
62 | with open(args.out_file, "w") as file:
63 |     file.write(f"{metric:.5e}\n")


--------------------------------------------------------------------------------
/metric/Homogeneity/Homogeneity.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: Kirti Biharie; implemented homogeneity score
 5 | 
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser(description="Calculate Homogeneity Score (scikit-learn)")
 9 | 
10 | parser.add_argument(
11 |     "-l", "--labels", help="Labels from domain clustering.", required=True
12 | )
13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
14 | parser.add_argument(
15 |     "-e",
16 |     "--embedding",
17 |     help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
18 |     required=False,
19 | )
20 | parser.add_argument(
21 |     "-c",
22 |     "--config",
23 |     help="Optional config file (json) used to pass additional parameters.",
24 |     required=False,
25 | )  # format should be json
26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True)
27 | 
28 | args = parser.parse_args()
29 | 
30 | # Use these filepaths as input
31 | label_file = args.labels
32 | 
33 | if args.ground_truth is not None:
34 |     groundtruth_file = args.ground_truth
35 | if args.embedding is not None:
36 |     embedding_file = args.embedding
37 | if args.config is not None:
38 |     config_file = args.config
39 | 
40 | 
41 | ## Your code goes here
42 | if args.ground_truth is None:
43 |     raise Exception("Groundtruth labels needed to calculate the Homogeneity Score")
44 | 
45 | import pandas as pd
46 | import sklearn.metrics
47 | 
48 | ground_truth = pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes
49 | labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes
50 | 
51 | common_index = labels.index.intersection(ground_truth.index)
52 | ground_truth = ground_truth.loc[common_index]
53 | labels = labels.loc[common_index]
54 | 
55 | metric = sklearn.metrics.homogeneity_score(ground_truth, labels)
56 | 
57 | ## Write output
58 | from pathlib import Path
59 | 
60 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True)
61 | 
62 | with open(args.out_file, "w") as file:
63 |     file.write(f"{metric:.5e}\n")
64 | 


--------------------------------------------------------------------------------
/metric/V_measure/V_measure.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script
 4 | 
 5 | import argparse
 6 | 
 7 | parser = argparse.ArgumentParser(description="Calculate V-measure (scikit-learn)")
 8 | 
 9 | parser.add_argument(
10 |     "-l", "--labels", help="Labels from domain clustering.", required=True
11 | )
12 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
13 | parser.add_argument(
14 |     "-e",
15 |     "--embedding",
16 |     help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
17 |     required=False,
18 | )
19 | parser.add_argument(
20 |     "-c",
21 |     "--config",
22 |     help="Optional config file used to pass additional parameters.",
23 |     required=False,
24 | )
25 | parser.add_argument("-o", "--out_file", help="Output file.", required=True)
26 | 
27 | args = parser.parse_args()
28 | 
29 | # Use these filepaths as input
30 | label_file = args.labels
31 | 
32 | if args.ground_truth is not None:
33 |     groundtruth_file = args.ground_truth
34 | if args.config is not None:
35 |     config_file = args.config
36 | 
37 | 
38 | ## Your code goes here
39 | if args.ground_truth is None:
40 |     raise Exception("Groundtruth labels needed to calculate the Adjusted Rand Index.")
41 | 
42 | if args.config is None:
43 |     raise Exception("Config file not provided.")
44 | 
45 | import json
46 | 
47 | import pandas as pd
48 | import os
49 | from sklearn.metrics import v_measure_score
50 | 
51 | with open(config_file, "r") as f:
52 |     config = json.load(f)
53 | 
54 | domains = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes
55 | groundtruth = (
56 |     pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes
57 | )
58 | common_index = domains.index.intersection(groundtruth.index)
59 | 
60 | metric = v_measure_score(
61 |     groundtruth.loc[common_index], domains.loc[common_index], beta=config["beta"]
62 | )
63 | 
64 | ## Write output
65 | out_file_path = args.out_file
66 | out_dir = os.path.dirname(out_file_path)
67 | os.makedirs(out_dir, exist_ok=True)
68 | 
69 | with open(out_file_path, "w") as file:
70 |     file.write(f"{metric:.5e}\n")
71 | 


--------------------------------------------------------------------------------
/workflows/05_aggregation.smk:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | from pathlib import Path
 4 | from shared.functions import check_files_in_folder, get_git_directory, get_sample_dirs
 5 | 
 6 | configfile: "path_config.yaml"
 7 | configfile: "excute_config.yaml"
 8 | 
 9 | GIT_DIR = Path(get_git_directory(config))
10 | 
11 | # Get all the methods and metrics that's being used
12 | DATASET_DIR = Path(config["DATASET_DIR"])
13 | datasets_selected= config["datasets_selected"]
14 | 
15 | def generate_input_files(data_dir):
16 | 
17 |     # Function to check for the existence of domains.tsv in a sample folder
18 |     def has_domains(sample_path):
19 |         return any(
20 |             os.path.isfile(os.path.join(root, "domains.tsv"))
21 |             for root, dirs, files in os.walk(sample_path)
22 |         )
23 | 
24 |     result_files = []
25 | 
26 |     # sample directory
27 |     aggregate_files = [
28 |         f"{sample}/combined_methods.tsv"
29 |         for sample in get_sample_dirs(data_dir)
30 |         if any(
31 |             os.path.isfile(os.path.join(root, "domains.tsv"))
32 |             for root, dirs, files in os.walk(sample)
33 |         )
34 |     ]
35 | 
36 |     return aggregate_files
37 | 
38 | 
39 | def generate_all_input(wildcards):
40 |     all_input = []
41 | 
42 |     for dataset in datasets_selected:
43 |         data_dir = DATASET_DIR / dataset
44 |         if not data_dir.is_dir():
45 |             continue
46 | 
47 |         all_input += generate_input_files(data_dir=data_dir)
48 | 
49 |     return all_input
50 | 
51 | rule all:
52 |     input:
53 |         generate_all_input,
54 | 
55 | 
56 | rule aggregate_nclusters:
57 |     input:
58 |         results_folder=DATASET_DIR / "{dataset}" / "{sample}",
59 |         script=GIT_DIR / "consensus" / "01_Results_Aggregation" / "Results_Aggregation.py",
60 |     output:
61 |         file=DATASET_DIR / "{dataset}" /  "{sample}" / "combined_methods.tsv",
62 |     wildcard_constraints:
63 |         dataset="[a-zA-Z0-9_-]+",
64 |         sample="[a-zA-Z0-9_-]+",
65 |     conda:
66 |         GIT_DIR / "consensus" / "01_Results_Aggregation" / "Results_Aggregation.yaml"
67 |     shell:
68 |         """
69 |         {input.script} \
70 |             -i {input.results_folder} \
71 |             -o {output.file} \
72 |         """


--------------------------------------------------------------------------------
/docs/index.md:
--------------------------------------------------------------------------------
 1 | ![Logo](img/logo.png)
 2 | 
 3 | # SACCELERATOR
 4 | 
 5 | SACCELERATOR is the outcome of SpaceHack 2.0, a community-driven project to (not) benchmark domain identification methods for spatially-resolved transcriptomics data.
 6 | 
 7 | ## SACCELERATOR - a flexible framework for applying spatially aware clustering methods
 8 | 
 9 | Spatial omics have transformed tissue architecture and cellular heterogeneity analysis by integrating molecular data with spatial localization. In spatially resolved transcriptomics, identifying spatial domains is critical for analysis of anatomical regions within heterogeneous datasets and understanding tissue function. Since 2020, more than 50 spatially aware clustering methods have been developed for this task. However, the reliability of existing benchmarks is undermined by their narrow focus on Visium and brain tissue datasets, as well as the dependence on questionable ground truth annotations. Here, we implemented a consensus framework that surpasses traditional benchmarking practices.
10 | 
11 | Our framework comprises a community-driven benchmark-like platform that streamlines data formatting, method integration, and metric evaluation while accommodating new methods and datasets. Currently, the platform includes 22 spatially aware clustering methods across 15 datasets spanning 9 technologies and diverse tissue types. The benchmark approach uncovered significant limitations in generalizability and reproducibility where methods that perform well on healthy tissues often falter on cancer samples. We also found that anatomical labels commonly used as ground truths are often biased, potentially error-prone, and in some cases, unsuitable for benchmarking efforts.
12 | 
13 | In light of these issues, we adopt a flexible expert-in-the-loop consensus-driven approach. This goes beyond traditional ensemble/consensus methods, and allows researchers to interact with intermediate results to determine which tools should be used to generate a consensus. We believe that the inclusion of an expert-in-the-loop is critical to ensure that the computational analysis matches the biological question at hand, and we believe that when the focus of the analysis is to uncover novel biological discoveries, tissue experts are accessible more often than not.
14 | 
15 | ## Citation
16 | 
17 | If you are using SACCELERATOR please cite
18 | 
19 | > Sun, J. et al. Beyond benchmarking: an expert-guided consensus approach to spatially aware clustering. bioRxiv https://doi.org/10.1101/2025.06.23.660861 (2025).
20 | 


--------------------------------------------------------------------------------
/metric/MCC/MCC.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: Thomas Chartrand; created script
 5 | 
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser(description="Calculate Matthew's correlation coefficient (scikit-learn)")
 9 | 
10 | parser.add_argument(
11 |     "-l", "--labels", help="Labels from domain clustering.", required=True
12 | )
13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
14 | parser.add_argument(
15 |     "-e",
16 |     "--embedding",
17 |     help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
18 |     required=False,
19 | )
20 | parser.add_argument(
21 |     "-c",
22 |     "--config",
23 |     help="Optional config file used to pass additional parameters.",
24 |     required=False,
25 | )
26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True)
27 | parser.add_argument(
28 |     "--matched_labels",
29 |     help="Flag indicating ground-truth and clustering labels have already been matched.",
30 |     action='store_true',
31 | )
32 | 
33 | args = parser.parse_args()
34 | 
35 | # Use these filepaths as input
36 | label_file = args.labels
37 | 
38 | if args.ground_truth is not None:
39 |     groundtruth_file = args.ground_truth
40 | if args.config is not None:
41 |     config_file = args.config
42 | 
43 | 
44 | ## Your code goes here
45 | if args.ground_truth is None:
46 |     raise Exception("Groundtruth labels needed.")
47 | 
48 | import pandas as pd
49 | from scipy.optimize import linear_sum_assignment
50 | from sklearn.metrics import matthews_corrcoef
51 | 
52 | domains = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes
53 | groundtruth = (
54 |     pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes
55 | )
56 | common_index = domains.index.intersection(groundtruth.index)
57 | groundtruth = groundtruth.loc[common_index]
58 | domains = domains.loc[common_index]
59 | 
60 | if not args.matched_labels:
61 |     contingency_table = pd.crosstab(domains, groundtruth)
62 |     row_ind, col_ind = linear_sum_assignment(contingency_table, maximize=True)
63 |     domains = domains.map(dict(zip(row_ind, col_ind)))
64 | 
65 | metric = matthews_corrcoef(groundtruth, domains)
66 | 
67 | ## Write output
68 | from pathlib import Path
69 | 
70 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True)
71 | 
72 | with open(args.out_file, "w") as file:
73 |     file.write(f"{metric:.5e}\n")
74 | 


--------------------------------------------------------------------------------
/preprocessing/transformation/log1p.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script
 4 | 
 5 | import argparse
 6 | 
 7 | # TODO adjust description
 8 | parser = argparse.ArgumentParser(description="log1p transformation using scanpy")
 9 | 
10 | parser.add_argument(
11 |     "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True
12 | )
13 | parser.add_argument("-m", "--matrix", help="Path to counts (as mtx).", required=True)
14 | parser.add_argument(
15 |     "-f", "--features", help="Path to features (as tsv).", required=True
16 | )
17 | parser.add_argument(
18 |     "-o", "--observations", help="Path to observations (as tsv).", required=True
19 | )
20 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True)
21 | parser.add_argument(
22 |     "--config",
23 |     help="Optional config file (json) used to pass additional parameters.",
24 |     required=False,
25 | )
26 | 
27 | args = parser.parse_args()
28 | 
29 | from pathlib import Path
30 | 
31 | out_dir = Path(args.out_dir)
32 | 
33 | # Output files
34 | transformed_counts_file = out_dir / "counts.mtx"
35 | # if additional output files are required write it also to out_dir
36 | 
37 | # Use these filepaths as input ...
38 | coord_file = args.coordinates
39 | matrix_file = args.matrix
40 | feature_file = args.features
41 | observation_file = args.observations
42 | 
43 | if args.config is not None:
44 |     config_file = args.config
45 | 
46 | 
47 | # ... or AnnData if you want
48 | def get_anndata(args):
49 |     # Untested template
50 |     import anndata as ad
51 |     import pandas as pd
52 |     import scipy as sp
53 | 
54 |     X = sp.io.mmread(args.matrix)
55 |     if sp.sparse.issparse(X):
56 |         X = X.tocsr()
57 |     observations = pd.read_table(args.observations, index_col=0)
58 |     features = pd.read_table(args.features, index_col=0)
59 |     coordinates = (
60 |         pd.read_table(args.coordinates, index_col=0)
61 |         .loc[observations.index, :]
62 |         .to_numpy()
63 |     )
64 | 
65 |     adata = ad.AnnData(
66 |         X=X, obs=observations, var=features, obsm={"spatial": coordinates}
67 |     )
68 | 
69 |     return adata
70 | 
71 | 
72 | adata = get_anndata(args)
73 | 
74 | ## Your code goes here
75 | import scanpy as sc
76 | 
77 | sc.pp.normalize_total(adata)
78 | sc.pp.log1p(adata)
79 | 
80 | transformed_counts = adata.X
81 | 
82 | 
83 | ## Write output
84 | import scipy as sp
85 | 
86 | out_dir.mkdir(parents=True, exist_ok=True)
87 | sp.io.mmwrite(transformed_counts_file, transformed_counts, precision=5)
88 | 


--------------------------------------------------------------------------------
/metric/Entropy/Entropy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: Kirti Biharie; implemented Entropy
 5 | 
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser(description="Calculate Shannon's Entropy")
 9 | 
10 | parser.add_argument(
11 |     "-l", "--labels", help="Labels from domain clustering.", required=True
12 | )
13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
14 | parser.add_argument(
15 |     "-e",
16 |     "--embedding",
17 |     help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
18 |     required=False,
19 | )
20 | parser.add_argument(
21 |     "-c",
22 |     "--config",
23 |     help="Optional config file (json) used to pass additional parameters.",
24 |     required=False,
25 | )  # format should be json
26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True)
27 | 
28 | args = parser.parse_args()
29 | 
30 | # Use these filepaths as input
31 | label_file = args.labels
32 | 
33 | if args.ground_truth is not None:
34 |     groundtruth_file = args.ground_truth
35 | if args.embedding is not None:
36 |     embedding_file = args.embedding
37 | if args.config is not None:
38 |     config_file = args.config
39 | 
40 | 
41 | ## Your code goes here
42 | if args.ground_truth is None:
43 |     raise Exception("Groundtruth labels needed to calculate Shannon's Entropy")
44 | 
45 | import pandas as pd
46 | import sklearn.metrics
47 | import math
48 | 
49 | ground_truth = pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes
50 | labels = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes
51 | 
52 | common_index = labels.index.intersection(ground_truth.index)
53 | ground_truth = ground_truth.loc[common_index]
54 | labels = labels.loc[common_index]
55 | 
56 | df = pd.concat([labels, ground_truth],axis=1)
57 | df.columns = ["pred", "true"]
58 | total_pred = df.groupby("pred").size()
59 | counts = df.groupby(["pred", "true"]).size()
60 | 
61 |          # For every predicted cluster
62 | metric = -sum((total_pred.loc[pred]/len(common_index)) * 
63 |              # For every groundtruth class: calculate Shannon's entropy
64 |              sum((count/total_pred.loc[pred]) * math.log2(count/total_pred.loc[pred]) 
65 |                  for count in counts.loc[pred]) 
66 |              for pred in df["pred"].unique())
67 | 
68 | ## Write output
69 | from pathlib import Path
70 | 
71 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True)
72 | 
73 | with open(args.out_file, "w") as file:
74 |     file.write(f"{metric:.5e}\n")


--------------------------------------------------------------------------------
/metric/LISI/LISI.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: Kirti Biharie; implemented LISI score
 5 | 
 6 | suppressPackageStartupMessages(library(optparse))
 7 | 
 8 | option_list <- list(
 9 |   make_option(
10 |     c("-l", "--labels"),
11 |     type = "character", default = NULL,
12 |     help = "Labels from domain clustering."
13 |   ),
14 |   make_option(
15 |     c("-g", "--ground_truth"),
16 |     type = "character", default = NA,
17 |     help = "Groundtruth labels."
18 |   ),
19 |   make_option(
20 |     c("-e", "--embedding"),
21 |     type = "character", default = NA,
22 |     help = "Embedding of points in latent space. Potential usage for metrics without groundtruth."
23 |   ),
24 |   # format should be json
25 |   make_option(
26 |     c("-c", "--config"),
27 |     type = "character", default = NA,
28 |     help = "Optional config file (json) used to pass additional parameters."
29 |   ),
30 |   make_option(
31 |     c("-o", "--out_file"),
32 |     type = "character", default = NULL,
33 |     help = "Output file."
34 |   )
35 | )
36 | 
37 | description <- "Calculate LISI Score"
38 | 
39 | opt_parser <- OptionParser(
40 |   usage = description,
41 |   option_list = option_list
42 | )
43 | opt <- parse_args(opt_parser)
44 | 
45 | # Use these filepaths as input
46 | label_file <- opt$labels
47 | 
48 | if (!is.na(opt$ground_truth)) {
49 |   groundtruth_file <- opt$ground_truth
50 | }
51 | if (!is.na(opt$embedding)) {
52 |   embedding_file <- opt$embedding
53 | }
54 | if (!is.na(opt$config)) {
55 |   config_file <- opt$config
56 | }
57 | 
58 | 
59 | ## Your code goes here
60 | library(lisi)
61 | library(rjson)
62 | 
63 | if (is.na(opt$ground_truth)) {
64 |   stop("Groundtruth labels needed to calculate the LISI Score")
65 | }
66 | 
67 | if (is.na(opt$embedding)) {
68 |   stop("Embeddings needed to calculate the LISI Score")
69 | }
70 | 
71 | if (is.na(opt$config)) {
72 |   stop("Config file not provided")
73 | }
74 | 
75 | ground_truth <- read.delim(groundtruth_file, sep="\t", row.names=1)
76 | embeddings <- read.delim(embedding_file, sep="\t", row.names=1)
77 | config <- fromJSON(file=config_file)
78 | 
79 | common_index <- intersect(rownames(ground_truth), rownames(embeddings))
80 | ground_truth <- ground_truth[common_index,,drop=FALSE]
81 | embeddings <- embeddings[common_index,,drop=FALSE]
82 | 
83 | metric <- mean(compute_lisi(embeddings, ground_truth, "label", perplexity=config$perplexity)[,"label"])
84 | 
85 | ## Write output
86 | outfile <- file(opt$out_file)
87 | dir.create(dirname(opt$out_file), showWarnings = FALSE, recursive = TRUE)
88 | 
89 | writeLines(format(metric, digits = 6, scientific = TRUE), outfile)
90 | close(outfile)
91 | 


--------------------------------------------------------------------------------
/CITATION.cff:
--------------------------------------------------------------------------------
 1 | # This CITATION.cff file was generated with cffinit.
 2 | # Visit https://bit.ly/cffinit to generate yours today!
 3 | 
 4 | cff-version: 1.2.0
 5 | title: >-
 6 |   SpaceHack 2.0: an expert in the loop consensus driven
 7 |   framework for spatially aware clustering
 8 | message: >-
 9 |   If you use this software, please cite it using the
10 |   metadata from this file.
11 | type: software
12 | authors:
13 |   - name: SpaceHack 2.0. Participants
14 | repository-code: 'https://github.com/SpatialHackathon/SpaceHack2023'
15 | url: 'https://spatialhackathon.github.io/past.html'
16 | abstract: >-
17 |   Spatial omics have transformed tissue architecture and
18 |   cellular heterogeneity analysis by integrating molecular
19 |   data with spatial localization. In spatially resolved
20 |   transcriptomics, identifying spatial domains is critical
21 |   for analysis of anatomical regions within heterogeneous
22 |   datasets and understanding tissue function. Since 2020,
23 |   more than 50 spatially aware clustering methods have been
24 |   developed for this task. However, the reliability of
25 |   existing benchmarks is undermined by their narrow focus on
26 |   Visium and brain tissue datasets, as well as the
27 |   dependence on questionable ground truth annotations. Here,
28 |   we implemented a consensus framework that surpasses
29 |   traditional benchmarking practices.
30 | 
31 | 
32 |   Our framework comprises a community-driven benchmark-like
33 |   platform that streamlines data formatting, method
34 |   integration, and metric evaluation while accommodating new
35 |   methods and datasets. Currently, the platform includes 22
36 |   spatially aware clustering methods across 15 datasets
37 |   spanning 9 technologies and diverse tissue types. The
38 |   benchmark approach uncovered significant limitations in
39 |   generalizability and reproducibility where methods that
40 |   perform well on healthy tissues often falter on cancer
41 |   samples. We also found that anatomical labels commonly
42 |   used as ground truths are often biased, potentially
43 |   error-prone, and in some cases, unsuitable for
44 |   benchmarking efforts.
45 | 
46 | 
47 |   In light of these issues, we adopt a flexible
48 |   expert-in-the-loop consensus-driven approach. This goes
49 |   beyond traditional ensemble/consensus methods, and allows
50 |   researchers to interact with intermediate results to
51 |   determine which tools should be used to generate a
52 |   consensus. We believe that the inclusion of an
53 |   expert-in-the-loop is critical to ensure that the
54 |   computational analysis matches the biological question at
55 |   hand, and we believe that when the focus of the analysis
56 |   is to un cover novel biological discoveries, tissue
57 |   experts are accessible more often than not.
58 | license: MIT-0
59 | 


--------------------------------------------------------------------------------
/metric/jaccard/jaccard.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: Thomas Chartrand; created script
 5 | 
 6 | import argparse
 7 | 
 8 | parser = argparse.ArgumentParser(description="Calculate Matthew's correlation coefficient (scikit-learn)")
 9 | 
10 | parser.add_argument(
11 |     "-l", "--labels", help="Labels from domain clustering.", required=True
12 | )
13 | parser.add_argument("-g", "--ground_truth", help="Groundtruth labels.", required=False)
14 | parser.add_argument(
15 |     "-e",
16 |     "--embedding",
17 |     help="Embedding of points in latent space. Potential usage for metrics without groundtruth.",
18 |     required=False,
19 | )
20 | parser.add_argument(
21 |     "-c",
22 |     "--config",
23 |     help="Optional config file used to pass additional parameters.",
24 |     required=False,
25 | )
26 | parser.add_argument("-o", "--out_file", help="Output file.", required=True)
27 | parser.add_argument(
28 |     "--matched_labels",
29 |     help="Flag indicating ground-truth and clustering labels have already been matched.",
30 |     action='store_true',
31 | )
32 | 
33 | args = parser.parse_args()
34 | 
35 | # Use these filepaths as input
36 | label_file = args.labels
37 | 
38 | if args.ground_truth is not None:
39 |     groundtruth_file = args.ground_truth
40 | if args.config is not None:
41 |     config_file = args.config
42 | 
43 | 
44 | ## Your code goes here
45 | if args.ground_truth is None:
46 |     raise Exception("Groundtruth labels needed.")
47 | 
48 | import pandas as pd
49 | from scipy.optimize import linear_sum_assignment
50 | from sklearn.metrics import jaccard_score
51 | 
52 | domains = pd.read_table(label_file, index_col=0)["label"].astype("category").cat.codes
53 | groundtruth = (
54 |     pd.read_table(groundtruth_file, index_col=0)["label"].astype("category").cat.codes
55 | )
56 | common_index = domains.index.intersection(groundtruth.index)
57 | groundtruth = groundtruth.loc[common_index]
58 | domains = domains.loc[common_index]
59 | 
60 | if not args.matched_labels:
61 |     contingency_table = pd.crosstab(domains, groundtruth)
62 |     row_ind, col_ind = linear_sum_assignment(contingency_table, maximize=True)
63 |     domains = domains.map(dict(zip(row_ind, col_ind)))
64 | 
65 | metric = jaccard_score(groundtruth, domains, average='weighted')
66 | 
67 | domain_scores = jaccard_score(groundtruth, domains, average=None)
68 | domains_df = pd.DataFrame({
69 |     "cluster": ["all", *sorted(groundtruth.unique())],
70 |     "jaccard_score": [metric, *domain_scores]
71 | })
72 | 
73 | ## Write output
74 | from pathlib import Path
75 | 
76 | Path(args.out_file).parent.mkdir(parents=True, exist_ok=True)
77 | 
78 | with open(args.out_file, "w") as file:
79 |     file.write(domains_df.to_json(orient='records'))


--------------------------------------------------------------------------------
/metric/README.md:
--------------------------------------------------------------------------------
 1 | # Metric modules
 2 | 
 3 | ## Implementing a new dataset module
 4 | 
 5 | To implement a new metric follow the [Contribution guide](../CONTRIBUTING.md) and make sure you adopt all the necessary conventions specified in this document.
 6 | 
 7 | For examples have a look 
 8 | [here for a method in Python](./ARI/) or 
 9 | [here for a method in R](./LISI/).
10 | 
11 | ## Metric module layout and interface
12 | 
13 | Metric modules require 3 files (see templates). '{metric}' in the file names should be
14 | replaced by the name of your module and all files placed in a subfolder of the same name.
15 | 
16 | * `{metric}.yml`: dependencies of the metric module script following the format:
17 | ```yaml
18 | channels:
19 | - conda-forge
20 | dependencies:
21 | - anndata=0.10.3
22 | - gitpython=3.1.40
23 | ```
24 | 
25 | * `{metric}_optargs.json`: defining optional arguments for the workflow following the format:
26 | ```json
27 | {
28 |     "groundtruth": true,   # Does the metric need groundtruth labels? (boolean)
29 |     "embedding": false,    # Does the metric need embeddings? (boolean)
30 |     "config_file": true    # Does the metric take an additional config file? (boolean)
31 | }
32 | ```
33 | 
34 | It is optional to add when your metric requires this:
35 | 
36 | ```
37 | physical_coordinate:
38 | description: Does the metric take physcial coordination of the sample?
39 | type: boolean
40 | ```
41 | 
42 | * `{metric}.py/.r`: metric module script. 
43 |    * Check the TODOs in the `metric.py` or `metric.r` [template](../templates/).
44 |    * The command line arguments are fixed and should not be modified.
45 |    * see further instruction below.
46 | 
47 | 
48 | ### Input Format
49 | 
50 | * `Labels File (-l, --labels)`: Path to a file containing cluster labels. Format: Text file where each row corresponds to a label for a specific observation.
51 | 
52 | Optional Files:
53 | 
54 | * `Ground Truth File (-g, --ground_truth)`: Path to a file containing ground truth labels. Use this for metrics requiring true labels for comparison.
55 | * `Embedding File (-e, --embedding)`: Path to a file containing latent space embeddings. Useful for metrics that do not rely on ground truth labels.
56 | * `Config File (-c, --config)`: Path to an optional JSON file with additional parameters for metric calculation.
57 | 
58 | ### Output Format
59 | 
60 | The script writes the calculated metric to the specified output file (`-o, --out_file`) in scientific notation with five decimal places.
61 | 
62 | ### Example usage of module scripts (Testing)
63 | 
64 | ```sh
65 | python metric.py -l labels.txt -g ground_truth.txt -o result.txt
66 | ```
67 | 
68 | ### Add to workflow
69 | 
70 | * Add your metric to the excute_config.yaml under `Metrics selected for execution`.
71 | * Add your metric scripts to the path_config.yaml under `metrics`.
72 | 


--------------------------------------------------------------------------------
/preprocessing/neighbors/delaunay_triangulation/delaunay_triangulation.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script
 4 | # Author_and_contribution: Qirong Mao; modifying output file format
 5 | 
 6 | import argparse
 7 | 
 8 | # TODO adjust description
 9 | parser = argparse.ArgumentParser(
10 |     description="Neighbor definition using Delaunay triangulation"
11 | )
12 | 
13 | parser.add_argument(
14 |     "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True
15 | )
16 | parser.add_argument(
17 |     "-m", "--matrix", help="Path to (transformed) counts (as mtx).", required=True
18 | )
19 | parser.add_argument(
20 |     "-f", "--features", help="Path to features (as tsv).", required=True
21 | )
22 | parser.add_argument(
23 |     "-o", "--observations", help="Path to observations (as tsv).", required=True
24 | )
25 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True)
26 | parser.add_argument(
27 |     "--config",
28 |     help="Optional config file (json) used to pass additional parameters.",
29 |     required=False,
30 | )
31 | 
32 | args = parser.parse_args()
33 | 
34 | # Output files
35 | from pathlib import Path
36 | 
37 | out_dir = Path(args.out_dir)
38 | 
39 | spatial_connectivities_file = out_dir / "spatial_connectivities.mtx"
40 | ##spatial_distances_file = out_dir / "spatial_distances.mtx"
41 | 
42 | 
43 | # Use these filepaths and inputs ...
44 | coord_file = args.coordinates
45 | matrix_file = args.matrix
46 | feature_file = args.features
47 | observation_file = args.observations
48 | 
49 | 
50 | # ... or AnnData if you want
51 | def get_anndata(args):
52 |     # Untested template
53 |     import anndata as ad
54 |     import pandas as pd
55 |     import scipy as sp
56 | 
57 |     X = sp.io.mmread(args.matrix)
58 |     if sp.sparse.issparse(X):
59 |         X = X.tocsr()
60 |     observations = pd.read_table(args.observations, index_col=0)
61 |     features = pd.read_table(args.features, index_col=0)
62 |     coordinates = (
63 |         pd.read_table(args.coordinates, index_col=0)
64 |         .loc[observations.index, :]
65 |         .to_numpy()
66 |     )
67 | 
68 |     adata = ad.AnnData(
69 |         X=X, obs=observations, var=features, obsm={"spatial": coordinates}
70 |     )
71 | 
72 |     return adata
73 | 
74 | 
75 | adata = get_anndata(args)
76 | 
77 | ## Your code goes here
78 | import squidpy as sq
79 | 
80 | sq.gr.spatial_neighbors(adata, delaunay=True, coord_type="generic")
81 | 
82 | neighbors = adata.obsp["spatial_connectivities"].astype(int)
83 | ##distance = adata.obsp["spatial_distances"].astype(float)
84 | 
85 | ## Write output
86 | import scipy as sp
87 | 
88 | out_dir.mkdir(parents=True, exist_ok=True)
89 | 
90 | sp.io.mmwrite(spatial_connectivities_file, neighbors)
91 | ##sp.io.mmwrite(spatial_distances_file, distance)            
92 | 


--------------------------------------------------------------------------------
/metric/NMI/NMI.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: Mark D. Robinson; coded the domain-specific F1
 5 | 
 6 | suppressPackageStartupMessages(library(optparse))
 7 | 
 8 | # TODO adjust description
 9 | option_list <- list(
10 |   make_option(
11 |     c("-l", "--labels"),
12 |     type = "character", default = NULL,
13 |     help = "Labels from domain clustering."
14 |   ),
15 |   make_option(
16 |     c("-g", "--ground_truth"),
17 |     type = "character", default = NA,
18 |     help = "Groundtruth labels."
19 |   ),
20 |   make_option(
21 |     c("-e", "--embedding"),
22 |     type = "character", default = NA,
23 |     help = "Embedding of points in latent space. Potential usage for metrics without groundtruth."
24 |   ),
25 |   # format should be json
26 |   make_option(
27 |     c("-c", "--config"),
28 |     type = "character", default = NA,
29 |     help = "Optional config file (json) used to pass additional parameters."
30 |   ),
31 |   make_option(
32 |     c("-o", "--out_file"),
33 |     type = "character", default = NULL,
34 |     help = "Output file."
35 |   )
36 | )
37 | 
38 | # TODO adjust description
39 | description <- "Calculate domain-specific F1 score (returns JSON with vector: F1 for each true domain)"
40 | 
41 | opt_parser <- OptionParser(
42 |   usage = description,
43 |   option_list = option_list
44 | )
45 | opt <- parse_args(opt_parser)
46 | 
47 | # Use these filepaths as input
48 | label_file <- opt$labels
49 | 
50 | if (!is.na(opt$ground_truth)) {
51 |   groundtruth_file <- opt$ground_truth
52 | }
53 | if (!is.na(opt$embedding)) {
54 |   embedding_file <- opt$embedding
55 | }
56 | if (!is.na(opt$config)) {
57 |   config_file <- opt$config
58 | }
59 | 
60 | 
61 | ## Code for calculating metric goes here
62 | ## --------------------------------------
63 | ## Code for calculating metric goes here
64 | ## --------------------------------------
65 | 
66 | library(aricode)
67 | 
68 | # # for testing - start
69 | # label_file <- "results/libd_dlpfc/Br5595_151670/SpaGCN/domains.tsv"
70 | # groundtruth_file <- "data/libd_dlpfc/Br5595_151670/labels.tsv"
71 | # outfile <- "NMI.txt"
72 | # # for testing - stop
73 | 
74 | domains <- read.delim(label_file, sep="\t", row.names = 1)
75 | groundtruth <- read.delim(groundtruth_file, sep="\t", row.names = 1)
76 | 
77 | rn <- intersect(rownames(domains), rownames(groundtruth))
78 | 
79 | # subset to common set
80 | domains <- domains[rn,,drop = FALSE]
81 | groundtruth <- groundtruth[rn,,drop = FALSE]
82 | 
83 | metric <- NMI(domains$label,groundtruth$label)
84 | 
85 | print(metric)
86 | 
87 | ## Write output
88 | dir.create(dirname(opt$out_file), showWarnings = FALSE, recursive = TRUE)
89 | 
90 | outfile <- file(opt$out_file)
91 | writeLines(format(metric, digits = 6, scientific = TRUE), outfile)
92 | close(outfile)
93 | 


--------------------------------------------------------------------------------
/preprocessing/feature_selection/highly_variable_genes_scanpy.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script
 4 | 
 5 | import argparse
 6 | 
 7 | # TODO adjust description
 8 | parser = argparse.ArgumentParser(description="HVG selection using scanpy")
 9 | 
10 | parser.add_argument(
11 |     "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True
12 | )
13 | parser.add_argument(
14 |     "-m", "--matrix", help="Path to (transformed) counts (as mtx).", required=True
15 | )
16 | parser.add_argument(
17 |     "-f", "--features", help="Path to features (as tsv).", required=True
18 | )
19 | parser.add_argument(
20 |     "-o", "--observations", help="Path to observations (as tsv).", required=True
21 | )
22 | # parser.add_argument(
23 | #     "-n", "--n_top_genes", help="Number of genes to keep.", required=False, type=int
24 | # )
25 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True)
26 | parser.add_argument(
27 |     "--config",
28 |     help="Optional config file (json) used to pass additional parameters.",
29 |     required=False,
30 | )
31 | 
32 | args = parser.parse_args()
33 | 
34 | from pathlib import Path
35 | 
36 | out_dir = Path(args.out_dir)
37 | 
38 | # Output files
39 | feature_selection_file = out_dir / "features.tsv"
40 | # if additional output files are required write it also to out_dir
41 | 
42 | # Use these filepaths and inputs ...
43 | coord_file = args.coordinates
44 | matrix_file = args.matrix
45 | feature_file = args.features
46 | observation_file = args.observations
47 | 
48 | # if args.n_top_genes is not None:
49 | #     n_top_genes = args.n_top_genes
50 | if args.config is not None:
51 |     config_file = args.config
52 | 
53 | 
54 | # ... or AnnData if you want
55 | def get_anndata(args):
56 |     # Untested template
57 |     import anndata as ad
58 |     import pandas as pd
59 |     import scipy as sp
60 | 
61 |     X = sp.io.mmread(args.matrix)
62 |     if sp.sparse.issparse(X):
63 |         X = X.tocsr()
64 |     observations = pd.read_table(args.observations, index_col=0)
65 |     features = pd.read_table(args.features, index_col=0)
66 |     coordinates = (
67 |         pd.read_table(args.coordinates, index_col=0)
68 |         .loc[observations.index, :]
69 |         .to_numpy()
70 |     )
71 | 
72 |     adata = ad.AnnData(
73 |         X=X, obs=observations, var=features, obsm={"spatial": coordinates}
74 |     )
75 | 
76 |     return adata
77 | 
78 | 
79 | adata = get_anndata(args)
80 | 
81 | ## Your code goes here
82 | import scanpy as sc
83 | 
84 | features_df = adata.var.copy()
85 | 
86 | sc.pp.highly_variable_genes(adata, flavor="seurat")
87 | 
88 | features_df["selected"] = adata.var["highly_variable"]
89 | 
90 | 
91 | ## Write output
92 | out_dir.mkdir(parents=True, exist_ok=True)
93 | features_df.to_csv(feature_selection_file, sep="\t", index_label="")
94 | 


--------------------------------------------------------------------------------
/consensus/02_Smoothness_entropy/Smoothness_entropy.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; Create the script
 4 | 
 5 | suppressPackageStartupMessages(library(optparse))
 6 | 
 7 | option_list <- list(
 8 |   make_option(
 9 |     c("-i", "--input_file"),
10 |     type = "character", default = NULL,
11 |     help = "Input containing the aggregated labels."
12 |   ),
13 |   make_option(
14 |     c("-c", "--coordinates"),
15 |     type = "character", default = NULL,
16 |     help = "file path to the spatial coordinates of the spots/cells"
17 |   ),
18 |   make_option(
19 |     c("-o", "--output_file"),
20 |     type = "character", default = NULL,
21 |     help = "desired output file"
22 |   ),
23 |   make_option(
24 |     c("-n", "--neighbors"),
25 |     type = "integer", default = NULL,
26 |     help = "Number of neighbors to calculate the smoothness"
27 |   ),
28 |   make_option(
29 |     c("-s", "--seed"),
30 |     type = "integer", default = NULL,
31 |     help = "seed for neighboring algorithm"
32 |   )
33 | )
34 | 
35 | description <- "Calculate overall smoothness of the clustering"
36 | 
37 | opt_parser <- OptionParser(
38 |   usage = description,
39 |   option_list = option_list
40 | )
41 | opt <- parse_args(opt_parser)
42 | 
43 | # Use these filepaths as input
44 | input_file <- opt$input_file
45 | output_file <- opt$output_file
46 | coord_file <- opt$coordinates
47 | neighbors <- ifelse(is.null(opt$neighbors), 6, opt$neighbors)
48 | seed <- ifelse(is.null(opt$seed), 2025, opt$seed)
49 | 
50 | 
51 | 
52 | set.seed(seed)
53 | 
54 | suppressPackageStartupMessages({
55 |   library(dbscan)
56 | })
57 | 
58 | ##### Define function
59 | calc_entropy <- function(u) {
60 |   p <- u[u>0]
61 |   p <- p/sum(p)
62 |   -sum(p*log(p))
63 | }
64 | 
65 | spot_entropy <- function(spatial_coords, label, k) {
66 |   suppressPackageStartupMessages(require(dbscan))
67 |   knns <- dbscan::kNN(spatial_coords, k=k)
68 |   label <- as.factor(label)
69 |   neighb_labels <- apply(knns$id, 2, function(u) label[u])
70 |   apply(neighb_labels, 
71 |         1, function(u) calc_entropy(table(factor(u, levels=levels(label)))))
72 | }
73 | 
74 | ##### Load files
75 | label_df <- read.delim(input_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss")
76 | coord_df <- read.delim(coord_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss")
77 | coord_df <- coord_df[row.names(label_df), ]
78 | 
79 | # Calculate point-wise entropy with its neighbor
80 | spot_entropy_df <- apply(label_df, 2,
81 |                          function(u) spot_entropy(coord_df, u, neighbors))
82 | 
83 | # calculate colmeans and save it to a dataframe
84 | sm_df <- data.frame(smoothness = colMeans(spot_entropy_df))
85 | 
86 | dir.create(dirname(output_file), showWarnings = FALSE, recursive = TRUE)
87 | 
88 | # Save the results
89 | write.table(sm_df, file = output_file, sep = "\t", col.names = NA, quote = FALSE)
90 | 


--------------------------------------------------------------------------------
/preprocessing/neighbors/radius/radius.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script, 
  4 | # Author_and_contribution: Qirong Mao; implemented method
  5 | 
  6 | import argparse
  7 | 
  8 | # TODO adjust description
  9 | parser = argparse.ArgumentParser(
 10 |     description="Neighbor definition based on the radius (only for generic coordinates)"
 11 | )
 12 | 
 13 | parser.add_argument(
 14 |     "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True
 15 | )
 16 | 
 17 | parser.add_argument(
 18 |     "-m", "--matrix", help="Path to (transformed) counts (as mtx).", required=True
 19 | )
 20 | 
 21 | parser.add_argument(
 22 |     "-f", "--features", help="Path to features (as tsv).", required=True
 23 | )
 24 | 
 25 | parser.add_argument(
 26 |     "-o", "--observations", help="Path to observations (as tsv).", required=True
 27 | )
 28 | 
 29 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True)
 30 | 
 31 | parser.add_argument(
 32 |     "--config",
 33 |     help="Optional config file (json) used to pass additional parameters.",
 34 |     required=False,
 35 | )
 36 | 
 37 | args = parser.parse_args()
 38 | 
 39 | # Output files
 40 | from pathlib import Path
 41 | 
 42 | out_dir = Path(args.out_dir)
 43 | 
 44 | spatial_connectivities_file = out_dir / "spatial_connectivities.mtx"
 45 | ##spatial_distances_file = out_dir / "spatial_distances.mtx"
 46 | 
 47 | # Use these filepaths and inputs ...
 48 | coord_file = args.coordinates
 49 | matrix_file = args.matrix
 50 | feature_file = args.features
 51 | observation_file = args.observations
 52 | 
 53 | ## Loading radius parameters from config_file
 54 | if args.config is not None:
 55 |     config_file = args.config
 56 | 
 57 | import json
 58 | 
 59 | with open(config_file) as f:
 60 |    parameters = json.load(f)
 61 | 
 62 | radius = parameters["radius"]
 63 | 
 64 | 
 65 | # ... or AnnData if you want
 66 | def get_anndata(args):
 67 |     # Untested template
 68 |     import anndata as ad
 69 |     import pandas as pd
 70 |     import scipy as sp
 71 | 
 72 |     X = sp.io.mmread(args.matrix)
 73 |     if sp.sparse.issparse(X):
 74 |         X = X.tocsr()
 75 |     observations = pd.read_table(args.observations, index_col=0)
 76 |     features = pd.read_table(args.features, index_col=0)
 77 |     coordinates = (
 78 |         pd.read_table(args.coordinates, index_col=0)
 79 |         .loc[observations.index, :]
 80 |         .to_numpy()
 81 |     )
 82 | 
 83 |     adata = ad.AnnData(
 84 |         X=X, obs=observations, var=features, obsm={"spatial": coordinates}
 85 |     )
 86 | 
 87 |     return adata
 88 | 
 89 | 
 90 | adata = get_anndata(args)
 91 | 
 92 | ## Your code goes here
 93 | import squidpy as sq
 94 | 
 95 | sq.gr.spatial_neighbors(adata, radius=radius, coord_type='generic')
 96 | 
 97 | neighbors = adata.obsp["spatial_connectivities"].astype(int)
 98 | ##distance = adata.obsp["spatial_distances"].astype(float)
 99 | 
100 | ## Write output
101 | import scipy as sp
102 | 
103 | out_dir.mkdir(parents=True, exist_ok=True)
104 | 
105 | sp.io.mmwrite(spatial_connectivities_file, neighbors)
106 | ##sp.io.mmwrite(spatial_distances_file, distance)             
107 | 


--------------------------------------------------------------------------------
/preprocessing/neighbors/n_neighbourhood/n_neighbourhood.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script, 
  4 | # Author_and_contribution: Qirong Mao; implemented method
  5 | 
  6 | import argparse
  7 | 
  8 | # TODO adjust description
  9 | parser = argparse.ArgumentParser(
 10 |     description="Neighbor definition based on numbers of neibourhood (only for generic coordinates)"
 11 | )
 12 | 
 13 | parser.add_argument(
 14 |     "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True
 15 | )
 16 | 
 17 | parser.add_argument(
 18 |     "-m", "--matrix", help="Path to (transformed) counts (as mtx).", required=True
 19 | )
 20 | 
 21 | parser.add_argument(
 22 |     "-f", "--features", help="Path to features (as tsv).", required=True
 23 | )
 24 | 
 25 | parser.add_argument(
 26 |     "-o", "--observations", help="Path to observations (as tsv).", required=True
 27 | )
 28 | 
 29 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True)
 30 | 
 31 | parser.add_argument(
 32 |     "--config",
 33 |     help="Optional config file (json) used to pass additional parameters.",
 34 |     required=False,
 35 | )
 36 | 
 37 | args = parser.parse_args()
 38 | 
 39 | # Output files
 40 | from pathlib import Path
 41 | 
 42 | out_dir = Path(args.out_dir)
 43 | 
 44 | spatial_connectivities_file = out_dir / "spatial_connectivities.mtx"
 45 | ##spatial_distances_file = out_dir / "spatial_distances.mtx"
 46 | 
 47 | # Use these filepaths and inputs ...
 48 | coord_file = args.coordinates
 49 | matrix_file = args.matrix
 50 | feature_file = args.features
 51 | observation_file = args.observations
 52 | 
 53 | ## Loading n_neighs parameter from config_file
 54 | 
 55 | if args.config is not None:
 56 |     config_file = args.config
 57 | 
 58 | import json
 59 | 
 60 | with open(config) as f:
 61 |    parameters = json.load(f)
 62 | 
 63 | n_neighs = data["n_neighs"]
 64 | 
 65 | # ... or AnnData if you want
 66 | def get_anndata(args):
 67 |     # Untested template
 68 |     import anndata as ad
 69 |     import pandas as pd
 70 |     import scipy as sp
 71 | 
 72 |     X = sp.io.mmread(args.matrix)
 73 |     if sp.sparse.issparse(X):
 74 |         X = X.tocsr()
 75 |     observations = pd.read_table(args.observations, index_col=0)
 76 |     features = pd.read_table(args.features, index_col=0)
 77 |     coordinates = (
 78 |         pd.read_table(args.coordinates, index_col=0)
 79 |         .loc[observations.index, :]
 80 |         .to_numpy()
 81 |     )
 82 | 
 83 |     adata = ad.AnnData(
 84 |         X=X, obs=observations, var=features, obsm={"spatial": coordinates}
 85 |     )
 86 | 
 87 |     return adata
 88 | 
 89 | 
 90 | adata = get_anndata(args)
 91 | 
 92 | ## Your code goes here
 93 | import squidpy as sq
 94 | 
 95 | sq.gr.spatial_neighbors(adata, n_neighs=n_neighs, coord_type="generic")
 96 | 
 97 | neighbors = adata.obsp["spatial_connectivities"].astype(int)
 98 | ##distance = adata.obsp["spatial_distances"].astype(float)
 99 | 
100 | ## Write output
101 | import scipy as sp
102 | 
103 | out_dir.mkdir(parents=True, exist_ok=True)
104 | 
105 | sp.io.mmwrite(spatial_connectivities_file, neighbors)
106 | ##sp.io.mmwrite(spatial_distances_file, distance)              
107 | 


--------------------------------------------------------------------------------
/preprocessing/neighbors/n_rings/n_rings.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script, 
  4 | # Author_and_contribution: Qirong Mao; implemented method
  5 | 
  6 | 
  7 | import argparse
  8 | 
  9 | # TODO adjust description
 10 | parser = argparse.ArgumentParser(
 11 |     description="Neighbor definition based on number of rings of neighbors (only for grid coordinates)"
 12 | )
 13 | 
 14 | parser.add_argument(
 15 |     "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True
 16 | )
 17 | 
 18 | parser.add_argument(
 19 |     "-m", "--matrix", help="Path to (transformed) counts (as mtx).", required=True
 20 | )
 21 | 
 22 | parser.add_argument(
 23 |     "-f", "--features", help="Path to features (as tsv).", required=True
 24 | )
 25 | 
 26 | parser.add_argument(
 27 |     "-o", "--observations", help="Path to observations (as tsv).", required=True
 28 | )
 29 | 
 30 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True)
 31 | 
 32 | parser.add_argument(
 33 |     "--config",
 34 |     help="Optional config file (json) used to pass additional parameters.",
 35 |     required=False,
 36 | )
 37 | 
 38 | args = parser.parse_args()
 39 | 
 40 | # Output files
 41 | from pathlib import Path
 42 | 
 43 | out_dir = Path(args.out_dir)
 44 | 
 45 | spatial_connectivities_file = out_dir / "spatial_connectivities.mtx"
 46 | ##spatial_distances_file = out_dir / "spatial_distances.mtx"
 47 | 
 48 | # Use these filepaths and inputs ...
 49 | coord_file = args.coordinates
 50 | matrix_file = args.matrix
 51 | feature_file = args.features
 52 | observation_file = args.observations
 53 | 
 54 | 
 55 | ## Loading delaunay parameters from config_file
 56 | if args.config is not None:
 57 |     config_file = args.config
 58 | 
 59 | import json
 60 | 
 61 | with open(config_file) as f:
 62 |    parameters = json.load(f)
 63 | 
 64 | n_rings = parameters["n_rings"]
 65 | 
 66 | 
 67 | # ... or AnnData if you want
 68 | def get_anndata(args):
 69 |     # Untested template
 70 |     import anndata as ad
 71 |     import pandas as pd
 72 |     import scipy as sp
 73 | 
 74 |     X = sp.io.mmread(args.matrix)
 75 |     if sp.sparse.issparse(X):
 76 |         X = X.tocsr()
 77 |     observations = pd.read_table(args.observations, index_col=0)
 78 |     features = pd.read_table(args.features, index_col=0)
 79 |     coordinates = (
 80 |         pd.read_table(args.coordinates, index_col=0)
 81 |         .loc[observations.index, :]
 82 |         .to_numpy()
 83 |     )
 84 | 
 85 |     adata = ad.AnnData(
 86 |         X=X, obs=observations, var=features, obsm={"spatial": coordinates}
 87 |     )
 88 | 
 89 |     return adata
 90 | 
 91 | 
 92 | adata = get_anndata(args)
 93 | 
 94 | ## Your code goes here
 95 | import squidpy as sq
 96 | 
 97 | sq.gr.spatial_neighbors(adata,n_rings=n_rings, coord_type="grid")
 98 | 
 99 | neighbors = adata.obsp["spatial_connectivities"].astype(int)
100 | ##distance = adata.obsp["spatial_distances"].astype(float)
101 | 
102 | ## Write output
103 | import scipy as sp
104 | 
105 | out_dir.mkdir(parents=True, exist_ok=True)
106 | 
107 | sp.io.mmwrite(spatial_connectivities_file, neighbors)
108 | ##sp.io.mmwrite(spatial_distances_file, distance)             
109 | 


--------------------------------------------------------------------------------
/templates/README.md:
--------------------------------------------------------------------------------
 1 | # SpaceHack - templates for modules
 2 | 
 3 | This directory contains the templates that you can use to implement a new dataset, method or metric.
 4 | 
 5 | For further instructions have a look in the corresponding directory;
 6 | 
 7 | - [data](/data)
 8 | - [method](/method)
 9 | - [metric](/metric)
10 | - [consensus](/consensus)
11 | 
12 | ### How to contribute a module
13 | 
14 | Module contribution will be managed via GitHub. The steps to contribute a module are:
15 |  1. Create or claim a **GitHub issue** from the [SpaceHack issue board.](https://github.com/SpatialHackathon/SpaceHack2023/issues) that describes the module you want to implement. There are currently 90 issues to claim, but if you come up with a new idea, please **create** a new issue, add the appropriate **tags**, and **assign** the task to yourself.
16 |  2. Add **metadata** to our metadata [spreadsheet](https://docs.google.com/spreadsheets/d/1QCeAF4yQG4bhZSGPQwwVBj_XF7ADY_2mK5xivAIfHsc/edit). Please fill in as much as you can as metadata is helpful! If you feel the need, please also add new columns or add additional notes. The metadata should be added to the appropriate tabs:
17 |     - [datasets](https://docs.google.com/spreadsheets/d/1QCeAF4yQG4bhZSGPQwwVBj_XF7ADY_2mK5xivAIfHsc/edit#gid=1453488771)
18 |     - [computational methods](https://docs.google.com/spreadsheets/d/1QCeAF4yQG4bhZSGPQwwVBj_XF7ADY_2mK5xivAIfHsc/edit#gid=0)
19 |     - [evaluation metrics](https://docs.google.com/spreadsheets/d/1QCeAF4yQG4bhZSGPQwwVBj_XF7ADY_2mK5xivAIfHsc/edit#gid=4776337)
20 |     - [simulations and technical evaluation](https://docs.google.com/spreadsheets/d/1QCeAF4yQG4bhZSGPQwwVBj_XF7ADY_2mK5xivAIfHsc/edit#gid=640974611)
21 |  3. Now you are ready to create a new git **[branch](https://learngitbranching.js.org/)**. Try to give your new branch an intuitive prefix such as `data_...`, `method_...`, `metric_...` or `consensus_...`. You can create a new branch in several ways: (i) [create a branch directly from the issue board](https://docs.github.com/en/issues/tracking-your-work-with-issues/creating-a-branch-for-an-issue) and then `git checkout` that branch, or (ii) via the command line:
22 | ```
23 | # clone the template repository
24 | git clone https://github.com/SpatialHackathon/SpaceHack2023.git
25 | # create and switch to a new branch for your e.g. method "X"
26 | git branch method_x_naveedishaque # try to make the branch name unique!
27 | git checkout method_x_naveedishaque
28 | # link the branch to the issue via the issue board: https://docs.github.com/en/issues/tracking-your-work-with-issues/linking-a-pull-request-to-an-issue
29 | ```
30 |  4. Modify the files, filenames, and code in `template/`, referring to the examples in the `data`, `method`, or `metric` subfolder. If your method requires a specific type or preprocessing, please reach out to the organisers!
31 |  5. Test. We are currently working on validators and automatic testing scripts... but this is tricky. Reach out to Niklas Muller-Botticher when you are ready to test!
32 |  6. Create a [pull request](https://docs.github.com/en/pull-requests/collaborating-with-pull-requests/proposing-changes-to-your-work-with-pull-requests/creating-a-pull-request?tool=cli)
33 |  7. Code review (by whom?) and merge your contributed module into the GitHub main branch!
34 | 


--------------------------------------------------------------------------------
/consensus/03_Consensus_kmode/Consensus_kmode.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; Create the script
 4 | 
 5 | suppressPackageStartupMessages(library(optparse))
 6 | 
 7 | option_list <- list(
 8 |   make_option(
 9 |     c("-i", "--input_file"),
10 |     type = "character", default = NULL,
11 |     help = "Input containing the aggregated labels."
12 |   ),
13 |   make_option(
14 |     c("--seed"),
15 |     type = "integer", default = NULL,
16 |     help = "seed for input"
17 |   ),
18 |   make_option(
19 |     c("-b", "--base_clusterings"),
20 |     type = "character", default = NULL,
21 |     help = "Path to base-clustering ranking file"
22 |   ), 
23 |   make_option(
24 |     c("--n_clusters"),
25 |     type = "character", default = NULL,
26 |     help = "Desired number of clusters in the consensus output"
27 |   ),
28 |   make_option(
29 |     c("--n_bcs"),
30 |     type = "integer", default = NULL,
31 |     help = "Desired number of base clustering results feed into the algorithm"
32 |   ),
33 |   make_option(
34 |     c("-o", "--output_file"),
35 |     type = "character", default = NULL,
36 |     help = "desired output file"
37 |   )
38 | )
39 | 
40 | description <- "Calculate consensus for selected BCs"
41 | 
42 | opt_parser <- OptionParser(
43 |   usage = description,
44 |   option_list = option_list
45 | )
46 | opt <- parse_args(opt_parser)
47 | 
48 | # Use these filepaths as input
49 | input_file <- opt$input_file
50 | output_file <- opt$output_file
51 | bc_file <- opt$base_clusterings
52 | n_bcs <- ifelse(is.null(opt$n_bcs), 8, opt$n_bcs)
53 | n_clust <- ifelse(is.null(opt$n_clusters), "7", opt$n_clusters)
54 | seed <- opt$seed
55 | 
56 | # Your code goes here
57 | suppressPackageStartupMessages({
58 |     library(diceR)
59 | })
60 | 
61 | label_df <- read.delim(input_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss")
62 | bc_list <- read.delim(bc_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss", check.names=FALSE)[[as.character(n_clust)]]
63 | bc_list <- bc_list[!is.na(bc_list)]
64 | 
65 | if (length(bc_list) < n_bcs){
66 |   warning(sprintf("Not enough (%s) base clusterings(BCs) are available, use %s BCs instead.", n_bcs, length(bc_list)))
67 | }
68 | bc_list <- bc_list[1:min(n_bcs, length(bc_list))]
69 | 
70 | label_selected <- label_df[, bc_list]
71 | 
72 | # Make sure all the clusters are ranked 1 to n without jumping (SOTIP)
73 | label_selected <- as.data.frame(lapply(label_selected, function(u){
74 |     unique_labels <- sort(unique(u))
75 |     if (all(unique_labels==seq_along(unique_labels))) {
76 |         return(as.factor(u))
77 |     } else {
78 |         # Count occurrences of each number
79 |         freq <- table(u)
80 |         rank_map <- rank(-freq, ties.method = "first") # Negative for descending order
81 |         new_vec <- rank_map[as.character(u)]
82 |         new_vec <- as.factor(as.numeric(new_vec))
83 |         return(new_vec)
84 |     }
85 | }))
86 | 
87 | kmode_vec <- diceR:::k_modes(label_selected, is.relabelled = FALSE, seed = seed)
88 | kmode_df <- data.frame(consensus_kmode=kmode_vec, row.names = row.names(label_selected))
89 | 
90 | dir.create(dirname(output_file), showWarnings = FALSE, recursive = TRUE)
91 | # Save the results
92 | write.table(kmode_df, file = output_file, sep = "\t", col.names = NA, quote = FALSE)
93 | 


--------------------------------------------------------------------------------
/consensus/03_Consensus_lca/Consensus_lca.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; Create the script
 4 | 
 5 | suppressPackageStartupMessages(library(optparse))
 6 | 
 7 | option_list <- list(
 8 |   make_option(
 9 |     c("-i", "--input_file"),
10 |     type = "character", default = NULL,
11 |     help = "Input containing the aggregated labels."
12 |   ),
13 |   make_option(
14 |     c("--seed"),
15 |     type = "integer", default = NULL,
16 |     help = "seed for input"
17 |   ),
18 |   make_option(
19 |     c("-b", "--base_clusterings"),
20 |     type = "character", default = NULL,
21 |     help = "Path to base-clustering ranking file"
22 |   ), 
23 |   make_option(
24 |     c("--n_clusters"),
25 |     type = "integer", default = NULL,
26 |     help = "Desired number of clusters in the consensus output"
27 |   ),
28 |   make_option(
29 |     c("--n_bcs"),
30 |     type = "integer", default = NULL,
31 |     help = "Desired number of base clustering results feed into the algorithm"
32 |   ),
33 |   make_option(
34 |     c("-o", "--output_file"),
35 |     type = "character", default = NULL,
36 |     help = "desired output file"
37 |   )
38 | )
39 | 
40 | description <- "Calculate consensus for selected BCs"
41 | 
42 | opt_parser <- OptionParser(
43 |   usage = description,
44 |   option_list = option_list
45 | )
46 | opt <- parse_args(opt_parser)
47 | 
48 | # Use these filepaths as input
49 | input_file <- opt$input_file
50 | output_file <- opt$output_file
51 | bc_file <- opt$base_clusterings
52 | n_bcs <- ifelse(is.null(opt$n_bcs), 8, opt$n_bcs)
53 | n_clust <- ifelse(is.null(opt$n_clusters), "7", opt$n_clusters)
54 | seed <- opt$seed
55 | 
56 | # Your code goes here
57 | suppressPackageStartupMessages({
58 |     library(diceR)
59 | })
60 | 
61 | 
62 | 
63 | label_df <- read.delim(input_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss")
64 | bc_list <- read.delim(bc_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss", check.names=FALSE)[[as.character(n_clust)]]
65 | bc_list <- bc_list[!is.na(bc_list)]
66 | 
67 | if (length(bc_list) < n_bcs){
68 |   warning(sprintf("Not enough (%s) base clusterings(BCs) are available, use %s BCs instead.", n_bcs, length(bc_list)))
69 | }
70 | bc_list <- bc_list[1:min(n_bcs, length(bc_list))]
71 | 
72 | label_selected <- label_df[, bc_list]
73 | 
74 | # Make sure all the clusters are ranked 1 to n without jumping (SOTIP)
75 | label_selected <- as.data.frame(lapply(label_selected, function(u){
76 |     unique_labels <- sort(unique(u))
77 |     if (all(unique_labels==seq_along(unique_labels))) {
78 |         return(factor(u, levels = unique_labels))
79 |     } else {
80 |         # Count occurrences of each number
81 |         freq <- table(u)
82 |         rank_map <- rank(-freq, ties.method = "first") # Negative for descending order
83 |         new_vec <- rank_map[as.character(u)]
84 |         new_vec <- factor(as.numeric(new_vec))
85 |         return(new_vec)
86 |     }
87 | }))
88 | lca_vec <- diceR:::LCA(label_selected, is.relabelled = FALSE, seed = seed)
89 | lca_df <- data.frame(consensus_lca=lca_vec, row.names = row.names(label_selected))
90 | 
91 | dir.create(dirname(output_file), showWarnings = FALSE, recursive = TRUE)
92 | 
93 | # Save the results
94 | write.table(lca_df, file = output_file, sep = "\t", col.names = NA, quote = FALSE)
95 | 


--------------------------------------------------------------------------------
/consensus/02_BC_ranking/BC_ranking.r:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env Rscript
 2 | 
 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; Create the script
 4 | 
 5 | suppressPackageStartupMessages(library(optparse))
 6 | 
 7 | option_list <- list(
 8 |   make_option(
 9 |     c("-i", "--input_file"),
10 |     type = "character", default = NULL,
11 |     help = "Input containing the aggregated labels."
12 |   ),
13 |   make_option(
14 |     c("-o", "--output_file"),
15 |     type = "character", default = NULL,
16 |     help = "desired output file"
17 |   ),
18 |   make_option(
19 |     c("--selection_metrics"),
20 |     type = "character", default = NULL,
21 |     help = "file containing the metric information for BC selection"
22 |   ),
23 |   make_option(
24 |     c("-m", "--max_percentage"),
25 |     type = "double", default = NULL,
26 |     help = "maximal percentage of the largest class"
27 |   )
28 | )
29 | 
30 | description <- "Automatically select the base-clusterings based on different algorithms"
31 | 
32 | opt_parser <- OptionParser(
33 |   usage = description,
34 |   option_list = option_list
35 | )
36 | opt <- parse_args(opt_parser)
37 | 
38 | # Use these filepaths as input
39 | input_file <- opt$input_file
40 | output_file <- opt$output_file
41 | smoothness_file <- opt$smoothness
42 | ari_file <- opt$ari
43 | max_percentage <- ifelse(is.null(opt$max_percentage), 0.8, opt$max_percentage)
44 | 
45 | ##### Load files
46 | label_df <- read.delim(input_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss")
47 | 
48 | ##### Filter out class-imbalanced case
49 | label_df[, sapply(label_df, function(col) {
50 |   max(table(col)) / length(col) <= max_percentage
51 | })]
52 | 
53 | ##### Separate label_df into different cluster number ones
54 | n_clusters <- apply(label_df, 2, function(u){length(unique(u))})
55 | label_lists <- split(names(n_clusters), n_clusters)
56 | 
57 | ##### Select base-clsuterings based on algorithms
58 | 
59 | if (!is.null(ari_file)){
60 |     # ARI df is a nxn dataframe with n = number of results instance, each value is the cross-ARI result
61 |     selection_df <- read.delim(smoothness_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss")
62 |     s_mean <- colMeans(selection_df)
63 | } else {
64 |     if (is.null(smoothness_file)){
65 |       consensus_dir <- file.path(dirname(input_file), "consensus")
66 |       file_searched <- list.files(path = consensus_dir, pattern = "smoothness", 
67 | 				  full.names = TRUE, ignore.case = TRUE)
68 |       if (length(file_searched) == 0){
69 |         stop("No smoothness entropy file found or defined.")
70 |       }
71 |       smoothness_file <- file_searched[1]
72 |     }
73 |     # Smoothness df is a one-column dataframe with row names refers to individual result instance
74 |     selection_df <- read.delim(smoothness_file, stringsAsFactors = FALSE, row.names = 1, numerals="no.loss")
75 |     # Reversed it as high entropy means less smooth
76 |     s_mean <- - rowMeans(selection_df)
77 | }
78 | 
79 | d_length <- max(lengths(label_lists))
80 | s_bc_list <- sapply(label_lists, function(nclu_names){
81 |     s_n <- s_mean[names(s_mean) %in% nclu_names]
82 |     selected_names <- names(sort(s_n, decreasing = TRUE))
83 |     length(selected_names) <- d_length
84 |     return(selected_names)
85 | })
86 | 
87 | result_df <- as.data.frame(s_bc_list)
88 | dir.create(dirname(output_file), showWarnings = FALSE, recursive = TRUE)
89 | 
90 | # Save the results
91 | write.table(result_df, file = output_file, sep = "\t", col.names = NA, quote = FALSE)
92 | 


--------------------------------------------------------------------------------
/templates/consensus.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Jieran Sun & Mark Robinson; implmented method
 4 | # Author_and_contribution: Peiying Cai; created template
 5 | # Author_and_contribution: ENTER YOUR NAME AND CONTRIBUTION HERE
 6 | 
 7 | import argparse
 8 | 
 9 | # TODO adjust description
10 | parser = argparse.ArgumentParser(description="Calculate consensus ... for selected BCs")
11 | 
12 | parser.add_argument(
13 |     "-i", "--input_file", help="Input containing the aggregated labels.", required=True)
14 | parser.add_argument(
15 |     "--seed", type=int, default=None, help="Seed for random number generator")
16 | parser.add_argument(
17 |     "-b", "--base_clusterings", help="Path to base-clustering ranking file", required=True)
18 | parser.add_argument(
19 |     "-o", "--output_file", help="Desired output file", required=True)
20 | # TODO adjust default numbers in `n_bcs` and `n_clusters`
21 | # make sure that `n_clusters` exists among the column names of the base-clustering ranking file
22 | parser.add_argument(
23 |     "--n_clusters", type=int, default=7, help="Desired number of clusters in the consensus output")
24 | parser.add_argument(
25 |     "--n_bcs", type=int, default=8, help="Desired number of base clustering results fed into the algorithm")
26 | 
27 | args = parser.parse_args()
28 | from pathlib import Path
29 | import pandas as pd
30 | import sys
31 | import warnings
32 | 
33 | seed = args.seed
34 | output_file = args.output_file
35 | output_path = Path(args.output_file)
36 | 
37 | # Read input label data
38 | label_df = pd.read_csv(args.input_file, sep="\t", index_col=0)
39 |     
40 | # Read base clustering rankings
41 | bc_df = pd.read_csv(args.base_clusterings, sep="\t", index_col=0)
42 | 
43 | n_clust_str = str(args.n_clusters)
44 | if n_clust_str not in bc_df.columns:
45 |     sys.exit(f"Error: n_clusters={args.n_clusters} not found in base clustering file columns.")
46 | 
47 | # bc_list stores all 'method_config_n_clust_label' entries matching n_clust
48 | bc_list = bc_df[n_clust_str].dropna().tolist()
49 | 
50 | if len(bc_list) < args.n_bcs:
51 |     warnings.warn(f"Not enough ({args.n_bcs}) base clusterings (BCs) are available, use {len(bc_list)} BCs instead.")
52 | bc_list = bc_list[:min(args.n_bcs, len(bc_list))]
53 | 
54 | # Subset the label data to keep only the selected base clusterings
55 | label_selected = label_df[bc_list]
56 | 
57 | # Make sure clusters are ranked 1 to n without jumps (SOTIP)
58 | def rank_labels(u):
59 |     unique_labels = sorted(u.dropna().unique())
60 |     if unique_labels == list(range(1, len(unique_labels) + 1)):
61 |         # Already consecutive starting from 1
62 |         return u.astype('Int64')
63 |         
64 |     freq = u.dropna().value_counts()
65 |     # Rank by descending frequency, ties.method='first' equivalent
66 |     rank_map = {label: rank+1 for rank, label in enumerate(freq.index)}
67 |     return u.map(rank_map).astype('Int64')
68 | 
69 | label_selected = label_selected.apply(rank_labels, axis=0)
70 | 
71 | 
72 | # TODO set the seed, if the algorithm requires the seed elsewhere please pass it on
73 | import random
74 | 
75 | random.seed(seed)
76 | # np.random.seed(seed)
77 | # torch.manual_seed(seed)
78 | 
79 | ## Your code goes here
80 | # TODO
81 | # Input: label_selected (DataFrame) with samples as rows and base clusterings as columns
82 | 
83 | # output_df = ...
84 | 
85 | ## Write output
86 | output_path.parent.mkdir(parents=True, exist_ok=True)
87 | output_df.to_csv(output_file, sep="\t", index=True)
88 | 
89 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
 1 | # Contributor Covenant Code of Conduct
 2 | 
 3 | ## Our Pledge
 4 | 
 5 | In the interest of fostering an open and welcoming environment, we as
 6 | contributors and maintainers pledge to making participation in our project and
 7 | our community a harassment-free experience for everyone, regardless of age, body
 8 | size, disability, ethnicity, gender identity and expression, level of experience,
 9 | education, socio-economic status, nationality, personal appearance, race,
10 | religion, or sexual identity and orientation.
11 | 
12 | ## Our Standards
13 | 
14 | Examples of behavior that contributes to creating a positive environment
15 | include:
16 | 
17 | * Using welcoming and inclusive language
18 | * Being respectful of differing viewpoints and experiences
19 | * Gracefully accepting constructive criticism
20 | * Focusing on what is best for the community
21 | * Showing empathy towards other community members
22 | 
23 | Examples of unacceptable behavior by participants include:
24 | 
25 | * The use of sexualized language or imagery and unwelcome sexual attention or
26 |   advances
27 | * Trolling, insulting/derogatory comments, and personal or political attacks
28 | * Public or private harassment
29 | * Publishing others' private information, such as a physical or electronic
30 |   address, without explicit permission
31 | * Other conduct which could reasonably be considered inappropriate in a
32 |   professional setting
33 | 
34 | ## Our Responsibilities
35 | 
36 | Project maintainers are responsible for clarifying the standards of acceptable
37 | behavior and are expected to take appropriate and fair corrective action in
38 | response to any instances of unacceptable behavior.
39 | 
40 | Project maintainers have the right and responsibility to remove, edit, or
41 | reject comments, commits, code, wiki edits, issues, and other contributions
42 | that are not aligned to this Code of Conduct, or to ban temporarily or
43 | permanently any contributor for other behaviors that they deem inappropriate,
44 | threatening, offensive, or harmful.
45 | 
46 | ## Scope
47 | 
48 | This Code of Conduct applies both within project spaces and in public spaces
49 | when an individual is representing the project or its community. Examples of
50 | representing a project or community include using an official project e-mail
51 | address, posting via an official social media account, or acting as an appointed
52 | representative at an online or offline event. Representation of a project may be
53 | further defined and clarified by project maintainers.
54 | 
55 | ## Enforcement
56 | 
57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
58 | reported by contacting the project team at {{ email }}. All
59 | complaints will be reviewed and investigated and will result in a response that
60 | is deemed necessary and appropriate to the circumstances. The project team is
61 | obligated to maintain confidentiality with regard to the reporter of an incident.
62 | Further details of specific enforcement policies may be posted separately.
63 | 
64 | Project maintainers who do not follow or enforce the Code of Conduct in good
65 | faith may face temporary or permanent repercussions as determined by other
66 | members of the project's leadership.
67 | 
68 | ## Attribution
69 | 
70 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4,
71 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html
72 | 
73 | [homepage]: https://www.contributor-covenant.org
74 | 


--------------------------------------------------------------------------------
/preprocessing/dimensionality_reduction/PCA.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | # Author_and_contribution: Niklas Mueller-Boetticher; created script
  4 | 
  5 | import argparse
  6 | 
  7 | # TODO adjust description
  8 | parser = argparse.ArgumentParser(description="PCA (with standard-scaling)")
  9 | 
 10 | parser.add_argument(
 11 |     "-c", "--coordinates", help="Path to coordinates (as tsv).", required=True
 12 | )
 13 | parser.add_argument(
 14 |     "-m", "--matrix", help="Path to (transformed) counts (as mtx).", required=True
 15 | )
 16 | parser.add_argument(
 17 |     "-f", "--features", help="Path to features (as tsv).", required=True
 18 | )
 19 | parser.add_argument(
 20 |     "-o", "--observations", help="Path to observations (as tsv).", required=True
 21 | )
 22 | parser.add_argument(
 23 |     "-n",
 24 |     "--n_components",
 25 |     help="Number of components/factors to generate.",
 26 |     required=False,
 27 |     type=int,
 28 | )
 29 | parser.add_argument("-d", "--out_dir", help="Output directory.", required=True)
 30 | parser.add_argument(
 31 |     "--config",
 32 |     help="Optional config file (json) used to pass additional parameters.",
 33 |     required=False,
 34 | )
 35 | parser.add_argument(
 36 |     "--seed",
 37 |     help="Seed for random state control on PCA.",
 38 |     required=True,
 39 | )
 40 | 
 41 | args = parser.parse_args()
 42 | 
 43 | # Output files
 44 | from pathlib import Path
 45 | 
 46 | out_dir = Path(args.out_dir)
 47 | 
 48 | # Output files
 49 | dim_red_file = out_dir / "dimensionality_reduction.tsv"
 50 | # if additional output files are required write it also to out_dir
 51 | 
 52 | # Use these filepaths as input ...
 53 | coord_file = args.coordinates
 54 | matrix_file = args.matrix
 55 | feature_file = args.features
 56 | observation_file = args.observations
 57 | 
 58 | if args.n_components is not None:
 59 |     n_components = args.n_components
 60 | if args.config is not None:
 61 |     config_file = args.config
 62 | 
 63 | ## Your code goes here
 64 | import pandas as pd
 65 | import scipy as sp
 66 | from sklearn.decomposition import PCA
 67 | from sklearn.preprocessing import StandardScaler
 68 | 
 69 | matrix = sp.io.mmread(matrix_file)
 70 | if sp.sparse.issparse(matrix):
 71 |     matrix = matrix.tocsr()
 72 | 
 73 | observations = pd.read_table(observation_file, index_col=0)
 74 | features = pd.read_table(feature_file, index_col=0)
 75 | 
 76 | # Filter features and observations
 77 | if "selected" in observations.columns:
 78 |     matrix = matrix[observations["selected"].to_numpy().nonzero()[0], :]
 79 |     observations = observations.loc[lambda df: df["selected"]].index
 80 | else:
 81 |     observations = observations.index
 82 | if "selected" in features.columns:
 83 |     matrix = matrix[:, features["selected"].to_numpy().nonzero()[0]]
 84 |     features = features.loc[lambda df: df["selected"]].index
 85 | else:
 86 |     features = features.index
 87 | 
 88 | matrix = matrix.toarray() if sp.sparse.issparse(matrix) else matrix
 89 | matrix = pd.DataFrame(matrix, columns=features, index=observations)
 90 | 
 91 | scaler = StandardScaler().set_output(transform="pandas")
 92 | matrix = scaler.fit_transform(matrix)
 93 | 
 94 | pca = PCA(n_components=n_components, svd_solver="arpack", random_state = int(args.seed)).set_output(transform="pandas")
 95 | dim_red_df = pca.fit_transform(matrix)
 96 | 
 97 | 
 98 | ## Write output
 99 | out_dir.mkdir(parents=True, exist_ok=True)
100 | dim_red_df.to_csv(dim_red_file, sep="\t", index_label="", float_format="%g")
101 | 


--------------------------------------------------------------------------------
/templates/data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
 4 | # Author_and_contribution: ENTER YOUR NAME AND CONTRIBUTION HERE
 5 | 
 6 | import argparse
 7 | 
 8 | # TODO adjust description
 9 | parser = argparse.ArgumentParser(description="Load data for ...")
10 | 
11 | parser.add_argument(
12 |     "-o", "--out_dir", help="Output directory to write files to.", required=True
13 | )
14 | 
15 | args = parser.parse_args()
16 | 
17 | 
18 | from pathlib import Path
19 | 
20 | import pandas as pd
21 | 
22 | out_dir = Path(args.out_dir)
23 | 
24 | # The folder structure should look like the following
25 | # out_dir
26 | # |_______sample_1  (sample name can be chosen freely)
27 | #         |_____coordinates.tsv
28 | #         |_____features.tsv
29 | #         |_____observations.tsv
30 | #         |_____counts.mtx  (use scipy.io.mmwrite)
31 | #         |_____labels.tsv  (optional)
32 | #         |_____H_E.(tiff/png/...)  (optional)
33 | #         |_____H_E.json  (optional, required if H_E is provided)
34 | # |_______sample_2
35 | #         | ...
36 | # |_______samples.tsv
37 | # |_______experiment.json
38 | # if additional output files are required write it also to out_dir
39 | 
40 | 
41 | ## Your code goes here
42 | # TODO
43 | # features_df = ...  # DataFrame with index (gene-id/name) and n columns (?)
44 | # observations_df = ...  # DataFrame with index (cell-id/barcode) and n columns (?)
45 | # coordinates_df = ...  # DataFrame with index (cell-id/barcode) and 2/3 columns (x, y, z?)
46 | # counts = ...  # array with #observations rows x #features columns
47 | # labels_df = None  # optional, DataFrame with index (cell-id/barcode) and 1 column (label)
48 | # img = None  # optional
49 | # technology = ...  # i.e. "Visium", "ST", "MERSCOPE", "MERFISH", "Stereo-seq", "Slide-seq", "Xenium", "STARmap", "STARmap+", "osmFISH", "seqFISH"
50 | # samples_df = ...  # DataFrame with information on samples. columns: (patient, sample, position, replicate, directory, n_clusters), columns can be NA
51 | 
52 | # Make sure to use consistent indexes for the DataFrames
53 | # i.e. the index (not necessarily the order) of observations and coordinates should match
54 | # But the order of observations and features must match counts (observations x features)
55 | 
56 | 
57 | # Example how a sample could be written
58 | def write_sample(
59 |     path,
60 |     sample,
61 |     coordinates_df,
62 |     observations_df,
63 |     features_df,
64 |     counts,
65 |     labels_df=None,
66 |     img=None,
67 | ):
68 |     if img is not None:
69 |         # TODO write to image_file
70 |         # H_E.json must contain the scale
71 |         pass
72 | 
73 |     import scipy as sp
74 | 
75 |     sample_path = Path(path) / sample
76 | 
77 |     coordinates_df.to_csv(sample_path / "coordinates.tsv", sep="\t", index_label="")
78 |     features_df.to_csv(sample_path / "features.tsv", sep="\t", index_label="")
79 |     observations_df.to_csv(sample_path / "observations.tsv", sep="\t", index_label="")
80 |     sp.io.mmwrite(sample_path / "counts.mtx", counts)
81 | 
82 |     if labels_df is not None:
83 |         labels_df.columns = ["label"]
84 |         labels_df.to_csv(sample_path / "labels.tsv", sep="\t", index_label="")
85 | 
86 | 
87 | ## Metadata files
88 | samples_df.loc[
89 |     :, ["patient", "sample", "position", "replicate", "directory", "n_clusters"]
90 | ].to_csv(out_dir / "samples.tsv", sep="\t", index_label="")
91 | 
92 | import json
93 | 
94 | with open(out_dir / "experiment.json", "w") as f:
95 |     exp_info = {"technology": technology}
96 |     json.dump(exp_info, f)
97 | 


--------------------------------------------------------------------------------
/metric/cluster-specific-silhouette/cluster-specific-silhouette.r:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | 
  3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
  4 | # Author_and_contribution: Mark D. Robinson; coded the domain-specific F1
  5 | 
  6 | suppressPackageStartupMessages(library(optparse))
  7 | 
  8 | # TODO adjust description
  9 | option_list <- list(
 10 |   make_option(
 11 |     c("-l", "--labels"),
 12 |     type = "character", default = NULL,
 13 |     help = "Labels from domain clustering."
 14 |   ),
 15 |   make_option(
 16 |     c("-g", "--ground_truth"),
 17 |     type = "character", default = NA,
 18 |     help = "Groundtruth labels."
 19 |   ),
 20 |   make_option(
 21 |     c("-e", "--embedding"),
 22 |     type = "character", default = NA,
 23 |     help = "Embedding of points in latent space. Potential usage for metrics without groundtruth."
 24 |   ),
 25 |   # format should be json
 26 |   make_option(
 27 |     c("-c", "--config"),
 28 |     type = "character", default = NA,
 29 |     help = "Optional config file (json) used to pass additional parameters."
 30 |   ),
 31 |   make_option(
 32 |     c("-o", "--out_file"),
 33 |     type = "character", default = NULL,
 34 |     help = "Output file."
 35 |   )
 36 | )
 37 | 
 38 | # TODO adjust description
 39 | description <- "Calculate domain-specific F1 score (returns JSON with vector: F1 for each true domain)"
 40 | 
 41 | opt_parser <- OptionParser(
 42 |   usage = description,
 43 |   option_list = option_list
 44 | )
 45 | opt <- parse_args(opt_parser)
 46 | 
 47 | # Use these filepaths as input
 48 | label_file <- opt$labels
 49 | 
 50 | if (!is.na(opt$ground_truth)) {
 51 |   groundtruth_file <- opt$ground_truth
 52 | }
 53 | if (!is.na(opt$embedding)) {
 54 |   embedding_file <- opt$embedding
 55 | }
 56 | if (!is.na(opt$config)) {
 57 |   config_file <- opt$config
 58 | }
 59 | 
 60 | 
 61 | ## Code for calculating metric goes here
 62 | ## --------------------------------------
 63 | library(cluster)
 64 | library(jsonlite)
 65 | 
 66 | # # for testing - start
 67 | # label_file <- "~/scratch/SpaceHack2/method_results/LIBD_DLPFC/Br5292_151507/SpaGCN/domains.tsv"
 68 | # outfile <- "./cluster-specific-silhouette.json"
 69 | # embedding_file <- "~/scratch/SpaceHack2/method_results/LIBD_DLPFC/Br5292_151507/log1p/hvg/pca_20/dimensionality_reduction.tsv"
 70 | # groundtruth_file <- "data/libd_dlpfc/Br5595_151670/labels.tsv"
 71 | # # for testing - stop
 72 | 
 73 | domains <- read.delim(label_file, sep="\t", row.names = 1)
 74 | embedding <- read.delim(embedding_file, sep="\t", row.names = 1)
 75 | 
 76 | rn <- intersect(rownames(domains), rownames(embedding))
 77 | 
 78 | # subset to common set
 79 | embedding <- embedding[rn,,drop = FALSE]
 80 | domains <- domains[rn,,drop = FALSE]
 81 | 
 82 | # calculate silhouette score on the embeddings per cell
 83 | sil <- silhouette(list(clustering=domains$label),
 84 |                   dist=dist(embedding))
 85 | 
 86 | agg_median <- aggregate(sil[,"sil_width",drop=FALSE], 
 87 |                         list(cluster=sil[,"cluster"]), FUN = median)
 88 | agg_mean <- aggregate(sil[,"sil_width",drop=FALSE], 
 89 |                       list(cluster=sil[,"cluster"]), FUN = mean)
 90 | 
 91 | (df <- data.frame(cluster = agg_mean$cluster,
 92 |                   mean_sil_width = agg_mean$sil_width,
 93 |                   median_sil_width = agg_median$sil_width))
 94 | 
 95 | ## Write output
 96 | outfile <- file(opt$out_file)
 97 | write_json(df,outfile)
 98 | 
 99 | # from template
100 | # dir.create(dirname(outfile), showWarnings = FALSE, recursive = TRUE)
101 | # 
102 | # writeLines(format(metric, digits = 6, scientific = TRUE), outfile)
103 | # close(outfile)
104 | 


--------------------------------------------------------------------------------
/data/spatialDLPFC/spatialDLPFC.r:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env Rscript
  2 | 
  3 | # Author_and_contribution: Niklas Mueller-Boetticher; created template
  4 | # Author_and_contribution: Niklas Mueller-Boetticher; contributed code
  5 | 
  6 | suppressPackageStartupMessages(library(optparse))
  7 | 
  8 | option_list <- list(
  9 |   make_option(
 10 |     c("-o", "--out_dir"),
 11 |     type = "character", default = NULL,
 12 |     help = "Output directory to write files to."
 13 |   )
 14 | )
 15 | 
 16 | # TODO adjust description
 17 | description <- "Load data for ..."
 18 | 
 19 | opt_parser <- OptionParser(
 20 |   usage = description,
 21 |   option_list = option_list
 22 | )
 23 | opt <- parse_args(opt_parser)
 24 | 
 25 | out_dir <- opt$out_dir
 26 | 
 27 | # The folder structure should look like the following
 28 | # out_dir
 29 | # |_______sample_1  (sample name can be chosen freely)
 30 | #         |_____coordinates.tsv
 31 | #         |_____features.tsv
 32 | #         |_____observations.tsv
 33 | #         |_____counts.mtx  (use Matrix::writeMM)
 34 | #         |_____labels.tsv  (optional)
 35 | #         |_____H_E.(tiff/png/...)  (optional)
 36 | #         |_____H_E.json  (optional, required if H_E is provided)
 37 | # |_______sample_2
 38 | #         | ...
 39 | # |_______samples.tsv
 40 | # |_______experiment.json
 41 | # if additional output files are required write it also to out_dir
 42 | 
 43 | 
 44 | ## Your code goes here
 45 | technology <- "Visium"
 46 | 
 47 | suppressPackageStartupMessages(library(spatialLIBD))
 48 | suppressPackageStartupMessages(library(magrittr))
 49 | 
 50 | write_tsv <- function(df, path) {
 51 |   write.table(df, path, sep = "\t", col.names = NA, quote = FALSE)
 52 | }
 53 | 
 54 | write_SpatialExperiment_to_folder <- function(spe, path, obs_col, assay_name = "counts") {
 55 |   dir.create(path, showWarnings = FALSE, recursive = TRUE)
 56 | 
 57 |   colData(spe)[obs_col] %>%
 58 |     as.data.frame() %>%
 59 |     write_tsv(file.path(path, "observations.tsv"))
 60 | 
 61 |   rowData(spe) %>%
 62 |     as.data.frame() %>%
 63 |     write_tsv(file.path(path, "features.tsv"))
 64 | 
 65 |   coords <- spatialCoords(spe)
 66 |   mode(coords) <- "integer"
 67 |   as.data.frame(coords) %>%
 68 |     dplyr::rename(x = "pxl_col_in_fullres", y = "pxl_row_in_fullres") %>%
 69 |     write_tsv(file.path(path, "coordinates.tsv"))
 70 | 
 71 |   assay(spe, assay_name) %>%
 72 |     t() %>%
 73 |     Matrix::writeMM(file.path(path, "counts.mtx"))
 74 | }
 75 | 
 76 | spe <- fetch_data("spatialDLPFC_Visium")
 77 | 
 78 | keep_cols <- c("sample_id", "subject", "position", "sex", "age", "row", "col")
 79 | colData(spe) <- colData(spe)[, keep_cols]
 80 | 
 81 | keep_rows <- c("gene_name", "gene_version", "source", "gene_type")
 82 | rowData(spe) <- rowData(spe)[, keep_rows]
 83 | 
 84 | for (sample in unique(colData(spe)$sample_id)) {
 85 |   spe_sample <- spe[, spe$sample_id == sample]
 86 |   write_SpatialExperiment_to_folder(
 87 |     spe_sample,
 88 |     file.path(out_dir, sample),
 89 |     obs_col = c("row", "col")
 90 |   )
 91 | }
 92 | 
 93 | samples_df <- colData(spe) %>%
 94 |   as.data.frame() %>%
 95 |   dplyr::mutate(replicate = NA) %>%
 96 |   dplyr::select(patient = subject, sample = sample_id, position, replicate, sex, age) %>%
 97 |   dplyr::distinct() %>%
 98 |   dplyr::mutate(directory = sample) %>%
 99 |   `row.names<-`(NULL)
100 | 
101 | 
102 | ## Metadata files
103 | row.names(samples_df) <- NULL
104 | write.table(samples_df, file = file.path(out_dir, "samples.tsv"), sep = "\t", col.names = NA, quote = FALSE, na = "")
105 | 
106 | json <- file(file.path(out_dir, "experiment.json"))
107 | writeLines(c(paste0('{"technology": "', technology, '"}')), json)
108 | close(json)
109 | 


--------------------------------------------------------------------------------
/workflows/02_preprocessing.smk:
--------------------------------------------------------------------------------
 1 | import os
 2 | from shared.functions import check_files_in_folder, get_git_directory, get_sample_dirs
 3 | 
 4 | configfile: "path_config.yaml"
 5 | configfile: "excute_config.yaml"
 6 | 
 7 | GIT_DIR = get_git_directory(config)
 8 | DATASETS = config.pop("datasets")
 9 | datasets_selected = config.pop("datasets_selected")
10 | 
11 | # If all required input files are in the folder, generate the required output file for all sample folders
12 | def create_input(file_list, input_file_name, data_dir):
13 |     input_files = []
14 |     for sample_dir in get_sample_dirs(data_dir):
15 |         if check_files_in_folder(sample_dir, file_list):
16 |             input_files.append(sample_dir + input_file_name)
17 |     return input_files
18 | 
19 | def create_quality_control_input(wildcards):
20 |     file_list = ["coordinates.tsv", "counts.mtx", "features.tsv", "observations.tsv"]
21 |     all_qc_file = []
22 |     for dataset in datasets_selected:
23 |         data_dir = config["DATASET_DIR"] +  "/" + dataset
24 |         if "experiment.json" in os.listdir(data_dir):
25 |             all_qc_file += create_input(file_list, "/qc/counts.mtx", data_dir)
26 |             all_qc_file += create_input(file_list, "/qc/features.tsv", data_dir)
27 |             all_qc_file += create_input(file_list, "/qc/observations.tsv", data_dir)
28 |             all_qc_file += create_input(file_list, "/qc/coordinates.tsv", data_dir)
29 | 
30 |     return all_qc_file
31 | 
32 | # Get the optargs.json file for QC input, if exists
33 | def get_opt(wildcards):
34 |     import json
35 |     dataset = wildcards["dataset"]
36 | 
37 |     # default value
38 |     opt = {"min_cells":1, "min_genes":1, "min_counts":1}
39 | 
40 |     # Check if customized value exist
41 |     if "optargs" in DATASETS[dataset] and os.path.exists(GIT_DIR + DATASETS[dataset]["optargs"]):
42 |         with open(GIT_DIR + DATASETS[dataset]["optargs"], "r") as file:
43 |             opt_load = json.load(file)
44 |             # Update opt values based on existing opt_load value
45 |             opt.update({k: v for k, v in opt_load.items() if k in opt})
46 | 
47 |     return opt
48 | 
49 | ####################### Preprocessing #######################
50 | rule all:
51 |     input:
52 |         create_quality_control_input,
53 | 
54 | rule quality_control:
55 |     input:
56 |         coordinates=config["DATASET_DIR"] + "/{dataset}/{sample}/coordinates.tsv",
57 |         matrix=config["DATASET_DIR"] + "/{dataset}/{sample}/counts.mtx",
58 |         features=config["DATASET_DIR"] + "/{dataset}/{sample}/features.tsv",
59 |         observations=config["DATASET_DIR"] + "/{dataset}/{sample}/observations.tsv",
60 |     output:
61 |         dir=directory(config["DATASET_DIR"] + "/{dataset}/{sample}/qc"),
62 |         counts=config["DATASET_DIR"] + "/{dataset}/{sample}/qc/counts.mtx",
63 |         features=config["DATASET_DIR"] + "/{dataset}/{sample}/qc/features.tsv",
64 |         observations=config["DATASET_DIR"] + "/{dataset}/{sample}/qc/observations.tsv",
65 |         coordinates=config["DATASET_DIR"] + "/{dataset}/{sample}/qc/coordinates.tsv",
66 |     conda:
67 |         GIT_DIR + "preprocessing/quality_control/qc_scanpy.yml"
68 |     params:
69 |         opt=lambda wildcards: get_opt(wildcards)
70 |     shell:
71 |         """
72 |         python {GIT_DIR}preprocessing/quality_control/qc_scanpy.py \
73 |           -c {input.coordinates} \
74 |           -m {input.matrix} \
75 |           -f {input.features} \
76 |           -o {input.observations} \
77 |           --min_genes {params.opt[min_genes]}\
78 |           --min_cells {params.opt[min_cells]} \
79 |           --min_counts {params.opt[min_counts]} \
80 |           -d {output.dir}
81 |         """


--------------------------------------------------------------------------------