├── make_figures ├── figures │ └── png │ │ ├── figure 2 basics-1.png │ │ ├── figure s1 data_qc.png │ │ ├── figure s8 dcdfg-1.png │ │ ├── figure 1 schematic-1.png │ │ ├── figure s7 simulation-1.png │ │ ├── figure 3 all published-1.png │ │ ├── figure s2 basics supp-1.png │ │ ├── figure s4 preprocessing-1.png │ │ ├── figure s3 stratify targets-1.png │ │ ├── figure s6 stratified split-1.png │ │ ├── figure s9 all published details-1.png │ │ └── figure s5 why is the mean a strong baseline-1.png ├── simple_simulation.R ├── add_lit_review_to_data_collection.R ├── export_benchmark_results.sh ├── figure_2_demo.py ├── global_effects │ ├── dixit.txt │ ├── freimer.txt │ ├── replogle.txt │ ├── adamson.txt │ └── frangieh_IFNg_v3.txt ├── timeseries_differential_expression.py ├── psc_tf_due_diligence.py ├── figure_s1_timeseries.py ├── variance_decomposition.py ├── cross_dataset_correlations.py ├── figure_s1_effects.py ├── plotting_script_unused.R ├── .load_perturbation(dataset) └── replicate_correlations_and_stereotypical_effects.py ├── package.json ├── experiments ├── 1.4.1_1 │ └── metadata.json ├── 1.4.1_2 │ └── metadata.json ├── 1.0_10 │ └── metadata.json ├── 1.0_8 │ └── metadata.json ├── 1.0_13 │ └── metadata.json ├── 1.0_14 │ └── metadata.json ├── 1.0_2 │ └── metadata.json ├── 1.0_9 │ └── metadata.json ├── 1.4.2_13 │ └── metadata.json ├── 1.0_12 │ └── metadata.json ├── 1.0_15 │ └── metadata.json ├── 1.4.2_12 │ └── metadata.json ├── 1.4.2_2 │ └── metadata.json ├── 1.4.2_3 │ └── metadata.json ├── 1.4.2_4 │ └── metadata.json ├── 1.6.3_4 │ └── metadata.json ├── 1.6.3_7 │ └── metadata.json ├── 1.0_4 │ └── metadata.json ├── 1.0_5 │ └── metadata.json ├── 1.4.2_14 │ └── metadata.json ├── 1.6.3_2 │ └── metadata.json ├── 1.0_11 │ └── metadata.json ├── 1.0_3 │ └── metadata.json ├── 1.4.3_12 │ └── metadata.json ├── 1.6.3_3 │ └── metadata.json ├── 1.0_6 │ └── metadata.json ├── 1.4.3_7 │ └── metadata.json ├── 1.4.3_8 │ └── metadata.json ├── 1.6.3_5 │ └── metadata.json ├── 1.4.3_10 │ └── metadata.json ├── 1.4.3_2 │ └── metadata.json ├── 1.4.3_9 │ └── metadata.json ├── 1.6.3_6 │ └── metadata.json ├── 1.8.3_2 │ └── metadata.json ├── 1.8.3_7 │ └── metadata.json ├── 1.8.3_8 │ └── metadata.json ├── 1.8.3_9 │ └── metadata.json ├── 1.4.3_13 │ └── metadata.json ├── 1.4.3_14 │ └── metadata.json ├── 1.4.3_4 │ └── metadata.json ├── 1.8.2_7 │ └── metadata.json ├── 1.8.3_3 │ └── metadata.json ├── 1.4.3_15 │ └── metadata.json ├── 1.4.3_3 │ └── metadata.json ├── 1.8.3_5 │ └── metadata.json ├── 1.4.3_5 │ └── metadata.json ├── 1.8.3_6 │ └── metadata.json ├── 1.8.2_2 │ └── metadata.json ├── 1.8.2_4 │ └── metadata.json ├── 1.8.2_5 │ └── metadata.json ├── 1.4.3_6 │ └── metadata.json ├── 1.4.3_11 │ └── metadata.json ├── 1.8.2_3 │ └── metadata.json ├── 1.0_0 │ └── metadata.json ├── 1.4.2_8 │ └── metadata.json ├── 1.6.1_4 │ └── metadata.json ├── 1.8.4_1 │ └── metadata.json ├── 1.4.2_10 │ └── metadata.json ├── 1.4.2_11 │ └── metadata.json ├── 1.4.2_6 │ └── metadata.json ├── 1.4.2_5 │ └── metadata.json ├── 1.6.1_19 │ └── metadata.json ├── 1.6.3_8 │ └── metadata.json ├── 1.6.1_3 │ └── metadata.json ├── 1.6.1_2 │ └── metadata.json ├── 1.6.3_1 │ └── metadata.json ├── 1.3.3_7 │ └── metadata.json ├── 1.3.3_8 │ └── metadata.json ├── 1.3.3_9 │ └── metadata.json ├── 1.3.3_10 │ └── metadata.json ├── 1.6.1_6 │ └── metadata.json ├── 1.8.4_7 │ └── metadata.json ├── 1.3.3_3 │ └── metadata.json ├── 1.3.3_5 │ └── metadata.json ├── 1.8.4_2 │ └── metadata.json ├── 1.8.4_8 │ └── metadata.json ├── 1.8.4_9 │ └── metadata.json ├── 1.3.3_6 │ └── metadata.json ├── 1.8.4_3 │ └── metadata.json ├── 1.8.4_5 │ └── metadata.json ├── 1.8.4_6 │ └── metadata.json ├── 1.9_3 │ └── metadata.json ├── 1.9_0 │ └── metadata.json ├── 1.9_4 │ └── metadata.json ├── 1.6.1_16 │ └── metadata.json ├── 1.6.1_7 │ └── metadata.json ├── 1.9_2 │ └── metadata.json ├── 1.2.2_8 │ └── metadata.json ├── 1.6.1_10 │ └── metadata.json ├── 1.6.1_11 │ └── metadata.json ├── 1.6.1_13 │ └── metadata.json ├── 1.6.1_14 │ └── metadata.json ├── 1.6.1_15 │ └── metadata.json ├── 1.6.1_9 │ └── metadata.json ├── 1.2.2_12 │ └── metadata.json ├── 1.2.2_7 │ └── metadata.json ├── 1.6.1_17 │ └── metadata.json ├── 1.6.1_8 │ └── metadata.json ├── 1.2.2_10 │ └── metadata.json ├── 1.2.2_13 │ └── metadata.json ├── 1.2.2_2 │ └── metadata.json ├── 1.2.2_9 │ └── metadata.json ├── 1.2.2_3 │ └── metadata.json ├── 1.2.2_5 │ └── metadata.json ├── 1.6.1_12 │ └── metadata.json ├── 1.2.2_11 │ └── metadata.json ├── 1.3.1_2 │ └── metadata.json ├── 1.2.2_6 │ └── metadata.json ├── 1.4.4_7 │ └── metadata.json ├── 1.4.4_8 │ └── metadata.json ├── 1.4.4_3 │ └── metadata.json ├── 1.4.4_4 │ └── metadata.json ├── 1.4.4_5 │ └── metadata.json ├── 1.4.4_6 │ └── metadata.json ├── 1.3.2_5 │ └── metadata.json ├── 1.3.2_4 │ └── metadata.json ├── 1.5.1_4 │ └── metadata.json ├── 1.3.2_3 │ └── metadata.json ├── 1.5.1_5 │ └── metadata.json ├── 1.5.1_3 │ └── metadata.json ├── 1.5.1_6 │ └── metadata.json ├── 1.2.2_16 │ └── metadata.json ├── 1.3.2_2 │ └── metadata.json ├── 1.0_7 │ └── metadata.json ├── 1.2.2_17 │ └── metadata.json ├── 1.5.1_1 │ └── metadata.json ├── 1.5.1_2 │ └── metadata.json ├── 1.8.5_1 │ └── metadata.json ├── 1.2.2_19 │ └── metadata.json ├── 1.1.1_1 │ └── metadata.json ├── 1.2.2_20 │ └── metadata.json ├── 1.2.2_18 │ └── metadata.json ├── 1.2.2_21 │ └── metadata.json ├── ggrn_docker_backend_celloracle │ └── metadata.json ├── 1.8.5_2 │ └── metadata.json ├── 1.8.5_7 │ └── metadata.json ├── 1.8.5_8 │ └── metadata.json ├── 1.8.5_9 │ └── metadata.json ├── 1.8.5_3 │ └── metadata.json ├── test │ └── metadata.json ├── 1.8.5_5 │ └── metadata.json ├── 1.8.5_6 │ └── metadata.json ├── ggrn_docker_backend │ └── metadata.json ├── 1.4.2_1 │ └── metadata.json ├── 1.3.3_2 │ └── metadata.json ├── 1.3.2_1 │ └── metadata.json ├── 1.2.2_1 │ └── metadata.json ├── 1.2.2_15 │ └── metadata.json ├── 1.2.2_22 │ └── metadata.json ├── 1.4.1_3 │ └── metadata.json ├── 1.4.1_4 │ └── metadata.json ├── 1.1.2_1 │ └── metadata.json ├── 1.6.3_0 │ └── metadata.json ├── 1.3.1_1 │ └── metadata.json ├── 1.5.2_0 │ └── metadata.json ├── 1.8.3_1 │ └── metadata.json ├── 1.3.3_1 │ └── metadata.json ├── 1.2.2_14 │ └── metadata.json ├── 1.8.2_1 │ └── metadata.json ├── 1.6.1_18 │ └── metadata.json ├── 1.3.2_10 │ └── metadata.json ├── 1.3.2_6 │ └── metadata.json ├── 1.3.2_9 │ └── metadata.json ├── 1.3.2_7 │ └── metadata.json ├── 1.3.2_8 │ └── metadata.json ├── 1.0_1 │ └── metadata.json ├── singularity_demo │ └── metadata.json ├── 1.4.5_1 │ └── metadata.json ├── 1.4.4_1 │ └── metadata.json ├── 1.9_1 │ └── metadata.json ├── 1.5.1_0 │ └── metadata.json ├── 1.6.1_1 │ └── metadata.json ├── 1.4.3_1 │ └── metadata.json ├── 1.4.4_2 │ └── metadata.json ├── 5_0 │ └── metadata.json └── 1.4.1_0 │ └── metadata.json ├── environment ├── preprint version 1 commit hashes used.txt ├── conda_inputs.yaml ├── install_minimal.sh ├── install.sh └── install.md ├── .gitignore ├── run_experiments.sh ├── license.md ├── gather_experiment_metadata.py ├── README.md └── guiding_questions.txt /make_figures/figures/png/figure 2 basics-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekernf01/perturbation_benchmarking/HEAD/make_figures/figures/png/figure 2 basics-1.png -------------------------------------------------------------------------------- /make_figures/figures/png/figure s1 data_qc.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekernf01/perturbation_benchmarking/HEAD/make_figures/figures/png/figure s1 data_qc.png -------------------------------------------------------------------------------- /make_figures/figures/png/figure s8 dcdfg-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekernf01/perturbation_benchmarking/HEAD/make_figures/figures/png/figure s8 dcdfg-1.png -------------------------------------------------------------------------------- /make_figures/figures/png/figure 1 schematic-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekernf01/perturbation_benchmarking/HEAD/make_figures/figures/png/figure 1 schematic-1.png -------------------------------------------------------------------------------- /make_figures/figures/png/figure s7 simulation-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekernf01/perturbation_benchmarking/HEAD/make_figures/figures/png/figure s7 simulation-1.png -------------------------------------------------------------------------------- /make_figures/figures/png/figure 3 all published-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekernf01/perturbation_benchmarking/HEAD/make_figures/figures/png/figure 3 all published-1.png -------------------------------------------------------------------------------- /make_figures/figures/png/figure s2 basics supp-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekernf01/perturbation_benchmarking/HEAD/make_figures/figures/png/figure s2 basics supp-1.png -------------------------------------------------------------------------------- /make_figures/figures/png/figure s4 preprocessing-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekernf01/perturbation_benchmarking/HEAD/make_figures/figures/png/figure s4 preprocessing-1.png -------------------------------------------------------------------------------- /make_figures/figures/png/figure s3 stratify targets-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekernf01/perturbation_benchmarking/HEAD/make_figures/figures/png/figure s3 stratify targets-1.png -------------------------------------------------------------------------------- /make_figures/figures/png/figure s6 stratified split-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekernf01/perturbation_benchmarking/HEAD/make_figures/figures/png/figure s6 stratified split-1.png -------------------------------------------------------------------------------- /make_figures/figures/png/figure s9 all published details-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekernf01/perturbation_benchmarking/HEAD/make_figures/figures/png/figure s9 all published details-1.png -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "bin": "^0.0.0", 4 | "canvas": "^2.11.0", 5 | "vega": "^5.22.1", 6 | "vega-cli": "^5.22.1", 7 | "vega-lite": "^5.6.0" 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /experiments/1.4.1_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.1_1", 3 | "is_active": true, 4 | "refers_to": "1.4.1_0", 5 | "perturbation_dataset": "fantom4", 6 | "visualization_embedding": "X_pca" 7 | } -------------------------------------------------------------------------------- /make_figures/figures/png/figure s5 why is the mean a strong baseline-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ekernf01/perturbation_benchmarking/HEAD/make_figures/figures/png/figure s5 why is the mean a strong baseline-1.png -------------------------------------------------------------------------------- /experiments/1.4.1_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.1_2", 3 | "is_active": true, 4 | "refers_to": "1.4.1_0", 5 | "perturbation_dataset": "BETS_A549", 6 | "visualization_embedding": "X_pca" 7 | } -------------------------------------------------------------------------------- /experiments/1.0_10/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_10", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle3" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /experiments/1.0_8/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_8", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but replogle instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /environment/preprint version 1 commit hashes used.txt: -------------------------------------------------------------------------------- 1 | geneformer_embeddings@2335437 2 | ggrn@47545673b 3 | ggrn_backend2@40b755b78d 4 | load_networks@768809c 5 | load_perturbations@50732922a 6 | perturbation_benchmarking@f2be496e652 7 | perturbation_benchmarking_package@b8219aceae 8 | 9 | -------------------------------------------------------------------------------- /experiments/1.0_13/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_13", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but different data instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "joung" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /experiments/1.0_14/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_14", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but different data instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "dixit" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /experiments/1.0_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_2", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but replogle2 instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle2" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /experiments/1.0_9/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_9", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but replogle4 instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle4" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /experiments/1.4.2_13/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.2_13", 3 | "nickname": "GEARS", 4 | "readme": "Test of GEARS on other datasets.", 5 | "refers_to": "1.4.2_1", 6 | "perturbation_dataset": "frangieh_IFNg_v2", 7 | "num_genes":1000 8 | } 9 | 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.0_12/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_12", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but different data instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "norman" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /experiments/1.0_15/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_15", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but different data instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "adamson" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /experiments/1.4.2_12/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.2_12", 3 | "nickname": "GEARS", 4 | "readme": "Test of GEARS on other datasets.", 5 | "refers_to": "1.4.2_1", 6 | "perturbation_dataset": "frangieh_IFNg_v1", 7 | "num_genes":1000 8 | } 9 | 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.2_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.2_2", 3 | "nickname": "GEARS", 4 | "readme": "Test of GEARS on their preferred demo datasets.", 5 | "refers_to": "1.4.2_1", 6 | "num_genes":10000, 7 | "perturbation_dataset": "adamson" 8 | } 9 | 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.2_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.2_3", 3 | "nickname": "GEARS", 4 | "readme": "Test of GEARS on their preferred demo datasets.", 5 | "refers_to": "1.4.2_1", 6 | "perturbation_dataset": "dixit", 7 | "num_genes":10000 8 | } 9 | 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.2_4/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.2_4", 3 | "nickname": "GEARS", 4 | "readme": "Test of GEARS on their preferred demo datasets.", 5 | "refers_to": "1.4.2_1", 6 | "num_genes":10000, 7 | "perturbation_dataset": "norman" 8 | } 9 | 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.6.3_4/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.3_4", 3 | "nickname": "timeseries pilot", 4 | "question": "1.6.3", 5 | "refers_to": "1.6.3_0", 6 | "eligible_regulators": "tf", 7 | "species": "zebrafish", 8 | "perturbation_dataset": "saunders_axial_mesoderm" 9 | } -------------------------------------------------------------------------------- /experiments/1.6.3_7/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.3_7", 3 | "nickname": "timeseries pilot", 4 | "question": "1.6.3", 5 | "refers_to": "1.6.3_0", 6 | "eligible_regulators": "tf", 7 | "species": "zebrafish", 8 | "perturbation_dataset": "saunders_pigment_cells" 9 | } -------------------------------------------------------------------------------- /experiments/1.0_4/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_4", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 (various sklearn methods) but using only the GFP controls.", 5 | "refers_to": "1.0_1", 6 | "is_active": false, 7 | "control_subtype": "Emerald" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /experiments/1.0_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_5", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but replogle2_tf_only instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle2_tf_only" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /experiments/1.4.2_14/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.2_14", 3 | "nickname": "GEARS", 4 | "readme": "Test of GEARS on other datasets.", 5 | "refers_to": "1.4.2_1", 6 | "perturbation_dataset": "nakatake_simulated_scrna", 7 | "num_genes":1000 8 | } 9 | 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.6.3_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.3_2", 3 | "nickname": "timeseries pilot", 4 | "question": "1.6.3", 5 | "refers_to": "1.6.3_0", 6 | "eligible_regulators": "tf", 7 | "species": "human", 8 | "perturbation_dataset": "fantom4" 9 | } 10 | 11 | 12 | -------------------------------------------------------------------------------- /experiments/1.0_11/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_11", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but different data instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "nakatake_simulated_scrna" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /experiments/1.0_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_3", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but frangieh pseudobulk data instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "frangieh_IFNg_v3" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /experiments/1.4.3_12/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_12", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on joung", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "joung" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.6.3_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.3_3", 3 | "nickname": "timeseries pilot", 4 | "question": "1.6.3", 5 | "refers_to": "1.6.3_0", 6 | "eligible_regulators": "tf", 7 | "species": "human", 8 | "perturbation_dataset": "BETS_A549" 9 | } 10 | 11 | 12 | -------------------------------------------------------------------------------- /experiments/1.0_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_6", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but replogle2_large_effect instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle2_large_effect" 8 | } 9 | 10 | -------------------------------------------------------------------------------- /experiments/1.4.3_7/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_7", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on freimer", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "freimer" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.3_8/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_8", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on replogle", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "replogle" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.6.3_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.3_5", 3 | "nickname": "timeseries pilot", 4 | "question": "1.6.3", 5 | "refers_to": "1.6.3_0", 6 | "eligible_regulators": "tf", 7 | "species": "zebrafish", 8 | "perturbation_dataset": "saunders_blood" 9 | } 10 | 11 | 12 | -------------------------------------------------------------------------------- /experiments/1.4.3_10/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_10", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on replogle3", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "replogle3" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.3_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_2", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on replogle2", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "replogle2" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.3_9/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_9", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on replogle4", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "replogle4" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.6.3_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.3_6", 3 | "nickname": "timeseries pilot", 4 | "question": "1.6.3", 5 | "refers_to": "1.6.3_0", 6 | "eligible_regulators": "tf", 7 | "species": "zebrafish", 8 | "perturbation_dataset": "saunders_endoderm" 9 | } 10 | 11 | 12 | -------------------------------------------------------------------------------- /experiments/1.8.3_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.3_2", 3 | "nickname": "gene_selection", 4 | "readme": "Gene-selection experiment but with a different dataset.", 5 | "question": "1.8.3", 6 | "is_active": true, 7 | "refers_to": "1.8.3_1", 8 | "perturbation_dataset": "replogle2" 9 | } -------------------------------------------------------------------------------- /experiments/1.8.3_7/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.3_7", 3 | "nickname": "gene_selection", 4 | "readme": "Gene-selection experiment but with a different dataset.", 5 | "question": "1.8.3", 6 | "is_active": true, 7 | "refers_to": "1.8.3_1", 8 | "perturbation_dataset": "freimer" 9 | } -------------------------------------------------------------------------------- /experiments/1.8.3_8/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.3_8", 3 | "nickname": "gene_selection", 4 | "readme": "Gene-selection experiment but with a different dataset.", 5 | "question": "1.8.3", 6 | "is_active": true, 7 | "refers_to": "1.8.3_1", 8 | "perturbation_dataset": "replogle" 9 | } -------------------------------------------------------------------------------- /experiments/1.8.3_9/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.3_9", 3 | "nickname": "gene_selection", 4 | "readme": "Gene-selection experiment but with a different dataset.", 5 | "question": "1.8.3", 6 | "is_active": true, 7 | "refers_to": "1.8.3_1", 8 | "perturbation_dataset": "replogle4" 9 | } -------------------------------------------------------------------------------- /experiments/1.4.3_13/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_13", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on a different dataset", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "norman" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.3_14/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_14", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on a different dataset", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "dixit" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.3_4/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_4", 3 | "nickname": "base_network", 4 | "readme": "Networks experiment but only using GFP controls.", 5 | "question": "1.4.3", 6 | "is_active": false, 7 | "refers_to": "1.4.3_1", 8 | "control_subtype": "Emerald" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.8.2_7/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.2_7", 3 | "nickname": "how_much_averaging", 4 | "readme": "Replicate-averaging experiment but with a different dataset.", 5 | "question": "1.8.2", 6 | "is_active": true, 7 | "refers_to": "1.8.2_1", 8 | "perturbation_dataset": "freimer" 9 | } -------------------------------------------------------------------------------- /experiments/1.8.3_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.3_3", 3 | "nickname": "gene_selection", 4 | "readme": "Gene-selection experiment but with a different dataset.", 5 | "question": "1.8.3", 6 | "is_active": true, 7 | "refers_to": "1.8.3_1", 8 | "perturbation_dataset": "frangieh_IFNg_v3" 9 | } -------------------------------------------------------------------------------- /experiments/1.4.3_15/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_15", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on a different dataset", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "adamson" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.3_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_3", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on frangieh_IFNg_v3", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "frangieh_IFNg_v3" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.8.3_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.3_5", 3 | "nickname": "gene_selection", 4 | "readme": "Gene-selection experiment but with a different dataset.", 5 | "question": "1.8.3", 6 | "is_active": true, 7 | "refers_to": "1.8.3_1", 8 | "perturbation_dataset": "replogle2_tf_only" 9 | } -------------------------------------------------------------------------------- /experiments/1.4.3_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_5", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on replogle2_tf_only", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "replogle2_tf_only" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.8.3_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.3_6", 3 | "nickname": "gene_selection", 4 | "readme": "Gene-selection experiment but with a different dataset.", 5 | "question": "1.8.3", 6 | "is_active": true, 7 | "refers_to": "1.8.3_1", 8 | "perturbation_dataset": "replogle2_large_effect" 9 | } -------------------------------------------------------------------------------- /experiments/1.8.2_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.2_2", 3 | "nickname": "how_much_averaging", 4 | "readme": "Replicate-averaging experiment but with a different dataset.", 5 | "question": "1.8.2", 6 | "is_active": true, 7 | "refers_to": "1.8.2_1", 8 | "perturbation_dataset": "adamson" 9 | } 10 | -------------------------------------------------------------------------------- /experiments/1.8.2_4/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.2_4", 3 | "nickname": "how_much_averaging", 4 | "readme": "Replicate-averaging experiment but with a different dataset.", 5 | "question": "1.8.2", 6 | "is_active": true, 7 | "refers_to": "1.8.2_1", 8 | "perturbation_dataset": "dixit" 9 | } 10 | -------------------------------------------------------------------------------- /experiments/1.8.2_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.2_5", 3 | "nickname": "how_much_averaging", 4 | "readme": "Replicate-averaging experiment but with a different dataset.", 5 | "question": "1.8.2", 6 | "is_active": true, 7 | "refers_to": "1.8.2_1", 8 | "perturbation_dataset": "norman" 9 | } 10 | -------------------------------------------------------------------------------- /experiments/1.4.3_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_6", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on replogle2_large_effect", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "replogle2_large_effect" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.3_11/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_11", 3 | "nickname": "base_network", 4 | "readme": "Network experiment but on nakatake_simulated_scrna", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "perturbation_dataset": "nakatake_simulated_scrna" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.8.2_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.2_3", 3 | "nickname": "how_much_averaging", 4 | "readme": "Replicate-averaging experiment but with a different dataset.", 5 | "question": "1.8.2", 6 | "is_active": true, 7 | "refers_to": "1.8.2_1", 8 | "perturbation_dataset": "frangieh_IFNg_v2" 9 | } 10 | -------------------------------------------------------------------------------- /experiments/1.0_0/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_0", 3 | "nickname": "ml methods tiny", 4 | "readme": "Like 1.0_1 but faster / smaller, mostly for realistic testing.", 5 | "refers_to": "1.0_1", 6 | "num_genes": 1000, 7 | "regression_method":[ 8 | "mean", 9 | "RidgeCV" 10 | ] 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.4.2_8/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.2_8", 3 | "is_active": false, 4 | "nickname": "GEARS", 5 | "readme": "Test of GEARS on other datasets.", 6 | "refers_to": "1.4.2_1", 7 | "desired_heldout_fraction": 0.2, 8 | "perturbation_dataset": "norman", 9 | "num_genes":10000 10 | } 11 | 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.6.1_4/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_4", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but different data.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "joung", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.8.4_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.4_1", 3 | "nickname": "stratified_split", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "type_of_split": "stratified", 9 | "merge_replicates": false 10 | } -------------------------------------------------------------------------------- /experiments/1.4.2_10/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.2_10", 3 | "is_active": false, 4 | "nickname": "GEARS", 5 | "readme": "Test of GEARS on other datasets.", 6 | "refers_to": "1.4.2_1", 7 | "desired_heldout_fraction": 0.2, 8 | "perturbation_dataset": "nakatake", 9 | "num_genes":6000 10 | } 11 | 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.4.2_11/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.2_11", 3 | "nickname": "GEARS", 4 | "readme": "Test of GEARS on other datasets.", 5 | "refers_to": "1.4.2_1", 6 | "is_active" : true, 7 | "type_of_split": "genetic_interaction", 8 | "perturbation_dataset": "norman", 9 | "num_genes":10000 10 | } 11 | 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.4.2_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.2_6", 3 | "is_active": false, 4 | "nickname": "GEARS", 5 | "readme": "Test of GEARS on their preferred demo datasets.", 6 | "refers_to": "1.4.2_1", 7 | "desired_heldout_fraction": 0.2, 8 | "perturbation_dataset": "dixit", 9 | "num_genes":10000 10 | } 11 | 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.4.2_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.2_5", 3 | "is_active": false, 4 | "nickname": "GEARS", 5 | "readme": "Test of GEARS on their preferred demo datasets.", 6 | "refers_to": "1.4.2_1", 7 | "desired_heldout_fraction": 0.2, 8 | "perturbation_dataset": "adamson", 9 | "num_genes":10000 10 | } 11 | 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.6.1_19/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_19", 3 | "nickname": "dcdfg", 4 | "readme": "Exact repeat of 1.6.1_15, to check if the code is stochastic.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": false, 7 | "perturbation_dataset": "dixit", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.6.3_8/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.3_8", 3 | "nickname": "timeseries pilot", 4 | "question": "1.6.3", 5 | "refers_to": "1.6.3_0", 6 | "eligible_regulators": "tf", 7 | "species": "mouse", 8 | "matching_method": "user", 9 | "matching_method_for_evaluation": "user", 10 | "perturbation_dataset": "paul2" 11 | } -------------------------------------------------------------------------------- /experiments/1.6.1_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_3", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but data preprocessed differently.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "frangieh_IFNg_v3", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.6.1_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_2", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but uses data preprocessed differently.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "frangieh_IFNg_v2", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.6.3_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.3_1", 3 | "nickname": "timeseries pilot", 4 | "question": "1.6.3", 5 | "refers_to": "1.6.3_0", 6 | "eligible_regulators": "tf", 7 | "species": "mouse", 8 | "matching_method": "user", 9 | "matching_method_for_evaluation": "user", 10 | "perturbation_dataset": "paul1" 11 | } 12 | 13 | 14 | -------------------------------------------------------------------------------- /experiments/1.3.3_7/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.3_7", 3 | "nickname": "TransferLearning", 4 | "readme": "Q1.3.3 is about whether we can learn causal effects by pretraining a big fat transformer on a big fat collection of scRNA data.", 5 | "question": "1.3.3", 6 | "is_active": true, 7 | "refers_to": "1.3.3_1", 8 | "perturbation_dataset": "freimer" 9 | } -------------------------------------------------------------------------------- /experiments/1.3.3_8/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.3_8", 3 | "nickname": "TransferLearning", 4 | "readme": "Q1.3.3 is about whether we can learn causal effects by pretraining a big fat transformer on a big fat collection of scRNA data.", 5 | "question": "1.3.3", 6 | "is_active": true, 7 | "refers_to": "1.3.3_1", 8 | "perturbation_dataset": "replogle" 9 | } -------------------------------------------------------------------------------- /experiments/1.3.3_9/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.3_9", 3 | "nickname": "TransferLearning", 4 | "readme": "Q1.3.3 is about whether we can learn causal effects by pretraining a big fat transformer on a big fat collection of scRNA data.", 5 | "question": "1.3.3", 6 | "is_active": true, 7 | "refers_to": "1.3.3_1", 8 | "perturbation_dataset": "replogle4" 9 | } -------------------------------------------------------------------------------- /experiments/1.3.3_10/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.3_10", 3 | "nickname": "TransferLearning", 4 | "readme": "Q1.3.3 is about whether we can learn causal effects by pretraining a big fat transformer on a big fat collection of scRNA data.", 5 | "question": "1.3.3", 6 | "is_active": true, 7 | "refers_to": "1.3.3_1", 8 | "perturbation_dataset": "replogle3" 9 | } -------------------------------------------------------------------------------- /experiments/1.6.1_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_6", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but with nakatake. Note: only 1000 genes selected, as in 1.6.1_1.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "nakatake", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.8.4_7/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.4_7", 3 | "nickname": "stratified_split", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "type_of_split": "stratified", 9 | "perturbation_dataset": "freimer", 10 | "merge_replicates": false 11 | } -------------------------------------------------------------------------------- /experiments/1.3.3_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.3_3", 3 | "nickname": "TransferLearning", 4 | "readme": "Q1.3.3 is about whether we can learn causal effects by pretraining a big fat transformer on a big fat collection of scRNA data.", 5 | "question": "1.3.3", 6 | "is_active": true, 7 | "refers_to": "1.3.3_1", 8 | "perturbation_dataset": "frangieh_IFNg_v3" 9 | } -------------------------------------------------------------------------------- /experiments/1.3.3_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.3_5", 3 | "nickname": "TransferLearning", 4 | "readme": "Q1.3.3 is about whether we can learn causal effects by pretraining a big fat transformer on a big fat collection of scRNA data.", 5 | "question": "1.3.3", 6 | "is_active": true, 7 | "refers_to": "1.3.3_1", 8 | "perturbation_dataset": "replogle2_tf_only" 9 | } -------------------------------------------------------------------------------- /experiments/1.8.4_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.4_2", 3 | "nickname": "stratified_split", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "type_of_split": "stratified", 9 | "perturbation_dataset": "replogle2", 10 | "merge_replicates": false 11 | } -------------------------------------------------------------------------------- /experiments/1.8.4_8/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.4_8", 3 | "nickname": "stratified_split", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "type_of_split": "stratified", 9 | "perturbation_dataset": "replogle", 10 | "merge_replicates": false 11 | } -------------------------------------------------------------------------------- /experiments/1.8.4_9/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.4_9", 3 | "nickname": "stratified_split", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "type_of_split": "stratified", 9 | "perturbation_dataset": "replogle4", 10 | "merge_replicates": false 11 | } -------------------------------------------------------------------------------- /experiments/1.3.3_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.3_6", 3 | "nickname": "TransferLearning", 4 | "readme": "Q1.3.3 is about whether we can learn causal effects by pretraining a big fat transformer on a big fat collection of scRNA data.", 5 | "question": "1.3.3", 6 | "is_active": true, 7 | "refers_to": "1.3.3_1", 8 | "perturbation_dataset": "replogle2_large_effect" 9 | } -------------------------------------------------------------------------------- /experiments/1.8.4_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.4_3", 3 | "nickname": "stratified_split", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "type_of_split": "stratified", 9 | "perturbation_dataset": "frangieh_IFNg_v3", 10 | "merge_replicates": false 11 | } -------------------------------------------------------------------------------- /experiments/1.8.4_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.4_5", 3 | "nickname": "stratified_split", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "type_of_split": "stratified", 9 | "perturbation_dataset": "replogle2_tf_only", 10 | "merge_replicates": false 11 | } -------------------------------------------------------------------------------- /experiments/1.8.4_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.4_6", 3 | "nickname": "stratified_split", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "type_of_split": "stratified", 9 | "perturbation_dataset": "replogle2_large_effect", 10 | "merge_replicates": false 11 | } -------------------------------------------------------------------------------- /experiments/1.9_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.9_3", 3 | "nickname": "base_network_simulation", 4 | "refers_to": "1.9_1", 5 | "readme": "Testing different network structures like in experiment 1.4.3_1, but with simulated data based on known network. ", 6 | "question": "1.9", 7 | "is_active": true, 8 | "perturbation_dataset": "simulation_TrueNetwork=gtex_rna_S=1_NoiseSD=0" 9 | } -------------------------------------------------------------------------------- /experiments/1.9_0/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.9_0", 3 | "nickname": "base_network_simulation", 4 | "refers_to": "1.9_1", 5 | "readme": "Testing different network structures like in experiment 1.4.3_1, but with simulated data based on known networks.", 6 | "question": "1.9", 7 | "is_active": true, 8 | "perturbation_dataset": "simulation_TrueNetwork=MARA_FANTOM4_S=1_NoiseSD=0" 9 | } -------------------------------------------------------------------------------- /experiments/1.9_4/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.9_4", 3 | "nickname": "base_network_simulation", 4 | "refers_to": "1.9_1", 5 | "readme": "Testing different network structures like in experiment 1.4.3_1, but with simulated data based on known network.", 6 | "question": "1.9", 7 | "is_active": true, 8 | "perturbation_dataset": "simulation_TrueNetwork=celloracle_human_S=1_NoiseSD=0" 9 | } -------------------------------------------------------------------------------- /make_figures/simple_simulation.R: -------------------------------------------------------------------------------- 1 | set.seed(0) 2 | X = matrix(rnorm(10000), ncol = 100) 3 | control_indices = 1 4 | train_indices = 1:50 5 | test_indices = 51:100 6 | baseline_predictor = colMeans(X[train_indices, ]) 7 | correlations = c() 8 | for(i in test_indices){ 9 | correlations[i-50] = cor(baseline_predictor - X[control_indices, ], X[i, ] - X[control_indices, ]) 10 | } 11 | mean(correlations) 12 | # 0.649 -------------------------------------------------------------------------------- /experiments/1.6.1_16/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_16", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but different data. Note: only 1000 genes selected, as in 1.6.1_1.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "nakatake_simulated_scrna", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.6.1_7/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_7", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but different data and starting_expression. Note: only 1000 genes selected, as in 1.6.1_1.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.9_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.9_2", 3 | "nickname": "base_network_simulation", 4 | "refers_to": "1.9_1", 5 | "readme": "Testing different network structures like in experiment 1.4.3_1, but with simulated data based on known network.", 6 | "question": "1.9", 7 | "is_active": true, 8 | "perturbation_dataset": "simulation_TrueNetwork=cellnet_human_Hugene_S=1_NoiseSD=0" 9 | } -------------------------------------------------------------------------------- /experiments/1.2.2_8/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_8", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_1", 8 | "perturbation_dataset": "replogle" 9 | 10 | } -------------------------------------------------------------------------------- /experiments/1.6.1_10/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_10", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but different data and starting_expression. Note: only 1000 genes selected, as in 1.6.1_1.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle4", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.6.1_11/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_11", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but different data and starting_expression. Note: only 1000 genes selected, as in 1.6.1_1.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "freimer", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.6.1_13/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_13", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but different data and starting_expression. Note: only 1000 genes selected, as in 1.6.1_1.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "norman", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.6.1_14/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_14", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but different data and starting_expression. Note: only 1000 genes selected, as in 1.6.1_1.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "adamson", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.6.1_15/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_15", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but different data and starting_expression. Note: only 1000 genes selected, as in 1.6.1_1.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "dixit", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.6.1_9/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_9", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but different data and starting_expression. Note: only 1000 genes selected, as in 1.6.1_1.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle3", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.2.2_12/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_12", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_1", 8 | "perturbation_dataset": "joung" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.2.2_7/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_7", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_1", 8 | "perturbation_dataset": "freimer" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.6.1_17/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_17", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but different data and starting_expression. Note: only 1000 genes selected, as in 1.6.1_1.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle2", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.6.1_8/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_8", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but different data and starting_expression. Note: only 1000 genes selected, as in 1.6.1_1.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle2_tf_only", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.2.2_10/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_10", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_1", 8 | "perturbation_dataset": "replogle3" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.2.2_13/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_13", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_1", 8 | "perturbation_dataset": "norman" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.2.2_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_2", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_1", 8 | "perturbation_dataset": "replogle2" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.2.2_9/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_9", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_1", 8 | "perturbation_dataset": "replogle4" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.2.2_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_3", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_1", 8 | "perturbation_dataset": "frangieh_IFNg_v3" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.2.2_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_5", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_1", 8 | "perturbation_dataset": "replogle2_tf_only" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.6.1_12/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_12", 3 | "nickname": "dcdfg", 4 | "readme": "Like experiment 1.6.1_1, but different data and starting_expression. Note: only 1000 genes selected, as in 1.6.1_1.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle2_large_effect", 8 | "network_datasets": { 9 | "dense":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.2.2_11/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_11", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_1", 8 | "perturbation_dataset": "nakatake_simulated_scrna" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.3.1_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.1_2", 3 | "nickname": "CellTypeSpecificRegressionCMAP", 4 | "readme": "This experiment uses the same logic as experiment , with different networks and perturbation data.", 5 | "refers_to": "1.3.1_1", 6 | "is_active": false, 7 | "perturbation_dataset": "cmap", 8 | "network_datasets": { 9 | "celloracle_human":{} 10 | } 11 | } 12 | 13 | -------------------------------------------------------------------------------- /experiments/1.2.2_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_6", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_1", 8 | "perturbation_dataset": "replogle2_large_effect" 9 | 10 | } 11 | 12 | -------------------------------------------------------------------------------- /experiments/1.4.4_7/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.4_7", 3 | "nickname": "network_only", 4 | "readme": "Are network-connected genes enriched for perturbation responses? This experiment uses network structure alone for prediction, with no training data and all perturbations reserved for evaluation.", 5 | "question": "1.4.4", 6 | "is_active": true, 7 | "refers_to": "1.4.4_1", 8 | "perturbation_dataset": "replogle4" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.4_8/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.4_8", 3 | "nickname": "network_only", 4 | "readme": "Are network-connected genes enriched for perturbation responses? This experiment uses network structure alone for prediction, with no training data and all perturbations reserved for evaluation.", 5 | "question": "1.4.4", 6 | "is_active": true, 7 | "refers_to": "1.4.4_2", 8 | "perturbation_dataset": "replogle4" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.4_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.4_3", 3 | "nickname": "network_only", 4 | "readme": "Are network-connected genes enriched for perturbation responses? This experiment uses network structure alone for prediction, with no training data and all perturbations reserved for evaluation.", 5 | "question": "1.4.4", 6 | "is_active": true, 7 | "refers_to": "1.4.4_1", 8 | "perturbation_dataset": "frangieh_IFNg_v3" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.4_4/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.4_4", 3 | "nickname": "network_only", 4 | "readme": "Are network-connected genes enriched for perturbation responses? This experiment uses network structure alone for prediction, with no training data and all perturbations reserved for evaluation.", 5 | "question": "1.4.4", 6 | "is_active": true, 7 | "refers_to": "1.4.4_2", 8 | "perturbation_dataset": "frangieh_IFNg_v3" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.4_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.4_5", 3 | "nickname": "network_only", 4 | "readme": "Are network-connected genes enriched for perturbation responses? This experiment uses network structure alone for prediction, with no training data and all perturbations reserved for evaluation.", 5 | "question": "1.4.4", 6 | "is_active": true, 7 | "refers_to": "1.4.4_1", 8 | "perturbation_dataset": "replogle2_large_effect" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.4.4_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.4_6", 3 | "nickname": "network_only", 4 | "readme": "Are network-connected genes enriched for perturbation responses? This experiment uses network structure alone for prediction, with no training data and all perturbations reserved for evaluation.", 5 | "question": "1.4.4", 6 | "is_active": true, 7 | "refers_to": "1.4.4_2", 8 | "perturbation_dataset": "replogle2_large_effect" 9 | } 10 | 11 | -------------------------------------------------------------------------------- /experiments/1.3.2_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.2_5", 3 | "nickname": "cellTypeSpecificCSNets", 4 | "readme": "This experiment uses the same logic as experiment , with different networks and perturbation data.", 5 | "refers_to": "1.3.2_1", 6 | "is_active": true, 7 | "perturbation_dataset": "nakatake", 8 | "default_level": "hIPS.parquet", 9 | "network_datasets": { 10 | "empty": {}, 11 | "dense": {}, 12 | "csnets":{} 13 | } 14 | } 15 | 16 | -------------------------------------------------------------------------------- /experiments/1.3.2_4/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.2_4", 3 | "nickname": "cellTypeSpecificANANSE", 4 | "readme": "This experiment uses the same logic as experiment , with different networks and perturbation data.", 5 | "refers_to": "1.3.2_1", 6 | "is_active": true, 7 | "perturbation_dataset": "nakatake", 8 | "default_level": "hIPS.parquet", 9 | "network_datasets": { 10 | "empty": {}, 11 | "dense": {}, 12 | "ANANSE_0.5":{} 13 | } 14 | } 15 | 16 | -------------------------------------------------------------------------------- /experiments/1.5.1_4/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.5.1_4", 3 | "nickname": "timeseries pilot", 4 | "readme": "Direct comparison of published timeseries methods", 5 | "question": "1.5.1", 6 | "refers_to": "1.5.1_0", 7 | "perturbation_dataset": "saunders_blood", 8 | "visualization_embedding": "X_umap", 9 | "eligible_regulators": "tf", 10 | "species": "zebrafish", 11 | "network_datasets": { 12 | "celloracle_zebrafish": { "do_aggregate_subnets": true } 13 | } 14 | } 15 | 16 | 17 | -------------------------------------------------------------------------------- /experiments/1.3.2_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.2_3", 3 | "nickname": "cellTypeSpecificCellNetHugene", 4 | "readme": "This experiment uses the same logic as experiment , with different networks and perturbation data.", 5 | "refers_to": "1.3.2_1", 6 | "is_active": true, 7 | "perturbation_dataset": "nakatake", 8 | "default_level": "hIPS.parquet", 9 | "network_datasets": { 10 | "empty": {}, 11 | "dense": {}, 12 | "cellnet_human_Hugene":{} 13 | } 14 | } 15 | 16 | -------------------------------------------------------------------------------- /experiments/1.5.1_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.5.1_5", 3 | "nickname": "timeseries pilot", 4 | "readme": "Direct comparison of published timeseries methods", 5 | "question": "1.5.1", 6 | "refers_to": "1.5.1_0", 7 | "perturbation_dataset": "saunders_endoderm", 8 | "visualization_embedding": "X_umap", 9 | "eligible_regulators": "tf", 10 | "species": "zebrafish", 11 | "network_datasets": { 12 | "celloracle_zebrafish": { "do_aggregate_subnets": true } 13 | } 14 | } 15 | 16 | 17 | -------------------------------------------------------------------------------- /experiments/1.5.1_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.5.1_3", 3 | "nickname": "timeseries pilot", 4 | "readme": "Direct comparison of published timeseries methods", 5 | "question": "1.5.1", 6 | "refers_to": "1.5.1_0", 7 | "perturbation_dataset": "saunders_axial_mesoderm", 8 | "visualization_embedding": "X_umap", 9 | "eligible_regulators": "tf", 10 | "species": "zebrafish", 11 | "network_datasets": { 12 | "celloracle_zebrafish": { "do_aggregate_subnets": true } 13 | } 14 | } 15 | 16 | 17 | -------------------------------------------------------------------------------- /experiments/1.5.1_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.5.1_6", 3 | "nickname": "timeseries pilot", 4 | "readme": "Direct comparison of published timeseries methods", 5 | "question": "1.5.1", 6 | "refers_to": "1.5.1_0", 7 | "perturbation_dataset": "saunders_pigment_cells", 8 | "eligible_regulators": "tf", 9 | "visualization_embedding": "X_umap", 10 | "species": "zebrafish", 11 | "network_datasets": { 12 | "celloracle_zebrafish": { "do_aggregate_subnets": true } 13 | } 14 | } 15 | 16 | 17 | -------------------------------------------------------------------------------- /make_figures/add_lit_review_to_data_collection.R: -------------------------------------------------------------------------------- 1 | # This is a single-use script to help me transfer literature review notes to a better permanent home. 2 | 3 | # Endoderm 4 | lit_review = readxl::read_excel("timeseries_figures/definitive_endoderm_ps_vs_screen_top_30_manually_annotated.xlsx") 5 | lit_review = lit_review[c("perturbation", "Included in literature review?", "Cell type affected", "PMID", "Notes")] %>% 6 | dplyr::distinct() %>% 7 | write.csv("../../perturbation_data/perturbations/definitive_endoderm/lit_review.csv") 8 | 9 | # TO DO: blood -------------------------------------------------------------------------------- /experiments/1.2.2_16/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_16", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_14", 8 | "perturbation_dataset": "fantom4", 9 | "eligible_regulators": "human_tfs", 10 | "network_datasets": { 11 | "celloracle_human": { "do_aggregate_subnets": true } 12 | } 13 | } -------------------------------------------------------------------------------- /experiments/1.3.2_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.2_2", 3 | "nickname": "cellTypeSpecificCellNetHg1332", 4 | "readme": "This experiment uses the same logic as experiment , with different networks and perturbation data.", 5 | "refers_to": "1.3.2_1", 6 | "is_active": true, 7 | "merge_replicates": true, 8 | "perturbation_dataset": "nakatake", 9 | "default_level": "hIPS.parquet", 10 | "network_datasets": { 11 | "empty": {}, 12 | "dense": {}, 13 | "cellnet_human_Hg1332":{} 14 | } 15 | } 16 | 17 | -------------------------------------------------------------------------------- /experiments/1.0_7/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_7", 3 | "nickname": "ml methods", 4 | "readme": "Like 1.0_1 but freimer instead of nakatake.", 5 | "refers_to": "1.0_1", 6 | "is_active": true, 7 | "perturbation_dataset": "freimer", 8 | "expand": "ladder", 9 | "kwargs": [ 10 | {}, 11 | {}, 12 | {}, 13 | {}, 14 | {}, 15 | {}, 16 | {}, 17 | {}, 18 | {}, 19 | {}, 20 | {}, 21 | { 22 | "pca_dim": 3 23 | } 24 | ] 25 | } 26 | 27 | -------------------------------------------------------------------------------- /experiments/1.2.2_17/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_17", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_14", 8 | "perturbation_dataset": "BETS_A549", 9 | "eligible_regulators": "human_tfs", 10 | "network_datasets": { 11 | "celloracle_human": { "do_aggregate_subnets": true } 12 | } 13 | } 14 | 15 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | slurm* 2 | *.oracle 3 | *.parquet 4 | *.csv 5 | *.pdf 6 | *.svg 7 | *.html 8 | *.Rhistory 9 | GEARS_gene_set.pkl 10 | ggrn_gears_input/* 11 | geneformer_loom_data 12 | geneformer_tokenized_data 13 | geneformer_finetuned 14 | from_to_docker 15 | logs 16 | wandb 17 | err.txt 18 | out.txt 19 | stdout.txt 20 | start_time.txt 21 | finish_time.txt 22 | *__pycache__* 23 | *.pyc 24 | old_experiments/* 25 | experiments/*/outputs 26 | experiments/*/old_outputs 27 | experiments/*/outputs_old 28 | node_modules/* 29 | lightning_logs/* 30 | TODO.md 31 | node_modules 32 | make_figures/global_effects/* 33 | -------------------------------------------------------------------------------- /experiments/1.5.1_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.5.1_1", 3 | "nickname": "timeseries pilot", 4 | "readme": "Direct comparison of published timeseries methods", 5 | "question": "1.5.1", 6 | "refers_to": "1.5.1_0", 7 | "perturbation_dataset": "paul1", 8 | "visualization_embedding": "X_draw_graph_fa", 9 | "matching_method": "user", 10 | "matching_method_for_evaluation": "user", 11 | "eligible_regulators": "tf", 12 | "species": "mouse", 13 | "network_datasets": { 14 | "celloracle_mouse_atac_atlas": { "do_aggregate_subnets": true } 15 | } 16 | } 17 | 18 | 19 | -------------------------------------------------------------------------------- /experiments/1.5.1_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.5.1_2", 3 | "nickname": "timeseries pilot", 4 | "readme": "Direct comparison of published timeseries methods", 5 | "question": "1.5.1", 6 | "refers_to": "1.5.1_0", 7 | "perturbation_dataset": "paul2", 8 | "visualization_embedding": "X_draw_graph_fa", 9 | "eligible_regulators": "tf", 10 | "species": "mouse", 11 | "matching_method": "user", 12 | "matching_method_for_evaluation": "user", 13 | "network_datasets": { 14 | "celloracle_mouse_atac_atlas": { "do_aggregate_subnets": true } 15 | } 16 | } 17 | 18 | 19 | -------------------------------------------------------------------------------- /experiments/1.8.5_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.5_1", 3 | "nickname": "split_seed_sensitivity", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "data_split_seed": [1,2,3], 9 | "merge_replicates": false, 10 | "network_prior": "restrictive", 11 | "network_datasets": { 12 | "empty": { "do_aggregate_subnets": true }, 13 | "dense": { "do_aggregate_subnets": true }, 14 | "celloracle_human": { "do_aggregate_subnets": true } 15 | } 16 | } -------------------------------------------------------------------------------- /experiments/1.2.2_19/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_19", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_14", 8 | "perturbation_dataset": "saunders_blood", 9 | "visualization_embedding": "X_umap", 10 | "eligible_regulators": "tf", 11 | "species": "zebrafish", 12 | "network_datasets": { 13 | "celloracle_zebrafish": { "do_aggregate_subnets": true } 14 | } 15 | } -------------------------------------------------------------------------------- /experiments/1.1.1_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.1.1_1", 3 | "nickname": "hyperparam_sweep", 4 | "readme": "This is a simple sweep over the regularization parameter for LASSO regression.", 5 | "question": "1.1.1", 6 | "is_active": true, 7 | "factor_varied": "alpha", 8 | "regression_method": "LASSO", 9 | "kwargs_to_expand": ["alpha"], 10 | "kwargs":{ 11 | "alpha": [0, 0.0000001, 0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.01, 0.1, 1, 10, 100, 1000] 12 | }, 13 | "facet_by": null, 14 | "color_by": null, 15 | "perturbation_dataset": "nakatake", 16 | "eligible_regulators": "human_tfs" 17 | } -------------------------------------------------------------------------------- /experiments/1.2.2_20/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_20", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_14", 8 | "perturbation_dataset": "saunders_endoderm", 9 | "visualization_embedding": "X_umap", 10 | "eligible_regulators": "tf", 11 | "species": "zebrafish", 12 | "network_datasets": { 13 | "celloracle_zebrafish": { "do_aggregate_subnets": true } 14 | } 15 | } -------------------------------------------------------------------------------- /experiments/1.2.2_18/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_18", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_14", 8 | "perturbation_dataset": "saunders_axial_mesoderm", 9 | "visualization_embedding": "X_umap", 10 | "eligible_regulators": "tf", 11 | "species": "zebrafish", 12 | "network_datasets": { 13 | "celloracle_zebrafish": { "do_aggregate_subnets": true } 14 | } 15 | } -------------------------------------------------------------------------------- /experiments/1.2.2_21/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_21", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_14", 8 | "perturbation_dataset": "saunders_pigment_cells", 9 | "visualization_embedding": "X_umap", 10 | "eligible_regulators": "tf", 11 | "species": "zebrafish", 12 | "network_datasets": { 13 | "celloracle_zebrafish": { "do_aggregate_subnets": true } 14 | } 15 | } -------------------------------------------------------------------------------- /make_figures/export_benchmark_results.sh: -------------------------------------------------------------------------------- 1 | 2 | mkdir ../../evaluation_results 3 | for experiment in `ls ../experiments` 4 | do 5 | mkdir -p ../../evaluation_results/experiments/${experiment}/outputs 6 | for path_to_copy in metadata.json outputs/conditions.csv outputs/evaluationPerPert.parquet outputs/evaluationPerTarget.parquet outputs/train_resources outputs/train_walltimes 7 | do 8 | cp -r ../experiments/${experiment}/${path_to_copy} ../../evaluation_results/experiments/${experiment}/${path_to_copy} 9 | done 10 | done 11 | cd .. && python gather_experiment_metadata.py 12 | cp ../all_experiments.tsv ../../evaluation_results/all_experiments.tsv -------------------------------------------------------------------------------- /experiments/ggrn_docker_backend_celloracle/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "ggrn_docker_backend_celloracle", 3 | "nickname": "ggrn_docker_backend_celloracle", 4 | "readme": "Test of the ggrn backend that runs a docker container with CO installed.", 5 | "question": "None", 6 | "is_active": true, 7 | "factor_varied": "regression_method", 8 | "color_by": null, 9 | "facet_by": null, 10 | "perturbation_dataset": "freimer", 11 | "pruning_parameter": 2000, 12 | "regression_method":"docker____ekernf01/ggrn_docker_backend_celloracle", 13 | "network_datasets": { 14 | "celloracle_human":{} 15 | } 16 | } 17 | 18 | -------------------------------------------------------------------------------- /experiments/1.8.5_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.5_2", 3 | "nickname": "split_seed_sensitivity", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "data_split_seed": [1,2,3], 9 | "perturbation_dataset": "replogle2", 10 | "merge_replicates": false, 11 | "network_prior": "restrictive", 12 | "network_datasets": { 13 | "empty": { "do_aggregate_subnets": true }, 14 | "dense": { "do_aggregate_subnets": true }, 15 | "celloracle_human": { "do_aggregate_subnets": true } 16 | } 17 | } -------------------------------------------------------------------------------- /experiments/1.8.5_7/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.5_7", 3 | "nickname": "split_seed_sensitivity", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "data_split_seed": [1,2,3], 9 | "perturbation_dataset": "freimer", 10 | "merge_replicates": false, 11 | "network_prior": "restrictive", 12 | "network_datasets": { 13 | "empty": { "do_aggregate_subnets": true }, 14 | "dense": { "do_aggregate_subnets": true }, 15 | "celloracle_human": { "do_aggregate_subnets": true } 16 | } 17 | } -------------------------------------------------------------------------------- /experiments/1.8.5_8/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.5_8", 3 | "nickname": "split_seed_sensitivity", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "data_split_seed": [1,2,3], 9 | "perturbation_dataset": "replogle", 10 | "merge_replicates": false, 11 | "network_prior": "restrictive", 12 | "network_datasets": { 13 | "empty": { "do_aggregate_subnets": true }, 14 | "dense": { "do_aggregate_subnets": true }, 15 | "celloracle_human": { "do_aggregate_subnets": true } 16 | } 17 | } -------------------------------------------------------------------------------- /experiments/1.8.5_9/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.5_9", 3 | "nickname": "split_seed_sensitivity", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "data_split_seed": [1,2,3], 9 | "perturbation_dataset": "replogle4", 10 | "merge_replicates": false, 11 | "network_prior": "restrictive", 12 | "network_datasets": { 13 | "empty": { "do_aggregate_subnets": true }, 14 | "dense": { "do_aggregate_subnets": true }, 15 | "celloracle_human": { "do_aggregate_subnets": true } 16 | } 17 | } -------------------------------------------------------------------------------- /experiments/1.8.5_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.5_3", 3 | "nickname": "split_seed_sensitivity", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "data_split_seed": [1,2,3], 9 | "perturbation_dataset": "frangieh_IFNg_v3", 10 | "merge_replicates": false, 11 | "network_prior": "restrictive", 12 | "network_datasets": { 13 | "empty": { "do_aggregate_subnets": true }, 14 | "dense": { "do_aggregate_subnets": true }, 15 | "celloracle_human": { "do_aggregate_subnets": true } 16 | } 17 | } -------------------------------------------------------------------------------- /experiments/test/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "test", 3 | "nickname": "test", 4 | "readme": "This experiment is a sandbox meant to test new features of the benchmarking code.", 5 | "question": "1.1", 6 | "is_active": true, 7 | "factor_varied": "regression_method", 8 | "default_level": "mean", 9 | "color_by": null, 10 | "facet_by": null, 11 | "merge_replicates": true, 12 | "network_prior": "restrictive", 13 | "num_genes": 500, 14 | "pruning_strategy": "none", 15 | "regression_method": ["mean", "RidgeCV"], 16 | "perturbation_dataset": "nakatake", 17 | "network_datasets": { 18 | "dense": {} 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /experiments/1.8.5_5/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.5_5", 3 | "nickname": "split_seed_sensitivity", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "data_split_seed": [1,2,3], 9 | "perturbation_dataset": "replogle2_tf_only", 10 | "merge_replicates": false, 11 | "network_prior": "restrictive", 12 | "network_datasets": { 13 | "empty": { "do_aggregate_subnets": true }, 14 | "dense": { "do_aggregate_subnets": true }, 15 | "celloracle_human": { "do_aggregate_subnets": true } 16 | } 17 | } -------------------------------------------------------------------------------- /experiments/1.8.5_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.5_6", 3 | "nickname": "split_seed_sensitivity", 4 | "readme": "Network experiment but with a different data split.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "refers_to": "1.4.3_1", 8 | "data_split_seed": [1,2,3], 9 | "perturbation_dataset": "replogle2_large_effect", 10 | "merge_replicates": false, 11 | "network_prior": "restrictive", 12 | "network_datasets": { 13 | "empty": { "do_aggregate_subnets": true }, 14 | "dense": { "do_aggregate_subnets": true }, 15 | "celloracle_human": { "do_aggregate_subnets": true } 16 | } 17 | } -------------------------------------------------------------------------------- /experiments/ggrn_docker_backend/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "ggrn_docker_backend", 3 | "nickname": "ggrn_docker_backend", 4 | "readme": "Test of the ggrn backend that runs a user-specified docker container.", 5 | "question": "None", 6 | "is_active": true, 7 | "factor_varied": "regression_method", 8 | "color_by": null, 9 | "facet_by": null, 10 | "perturbation_dataset": "nakatake", 11 | "kwargs": { 12 | "my_sweepy_hyperparameter": [0, 1], 13 | "my_constant_hyperparameter": 0 14 | }, 15 | "kwargs_to_expand": ["my_sweepy_hyperparameter"], 16 | "regression_method":"docker____ekernf01/ggrn_docker_backend_template" 17 | } 18 | 19 | -------------------------------------------------------------------------------- /run_experiments.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -l 2 | #SBATCH --job-name="ericBenchmarking" 3 | #SBATCH --partition=parallel 4 | #SBATCH --time=72:00:00 5 | #SBATCH --nodes=1 6 | #SBATCH --ntasks-per-node=16 7 | 8 | source "${HOME}/mambaforge/etc/profile.d/conda.sh" 9 | conda init 10 | conda activate ggrn 11 | 12 | 13 | for experiment in `ls -1 experiments | grep -E $1` 14 | do 15 | echo "Starting ${experiment}" 16 | echo "Monitor progress: 17 | less experiments/${experiment}/err.txt 18 | less experiments/${experiment}/stdout.txt 19 | " 20 | pereggrn --experiment_name $experiment --amount_to_do missing_models --verbosity 2 \ 21 | > experiments/$experiment/stdout.txt 2> experiments/$experiment/err.txt 22 | done 23 | -------------------------------------------------------------------------------- /experiments/1.4.2_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.2_1", 3 | "nickname": "GEARS", 4 | "readme": "Related Q asks, what's the best way of using a given network? GEARS has an interesting take on this. Here we test it out.", 5 | "question": "1.4.2", 6 | "is_active": true, 7 | "data_split_seed": [0, 1, 2], 8 | "regression_method":[ 9 | "median", 10 | "mean", 11 | "GEARS" 12 | ], 13 | "num_genes": 1000, 14 | "facet_by": null, 15 | "color_by": "data_split_seed", 16 | "factor_varied": "regression_method", 17 | "baseline_condition": 0, 18 | "predict_self" : true, 19 | "merge_replicates": false, 20 | "perturbation_dataset": "nakatake" 21 | } 22 | 23 | 24 | -------------------------------------------------------------------------------- /experiments/1.3.3_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.3_2", 3 | "nickname": "TransferLearning", 4 | "readme": "Q1.3.3 is about whether we can learn causal effects by pretraining a big fat transformer on a big fat collection of scRNA data.", 5 | "question": "1.3.3", 6 | "is_active": true, 7 | "refers_to": "1.3.3_1", 8 | "feature_extraction": [ 9 | "mrna", 10 | "mrna", 11 | "geneformer_model_/home/ekernf01/Desktop/jhu/research/projects/perturbation_prediction/cell_type_knowledge_transfer/perturbation_benchmarking/geneformer_finetuned/231119_geneformer_CellClassifier_L2048_B12_LR9.707511253364405e-05_LSlinear_WU296.3165900045724_E10_Oadamw_F2" 12 | ], 13 | "perturbation_dataset": "replogle2" 14 | } -------------------------------------------------------------------------------- /experiments/1.3.2_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.2_1", 3 | "nickname": "cellTypeSpecific", 4 | "readme": "Do networks inferred for the cell type of interest work better than global networks or networks from the wrong cell types? Tested here with ANANSE cell-type-specific networks, ESC versus others, on the nakatake ESC perturbation data.", 5 | "question": "1.3.2", 6 | "is_active": false, 7 | "factor_varied": "network", 8 | "facet_by": null, 9 | "color_by": null, 10 | "merge_replicates": true, 11 | "eligible_regulators": "human_tfs", 12 | "perturbation_dataset": "nakatake", 13 | "network_datasets": { 14 | "empty": {}, 15 | "dense": {}, 16 | "ANANSE_0.5":{} 17 | } 18 | } 19 | 20 | -------------------------------------------------------------------------------- /experiments/1.2.2_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_1", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "eligible_regulators": "human_tfs", 8 | "data_split_seed": [0], 9 | "merge_replicates": true, 10 | "regression_method":[ 11 | "RidgeCV" 12 | ], 13 | "matching_method": ["steady_state", "closest"], 14 | "prediction_timescale": ["1","3","10"], 15 | "factor_varied": "matching_method", 16 | "color_by": "prediction_timescale", 17 | "facet_by": null, 18 | "perturbation_dataset": "nakatake" 19 | } -------------------------------------------------------------------------------- /experiments/1.2.2_15/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_15", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_14", 8 | "perturbation_dataset": "paul1", 9 | "visualization_embedding": "X_draw_graph_fa", 10 | "eligible_regulators": "tf", 11 | "species": "mouse", 12 | "matching_method": ["steady_state", "closest", "optimal_transport", "random", "user"], 13 | "matching_method_for_evaluation": "user", 14 | "network_datasets": { 15 | "celloracle_mouse_atac_atlas": { "do_aggregate_subnets": true } 16 | } 17 | } -------------------------------------------------------------------------------- /experiments/1.2.2_22/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_22", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "refers_to": "1.2.2_14", 8 | "perturbation_dataset": "paul2", 9 | "visualization_embedding": "X_draw_graph_fa", 10 | "eligible_regulators": "tf", 11 | "species": "mouse", 12 | "matching_method": ["steady_state", "closest", "optimal_transport", "random", "user"], 13 | "matching_method_for_evaluation": "user", 14 | "network_datasets": { 15 | "celloracle_mouse_atac_atlas": { "do_aggregate_subnets": true } 16 | } 17 | } -------------------------------------------------------------------------------- /experiments/1.4.1_3/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.1_3", 3 | "is_active": true, 4 | "refers_to": "1.4.1_0", 5 | "perturbation_dataset": "paul2", 6 | "visualization_embedding": "X_draw_graph_fa", 7 | "matching_method": "user", 8 | "matching_method_for_evaluation": "user", 9 | "eligible_regulators": "tfs", 10 | "species" : "mouse", 11 | "network_datasets": { 12 | "cellnet_mouse_mogene": { "do_aggregate_subnets": false }, 13 | "cellnet_mouse_4302": { "do_aggregate_subnets": false }, 14 | "empty": { "do_aggregate_subnets": true }, 15 | "dense": { "do_aggregate_subnets": true }, 16 | "celloracle_mouse": { "do_aggregate_subnets": true }, 17 | "celloracle_mouse_atac_atlas": { "do_aggregate_subnets": true } 18 | } 19 | } -------------------------------------------------------------------------------- /experiments/1.4.1_4/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.1_4", 3 | "is_active": true, 4 | "refers_to": "1.4.1_0", 5 | "perturbation_dataset": "paul1", 6 | "visualization_embedding": "X_draw_graph_fa", 7 | "matching_method": "user", 8 | "matching_method_for_evaluation": "user", 9 | "eligible_regulators": "tfs", 10 | "species" : "mouse", 11 | "network_datasets": { 12 | "cellnet_mouse_mogene": { "do_aggregate_subnets": false }, 13 | "cellnet_mouse_4302": { "do_aggregate_subnets": false }, 14 | "empty": { "do_aggregate_subnets": true }, 15 | "dense": { "do_aggregate_subnets": true }, 16 | "celloracle_mouse": { "do_aggregate_subnets": true }, 17 | "celloracle_mouse_atac_atlas": { "do_aggregate_subnets": true } 18 | } 19 | } -------------------------------------------------------------------------------- /experiments/1.1.2_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.1.2_1", 3 | "nickname": "pruning_nakatake", 4 | "readme": "This experiment compares models that use all genes as predictors versus models that only allow TF's to regulator other genes.", 5 | "question": "1.1.2", 6 | "is_active": false, 7 | "factor_varied": "only_tfs_are_regulators", 8 | "network_prior": "ignore", 9 | "eligible_regulators": ["human_tfs", "all"], 10 | "regression_method": ["mean", "RidgeCV"], 11 | "data_split_seed": [0,1,2], 12 | "facet_by": "data_split_seed", 13 | "color_by": "regression_method", 14 | "baseline_condition": [0,1,2,0,1,2,0,1,2,0,1,2], 15 | "merge_replicates": true, 16 | "perturbation_dataset": "nakatake", 17 | "network_datasets": { 18 | "dense":{} 19 | } 20 | } 21 | 22 | -------------------------------------------------------------------------------- /experiments/1.6.3_0/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.3_0", 3 | "nickname": "timeseries low rank", 4 | "readme": "Comparison of linear models with full-rank and low-rank transition matrices", 5 | "question": "1.6.3", 6 | "is_active": true, 7 | "facet_by": "type_of_split", 8 | "color_by": "regression_method", 9 | "factor_varied": "data_split_seed", 10 | "type_of_split": "timeseries", 11 | "num_genes": 2000, 12 | "regression_method": "autoregressive", 13 | "prediction_timescale": "1", 14 | "low_dimensional_structure" : "dynamics", 15 | "low_dimensional_training": ["pca", "supervised"], 16 | "low_dimensional_value": [2, 3, 5, 20, 100, 2000], 17 | "matching_method": "optimal_transport", 18 | "perturbation_dataset": "definitive_endoderm", 19 | "predict_self": true 20 | } -------------------------------------------------------------------------------- /make_figures/figure_2_demo.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import scanpy as sc 4 | import pereggrn_perturbations 5 | pereggrn_perturbations.set_data_path('../../perturbation_data/perturbations') 6 | import os 7 | import altair as alt 8 | 9 | print(os.listdir("../experiments/1.0_1/outputs")) 10 | X = pd.read_parquet("../experiments/1.0_1/outputs/evaluationPerPert.parquet") 11 | print(X.query("gene=='GATA3' & condition==0").T.to_csv()) 12 | 13 | # Metrics: 14 | # pearsonCorr,0.8628334731048072 15 | # spearmanCorr,0.6939872813635597 16 | # logFCNorm2,116.28345489501952 17 | # mae,0.24543215334415436 18 | # mse,1915.5604908063688 19 | # spearman,0.6103465858719491 20 | # proportion_correct_direction,0.8333679186553227 21 | # mse_top_20,207.51050154170025 22 | # mse_top_100,529.9619580687759 23 | # mse_top_200,752.8383862192422 24 | # cell_type_correct,0.0 -------------------------------------------------------------------------------- /experiments/1.3.1_1/metadata.json: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "unique_id": "1.3.1_1", 4 | "nickname": "CellTypeSpecificRegression", 5 | "readme": "Q1.3 is about bias versus variance: does it work best to treat cell types as identical (high bias), separate (high variance), or similar (compromise)? This experiment and (sequels that refer to it) investigate the two extreme options by training one regression per cluster with either lots of clusters, or all data in one cluster. This experiment is currently not active and it may require some work to get it running again.", 6 | "question": "1.3.1", 7 | "is_active": false, 8 | "factor_varied": "num_clusters", 9 | "color_by": null, 10 | "facet_by": "network_prior", 11 | "merge_replicates": true, 12 | "perturbation_dataset": "nakatake", 13 | "network_datasets": { 14 | "celloracle_human":{} 15 | } 16 | } 17 | 18 | -------------------------------------------------------------------------------- /experiments/1.5.2_0/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.5.2_0", 3 | "nickname": "onesc", 4 | "readme": "This experiment allows colleagues in the Cahan Lab to systematically test a new method called oneSC.", 5 | "question": "1.8.1", 6 | "is_active": true, 7 | "facet_by": null, 8 | "color_by": "regression_method", 9 | "factor_varied": "regression_method", 10 | "type_of_split": "timeseries", 11 | "eligible_regulators": "all", 12 | "matching_method": "optimal_transport", 13 | "num_genes": 50, 14 | "expand": "ladder", 15 | "regression_method": [ 16 | "docker____ekernf01/ggrn_docker_backend_timeseries_baseline", 17 | "docker____ekernf01/ggrn_docker_backend_onesc" 18 | ], 19 | "prediction_timescale": [1, 2, 3, 4, 5, 10, 100, 1000], 20 | "expand_prediction_timescale": false, 21 | "perturbation_dataset": "definitive_endoderm" 22 | } -------------------------------------------------------------------------------- /experiments/1.8.3_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.3_1", 3 | "nickname": "gene_selection", 4 | "readme": "Gene-selection experiment but with a different dataset.", 5 | "question": "1.8.3", 6 | "is_active": true, 7 | "factor_varied": "num_genes", 8 | "data_split_seed": [0], 9 | "color_by": "type_of_split", 10 | "type_of_split": ["interventional"], 11 | "facet_by": null, 12 | "merge_replicates": true, 13 | "regression_method": "RidgeCV", 14 | "perturbation_dataset": "nakatake", 15 | "eligible_regulators": "human_tfs", 16 | "num_genes": [500, 1000, 2000, 5000, 10000], 17 | "network_prior": "restrictive", 18 | "network_datasets": { 19 | "empty": { "do_aggregate_subnets": true }, 20 | "dense": { "do_aggregate_subnets": true }, 21 | "celloracle_human": { "do_aggregate_subnets": true } 22 | } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /experiments/1.3.3_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.3_1", 3 | "nickname": "TransferLearning", 4 | "readme": "Q1.3.3 is about whether we can learn causal effects by pretraining a big fat transformer on a big fat collection of scRNA data.", 5 | "question": "1.3.3", 6 | "is_active": true, 7 | "regression_method":[ 8 | "mean", 9 | "median", 10 | "RidgeCV" 11 | ], 12 | "feature_extraction": [ 13 | "mrna", 14 | "mrna", 15 | "geneformer_hyperparam_finetune" 16 | ], 17 | "expand": "ladder", 18 | "eligible_regulators": "all", 19 | "predict_self": true, 20 | "data_split_seed": [0], 21 | "type_of_split": ["interventional"], 22 | "num_genes": 10000, 23 | "facet_by": null, 24 | "color_by": "type_of_split", 25 | "factor_varied": "regression_method", 26 | "baseline_condition": 0, 27 | "merge_replicates": true, 28 | "perturbation_dataset": "nakatake", 29 | "network_datasets": { 30 | "dense":{} 31 | } 32 | } 33 | 34 | -------------------------------------------------------------------------------- /experiments/1.2.2_14/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.2.2_14", 3 | "nickname": "matching_and_timescale", 4 | "readme": "Testing whether we should use the steady-state assumption or match to controls, and whether we should predict after one, a few, or many time-steps.", 5 | "question": "1.2.2", 6 | "is_active": true, 7 | "eligible_regulators": "human_tfs", 8 | "data_split_seed": [0], 9 | "type_of_split": "timeseries", 10 | "regression_method":[ 11 | "RidgeCV" 12 | ], 13 | "matching_method": ["steady_state", "closest", "optimal_transport", "random"], 14 | "num_genes": 2000, 15 | "prediction_timescale": "1,2,3,10", 16 | "cell_type_sharing_strategy": "distinct", 17 | "factor_varied": "matching_method", 18 | "color_by": "prediction_timescale", 19 | "facet_by": null, 20 | "perturbation_dataset": "definitive_endoderm", 21 | "visualization_embedding": "X_pca", 22 | "network_prior": "restrictive", 23 | "network_datasets": { 24 | "endoderm": { "do_aggregate_subnets": true } 25 | } 26 | } -------------------------------------------------------------------------------- /experiments/1.8.2_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.8.2_1", 3 | "nickname": "how_much_averaging", 4 | "readme": "Averaging within each perturbation reduces noise and focuses on biological variation due to perturbations. However, it hides potentially useful variation from sample to sample. This experiment tests whether one approach or the other tends yields better expression forecasting results.", 5 | "question": "1.8.3", 6 | "is_active": true, 7 | "factor_varied": "network_datasets", 8 | "data_split_seed": [0], 9 | "color_by": "type_of_split", 10 | "type_of_split": ["interventional"], 11 | "facet_by": null, 12 | "merge_replicates": [true, false], 13 | "regression_method": "RidgeCV", 14 | "perturbation_dataset": "nakatake", 15 | "eligible_regulators": "human_tfs", 16 | "num_genes": [2000], 17 | "network_prior": "restrictive", 18 | "network_datasets": { 19 | "empty": { "do_aggregate_subnets": true }, 20 | "dense": { "do_aggregate_subnets": true }, 21 | "celloracle_human": { "do_aggregate_subnets": true } 22 | } 23 | } 24 | 25 | -------------------------------------------------------------------------------- /license.md: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Eric Kernfeld 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /environment/conda_inputs.yaml: -------------------------------------------------------------------------------- 1 | name: ggrn 2 | channels: 3 | - pytorch 4 | - pyg 5 | - conda-forge 6 | - bioconda 7 | - defaults 8 | - lingfeiwang 9 | dependencies: 10 | - pandas[version'>=2.0'] 11 | - python-duckdb 12 | - pytorch-lightning 13 | - scipy 14 | - scanpy 15 | - pytorch 16 | - torchtext 17 | - torchvision 18 | - torchaudio 19 | - numpy[version='<=1.23'] 20 | - cython 21 | - scikit-learn 22 | - pyarrow 23 | - python=3.9 24 | - jupyterlab 25 | - jupyter 26 | - ipywidgets 27 | - yaml 28 | - wandb 29 | - selenium=3.141.0 30 | - altair 31 | - altair_saver 32 | - dcor 33 | - scanpy 34 | - python-igraph 35 | - louvain 36 | - genomepy 37 | - goatools 38 | - python-annoy 39 | - pyreadr 40 | - regex 41 | - scikit-misc 42 | - matplotlib[version'>=3.4,<3.5'] 43 | - rpy2 44 | - anndata2ri 45 | - deprecated 46 | - datasets 47 | - gseapy 48 | - dictys 49 | - velocyto.py 50 | - pyg 51 | - tensorboard[version'>=2.4,<2.5'] 52 | - gimmemotifs[version'==0.17.1'] 53 | - transformers 54 | - accelerate 55 | - hyperopt 56 | - grpcio[version'<=1.49'] 57 | - memray 58 | - fa2 59 | -------------------------------------------------------------------------------- /experiments/1.6.1_18/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_18", 3 | "nickname": "dcdfg", 4 | "readme": "Parameter sweep for a less sparse NO-TEARS model.", 5 | "refers_to": "1.6.1_1", 6 | "is_active": true, 7 | "perturbation_dataset": "dixit", 8 | "network_datasets": { 9 | "dense":{} 10 | }, 11 | "starting_expression": ["control"], 12 | "expand": "ladder", 13 | "regression_method":[ 14 | "mean", 15 | "median", 16 | 17 | "DCDFG-spectral_radius-linearlr-False", 18 | "DCDFG-spectral_radius-linearlr-False", 19 | "DCDFG-spectral_radius-linearlr-False", 20 | "DCDFG-spectral_radius-linearlr-False", 21 | "DCDFG-spectral_radius-linearlr-False", 22 | 23 | "DCDFG-spectral_radius-mlplr-False", 24 | "DCDFG-spectral_radius-mlplr-False", 25 | "DCDFG-spectral_radius-mlplr-False", 26 | "DCDFG-spectral_radius-mlplr-False", 27 | "DCDFG-spectral_radius-mlplr-False" 28 | ], 29 | "pruning_parameter": [ 30 | 0, 0, 31 | 0.0001, 0.001, 0.01, 0.1, 1, 32 | 0.0001, 0.001, 0.01, 0.1, 1 33 | ] 34 | } 35 | 36 | -------------------------------------------------------------------------------- /experiments/1.3.2_10/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.2_10", 3 | "nickname": "cellTypeSpecificFANTOM5CMAP", 4 | "readme": "This experiment uses the same logic as experiment , with different networks and perturbation data.", 5 | "refers_to": "1.3.2_1", 6 | "is_active": false, 7 | "perturbation_dataset": "cmap", 8 | "default_level": null, 9 | "network_datasets": { 10 | "empty": {}, 11 | "dense": {}, 12 | "magnum_compendium_394": { 13 | "subnets": [ 14 | "retinal_pigment_epithelial_cells.parquet", 15 | "chronic_myelogenous_leukemia_cml_cell_line.parquet", 16 | "teratocarcinoma_cell_line.parquet", 17 | "lung_adenocarcinoma_cell_line.parquet", 18 | "breast_carcinoma_cell_line.parquet", 19 | "embryonic_kidney_cell_line.parquet", 20 | "hepatocellular_carcinoma_cell_line.parquet", 21 | "epitheloid_cancer_cell_line.parquet", 22 | "acute_myeloid_leukemia_fab_m5_cell_line.parquet" 23 | ], 24 | "do_aggregate_subnets": false 25 | } 26 | } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /experiments/1.3.2_6/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.2_6", 3 | "nickname": "cellTypeSpecificFANTOM5Replogle1", 4 | "readme": "This experiment uses the same logic as experiment , with different networks and perturbation data.", 5 | "refers_to": "1.3.2_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle", 8 | "merge_replicates": true, 9 | "network_datasets": { 10 | "empty": {}, 11 | "dense": {}, 12 | "magnum_compendium_394": { 13 | "subnets": [ 14 | "retinal_pigment_epithelial_cells.parquet", 15 | "chronic_myelogenous_leukemia_cml_cell_line.parquet", 16 | "teratocarcinoma_cell_line.parquet", 17 | "lung_adenocarcinoma_cell_line.parquet", 18 | "breast_carcinoma_cell_line.parquet", 19 | "embryonic_kidney_cell_line.parquet", 20 | "hepatocellular_carcinoma_cell_line.parquet", 21 | "epitheloid_cancer_cell_line.parquet", 22 | "acute_myeloid_leukemia_fab_m5_cell_line.parquet" 23 | ], 24 | "do_aggregate_subnets": false 25 | } 26 | } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /experiments/1.3.2_9/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.2_9", 3 | "nickname": "cellTypeSpecificFANTOM5Replogle4", 4 | "readme": "This experiment uses the same logic as experiment , with different networks and perturbation data.", 5 | "refers_to": "1.3.2_1", 6 | "is_active": true, 7 | "merge_replicates": true, 8 | "perturbation_dataset": "replogle4", 9 | "network_datasets": { 10 | "empty": {}, 11 | "dense": {}, 12 | "magnum_compendium_394": { 13 | "subnets": [ 14 | "retinal_pigment_epithelial_cells.parquet", 15 | "chronic_myelogenous_leukemia_cml_cell_line.parquet", 16 | "teratocarcinoma_cell_line.parquet", 17 | "lung_adenocarcinoma_cell_line.parquet", 18 | "breast_carcinoma_cell_line.parquet", 19 | "embryonic_kidney_cell_line.parquet", 20 | "hepatocellular_carcinoma_cell_line.parquet", 21 | "epitheloid_cancer_cell_line.parquet", 22 | "acute_myeloid_leukemia_fab_m5_cell_line.parquet" 23 | ], 24 | "do_aggregate_subnets": false 25 | } 26 | } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /experiments/1.3.2_7/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.2_7", 3 | "nickname": "cellTypeSpecificFANTOM5Replogle2", 4 | "readme": "This experiment uses the same logic as experiment , with different networks and perturbation data.", 5 | "refers_to": "1.3.2_1", 6 | "is_active": true, 7 | "perturbation_dataset": "replogle2", 8 | "merge_replicates": true, 9 | "network_datasets": { 10 | "empty": {}, 11 | "dense": {}, 12 | "magnum_compendium_394": { 13 | "subnets": [ 14 | "retinal_pigment_epithelial_cells.parquet", 15 | "chronic_myelogenous_leukemia_cml_cell_line.parquet", 16 | "teratocarcinoma_cell_line.parquet", 17 | "lung_adenocarcinoma_cell_line.parquet", 18 | "breast_carcinoma_cell_line.parquet", 19 | "embryonic_kidney_cell_line.parquet", 20 | "hepatocellular_carcinoma_cell_line.parquet", 21 | "epitheloid_cancer_cell_line.parquet", 22 | "acute_myeloid_leukemia_fab_m5_cell_line.parquet" 23 | ], 24 | "do_aggregate_subnets": false 25 | } 26 | } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /experiments/1.3.2_8/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.3.2_8", 3 | "nickname": "cellTypeSpecificFANTOM5Replogle3", 4 | "readme": "This experiment uses the same logic as experiment , with different networks and perturbation data.", 5 | "refers_to": "1.3.2_1", 6 | "is_active": true, 7 | "merge_replicates": true, 8 | "perturbation_dataset": "replogle3", 9 | "network_datasets": { 10 | "empty": {}, 11 | "dense": {}, 12 | "magnum_compendium_394": { 13 | "subnets": [ 14 | "retinal_pigment_epithelial_cells.parquet", 15 | "chronic_myelogenous_leukemia_cml_cell_line.parquet", 16 | "teratocarcinoma_cell_line.parquet", 17 | "lung_adenocarcinoma_cell_line.parquet", 18 | "breast_carcinoma_cell_line.parquet", 19 | "embryonic_kidney_cell_line.parquet", 20 | "hepatocellular_carcinoma_cell_line.parquet", 21 | "epitheloid_cancer_cell_line.parquet", 22 | "acute_myeloid_leukemia_fab_m5_cell_line.parquet" 23 | ], 24 | "do_aggregate_subnets": false 25 | } 26 | } 27 | } 28 | 29 | -------------------------------------------------------------------------------- /experiments/1.0_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.0_1", 3 | "nickname": "ml methods", 4 | "readme": "We test a slate of regression methods to see if anything can beat ... the mean of the training data.", 5 | "question": "1.0", 6 | "data_split_seed": [0], 7 | "type_of_split": ["interventional"], 8 | "regression_method":[ 9 | "mean", 10 | "median", 11 | "GradientBoostingRegressor", 12 | "ExtraTreesRegressor", 13 | "KernelRidge", 14 | "RidgeCV", 15 | "RidgeCVExtraPenalty", 16 | "LassoCV", 17 | "ElasticNetCV", 18 | "OrthogonalMatchingPursuitCV", 19 | "BayesianRidge", 20 | "docker____ekernf01/ggrn_docker_backend_ahlmann_eltze" 21 | ], 22 | "num_genes": 10000, 23 | "eligible_regulators": "human_tfs", 24 | "is_active": true, 25 | "facet_by": null, 26 | "color_by": "type_of_split", 27 | "factor_varied": "regression_method", 28 | "visualization_embedding": "X_umap", 29 | "baseline_condition": 0, 30 | "merge_replicates": true, 31 | "perturbation_dataset": "nakatake", 32 | "network_datasets": { 33 | "dense":{} 34 | } 35 | } 36 | 37 | -------------------------------------------------------------------------------- /gather_experiment_metadata.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pandas as pd 3 | import pereggrn.experimenter as experimenter 4 | import pereggrn_networks 5 | import pereggrn_perturbations 6 | pereggrn_networks.set_grn_location("../network_collection/networks") 7 | pereggrn_perturbations.set_data_path("../perturbation_data/perturbations") 8 | all_active_experiments = [] 9 | for experiment in os.listdir("experiments"): 10 | try: 11 | all_active_experiments.append(pd.DataFrame( 12 | { 13 | k:experimenter.validate_metadata(experiment, input_folder = "experiments")[k] 14 | for k in ["nickname", "refers_to", "readme"] 15 | }, 16 | index = [experiment] 17 | )) 18 | except: 19 | all_active_experiments.append(pd.DataFrame( 20 | { 21 | k:"Could not validate the metadata -- likely an inactive experiment." 22 | for k in ["nickname", "refers_to", "readme"] 23 | }, 24 | index = [experiment] 25 | )) 26 | pd.concat(all_active_experiments).sort_index().to_csv("all_experiments.tsv", sep = "\t", index = True) 27 | print("Done. See results in all_experiments.tsv.") 28 | 29 | 30 | -------------------------------------------------------------------------------- /experiments/singularity_demo/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "singularity_demo", 3 | "nickname": "singularity_demo", 4 | "readme": "Trying to get backends working with singularity", 5 | "question": "1.8.1", 6 | "is_active": true, 7 | "facet_by": "type_of_split", 8 | "color_by": "regression_method", 9 | "factor_varied": "data_split_seed", 10 | "type_of_split": "timeseries", 11 | "eligible_regulators": "human_tfs", 12 | "num_genes": 2000, 13 | "expand": "ladder", 14 | "regression_method": [ 15 | 16 | "singularity____ekernf01/ggrn_docker_backend_sckinetics", 17 | "singularity____ekernf01/ggrn_docker_backend_dictys", 18 | 19 | "singularity____ekernf01/ggrn_docker_backend_celloracle", 20 | "singularity____ekernf01/ggrn_docker_backend_timeseries_baseline", 21 | "singularity____ekernf01/ggrn_docker_backend_prescient" 22 | ], 23 | "prediction_timescale": [ 24 | "1", "1", 25 | "1,2,3,5,10", "1,2,3,4", "1,2,3,4" 26 | ], 27 | "network_datasets": { 28 | "celloracle_human":{} 29 | }, 30 | "matching_method": "optimal_transport", 31 | "perturbation_dataset": "definitive_endoderm" 32 | } 33 | 34 | 35 | -------------------------------------------------------------------------------- /environment/install_minimal.sh: -------------------------------------------------------------------------------- 1 | mkdir expression_forecasting_benchmarks 2 | cd expression_forecasting_benchmarks 3 | # Get data collections from Zenodo 4 | # accessory data, e.g. pLI and list of TF names 5 | wget https://zenodo.org/record/13345104/files/accessory_data.zip && unzip accessory_data.zip 6 | # perturbations 7 | wget https://zenodo.org/records/13785607/files/perturbation_data_minimal.zip && unzip perturbation_data_minimal.zip && mv perturbation_data_minimal perturbation_data 8 | # networks 9 | wget https://zenodo.org/records/13785607/files/network_collection_minimal.zip && unzip network_collection_minimal.zip && mv network_collection_minimal network_collection 10 | 11 | # Get experiment metadata and project folder layout 12 | git clone https://github.com/ekernf01/perturbation_benchmarking 13 | # Install python packages 14 | conda create -n ggrn_minimal python=3.9 15 | conda activate ggrn_minimal 16 | conda install -y pip 17 | pip install vl-convert-python 18 | pip install ray[tune] 19 | pip install pyarrow 20 | for p in pereggrn_networks pereggrn_perturbations ggrn pereggrn 21 | do 22 | pip install git+https://github.com/ekernf01/${p} --branch v2 23 | done 24 | echo "Installation has finished. Test your installation:" 25 | echo " conda activate ggrn_minimal" 26 | echo " cd perturbation_benchmarking" 27 | echo " pereggrn -h # see the help page" 28 | echo " pereggrn --output example_output --input experiments --experiment_name '1.0_0' --networks ../network_collection/networks --data ../perturbation_data/perturbations --amount_to_do models --no_skip_bad_runs" 29 | -------------------------------------------------------------------------------- /make_figures/global_effects/dixit.txt: -------------------------------------------------------------------------------- 1 | deg,mi,mean,norm2,median 2 | -999.0,1.910162315127668,0.5038446881503853,52.312931645534476,0.30094033544637744 3 | -999.0,1.9855050253844826,0.4916380547221156,52.56394543285953,0.29798986083162815 4 | -999.0,2.0302618566942527,0.4756141754671866,52.976473363440824,0.2845773643314999 5 | -999.0,2.0796098255382485,0.451007774304164,51.14938597693658,0.26872471232975415 6 | -999.0,2.0411439603906376,0.4667585060832745,52.93853598842698,0.28042979028179643 7 | -999.0,2.4129531576200356,0.27371691027161144,30.350002845250383,0.16881920648740878 8 | -999.0,2.3629876602850333,0.3063420993997507,33.46881620355791,0.1840810704980273 9 | -999.0,2.01893161752782,0.482825004038108,53.32670494361112,0.29113124258929723 10 | -999.0,2.393002815297529,0.27753547068215606,30.33618567283924,0.17437199764860553 11 | -999.0,2.4777122626670756,0.24799212287514608,26.89714683792748,0.16440947866471348 12 | -999.0,2.70443560791687,0.19210115416587883,21.429255506453305,0.11923406669115186 13 | -999.0,2.3644642097930713,0.29890899266547666,33.13344444757394,0.1838976806801691 14 | -999.0,2.3244762960247396,0.3169443810809533,34.45570125268245,0.19448842193074148 15 | -999.0,2.377290821787801,0.2958279112113571,32.629771020614584,0.18001584414493516 16 | -999.0,2.068408380861639,0.44690851398302267,50.68586283808235,0.2725523351475753 17 | -999.0,1.9784142893048147,0.502264491921718,53.931519971231076,0.29953037704410534 18 | -999.0,1.9605012824830634,0.48614706656257084,50.02607076605626,0.303037074808737 19 | -999.0,2.386350421230154,0.29610498570392146,32.9446849226944,0.1778979505850157 20 | -------------------------------------------------------------------------------- /experiments/1.4.5_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.4_1", 3 | "nickname": "network_only", 4 | "readme": "Are network-connected genes enriched for perturbation responses? This experiment uses network structure alone for prediction, with no training data and all perturbations reserved for evaluation.", 5 | "question": "1.4.4", 6 | "is_active": true, 7 | "factor_varied": "network_datasets", 8 | "type_of_split": "interventional", 9 | "data_split_seed": [0], 10 | "desired_heldout_fraction": [1], 11 | "color_by": null, 12 | "facet_by": null, 13 | "regression_method": "?????", 14 | "perturbation_dataset": "nakatake", 15 | "eligible_regulators": "all", 16 | "starting_expression": ["heldout"], 17 | "num_genes": 10000, 18 | "network_datasets": { 19 | "celloracle_human": { "do_aggregate_subnets": true }, 20 | "gtex_rna": { "do_aggregate_subnets": true }, 21 | "magnum_compendium_32": { "do_aggregate_subnets": true }, 22 | "magnum_compendium_ppi": { "do_aggregate_subnets": true }, 23 | "cellnet_human_Hg1332": { "do_aggregate_subnets": true }, 24 | "cellnet_human_Hugene": { "do_aggregate_subnets": true }, 25 | "MARA_FANTOM4": { "do_aggregate_subnets": true }, 26 | "STRING": { "do_aggregate_subnets": true }, 27 | "ANANSE_0.5": { "do_aggregate_subnets": true }, 28 | "ANANSE_tissue_0.5": { "do_aggregate_subnets": true }, 29 | "humanbase": { "do_aggregate_subnets": true } 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /experiments/1.4.4_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.4_1", 3 | "nickname": "network_only", 4 | "readme": "Are network-connected genes enriched for perturbation responses? This experiment uses network structure alone for prediction, with no training data and all perturbations reserved for evaluation.", 5 | "question": "1.4.4", 6 | "is_active": true, 7 | "factor_varied": "network_datasets", 8 | "type_of_split": "interventional", 9 | "data_split_seed": [0], 10 | "desired_heldout_fraction": [1], 11 | "allowed_regulators_vs_network_regulators": "union", 12 | "color_by": null, 13 | "facet_by": null, 14 | "regression_method": "regulon", 15 | "perturbation_dataset": "nakatake", 16 | "eligible_regulators": "all", 17 | "num_genes": 10000, 18 | "network_datasets": { 19 | "celloracle_human": { "do_aggregate_subnets": true }, 20 | "gtex_rna": { "do_aggregate_subnets": true }, 21 | "magnum_compendium_32": { "do_aggregate_subnets": true }, 22 | "magnum_compendium_ppi": { "do_aggregate_subnets": true }, 23 | "cellnet_human_Hg1332": { "do_aggregate_subnets": true }, 24 | "cellnet_human_Hugene": { "do_aggregate_subnets": true }, 25 | "MARA_FANTOM4": { "do_aggregate_subnets": true }, 26 | "STRING": { "do_aggregate_subnets": true }, 27 | "ANANSE_0.5": { "do_aggregate_subnets": true }, 28 | "ANANSE_tissue_0.5": { "do_aggregate_subnets": true }, 29 | "humanbase": { "do_aggregate_subnets": true } 30 | } 31 | } 32 | 33 | -------------------------------------------------------------------------------- /experiments/1.9_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.9_1", 3 | "nickname": "base_network_simulation", 4 | "is_active": true, 5 | "readme": "Testing different network structures like in experiment 1.4.3_1, but with simulated data based on a known network. ", 6 | "question": "1.9", 7 | "factor_varied": "network_datasets", 8 | "color_by": "type_of_split", 9 | "type_of_split": ["interventional"], 10 | "facet_by": null, 11 | "regression_method": "RidgeCV", 12 | "eligible_regulators": "all", 13 | "num_genes": 10000, 14 | "network_prior": "restrictive", 15 | "network_datasets": { 16 | "empty": { "do_aggregate_subnets": true }, 17 | "dense": { "do_aggregate_subnets": true }, 18 | "celloracle_human": { "do_aggregate_subnets": true }, 19 | "gtex_rna": { "do_aggregate_subnets": true }, 20 | "magnum_compendium_32": { "do_aggregate_subnets": true }, 21 | "magnum_compendium_ppi": { "do_aggregate_subnets": true }, 22 | "cellnet_human_Hg1332": { "do_aggregate_subnets": true }, 23 | "cellnet_human_Hugene": { "do_aggregate_subnets": true }, 24 | "MARA_FANTOM4": { "do_aggregate_subnets": true }, 25 | "STRING": { "do_aggregate_subnets": true }, 26 | "ANANSE_0.5": { "do_aggregate_subnets": true }, 27 | "ANANSE_tissue_0.5": { "do_aggregate_subnets": true }, 28 | "humanbase": { "do_aggregate_subnets": true } 29 | }, 30 | "matching_method": ["user"], 31 | "predict_self": true, 32 | "data_split_seed": [0,1,2], 33 | "prediction_timescale": 1, 34 | "perturbation_dataset": "simulation_TrueNetwork=cellnet_human_Hg1332_S=1_NoiseSD=0" 35 | } -------------------------------------------------------------------------------- /experiments/1.5.1_0/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.5.1_0", 3 | "nickname": "timeseries pilot", 4 | "readme": "Direct comparison of published timeseries methods", 5 | "question": "1.5.1", 6 | "is_active": true, 7 | "facet_by": "type_of_split", 8 | "color_by": "regression_method", 9 | "factor_varied": "data_split_seed", 10 | "type_of_split": "timeseries", 11 | "eligible_regulators": "human_tfs", 12 | "num_genes": 2000, 13 | "pruning_parameter": 2000, 14 | "expand": "ladder", 15 | "regression_method": [ 16 | "median", 17 | "mean", 18 | "RidgeCV", 19 | 20 | "docker____ekernf01/ggrn_docker_backend_sckinetics", 21 | "docker____ekernf01/ggrn_docker_backend_dictys", 22 | 23 | "docker____ekernf01/ggrn_docker_backend_celloracle", 24 | "docker____ekernf01/ggrn_docker_backend_timeseries_baseline", 25 | "docker____ekernf01/ggrn_docker_backend_prescient", 26 | "docker____ekernf01/ggrn_docker_backend_rnaforecaster" 27 | ], 28 | "prediction_timescale": [ 29 | "1", "1", "1,2,3,10", 30 | "1", "1", 31 | "1,2,3,5", "1,2,3,4", "1,2,3,4", "1,2,3,4" 32 | ], 33 | "cell_type_sharing_strategy": "distinct", 34 | "predict_self": [ 35 | false, false, false, 36 | false, false, 37 | false, false, false, true 38 | ], 39 | "kwargs": [ 40 | {}, {}, {}, 41 | {}, { "minimum_expression": 0.05 }, 42 | {}, {}, {}, {} 43 | ], 44 | "matching_method": "optimal_transport", 45 | "perturbation_dataset": "definitive_endoderm", 46 | "visualization_embedding": "X_pca", 47 | "network_datasets": { 48 | "endoderm": { "do_aggregate_subnets": true } 49 | }, 50 | "network_prior": "restrictive" 51 | } -------------------------------------------------------------------------------- /experiments/1.6.1_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.6.1_1", 3 | "nickname": "published methods", 4 | "readme": "Comparison of several published methods (originally focused on DCD-FG, hence the numbering after Q1.6).", 5 | "question": "1.6", 6 | "is_active": true, 7 | "facet_by": "starting_expression", 8 | "color_by": null, 9 | "factor_varied": "regression_method", 10 | "expand": "ladder", 11 | "kwargs": [ 12 | {}, 13 | {}, 14 | {}, 15 | {}, 16 | {}, 17 | {}, 18 | {}, 19 | {}, 20 | {}, 21 | {} 22 | ], 23 | "regression_method":[ 24 | "mean", 25 | "mean", 26 | "median", 27 | "median", 28 | "DCDFG-spectral_radius-linearlr-False", 29 | "DCDFG-spectral_radius-linearlr-False", 30 | "DCDFG-spectral_radius-mlplr-False" , 31 | "DCDFG-spectral_radius-mlplr-False" , 32 | "RidgeCV", 33 | "GEARS" 34 | ], 35 | "feature_extraction": [ 36 | "mrna", 37 | "mrna", 38 | "mrna", 39 | "mrna", 40 | "mrna", 41 | "mrna", 42 | "mrna", 43 | "mrna", 44 | "geneformer_hyperparam_finetune", 45 | "mrna" 46 | ], 47 | "predict_self": [ 48 | false, 49 | false, 50 | false, 51 | false, 52 | false, 53 | false, 54 | false, 55 | false, 56 | true, 57 | true 58 | ], 59 | "baseline_condition": 0, 60 | "merge_replicates": false, 61 | "perturbation_dataset": "frangieh_IFNg_v1", 62 | "num_genes": 1000, 63 | "starting_expression": [ 64 | "control", "heldout", 65 | "control", "heldout", 66 | "control", "heldout", 67 | "control", "heldout", 68 | "control", 69 | "control" 70 | ], 71 | "network_datasets": { 72 | "dense":{} 73 | } 74 | } 75 | 76 | -------------------------------------------------------------------------------- /make_figures/global_effects/freimer.txt: -------------------------------------------------------------------------------- 1 | deg,mi,mean,norm2,median 2 | 0.0,1.7343218862056689,0.19943972016478761,42.16134515105941,0.09792018098009196 3 | 51.0,1.675038488044664,0.23554822621933932,54.19274235980161,0.09527179002206303 4 | 10.0,1.8538888108379181,0.1759638531487992,40.833678096526164,0.07031262033430491 5 | 0.0,1.9558892996805826,0.13874365642647515,31.642192934222216,0.05591693109502488 6 | 0.0,1.9693811633459988,0.1412092087059149,32.88510834261646,0.05336172285389287 7 | 4.0,1.9256675359984494,0.15453063646363532,34.8604855165297,0.06749591141784311 8 | 0.0,2.0317514749918146,0.12513219543449508,30.84630393784162,0.043945878910136536 9 | 0.0,1.8973835565613582,0.17346884928883674,37.993859804646206,0.07909252330845157 10 | 1.0,1.9886678737179506,0.13537700064122346,32.389979596614204,0.050204269213690804 11 | 6.0,1.9810338022765186,0.1489560543717019,37.17668530146186,0.05115143921775177 12 | 17.0,1.717687025316958,0.21406147981694806,49.087489821323366,0.08944421383182051 13 | 0.0,1.8989885921295004,0.15265015458299994,35.56572306981687,0.06026849941076856 14 | 4.0,1.8496164128167003,0.16461601442126586,37.547038757728295,0.07050215484510483 15 | 0.0,1.8954505998863567,0.16374288981899945,38.20787442150733,0.06554269694455271 16 | 4.0,1.911411814964397,0.16507045070989845,38.19916476743188,0.06351087037860026 17 | 23.0,1.6953779828055762,0.21809904406873054,48.8982368579704,0.09724737698332835 18 | 14.0,1.8347754156298597,0.18634992065106692,43.90672237759967,0.07674399865525601 19 | 1.0,1.8419721926257975,0.17449780062622644,39.97881022321453,0.0696136654723 20 | 0.0,2.046340688316985,0.12249124244804031,28.84714177458835,0.045549462287079054 21 | 1.0,1.9957782113815916,0.1462292994526221,34.403105547709714,0.05586016804821248 22 | 8.0,1.9201383488008854,0.16491124910620736,38.52060870601413,0.06592776566284995 23 | 8.0,1.7241701316419022,0.2075978526237536,46.08914444452816,0.08969086435808828 24 | 3.0,1.7997457523262692,0.1928282665887047,41.71048487408795,0.08552701148685775 25 | 7.0,1.8899789965283627,0.1696674008655979,39.87318625781164,0.0635335694135748 26 | -------------------------------------------------------------------------------- /experiments/1.4.3_1/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.3_1", 3 | "nickname": "base_network", 4 | "readme": "people have published big lists of TF-target or gene-gene relationships, often for GWAS interpretation or reprogramming. Existing benchmarks have limited information content and seldom compare these published network structures directly without introducing confounding factors. For instance, one might ask whether the networks used by CellNet, Mogrify, Irene, and CellOracle are of comparable value in predicting perturbation outcomes. Those methods have been compared, but they each involve many other components that may also affect the outcome, confounding the effect of network structure. This experiment benchmarks many networks using otherwise-equivalent methods to see how much each network helps predict held-out perturbations.", 5 | "question": "1.4.3", 6 | "is_active": true, 7 | "factor_varied": "network_datasets", 8 | "data_split_seed": [0], 9 | "color_by": "type_of_split", 10 | "type_of_split": ["interventional"], 11 | "facet_by": null, 12 | "merge_replicates": true, 13 | "regression_method": "RidgeCV", 14 | "perturbation_dataset": "nakatake", 15 | "eligible_regulators": "human_tfs", 16 | "num_genes": 10000, 17 | "visualization_embedding": "X_umap", 18 | "network_prior": "restrictive", 19 | "network_datasets": { 20 | "empty": { "do_aggregate_subnets": true }, 21 | "dense": { "do_aggregate_subnets": true }, 22 | "celloracle_human": { "do_aggregate_subnets": true }, 23 | "gtex_rna": { "do_aggregate_subnets": true }, 24 | "magnum_compendium_32": { "do_aggregate_subnets": true }, 25 | "magnum_compendium_ppi": { "do_aggregate_subnets": true }, 26 | "cellnet_human_Hg1332": { "do_aggregate_subnets": true }, 27 | "cellnet_human_Hugene": { "do_aggregate_subnets": true }, 28 | "MARA_FANTOM4": { "do_aggregate_subnets": true }, 29 | "STRING": { "do_aggregate_subnets": true }, 30 | "ANANSE_0.5": { "do_aggregate_subnets": true }, 31 | "ANANSE_tissue_0.5": { "do_aggregate_subnets": true }, 32 | "humanbase": { "do_aggregate_subnets": true } 33 | } 34 | } 35 | 36 | -------------------------------------------------------------------------------- /make_figures/timeseries_differential_expression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import scanpy as sc 4 | import anndata 5 | import pereggrn_perturbations 6 | pereggrn_perturbations.set_data_path('../../perturbation_data/perturbations') 7 | import sys 8 | import altair as alt 9 | import os 10 | 11 | dataset = 'definitive_endoderm' 12 | adata = pereggrn_perturbations.load_perturbation(dataset, is_timeseries=True) 13 | adata.obs['timepoint'] = adata.obs['timepoint'].astype('str') 14 | adata.uns["log1p"]["base"] = 2 15 | for celltype in ["endoderm", "mesendoderm"]: 16 | sc.tl.rank_genes_groups(adata, "cell_type", groups=[celltype], reference='pluripotent', method='wilcoxon') 17 | human_tf = pd.read_csv('../../accessory_data/tf_lists/human.txt', header=None) 18 | X = sc.get.rank_genes_groups_df(adata, group=[celltype]) 19 | X = X.query("names in @human_tf[0].values") 20 | X = X.query("pvals_adj < 0.05") 21 | X.sort_values('scores', ascending=False, inplace=True) 22 | X.head(30).to_csv(f"timeseries_plots/top30_differential_expression_{celltype}.csv") 23 | 24 | dataset = 'fantom4' 25 | adata = pereggrn_perturbations.load_perturbation(dataset, is_timeseries=True) 26 | adata.obs['timepoint'] = adata.obs['timepoint'].astype('str') 27 | sc.tl.rank_genes_groups(adata, "timepoint", groups=['96.0'], reference='0.0', method='wilcoxon') 28 | human_tf = pd.read_csv('../../accessory_data/tf_lists/human.txt', header=None) 29 | X = sc.get.rank_genes_groups_df(adata, group='96.0') 30 | X = X.query("names in @human_tf[0].values") 31 | X.sort_values('logfoldchanges', ascending=False, inplace=True) 32 | X.head(30).to_csv("timeseries_plots/top30_differential_expression_fantom4.csv") 33 | 34 | dataset = 'paul1' 35 | adata = pereggrn_perturbations.load_perturbation(dataset, is_timeseries=True) 36 | mouse_tf = pd.read_csv('../../accessory_data/tf_lists/mouse.txt', header=None) 37 | adata.obs["supertype"] = adata.obs["cell_type"].map({ 38 | "MEP": "ME", 39 | "Erythroids": "ME", 40 | "Megakaryocytes": "ME", 41 | "DC": "DC", 42 | "GMP": "GM", 43 | "late_GMP": "GM", 44 | "Monocytes": "GM", 45 | "Granulocytes": "GM" 46 | }) 47 | for st in ["GM","ME", "DC"]: 48 | sc.tl.rank_genes_groups(adata, "supertype", groups=[st], reference='rest', method='wilcoxon') 49 | X = sc.get.rank_genes_groups_df(adata, group=[st]) 50 | X = X.query("names in @mouse_tf[0].values") 51 | X = X.query("pvals_adj < 0.05") 52 | X.sort_values('logfoldchanges', ascending=False, inplace=True) 53 | X.head(30).to_csv(f"timeseries_plots/top30_differential_expression_paul_{st}.csv") -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## A systematic comparison of computational methods for expression forecasting with [PEREGGRN](https://github.com/ekernf01/pereggrn) 2 | 3 | This repo contains benchmark experiments to evaluate various strategies for predicting gene expression after knockout, knockdown, or overexpression. 4 | 5 | ![image](https://github.com/ekernf01/perturbation_benchmarking/assets/5271803/ae7a5c86-dca6-49be-b048-743f8e110a18) 6 | 7 | - For context and key results, see our [preprint](https://www.biorxiv.org/content/10.1101/2023.07.28.551039v2). 8 | - Install everything using [these instructions](https://github.com/ekernf01/perturbation_benchmarking/blob/main/environment/install.md). 9 | - To repeat our experiments or run your own, see the [pereggrn](https://github.com/ekernf01/pereggrn) benchmarking software ([tutorial](https://github.com/ekernf01/pereggrn/blob/main/docs/tutorial.md), how to [add your own method](https://github.com/ekernf01/pereggrn/blob/main/docs/how_to.md#how-to-evaluate-a-new-method)). 10 | - If there's something you cannot find, go ahead and file a github issue -- with your input, we hope to improve the project. 11 | 12 | ### Related infrastructure 13 | 14 | This project is tightly coupled with our collections of data, our GGRN package for dynamic models of gene regulatory networks, and our PEREGGRN package containing benchmarking infrastructure. 15 | 16 | - Install everything using [these instructions](https://github.com/ekernf01/perturbation_benchmarking/blob/main/environment/install.md). 17 | - Perturbation data, the network collection, and some accessory data (e.g. a list of TF's) are on Zenodo with DOI `10.5281/zenodo.15115945`. 18 | - Our code expects each of those three folders to be unzipped and placed adjacent to this repo. 19 | - Use our [perturbation loader](https://github.com/ekernf01/pereggrn_perturbations) and [network loader](https://github.com/ekernf01/pereggrn_networks) to easily access and validate data from Python. 20 | - [GGRN](https://github.com/ekernf01/ggrn), the Grammar of Gene Regulatory Networks, offers flexible combination of different features for regulatory network inference. 21 | - [PEREGGRN](https://github.com/ekernf01/pereggrn), PErturbation Response Evaluation via a Grammar of Gene Regulatory Networks, helps conduct the experiments that are specified in this repo. 22 | - To interact with the evaluation results and see the source data for our figures, [download (1GB)](https://zenodo.org/records/15115945/files/perturbation_benchmarking.zip?download=1) them from Zenodo (DOI: 10.5281/zenodo.15115945). 23 | - Certain additional experiments are implemented in [our fork of DCD-FG](https://github.com/ekernf01/dcdfg). 24 | 25 | 26 | -------------------------------------------------------------------------------- /experiments/1.4.4_2/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.4_2", 3 | "nickname": "network_only", 4 | "readme": "Are network-connected genes enriched for perturbation responses? This experiment uses network structure alone for prediction, with no training data and all perturbations reserved for evaluation.", 5 | "question": "1.4.4", 6 | "is_active": true, 7 | "factor_varied": "network_datasets", 8 | "type_of_split": "interventional", 9 | "data_split_seed": [0], 10 | "desired_heldout_fraction": [1], 11 | "allowed_regulators_vs_network_regulators": "union", 12 | "color_by": null, 13 | "facet_by": null, 14 | "regression_method": "regulon", 15 | "perturbation_dataset": "nakatake", 16 | "eligible_regulators": "all", 17 | "num_genes": 10000, 18 | "network_datasets": { 19 | "gtex_rna": { "do_aggregate_subnets": false }, 20 | "cellnet_human_Hg1332": { "do_aggregate_subnets": false }, 21 | "cellnet_human_Hugene": { "do_aggregate_subnets": false }, 22 | "ANANSE_0.5": { "do_aggregate_subnets": false }, 23 | "ANANSE_tissue_0.5": { "do_aggregate_subnets": false }, 24 | "humanbase": { "do_aggregate_subnets": false }, 25 | "magnum_compendium_394": { 26 | "subnets": [ 27 | "retinal_pigment_epithelial_cells.parquet", 28 | "chronic_myelogenous_leukemia_cml_cell_line.parquet", 29 | "teratocarcinoma_cell_line.parquet", 30 | "lung_adenocarcinoma_cell_line.parquet", 31 | "breast_carcinoma_cell_line.parquet", 32 | "embryonic_kidney_cell_line.parquet", 33 | "hepatocellular_carcinoma_cell_line.parquet", 34 | "epitheloid_cancer_cell_line.parquet", 35 | "acute_myeloid_leukemia_fab_m5_cell_line.parquet", 36 | "cd8+_t_cells.parquet", 37 | "adult_t-cell_leukemia_cell_line.parquet", 38 | "cd4+cd25-cd45ra-_memory_conventional_t_cells.parquet", 39 | "cd4+cd25+cd45ra+_naive_regulatory_t_cells.parquet", 40 | "cd4+_t_cells.parquet", 41 | "chronic_lymphocytic_leukemia_t-cll_cell_line.parquet", 42 | "cd4+cd25+cd45ra-_memory_regulatory_t_cells.parquet", 43 | "cd4+cd25-cd45ra+_naive_conventional_t_cells.parquet", 44 | "nk_t_cell_leukemia_cell_line.parquet", 45 | "melanoma_cell_line.parquet", 46 | "melanocyte.parquet", 47 | "skin_fetal.parquet", 48 | "iris_pigment_epithelial_cells.parquet", 49 | "retina_adult.parquet" 50 | ], 51 | "do_aggregate_subnets": false 52 | } 53 | } 54 | } -------------------------------------------------------------------------------- /experiments/5_0/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "5_0", 3 | "nickname": "scaling", 4 | "readme": "How do different methods scale in practice?", 5 | "question": "1.0", 6 | "is_active": true, 7 | "data_split_seed": [0], 8 | "type_of_split": ["interventional"], 9 | "expand": "ladder", 10 | "regression_method":[ 11 | "mean", "mean", 12 | "median", "median", 13 | "GradientBoostingRegressor", "GradientBoostingRegressor", 14 | "ExtraTreesRegressor", "ExtraTreesRegressor", 15 | "KernelRidge", "KernelRidge", 16 | "RidgeCV", "RidgeCV", 17 | "RidgeCVExtraPenalty", "RidgeCVExtraPenalty", 18 | "LassoCV", "LassoCV", 19 | "ElasticNetCV", "ElasticNetCV", 20 | "OrthogonalMatchingPursuitCV", "OrthogonalMatchingPursuitCV", 21 | "BayesianRidge", "BayesianRidge", 22 | "DCDFG-spectral_radius-linearlr-False", "DCDFG-spectral_radius-linearlr-False", 23 | "DCDFG-spectral_radius-mlplr-False", "DCDFG-spectral_radius-mlplr-False", 24 | 25 | "GEARS", "GEARS", "RidgeCV", "RidgeCV" 26 | ], 27 | "feature_extraction": [ 28 | "mrna", "mrna", "mrna", "mrna", "mrna", "mrna", "mrna", "mrna", "mrna", "mrna", 29 | "mrna", "mrna", "mrna", "mrna", "mrna", "mrna", "mrna", "mrna", "mrna", "mrna", 30 | "mrna", "mrna", "mrna", "mrna", "mrna", "mrna", 31 | 32 | "mrna", "mrna", "geneformer", "geneformer" 33 | ], 34 | "predict_self": [ 35 | false, false, false, false, false, false, false, false, false, false, 36 | false, false, false, false, false, false, false, false, false, false, 37 | false, false, false, false, false, false, 38 | 39 | true, true, true, true 40 | ], 41 | "num_genes": [ 42 | 500, 1000, 500, 1000, 500, 1000, 500, 1000, 500, 1000, 43 | 500, 1000, 500, 1000, 500, 1000, 500, 1000, 500, 1000, 44 | 500, 1000, 500, 1000, 500, 1000, 45 | 46 | 500, 1000, 500, 1000 47 | ], 48 | "eligible_regulators": "all", 49 | "facet_by": null, 50 | "color_by": "num_genes", 51 | "factor_varied": "regression_method", 52 | "merge_replicates": true, 53 | "perturbation_dataset": "frangieh_IFNg_v1", 54 | "network_datasets": { 55 | "dense":{} 56 | } 57 | } 58 | 59 | -------------------------------------------------------------------------------- /environment/install.sh: -------------------------------------------------------------------------------- 1 | # This script sets up a new box (for us, usually an AWS EC2 instance) to run benchmarking analyses. 2 | 3 | # Get data collections from Zenodo 4 | sudo apt install unzip 5 | # accessory data, e.g. pLI and list of TF names 6 | wget https://zenodo.org/record/13345104/files/accessory_data.zip && unzip accessory_data.zip & 7 | # perturbations 8 | wget https://zenodo.org/record/13345104/files/perturbation_data.zip && unzip perturbation_data.zip & 9 | # networks 10 | wget https://zenodo.org/record/13345104/files/network_collection.zip && unzip network_collection.zip & 11 | 12 | # Get mamba 13 | wget "https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-$(uname)-$(uname -m).sh" 14 | bash Mambaforge-$(uname)-$(uname -m).sh -b 15 | source "${HOME}/mambaforge/etc/profile.d/conda.sh" 16 | 17 | # Set up Conda env 18 | # If you have a GPU, you can use conda_list_explicit_gpu.txt. 19 | mamba create --name ggrn --file perturbation_benchmarking/environment/conda_list_explicit.txt 20 | conda activate ggrn 21 | # Why --no-deps? Makes sure every version is pinned explicitly and is compatible with the other packages. 22 | pip install vl-convert-python==1.4.0 --no-deps 23 | pip install git+https://github.com/snap-stanford/GEARS@df09d7a --no-deps 24 | # PRESCIENT and CO are now used thru docker, but I am leaving this alone for backwards compatibiliy. 25 | pip install celloracle==0.12.0 --no-deps 26 | pip install prescient==0.1.0 --no-deps 27 | pip install geomloss==0.2.3 --no-deps 28 | pip install git+https://github.com/bowang-lab/scFormer@2df344a --no-deps 29 | pip install 'scib>=1.0.3' --no-deps 30 | pip install biomart==0.9.2 --no-deps 31 | pip install msgpack==1.0.8 --no-deps 32 | pip install tensorboardX>=1.9 --no-deps 33 | pip install ray[tune]==2.6.2 --no-deps 34 | pip install scrublet==0.2.3 --no-deps 35 | pip install pot==0.9.3 --no-deps 36 | pip install wot==1.0.8.post2 --no-deps 37 | 38 | # We need a specific version of Geneformer. We use `git lfs pull` because we need certain model files locally. 39 | sudo apt install git-lfs 40 | echo "Cloning geneformer -- this could take a long time." 41 | git lfs install 42 | git clone https://huggingface.co/ctheodoris/Geneformer 43 | cd Geneformer 44 | git checkout 50e921d 45 | pip install . --no-deps 46 | git lfs pull 47 | cd .. 48 | 49 | # Install our packages 50 | for p in pereggrn_networks pereggrn_perturbations pereggrn ggrn ggrn_backend2 ggrn_backend3 geneformer_embeddings 51 | do 52 | git clone http://github.com/ekernf01/${p} --branch v3 53 | pip install -e $p --no-deps 54 | done 55 | 56 | echo "The package installation has finished, but the data download and unzip may still be running in the background, so it may not work right away." 57 | echo "Test your installation:" 58 | echo " conda activate ggrn" 59 | echo " pereggrn -h # see the help page" 60 | echo " pereggrn --experiment_name '1.0_0' --amount_to_do models --no_skip_bad_runs # Run a simple benchmark " 61 | -------------------------------------------------------------------------------- /make_figures/psc_tf_due_diligence.py: -------------------------------------------------------------------------------- 1 | import scanpy as sc 2 | import pandas as pd 3 | import numpy as np 4 | import pereggrn_perturbations 5 | from pereggrn import experimenter 6 | from scipy.stats import rankdata 7 | pereggrn_perturbations.set_data_path("../../perturbation_data/perturbations") 8 | 9 | # This scripe cross-references TF perturbation responses with known targets from Boyer et al. 2005. 10 | 11 | distance_to_targets = pd.read_csv("boyer2005targets.csv")[["GENE", "SOX2", "NANOG", "E2F4"]] 12 | distance_to_targets = 13 | predictions = sc.read_h5ad("../experiments/1.0_1/outputs/predictions/7.h5ad") # condition 7 is LassoCV 14 | observed = pereggrn_perturbations.load_perturbation("nakatake") 15 | observed = experimenter.averageWithinPerturbation(observed) 16 | top_genes = {} 17 | targets = list(set(distance_to_targets["GENE"].unique()).intersection(predictions.var_names)) 18 | overlap = pd.DataFrame(index = targets, columns = ["predicted_and_observed", "predicted_and_boyer", "boyer_and_observed"]) 19 | for target in targets: 20 | top_genes[target] = {} 21 | predicted_logfc = (predictions[ :, target].X.mean(axis=1) - observed[observed.obs["is_control"], target].X.mean()) 22 | observed_logfc = (observed[predictions.obs_names, target].X.mean(axis=1) - observed[observed.obs["is_control"], target].X.mean()) 23 | logfc = pd.DataFrame( 24 | { 25 | "observed_logfc": observed_logfc, 26 | "predicted_logfc": predicted_logfc, 27 | "observed_absolute_logfc": np.abs(observed_logfc), 28 | "predicted_absolute_logfc": np.abs(predicted_logfc), 29 | }, 30 | index = predictions.obs_names, 31 | ) 32 | top_genes[target]["predicted"] = logfc.query("@rankdata(-predicted_absolute_logfc)<=5").index 33 | top_genes[target]["observed"] = logfc.query("@rankdata(-observed_absolute_logfc)<=5").index 34 | top_genes[target]["boyer"] = distance_to_targets.query("GENE==@target").T[[False, True, True, True]].set_axis(['distance'], axis = 1).query("distance!='-'").index.values 35 | overlap.loc[target, "observed_top"] = top_genes[target]["observed"][0] 36 | try: 37 | overlap.loc[target, "predicted_top"] = top_genes[target]["predicted"][0] 38 | except IndexError: 39 | overlap.loc[target, "predicted_top"] = "" 40 | overlap.loc[target, "predicted"] = len(top_genes[target]["predicted"]) 41 | overlap.loc[target, "boyer"] = len(top_genes[target]["boyer"]) 42 | overlap.loc[target, "observed"] = len(top_genes[target]["observed"]) 43 | overlap.loc[target, "predicted_and_observed"] = len(set(top_genes[target]["predicted"]).intersection(top_genes[target]["observed"])) 44 | overlap.loc[target, "predicted_and_boyer"] = len(set(top_genes[target]["predicted"]).intersection(top_genes[target]["boyer"])) 45 | overlap.loc[target, "boyer_and_observed"] = len(set(top_genes[target]["boyer"]).intersection(top_genes[target]["observed"])) 46 | 47 | 48 | [overlap.value_counts(c) for c in overlap.columns] 49 | 50 | -------------------------------------------------------------------------------- /make_figures/figure_s1_timeseries.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import scanpy as sc 4 | import anndata 5 | import pereggrn_perturbations 6 | pereggrn_perturbations.set_data_path('../../perturbation_data/perturbations') 7 | import sys 8 | import altair as alt 9 | import os 10 | 11 | sys.path.append("../../perturbation_data/setup/") # access our reusable data ingestion code 12 | import ingestion 13 | import global_effects 14 | effects = [] 15 | for dataset in [ 16 | 'definitive_endoderm', 17 | 'fantom4', 18 | 'BETS_A549', 19 | ]: 20 | print(dataset) 21 | adata = pereggrn_perturbations.load_perturbation(dataset) 22 | pt = adata.obs["perturbation_type"][0] 23 | uns = adata.uns.copy() 24 | try: 25 | if adata.X.sum() == adata.raw.X.sum(): # We filled in log1p normalized data into the .raw slot for datasets obtained from GEARS. 26 | adata.raw = anndata.AnnData(X = np.exp(adata.raw.X.toarray()) - 1) 27 | adata = ingestion.aggregate_by_perturbation(adata, group_by = ["perturbation"], use_raw = True) 28 | sc.pp.normalize_total(adata) 29 | except: 30 | pass 31 | adata.uns = uns 32 | adata = ingestion.describe_perturbation_effect(adata, perturbation_type = pt) 33 | consistency = ingestion.checkConsistency(adata, pt) 34 | adata.obs["logFC"] = consistency[1] 35 | print("Consistency:") 36 | print(pd.Series(consistency[0]).value_counts()) 37 | fname = "global_effects/" + dataset + ".txt" 38 | os.makedirs("global_effects", exist_ok = True) 39 | global_effects.quantifyEffect(adata, fname = fname, withDEG = False, withMI = False, pseudocount = 1) 40 | obs = adata.obs[['perturbation', 'is_control', 'logFC', 'logFCNorm2', 'logFCMean', 'expression_level_after_perturbation', 'perturbation_type']].copy() 41 | 42 | obs.loc[:, "dataset"] = dataset 43 | effects.append(obs) 44 | 45 | effects = pd.concat(effects) 46 | effects = effects.query("~is_control") 47 | effects = effects.query("logFC != -999") 48 | effects["guide"] = 0 49 | alt.data_transformers.disable_max_rows() 50 | chart = alt.Chart(effects).transform_density( 51 | density='logFC', 52 | groupby=['dataset', 'perturbation_type'], 53 | as_=['logFC', 'density'], 54 | extent=[effects['logFC'].min(), effects['logFC'].max()], 55 | counts=False 56 | ).mark_area(opacity=0.75).encode( 57 | x=alt.X('logFC:Q', title='logFC'), 58 | y=alt.Y('density:Q', title='Density'), 59 | color='dataset:N' 60 | ).properties( 61 | width=200, 62 | height=60 63 | ) 64 | vline = alt.Chart(effects).mark_rule(color='black').encode( 65 | x='guide:Q' 66 | ) 67 | chart = (chart + vline).facet( 68 | row="perturbation_type:N", 69 | ) 70 | chart.save('timeseries_plots/fig_effects.svg') 71 | 72 | 73 | chart = alt.Chart(effects).mark_circle(size=10).encode( 74 | x=alt.X('logFC:Q', title='logFC of perturbed gene\'s RNA'), 75 | y=alt.Y('logFCMean:Q', title='Mean absolute logFC, all genes'), 76 | color=alt.Color('dataset:N') 77 | ).properties( 78 | width=200, 79 | height=170 80 | ) 81 | chart.save('timeseries_plots/fig_effects2.svg') 82 | -------------------------------------------------------------------------------- /experiments/1.4.1_0/metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "unique_id": "1.4.1_0", 3 | "nickname": "timeseries celltype networks", 4 | "readme": "Comparing cell type specific versus universal networks for timeseries prediction", 5 | "question": "1.4.1", 6 | "is_active": true, 7 | "facet_by": "type_of_split", 8 | "color_by": "network_datasets", 9 | "factor_varied": "data_split_seed", 10 | "type_of_split": "timeseries", 11 | "eligible_regulators": "tfs", 12 | "num_genes": 2000, 13 | "regression_method": "RidgeCV", 14 | "prediction_timescale": ["1,2,3,10"], 15 | "matching_method": "optimal_transport", 16 | "network_prior": "restrictive", 17 | "perturbation_dataset": "definitive_endoderm", 18 | "visualization_embedding": "X_pca", 19 | "network_datasets": { 20 | "endoderm": { "do_aggregate_subnets": true }, 21 | "gtex_rna": { "do_aggregate_subnets": false }, 22 | "cellnet_human_Hg1332": { "do_aggregate_subnets": false }, 23 | "cellnet_human_Hugene": { "do_aggregate_subnets": false }, 24 | "ANANSE_0.5": { "do_aggregate_subnets": false }, 25 | "ANANSE_tissue_0.5": { "do_aggregate_subnets": false }, 26 | "humanbase": { "do_aggregate_subnets": false }, 27 | "magnum_compendium_394": { 28 | "subnets": [ 29 | "retinal_pigment_epithelial_cells.parquet", 30 | "chronic_myelogenous_leukemia_cml_cell_line.parquet", 31 | "teratocarcinoma_cell_line.parquet", 32 | "lung_adenocarcinoma_cell_line.parquet", 33 | "breast_carcinoma_cell_line.parquet", 34 | "embryonic_kidney_cell_line.parquet", 35 | "hepatocellular_carcinoma_cell_line.parquet", 36 | "epitheloid_cancer_cell_line.parquet", 37 | "acute_myeloid_leukemia_fab_m5_cell_line.parquet", 38 | "cd8+_t_cells.parquet", 39 | "adult_t-cell_leukemia_cell_line.parquet", 40 | "cd4+cd25-cd45ra-_memory_conventional_t_cells.parquet", 41 | "cd4+cd25+cd45ra+_naive_regulatory_t_cells.parquet", 42 | "cd4+_t_cells.parquet", 43 | "chronic_lymphocytic_leukemia_t-cll_cell_line.parquet", 44 | "cd4+cd25+cd45ra-_memory_regulatory_t_cells.parquet", 45 | "cd4+cd25-cd45ra+_naive_conventional_t_cells.parquet", 46 | "nk_t_cell_leukemia_cell_line.parquet", 47 | "melanoma_cell_line.parquet", 48 | "melanocyte.parquet", 49 | "skin_fetal.parquet", 50 | "iris_pigment_epithelial_cells.parquet", 51 | "retina_adult.parquet", 52 | "blood_adult.parquet", 53 | "cord_blood_derived_cell_line.parquet", 54 | "whole_blood_ribopure.parquet", 55 | "peripheral_blood_mononuclear_cells.parquet", 56 | "multipotent_cord_blood_unrestricted_somatic_stem_cells.parquet" 57 | ], 58 | "do_aggregate_subnets": false 59 | }, 60 | "empty": { "do_aggregate_subnets": true }, 61 | "dense": { "do_aggregate_subnets": true } 62 | } 63 | } 64 | 65 | 66 | -------------------------------------------------------------------------------- /make_figures/variance_decomposition.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import scanpy as sc 4 | import anndata 5 | import pereggrn_perturbations 6 | pereggrn_perturbations.set_data_path('../../perturbation_data/perturbations') 7 | import sys 8 | import altair as alt 9 | import os 10 | 11 | sys.path.append("../../perturbation_data/setup/") 12 | import ingestion 13 | import global_effects 14 | 15 | def decompose_variance(adata, gene_perturbed, dataset): 16 | try: 17 | depth = np.random.choice( a = np.array(adata.raw.X.sum(1)).reshape(-1), size = 1000, replace = True ) 18 | pre_log_scale = np.expm1(adata[0,:].X).sum() 19 | fraction_of_rna_mapping_to_this_gene = np.expm1(adata[:,gene_perturbed].X).mean()/pre_log_scale 20 | poisson_raw_counts = np.array([np.random.poisson( lam = l ) for l in fraction_of_rna_mapping_to_this_gene*depth]) 21 | resampled = np.log1p(pre_log_scale*(poisson_raw_counts / depth)) 22 | poisson = np.var(resampled) 23 | except: 24 | poisson = np.nan 25 | control = np.var(ingestion.try_toarray(adata[adata.obs["is_control"],gene_perturbed].X)) 26 | others = np.var(ingestion.try_toarray(adata[adata.obs["perturbation"]!=gene_perturbed,gene_perturbed].X)) 27 | this = np.var(ingestion.try_toarray(adata[adata.obs["is_control"] | (adata.obs["perturbation"]==gene_perturbed),gene_perturbed].X)) 28 | return pd.DataFrame({ 29 | "poisson": poisson, 30 | "control": control, 31 | "others": others, 32 | "this": this, 33 | "gene_perturbed": gene_perturbed, 34 | "dataset": dataset, 35 | }, index = [0]) 36 | 37 | os.makedirs("variance_decomposition", exist_ok = True) 38 | all_variance_decomposition = [] 39 | for dataset in [ 40 | 'nakatake', 41 | 'freimer', 42 | 'replogle', 43 | 'replogle2', 44 | 'replogle3', 45 | 'replogle4', 46 | 'frangieh_IFNg_v1', 47 | 'frangieh_IFNg_v2', 48 | 'frangieh_IFNg_v3', 49 | 'dixit', 50 | 'adamson', 51 | 'norman', 52 | ]: 53 | print(dataset) 54 | try: 55 | variance_decomposition = pd.read_csv(f"variance_decomposition/{dataset}.csv") 56 | except FileNotFoundError: 57 | adata = pereggrn_perturbations.load_perturbation(dataset) 58 | pt = adata.obs["perturbation_type"][0] 59 | variance_decomposition = [] 60 | for gene_perturbed in adata.uns["perturbed_and_measured_genes"]: 61 | variance_decomposition.append(decompose_variance(adata, gene_perturbed, dataset)) 62 | variance_decomposition = pd.concat(variance_decomposition) 63 | variance_decomposition.to_csv(f"variance_decomposition/{dataset}.csv") 64 | all_variance_decomposition.append(variance_decomposition) 65 | all_variance_decomposition = pd.concat(all_variance_decomposition) 66 | all_variance_decomposition 67 | 68 | all_variance_decomposition = pd.melt( 69 | all_variance_decomposition, 70 | id_vars=['gene_perturbed', 'dataset'], 71 | value_vars=['poisson', 'control', 'others', 'this'], 72 | var_name='source_of_variance', 73 | value_name='variance' 74 | ) 75 | 76 | chart = alt.Chart(all_variance_decomposition.groupby(["dataset", "source_of_variance"])["variance"].mean()).mark_point().encode( 77 | x='dataset:N', # :N denotes a nominal (discrete) variable 78 | y='variance:Q', # :Q denotes a quantitative (continuous) variable 79 | color='source_of_variance:N' # Color by source_of_variance, also discrete 80 | ).properties( 81 | title='Scatter plot of Variance by Dataset and Source of Variance' 82 | ) 83 | 84 | chart.display() -------------------------------------------------------------------------------- /make_figures/cross_dataset_correlations.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import scipy as sp 4 | import itertools as it 5 | import pereggrn_perturbations 6 | import altair as alt 7 | 8 | pereggrn_perturbations.set_data_path('../../perturbation_data/perturbations') 9 | 10 | nakatake = pereggrn_perturbations.load_perturbation('nakatake') 11 | joung = pereggrn_perturbations.load_perturbation('joung') 12 | replogle2 = pereggrn_perturbations.load_perturbation('replogle2') 13 | replogle3 = pereggrn_perturbations.load_perturbation('replogle3') 14 | dixit = pereggrn_perturbations.load_perturbation('dixit') 15 | adamson = pereggrn_perturbations.load_perturbation('adamson') 16 | 17 | def cross_correlate_expression(data1, data2): 18 | 19 | # Subset data to genes common in both datasets 20 | common_genes = list(set(data1.var.index) & set(data2.var.index)) 21 | data1 = data1[:, common_genes].copy() 22 | data2 = data2[:, common_genes].copy() 23 | 24 | # Compute baseline shared gene expressions 25 | ctrl1 = np.asarray(data1[data1.obs.is_control].X.mean(axis=0)).squeeze() 26 | ctrl2 = np.asarray(data2[data2.obs.is_control].X.mean(axis=0)).squeeze() 27 | ctrl1n = data1[data1.obs.is_control].obs.perturbation.unique() 28 | ctrl2n = data2[data2.obs.is_control].obs.perturbation.unique() 29 | 30 | # Focus on the shared genetic perturbations 31 | correlations = list() 32 | common_perts = list(set(data1.obs.perturbation) & set(data2.obs.perturbation) - set(ctrl1n) - set(ctrl2n)) 33 | for p in common_perts: 34 | trt1 = np.asarray(data1[data1.obs.perturbation==p].X.mean(axis=0)).squeeze() 35 | trt2 = np.asarray(data2[data2.obs.perturbation==p].X.mean(axis=0)).squeeze() 36 | lfc1 = trt1 - ctrl1 37 | lfc2 = trt2 - ctrl2 # Log Fold Change - X is log-transformed 38 | correlations.append([ 39 | sp.stats.pearsonr(lfc1, lfc2).statistic, 40 | sp.stats.spearmanr(lfc1, lfc2).statistic, 41 | p 42 | ]) 43 | correlations = pd.DataFrame(correlations, columns=['Pearson', 'Spearman', 'Perturbation']) 44 | return correlations 45 | 46 | 47 | 48 | 49 | CRISPRi = ['replogle2', 'replogle3', 'dixit', 'adamson'] 50 | CRISPRiCorrelations = list() 51 | for d1, d2 in it.combinations(CRISPRi, r=2): 52 | corrs = cross_correlate_expression(eval(d1), eval(d2)) 53 | corrs['Dataset 1'] = d1 54 | corrs['Dataset 2'] = d2 55 | CRISPRiCorrelations.append(corrs) 56 | print(d1, d2) 57 | CRISPRiCorrelations = pd.concat(CRISPRiCorrelations) 58 | 59 | 60 | 61 | 62 | OE = ['nakatake', 'joung'] 63 | OECorrelations = list() 64 | for d1, d2 in it.combinations(OE, r=2): 65 | corrs = cross_correlate_expression(eval(d1), eval(d2)) 66 | corrs['Dataset 1'] = d1 67 | corrs['Dataset 2'] = d2 68 | OECorrelations.append(corrs) 69 | print(d1, d2) 70 | OECorrelations = pd.concat(OECorrelations) 71 | 72 | 73 | 74 | 75 | CRISPRiCorrelationsLong = CRISPRiCorrelations.melt(id_vars=['Dataset 1', 'Dataset 2', 'Perturbation'], 76 | value_vars=['Pearson', 'Spearman'], 77 | var_name='Correlation Type', 78 | value_name='Value') 79 | OECorrelationsLong = OECorrelations.melt(id_vars=['Dataset 1', 'Dataset 2', 'Perturbation'], 80 | value_vars=['Pearson', 'Spearman'], 81 | var_name='Correlation Type', 82 | value_name='Value') 83 | AllCorrelations = pd.concat([CRISPRiCorrelationsLong, OECorrelationsLong]) 84 | AllCorrelations["Datasets"] = AllCorrelations["Dataset 1"] + " vs " + AllCorrelations["Dataset 2"] 85 | AllCorrelations = AllCorrelations.query("Datasets!='dixit vs adamson'") 86 | alt.data_transformers.disable_max_rows() 87 | chart = alt.Chart( 88 | AllCorrelations 89 | ).mark_boxplot( 90 | ).encode( 91 | x = "Datasets:N", 92 | y = "Value:Q", 93 | color = "Correlation Type:N", 94 | xOffset = "Correlation Type:N", 95 | ).properties( 96 | width=400, 97 | height=200, 98 | title = "Cross-dataset correlations" 99 | ) 100 | chart.save("plots/cross_dataset_correlations.svg") 101 | -------------------------------------------------------------------------------- /guiding_questions.txt: -------------------------------------------------------------------------------- 1 | We wanted experiment identifiers to be concise, stable, and informative. To help compromise among these properties, we maintain this numbered list of guiding questions. These have been numbered the same way since the start of the project; we allow new sub-questions but we seldom change the number of a question after it is added. Each experiment refers explicitly to one of these questions, and the experiment ID's use the numbers below as prefixes. 2 | 3 | 1. What is the best computational/statistical framework for predicting unseen perturbations of the transcriptome, and what characteristics of that framework are important to its performance? 4 | 1.0. How important is the specific choice of ML method (e.g. ridge regression, LASSO, kernel regression, neural nets, boosted trees/random forests)? 5 | 1.1. How dense are the network structures that best predict expression following new perturbations? 6 | 1.1.1. How harshly should we prune features? 7 | 1.1.2. Should we allow non-TF regulators? 8 | 1.2. How does handling of time affect performance? 9 | 1.2.1. For dynamic models, is RNA velocity better or worse than modeling based on sample collection time? 10 | 1.2.2. Is it better to match each treatment to the nearest control (estimating total effects), or match each treatment to itself and assume steady state (estimating direct effects)? Is it better to predict results after a single iteration of the model, or a few, or many (steady state)? How do these decisions interact? 11 | 1.3. How much are causal effects or causal structures shared across different cell types? 12 | 1.3.1. Do estimators treating cell types as "separate", "shared", or "similar" work best? 13 | 1.3.2. See 1.4.1 14 | 1.3.3. Can transfer learning or pre-training approaches such as GeneFormer improve causal effect predictions? 15 | 1.4. About existing drafts of causal networks affecting transcription: 16 | 1.4.0. Do most regulators have similar effects across all their targets? 17 | 1.4.1. Do cell-type-specific draft networks work better on the corresponding cell types? 18 | 1.4.2. What’s the best way to use a given network? Does GEARS beat causal inference approaches? 19 | 1.4.3. Do some sources of network structures work better than others? 20 | 1.4.4. Even if we can't get quantitative fold estimates, do networks predict which genes will change and which stay the same? 21 | 1.4.5. Given gene expression, do existing networks predict which genes were perturbed (DoRothEA copycat)? 22 | 1.5. How do existing methods compare on common tasks? 23 | 1.5.1. How do CellOracle, scKinetics, Dictys, PRESCIENT, RNAForecaster, and simple baselines compare? 24 | 1.5.2. How does OneSC perform in systematic tests? 25 | 1.6. What method of imposing low-rank structure works best, if any? 26 | 1.6.1. Does DCD-FG work? 27 | 1.6.2. Leaving aside causal inference or held-out perturbations, does low-rank structure also help learn fold changes for perturbations, as in FR-Perturb? 28 | 1.7. What method of measuring TF activity works best? 29 | 1.8. What types of data contain more useful signal? How do mundane details (e.g. data splitting) affect apparent performance? 30 | 1.8.1. Which is more useful: lots of perturbations, or wild-type time-series data? 31 | 1.8.2. Does pseudobulk aggregation or metacell aggregation or averaging of replicates hurt performance? 32 | 1.8.3. How does variable gene selection affect apparent performance? 33 | 1.8.4. Is the main problem statistical generalization, or causal identification? Specifically, is it harder when the perturbations in the test set do not appear in the training set, or is it just as hard with a simple random split? 34 | 1.8.5. How do different data splits affect performance (50-50 vs 90-10, different seeds)? 35 | 1.8.6. Some evaluations require revealing all the test data to the predictor -- for instance, any evaluation of heldout data log likelihood. Does this make the task substantially easier? 36 | 1.9. Why does everything fail? Would similar evaluations work if cascading effects were much larger than noise, or if models were correctly specified? 37 | 2. Different model assumptions imply different amounts of perturbations are needed to identify network structure. What do our results imply about identifiability? 38 | 3. Is it possible to obtain calibrated predictive intervals for expression profiles after unseen perturbations? 39 | 3.1. What are the biggest drivers of uncertainty? 40 | 3.1.1. Measurement noise? 41 | 3.1.2. Network structure? 42 | 3.1.3. Causal effect size & direction? 43 | 3.1.4. Systematic errors such as samples failing sequencing or off-target CRISPR effects 44 | 4. What makes some genes easier to predict and others harder? 45 | 5. How do different methods scale in practice? (CPU time and RAM.) 46 | 47 | -------------------------------------------------------------------------------- /make_figures/figure_s1_effects.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import scanpy as sc 4 | import anndata 5 | import pereggrn_perturbations 6 | pereggrn_perturbations.set_data_path('../../perturbation_data/perturbations') 7 | import sys 8 | import altair as alt 9 | from scipy.stats import rankdata as rank 10 | 11 | sys.path.append("../../perturbation_data/setup/") # access our data ingestion module, which is not currently pip-installable 12 | import ingestion 13 | import global_effects 14 | effects = [] 15 | 16 | 17 | DATASET_ORDER = [ 18 | "nakatake", 19 | "joung", 20 | "norman", 21 | "replogle1", 22 | "replogle3", 23 | "replogle4", 24 | "adamson", 25 | "replogle2", 26 | "freimer", 27 | "dixit", 28 | "frangieh_IFNg_v2", 29 | ] 30 | 31 | # Top-n genes consistency across replicates 32 | for dataset in ["nakatake", "freimer", "frangieh_IFNg_v3", "replogle1"]: 33 | print(dataset) 34 | adata = pereggrn_perturbations.load_perturbation(dataset) 35 | baseline = adata.X[adata.obs["is_control"], :].mean(axis = 0) 36 | intersection = {} 37 | union = {} 38 | jaccard = {} 39 | for n in [20, 100, 200]: 40 | for perturbation in adata.obs["perturbation"].unique(): 41 | intersection[perturbation] = set(adata.var_names) 42 | union[perturbation] = set() 43 | for i in adata.obs.query("perturbation == @perturbation").index: 44 | if adata.obs["is_control"][i]: 45 | continue 46 | logfc = adata[i, :].X - baseline 47 | top_n_genes = set(adata.var_names[rank(-np.abs(logfc)) <= n]).copy() 48 | intersection[perturbation] = intersection[perturbation].intersection(top_n_genes) 49 | union[perturbation] = union[perturbation].union(top_n_genes) 50 | jaccard[perturbation] = len(intersection[perturbation]) / len(union[perturbation]) 51 | print(n) 52 | print(np.array([x for x in jaccard.values()]).mean()) 53 | 54 | # Effect size and direction 55 | for dataset in DATASET_ORDER: 56 | print(dataset) 57 | adata = pereggrn_perturbations.load_perturbation(dataset) 58 | pt = adata.obs["perturbation_type"][0] 59 | uns = adata.uns.copy() 60 | try: 61 | if adata.X.sum() == adata.raw.X.sum(): # We filled in log1p normalized data into the .raw slot for datasets obtained from GEARS. 62 | adata.raw = anndata.AnnData(X = np.exp(adata.raw.X.toarray()) - 1) 63 | adata = ingestion.aggregate_by_perturbation(adata, group_by = ["perturbation"], use_raw = True) 64 | sc.pp.normalize_total(adata) 65 | except: 66 | pass 67 | adata.uns = uns 68 | adata = ingestion.describe_perturbation_effect(adata, perturbation_type = pt) 69 | consistency = ingestion.checkConsistency(adata, pt) 70 | adata.obs["logFC"] = consistency[1] 71 | print("Consistency:") 72 | print(pd.Series(consistency[0]).value_counts()) 73 | fname = "global_effects/" + dataset + ".txt" 74 | global_effects.quantifyEffect(adata, fname = fname, withDEG = False, withMI = False, pseudocount = 1) 75 | obs = adata.obs[['perturbation', 'is_control', 'logFC', 'logFCNorm2', 'logFCMean', 'expression_level_after_perturbation', 'perturbation_type']].copy() 76 | 77 | obs.loc[:, "dataset"] = dataset 78 | effects.append(obs) 79 | 80 | effects = pd.concat(effects) 81 | effects = effects.query("~is_control") 82 | effects = effects.query("logFC != -999") 83 | effects["guide"] = 0 84 | alt.data_transformers.disable_max_rows() 85 | chart = alt.Chart(effects).transform_density( 86 | density='logFC', 87 | groupby=['dataset', 'perturbation_type'], 88 | as_=['logFC', 'density'], 89 | extent=[effects['logFC'].min(), effects['logFC'].max()], 90 | counts=False 91 | ).mark_area(opacity=0.75).encode( 92 | x=alt.X('logFC:Q', title='logFC'), 93 | y=alt.Y('density:Q', title='Density'), 94 | color=alt.Color('dataset:N', scale=alt.Scale(domain=DATASET_ORDER, scheme = "dark2")) # replace with your desired order 95 | ).properties( 96 | width=200, 97 | height=60 98 | ) 99 | vline = alt.Chart(effects).mark_rule(color='black').encode( 100 | x='guide:Q' 101 | ) 102 | chart = (chart + vline).facet( 103 | row="perturbation_type:N", 104 | ) 105 | chart.save('plots/fig_effects.svg') 106 | 107 | 108 | chart = alt.Chart(effects).mark_circle(size=10).encode( 109 | x=alt.X('logFC:Q', title='logFC of perturbed gene\'s RNA'), 110 | y=alt.Y('logFCMean:Q', title='Mean absolute logFC, all genes'), 111 | color=alt.Color('dataset:N', scale=alt.Scale(domain=DATASET_ORDER, scheme = "dark2")) # replace with your desired order 112 | ).properties( 113 | width=200, 114 | height=100 115 | ) 116 | chart = chart.facet( 117 | row="perturbation_type:N", 118 | ).resolve_scale(y='independent') 119 | chart.save('plots/fig_effects2.svg') 120 | 121 | -------------------------------------------------------------------------------- /make_figures/global_effects/replogle.txt: -------------------------------------------------------------------------------- 1 | deg,mi,mean,norm2,median 2 | 0.0,2.53220244336788,0.0851376320306763,18.561378309978537,0.03242773106026124 3 | 0.0,2.3354363553775945,0.08141238311717322,17.655476720300825,0.03249113166813662 4 | 1.0,2.490995770437818,0.08857901955376693,18.288630103143028,0.035282791202508094 5 | 0.0,2.449289610634067,0.08893685945566977,18.972449795487556,0.03597049034973923 6 | 0.0,2.7384663944533503,0.0617633592873864,13.833885910833132,0.02341548321210485 7 | 6566.0,0.5736099616684778,0.16344021925456315,10.381373834681122,0.11158609351059984 8 | 9.0,1.5741214482461752,0.12188076589780454,22.337624304148004,0.06149157101199716 9 | 23.0,1.5125992866163334,0.11711288306207096,21.37518184006788,0.058457629192020646 10 | 0.0,2.3874082913724743,0.12432136792407478,24.761340394866632,0.05315622178952548 11 | 0.0,2.6151125199638443,0.08006957468736565,17.552997344902433,0.030616784868386607 12 | 1.0,2.176322591807747,0.10706080320792109,22.92449815832578,0.04440849678227909 13 | 0.0,2.6655085847419473,0.07177084460139224,16.114358973094127,0.025979662309170398 14 | 0.0,2.622250569057808,0.06794705949446159,14.710025011737287,0.02628586120148789 15 | 0.0,2.4093025288723746,0.07356037164173564,16.193252426744046,0.028669752282965148 16 | 0.0,2.5532375238639036,0.07083735444318189,15.3273041966959,0.028080493564683277 17 | 0.0,2.6929299135049423,0.07005117096980484,16.217615159266217,0.02632713837306155 18 | 4.0,2.4316239224900285,0.12049612542234536,25.333510947706223,0.04772982519871703 19 | 0.0,2.2114906224160804,0.10901283522241911,21.254330364743346,0.04965254326157386 20 | 0.0,2.7540662723576963,0.0758180737306825,17.456677748981953,0.027243412285100597 21 | 0.0,2.4142981922921405,0.07749321709100794,16.587726989664553,0.031969599075122285 22 | 1.0,2.503459253406378,0.08899747768076602,19.399294681009472,0.03396729560137224 23 | 0.0,2.2869240140038567,0.0931245679974095,19.451142179248002,0.039343541404328594 24 | 0.0,2.540360125521646,0.09077471971848658,19.39568265930997,0.03631894476984457 25 | 0.0,2.6700745619904196,0.06637567537442006,15.062996944039243,0.024810393462022223 26 | 0.0,2.52498257077897,0.0777778239105776,16.436681535373335,0.03454566934990724 27 | 0.0,2.378222112648026,0.07358529732288872,15.25238353229441,0.030891321883759114 28 | 0.0,2.7250309522420455,0.06040585854428375,13.604706537920304,0.022381497134956244 29 | 0.0,1.500060540764355,0.15786745086780943,28.42930995382543,0.08512573705969635 30 | 7.0,1.8647697256385587,0.11059686425731534,21.68691535448334,0.05119773652271695 31 | 0.0,2.425943948510006,0.08011129440892355,17.67664559012215,0.031156963927202713 32 | 1.0,2.2550772581169563,0.11998042053370019,24.86143297958747,0.05152616434880946 33 | 54.0,1.8939861666516866,0.1318370395217529,23.82429140264389,0.06519928801964855 34 | 0.0,2.422898732389288,0.07932533938208546,16.60126406040305,0.0355124462001576 35 | 0.0,2.622363746838139,0.0753744298133296,16.849940503019837,0.028329388078202518 36 | 0.0,1.9524384274211086,0.1160466781533375,22.371578128082675,0.05325435950769616 37 | 0.0,2.2861635973115515,0.0979858584359881,19.616881954554533,0.04490560643830245 38 | 0.0,2.7176246369039534,0.057826564849600075,13.201582738556043,0.02148734152363047 39 | 0.0,2.300267766714472,0.09937908237044964,20.803438442217914,0.042829227048800295 40 | 3.0,2.3908362969831214,0.11192811376779843,22.783656608232356,0.0471706120296902 41 | 0.0,2.620373288439854,0.07786323598412682,17.818366699631007,0.028500890621481725 42 | 0.0,2.6345536288636024,0.06681063482790979,14.405803824984352,0.026443984176409054 43 | 0.0,2.782683527054495,0.06956382325805772,15.748230761025967,0.02569063260740482 44 | 2.0,2.6399668948008452,0.08289351032798649,18.791098363854722,0.02927904338580615 45 | 1.0,2.2533229235641747,0.09717749542996486,19.24037245231768,0.0431825915750651 46 | 38.0,2.1241585995274197,0.1293421259325743,23.565835319518047,0.062369558478899204 47 | 0.0,2.6103993735116644,0.0838894380212173,18.451538890948708,0.03193665866933697 48 | 0.0,2.644998649442768,0.07416858606639103,16.968667339537053,0.028507650754719012 49 | 0.0,2.76630450699615,0.05712219823283664,13.090239329751714,0.02083094261160019 50 | 0.0,2.731051453749715,0.07562789805828966,16.836151719705583,0.02852407301772497 51 | 0.0,2.240996350495207,0.1140636073145056,22.89587659448213,0.04990984568367461 52 | 35.0,1.852761638394906,0.15870331397918336,27.635287564807932,0.08327207977346303 53 | 0.0,2.6210457475559297,0.07115245919931588,16.112115807116844,0.028170699945515486 54 | 15.0,2.2787776058116176,0.10455931069427943,19.960056707886725,0.04696374762042114 55 | 0.0,2.573763781743705,0.08197435178401422,17.917952572128662,0.03136883747518937 56 | 0.0,2.587972611847775,0.07220116457801849,16.233346320347362,0.0274326827262376 57 | 0.0,2.5812635513205446,0.06833862970091643,14.577992283911522,0.027544027881836845 58 | 0.0,2.653495107185987,0.11190221021212549,24.25920414237376,0.04192543787242102 59 | 0.0,2.7215462189138004,0.0786076340661232,17.317861907221353,0.02873018454633397 60 | 0.0,2.5293173955825203,0.0730886386363198,15.26428710173191,0.031940094417080164 61 | 0.0,2.5876814020787835,0.06862864493382613,15.277351496735193,0.024767700354738464 62 | 0.0,2.6048379142212115,0.06711990132212776,15.097690420801136,0.025109119358669968 63 | 3.0,1.8464455469278533,0.13677045030443505,25.91788697629064,0.0660582637417314 64 | 0.0,2.666669405370624,0.07515900872460457,17.151767292167396,0.027366483388955844 65 | -------------------------------------------------------------------------------- /environment/install.md: -------------------------------------------------------------------------------- 1 | ### Installation 2 | 3 | We use Conda + Mamba + Docker to manage most dependencies. We offer either a minimal install, which may work cross-platform but lacks access to many GRN methods, or an exact install, which can reproduce all our results but only works on Ubuntu 20.04. 4 | 5 | ### Hardware 6 | 7 | Certain models nominally require GPU's, but we have been able to run most experiments using a CPU, sometimes by making minimal changes to Pytorch code. See the [GGRN repo](https://github.com/ekernf01/ggrn) for details on GPU requirements of specific methods. To install with GPU available, we recommend you use the exact or minimal install above; activate the environment; and then install a gpu version of PyTorch `2.x.x`. 8 | 9 | 50GB of disk space and 64GB of RAM is enough resources to run most experiments. Certain tree-based models or large datasets (Norman especially) may require more RAM. The more benchmarks you run, the more predictions are saved and the more disk space is occupied. To re-run all experiments, we would recommend 250GB disk space. 10 | 11 | ### Minimal install 12 | 13 | In case of different operating systems and environments, exactly reproducing results is infeasible. However, you should be able to carry out many of our experiments, or your own new experiments, even without all dependencies. Use the commands in [`install_minimal.sh`](https://github.com/ekernf01/perturbation_benchmarking/blob/main/environment/install_minimal.sh). They will install python packages in a new conda environment, and they will download about 1Gb of example data from Zenodo. 14 | 15 | Some notes: 16 | 17 | - If you want the development version of our packages, you can remove the tag `@v2` from the end of the pip install commands. 18 | - For this minimal installation, we do not download all the data or networks. We only include a couple of examples. You can download the full network and data collections from Zenodo [DOI: 10.5281/zenodo.8071808](https://doi.org/10.5281/zenodo.8071808). 19 | - We require wget, git, and conda to be installed already. If the data download doesn't work with `wget`, you can easily rephrase it to use `curl` instead, or you can download the data manually using a web browser. 20 | - This doesn't try to install all dependencies, so some backends may be unavailable. 21 | - This doesn't try to install docker or singularity. If you want access to containerized methods via ggrn, you need to install Docker yourself. 22 | - This install code is written in bash and tested on Ubuntu Linux. On a Mac, the default shell is zsh, and you may need to run a bash shell (just type `bash`) for everything to work. We are not able to support Windows. 23 | 24 | ### Exact install 25 | 26 | To reproduce our computing environment exactly, you can start with a bash shell on a clean linux box (we have tested Rocky Linux release 8.8, Ubuntu 20.04, and Ubuntu 22.04). Use the commands in [install.sh](https://github.com/ekernf01/perturbation_benchmarking/blob/main/environment/install.sh). This will install mamba and many python packages, and it will download all our data (20Gb) from Zenodo. 27 | 28 | ```bash 29 | git clone https://github.com/ekernf01/perturbation_benchmarking 30 | source perturbation_benchmarking/environment/install.sh 31 | ``` 32 | 33 | Our install script doesn't try to install docker or singularity. If you want access to containerized methods via ggrn, you need to install Docker yourself. We cannot support you in this step; if it doesn't work, you will need to go to the official Docker instructions or another source. But this worked for me on an Amazon EC2 running Ubuntu 22.04. 34 | 35 | ```bash 36 | sudo apt-get update 37 | sudo apt-get install ca-certificates curl 38 | sudo install -m 0755 -d /etc/apt/keyrings 39 | sudo curl -fsSL https://download.docker.com/linux/ubuntu/gpg -o /etc/apt/keyrings/docker.asc 40 | sudo chmod a+r /etc/apt/keyrings/docker.asc 41 | echo \ 42 | "deb [arch=$(dpkg --print-architecture) signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu \ 43 | $(. /etc/os-release && echo "$VERSION_CODENAME") stable" | \ 44 | sudo tee /etc/apt/sources.list.d/docker.list > /dev/null 45 | sudo apt-get update 46 | sudo apt-get install docker-ce docker-ce-cli containerd.io docker-buildx-plugin docker-compose-plugin 47 | # Now configure it so you don't need sudo for every docker command. 48 | # https://askubuntu.com/questions/477551/how-can-i-use-docker-without-sudo 49 | sudo usermod -aG docker $USER 50 | sg docker -c "bash" 51 | ``` 52 | 53 | ### How to check the installation 54 | 55 | **Warning**: data download and unzip will still be running in the background after the installer finishes. It is a ~20GB download. This means **the experiments may not work immediately.** If you see no `network_collection` or `perturbation_data` folders, then you need to wait for the download+unzip to finish. 56 | 57 | The installation should create a Conda environment called 'ggrn' and several folders in your working directory. At minimum, there will be three data collections and the benchmark experiments. 58 | 59 | ``` 60 | ├── accessory_data 61 | ├── network_collection 62 | ├── perturbation_data 63 | ├── perturbation_benchmarking 64 | ``` 65 | 66 | You can test your installation by running this. 67 | 68 | ```bash 69 | cd perturbation_benchmarking 70 | conda activate ggrn 71 | pereggrn -h # see the help page 72 | pereggrn --experiment_name "1.0_0" --amount_to_do models --no_skip_bad_runs # Run a simple benchmark 73 | ``` 74 | 75 | **Warning**: data download and unzip will still be running in the background after the installer finishes. It is a ~20GB download. This means **the experiments may not work immediately.** If you see no `network_collection` or `perturbation_data` folders, then you need to wait for the download+unzip to finish. 76 | -------------------------------------------------------------------------------- /make_figures/plotting_script_unused.R: -------------------------------------------------------------------------------- 1 | library(ggplot2) 2 | library(dplyr) 3 | library(stringr) 4 | library(arrow) 5 | library(magrittr) 6 | library(rjson) 7 | setwd("/home/ekernf01/Desktop/jhu/research/projects/perturbation_prediction/cell_type_knowledge_transfer/perturbation_benchmarking/make_figures/") 8 | source("plotting_functions.R") 9 | 10 | # networks-only 11 | { 12 | cell_type_matching = rjson::fromJSON(file = "../../accessory_data/cell_type_matching.json") 13 | do_subnetworks_match = function(perturbation_dataset, subnetwork){ 14 | x = cell_type_matching$celltypes[perturbation_dataset] 15 | x %<>% unlist 16 | # If x is from a network that does not include any relevant subnetwork, the key might be missing from cell_type_matching$networks. 17 | # If x is from a network that does include a relevant subnetwork, this code runs as expected: check if this subnetwork is the relevant one. 18 | try( 19 | { 20 | x = cell_type_matching$networks[x][[1]] 21 | # This incomprehensible one-liner converts a nested list of vectors to a tidy dataframe: {a:[1,2], b:2} becomes [[a,a,b], [1,2,2]]. 22 | x = data.frame(network = Reduce(c, mapply(rep, names(x), sapply(x, length))), 23 | subnetwork = Reduce(c, x)) 24 | x = paste(x$network, x$subnetwork) 25 | return(subnetwork %in% x) 26 | }, 27 | silent = T 28 | ) 29 | return(F) 30 | } 31 | X = collect_experiments(c("1.4.4_" %>% paste0(c(1:8)) )) 32 | X$cell_types_match = mapply(do_subnetworks_match, 33 | X$perturbation_dataset, 34 | X$network_datasets) 35 | X <- X %>% mutate(chart_x = paste(regression_method, starting_expression, sep = "_")) 36 | X$perturbation_dataset %<>% gsub("γ", "g", .) 37 | X %<>% make_the_usual_labels_nice() 38 | my_levels = unique(c("frangieh\nIFNg v1", "frangieh\nIFNg v2", "frangieh\nIFNg v3", "nakatake", "nakatake\nscrna\nsimulated", X$perturbation_dataset)) 39 | X$perturbation_dataset %<>% factor(levels = my_levels) 40 | X$network_datasets %<>% gsub(".parquet", "", .) 41 | X$network_datasets %<>% gsub(".csv_converted", "", .) 42 | X$network_datasets %<>% gsub("_top_filtered", "", .) 43 | X$network_source = X$network_datasets %>% 44 | strsplit(" ") %>% 45 | sapply(extract2, 1) 46 | X$network_tissue = X$network_datasets %>% 47 | paste("all") %>% 48 | strsplit(" ") %>% 49 | sapply(extract2, 2) %>% 50 | tolower %>% 51 | gsub("_", " ", .) %>% 52 | gsub("b lymphocyte", "bcell", .) %>% 53 | gsub(" memory", "", .) %>% 54 | gsub(" regulatory", "", .) %>% 55 | gsub(" conventional", "", .) %>% 56 | gsub(" naive", "", .) %>% 57 | gsub("retinal pigment epithelial", "rpe", .) %>% 58 | gsub("chronic lymphocytic leukemia", "", .) %>% 59 | gsub("chronic myelogenous leukemia", "", .) %>% 60 | gsub("suprapubic", "", .) %>% 61 | gsub("lower leg", "", .) %>% 62 | gsub("brain .*", "brain", .) %>% 63 | gsub("cell line", "", .) %>% 64 | gsub("muscleskel", "muscle", .) %>% 65 | gsub("pancreatic", "pancreas", .) 66 | single_networks = c("celloracle_human", "magnum_compendium_ppi" , "MARA_FANTOM4" , "STRING" , "magnum_compendium_32" ) 67 | X$network_tissue[X$network_source %in% single_networks] = X$network_source[X$network_source %in% single_networks] 68 | X$network_source[X$network_source %in% single_networks] = "other" 69 | X$network_pretty = paste( 70 | as.integer(as.factor(X$network_source)), 71 | X$network_tissue 72 | ) 73 | for(dataset in X$perturbation_dataset %>% unique){ 74 | current_X = subset(X, perturbation_dataset == dataset) 75 | networks_by_fc = current_X %>% 76 | dplyr::group_by(network_pretty) %>% 77 | dplyr::summarise(fc_targets_vs_non_targets = median(fc_targets_vs_non_targets, na.rm = T)) %>% 78 | dplyr::arrange(fc_targets_vs_non_targets) 79 | current_X$network_pretty %<>% factor(levels = networks_by_fc$network_pretty) 80 | ggplot(current_X) + 81 | geom_boxplot(outlier.shape = NA, 82 | aes(color = cell_types_match, 83 | x = network_pretty, y = pmax(pmin(fc_targets_vs_non_targets, 0.5), -0.5))) + 84 | theme(axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5)) + 85 | ggtitle("Perturbation response enrichment of known regulons") + 86 | facet_wrap(~network_source, scales = "free", nrow = 1) + 87 | geom_vline(xintercept = 0) + 88 | ggtitle(dataset) 89 | ylab("Log fold change in target genes minus\nlog fold change in other genes") + 90 | theme(axis.text.x = element_text(family = "mono", face = "bold")) 91 | ggsave(filename = paste0(paste0("plots/fig_network_only_", dataset, ".pdf")), width = 14, height = 8) 92 | networks_by_pvalue = current_X %>% 93 | dplyr::group_by(network_pretty) %>% 94 | dplyr::summarise(fc_targets_vs_non_targets = median(-log10(pvalue_targets_vs_non_targets + 0.00001), na.rm = T)) %>% 95 | dplyr::arrange(fc_targets_vs_non_targets) 96 | current_X$network_pretty %<>% factor(levels = networks_by_pvalue$network_pretty) 97 | ggplot(current_X) + 98 | geom_boxplot(outlier.shape = NA, 99 | aes(color = cell_types_match, 100 | x = network_pretty, y = -log10(pvalue_targets_vs_non_targets))) + 101 | theme(axis.text.x = element_text(angle = 90, hjust = 0, vjust = 0.5)) + 102 | ggtitle("Perturbation response enrichment of known regulons") + 103 | facet_wrap(~network_source, scales = "free", nrow = 1) + 104 | geom_vline(xintercept = 0) + 105 | ylab("-Log10 p-value of H0: \ntarget genes have same fc as non-targets") + 106 | theme(axis.text.x = element_text(family = "mono", face = "bold")) + 107 | ggtitle(dataset) 108 | ggsave(filename = paste0(paste0("plots/fig_network_only_", dataset, ".pdf")), width = 14, height = 8) 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /make_figures/global_effects/adamson.txt: -------------------------------------------------------------------------------- 1 | deg,mi,mean,norm2,median 2 | -999.0,1.5914746752989506,0.9402980431783272,80.66489463473708,0.7102679796061188 3 | -999.0,1.9318087899804128,0.6758282083383268,61.781840029780795,0.4835094955650109 4 | -999.0,1.9249115949410478,0.6724494950126417,59.94124558731231,0.5033497653594416 5 | -999.0,2.0219711260704627,0.5840349139085499,52.94411945427133,0.4241061451970056 6 | -999.0,1.8922613661078485,0.7161489710420372,65.50590101093931,0.4715083503829949 7 | -999.0,1.726837413579345,0.8062926097937672,73.08206046493554,0.5267304598077865 8 | -999.0,1.447305372992366,1.1141202513062227,95.94651855792124,0.8376613266043513 9 | -999.0,1.8507037124417225,0.6824758899611743,61.77216050661735,0.47652286028414365 10 | -999.0,1.7344687447707292,0.7080150067682857,64.34389099371036,0.4829694518703429 11 | -999.0,1.8417205475676082,0.7953456856397185,72.80230856302951,0.5119966479358862 12 | -999.0,1.7791033847131843,0.8708487170974377,79.26238705614833,0.5673135036231329 13 | -999.0,1.9589046428696335,0.7092332669238077,64.25641029381931,0.4728460125737106 14 | -999.0,2.0239483293011618,0.6610630176157726,60.07961506021345,0.49264387641819385 15 | -999.0,1.9927488041133377,0.6680741053147355,61.353261239606795,0.48381930671564566 16 | -999.0,1.9702931917985653,0.6798582035878346,60.41057181469964,0.4588274136423368 17 | -999.0,1.949990384254539,0.6851844616866452,61.859357483675794,0.48092650456738073 18 | -999.0,2.0091737561777085,0.6152866812185548,55.58070705065005,0.46150747235591183 19 | -999.0,1.9952716613485404,0.7517209348508542,67.95510829544779,0.5488176996145346 20 | -999.0,1.8350210793114705,0.7220683781231743,64.30537670566792,0.5117125758313197 21 | -999.0,1.934779431043772,0.6828190171396469,61.41311215649594,0.4906493981049376 22 | -999.0,1.7382237625562553,0.817524375024505,71.85912146725195,0.59915400242542 23 | -999.0,1.6948172069470762,0.8726550971649565,76.71886205280724,0.6271573285286236 24 | -999.0,1.8496267502299086,0.7531653868964031,67.73920612348142,0.5432100768221629 25 | -999.0,1.4829375753295577,1.0760398323870752,92.69790857348158,0.813174067012588 26 | -999.0,1.8290744463017794,0.7820287049066247,69.9348735353462,0.5689592408257427 27 | -999.0,2.0229068283250085,0.6217493260455793,54.81808795575437,0.4761566215333669 28 | -999.0,2.0727350941126788,0.6484616342485798,58.40361354564193,0.4984418819141664 29 | -999.0,2.038279364228695,0.6005967555295818,54.756859194032366,0.448899013773742 30 | -999.0,1.976331308541767,0.6436719076466788,57.32857310541398,0.4913584000433753 31 | -999.0,1.5980561439624257,0.8792795831511191,76.44724345620084,0.6366285728161758 32 | -999.0,1.827423668933483,0.7639845420687541,68.86716178932963,0.5270872540014772 33 | -999.0,1.856882599249974,0.7462203360439567,70.54588043374459,0.5061468722330975 34 | -999.0,1.5843348053958297,0.8877348034055554,79.12083691468126,0.658159985896507 35 | -999.0,2.0056607398726873,0.6320341827213461,56.0288615867708,0.4701156590765587 36 | -999.0,1.680331746299358,0.8079042196258024,72.19668736925499,0.6067361030675495 37 | -999.0,1.9868161965263593,0.6493278656441207,59.510334833379105,0.4727059859736925 38 | -999.0,2.022586790853879,0.6297152474727035,58.59943625631716,0.46548651347819087 39 | -999.0,2.017948905415038,0.6436480398721556,58.16848055875927,0.47816793037761274 40 | -999.0,1.7783122662071587,0.7655476052788972,68.38704440064329,0.5549992420906138 41 | -999.0,1.9002998575478416,0.666107005728057,62.24487008664547,0.44963131773855436 42 | -999.0,2.051551411144596,0.5755306665788776,53.17461578160182,0.4153700345349749 43 | -999.0,1.8522913786486102,0.6867574195109631,62.6082692425542,0.4758279142934275 44 | -999.0,1.674924463408369,0.887557565763358,79.0910983036986,0.653446133013394 45 | -999.0,2.0352477897881167,0.6402697900974206,57.64754269091291,0.47100440739524785 46 | -999.0,1.9453672397720758,0.7074309896057229,63.05250134908849,0.5280533258235164 47 | -999.0,2.0195727708802185,0.6275857507788403,57.10860402522137,0.4610243650677387 48 | -999.0,1.7444121505098404,0.9041010686822314,82.74469599947018,0.579391314398791 49 | -999.0,1.5561107953590811,0.8529119794266775,77.51840953551712,0.6056435501482237 50 | -999.0,2.0067665577522655,0.6428413359359157,57.60144862493335,0.48208258837511014 51 | -999.0,1.5473442483709072,1.0158661501964203,87.14837394524783,0.7582703428681146 52 | -999.0,1.972796266435563,0.6153464663609847,55.23828315058759,0.4601298124006118 53 | -999.0,1.6075974200251246,0.9013498694128812,80.2621626752748,0.6423521064253392 54 | -999.0,2.016025772148362,0.6412250043558921,58.70573002668311,0.4773238446705782 55 | -999.0,1.8285098315480641,0.7733560050414434,69.2577640119751,0.5752548469312637 56 | -999.0,2.007001709023173,0.6313707566518987,56.77168360665907,0.46491770105035685 57 | -999.0,1.9969532596836115,0.6766822743927187,61.25948444713688,0.49986743084497454 58 | -999.0,2.0196044351223956,0.645648783573736,58.20585426394427,0.48336565771978957 59 | -999.0,1.9890964547206846,0.6361012024105648,56.92477259004084,0.46606493500972623 60 | -999.0,2.0383875774184,0.6200481666915277,55.52307621947284,0.46735982803242004 61 | -999.0,1.9703385440297472,0.6904410142836638,61.621854049130796,0.528849962846023 62 | -999.0,2.0027914312900488,0.6636116608143904,59.38580827597671,0.480498868946735 63 | -999.0,1.9688634014749224,0.6344534694829904,55.7638353318526,0.4884345387235139 64 | -999.0,1.9869045205321627,0.6847391675076052,62.30583572727342,0.49393583539597313 65 | -999.0,1.999007973625627,0.6722282655335571,60.067934797969436,0.5071774554965496 66 | -999.0,1.9466837111137507,0.6456105131845772,57.86628688328466,0.4813283336181945 67 | -999.0,1.933105008214174,0.6839355388447359,61.52780909189652,0.5179922604357907 68 | -999.0,2.0194061624692834,0.6604749536308715,59.64462100249662,0.4890410367999095 69 | -999.0,2.0309355530605693,0.6477285046351372,59.17648118237441,0.47539778418427436 70 | -999.0,1.978658091244812,0.7072666568225668,63.64067759590031,0.5298945111661153 71 | -999.0,1.9528116584688289,0.6755024065877345,61.606664494996615,0.4882026854558382 72 | -999.0,1.723341402851696,0.8377561720884014,75.47088304026562,0.589329456794277 73 | -999.0,1.9840901542586111,0.6608379058386858,58.96199200965332,0.49967958222853615 74 | -999.0,1.9603468468118306,0.6649667557057085,59.19913690481098,0.4919745295139954 75 | -999.0,1.9995247413225532,0.665538116851975,59.22881605126685,0.48987785236759285 76 | -999.0,1.728397438071219,0.8869082108702844,78.15361303971969,0.6574513331952943 77 | -999.0,1.9843334345369592,0.6314043497278519,56.872528860048064,0.47442205321815445 78 | -999.0,2.008455219064121,0.6201323316378644,56.508523837755774,0.45662131491016256 79 | -999.0,2.0236181289879474,0.6070675056113858,55.18154319004647,0.45450842804290015 80 | -999.0,1.9548984852536293,0.6977743954006845,61.24813191979027,0.5515170960491625 81 | -999.0,2.0390745765898006,0.6079028022474319,55.11654738751056,0.47661355764047303 82 | -999.0,1.9529017760890959,0.7358623005727601,64.9717899833591,0.5193232885049748 83 | -------------------------------------------------------------------------------- /make_figures/.load_perturbation(dataset): -------------------------------------------------------------------------------- 1 | Help on module ingestion: 2 | 3 | NNAAMMEE 4 | ingestion 5 | 6 | FFUUNNCCTTIIOONNSS 7 | aaggggrreeggaattee__bbyy__ppeerrttuurrbbaattiioonn(adata: anndata._core.anndata.AnnData, group_by: list, use_raw=True) 8 | Compute pseudo-bulk expression by adding raw counts. 9 | 10 | Args: 11 | adata (anndata.AnnData): Object with raw counts in adata.raw.X 12 | group_by (list of st): names of categorical columns in adata.obs to group by. Typically starts with "perturbation". 13 | 14 | Returns: 15 | anndata.AnnData: Pseudo-bulk expression 16 | 17 | cchheecckkCCoonnssiisstteennccyy(adata: anndata._core.anndata.AnnData, perturbationType: str = 'overexpression', group: str = None, verbose: bool = False, do_return_pval=False, show_plots=False) 18 | Check whether the gene that was perturbed is actually 19 | measured to be higher (if overexpressed) or lower (if knocked 20 | down) or nearly zero (if knocked out). 21 | If an observation is a control or if the perturbed gene is not measured, 'N/A' is labeled. 22 | If a perturbagen's expression is higher or lower than the median control (matching 23 | the direction of intended perturbation), it is labeled True. Otherwise, False. 24 | 25 | Args: 26 | adata (anndata.AnnData): the object to operate on. adata.X is expected to be normalized but not log-transformed. 27 | It is expected to be a dense array, not a sparse e.g. scipy CSR. 28 | perturbation_type (str): one of {"overexpression", "knockout", "knockdown"} 29 | group (str, default None): a column in adata.obs to indicate sub-group of 30 | the treatment and the control. 31 | verbose (bool): show a swarmplot noting the difference between the control 32 | and the treatment, if the perturbation direction and expression 33 | level are disconcordant. 34 | 35 | cchheecckkPPeerrttuurrbbaattiioonnEEffffeeccttMMeettrriiccCCoorrrreellaattiioonn(adata: anndata._core.anndata.AnnData, metrics) 36 | Compute correlation between different measures of global effect size 37 | 38 | ccoommppuutteeCCoorrrreellaattiioonn(adata: anndata._core.anndata.AnnData, verbose: bool = False, group: str = None) 39 | Compute the correlation between biological replicates on scale of log fold change. For each 40 | set of perturbation, the final correlation score is the median of 41 | correlation between all pair-wise combinations of perturbation expression 42 | and control expression. Both Spearman and Pearson correlation are 43 | computed. 44 | 45 | This assume the existence of "is_control" in adata.obs. 46 | 47 | ccoonnvveerrtt__eennss__ttoo__ssyymmbbooll(ensembl_ids, gtf, strip_version=False) 48 | Convert ensembl gene id's (incomprehensible) into Entrez gene symbols (e.g. GAPDH) 49 | 50 | Args: 51 | 52 | - gtf: path to a GTF file with transcript annotations, e.g. Gencode V35. 53 | - ensemble_ids: iterable with inputs. 54 | - strip_version: ensembl ID's can be like 'ENSG01234.5' or like 'ENSG01234'. The '.5' is the version, i.e. the number of times this id has changed. Sometimes you want to strip this off (strip_version = True). More on ensembl ID's: 55 | 56 | https://useast.ensembl.org/Help/Faq?id=488#:~:text=An%20Ensembl%20stable%20ID%20consists,(version).&text=The%20second%20part%20is%20a,(object%20type)(identifier). 57 | 58 | ddeessccrriibbee__ppeerrttuurrbbaattiioonn__eeffffeecctt(adata: anndata._core.anndata.AnnData, perturbation_type, multiple_genes_hit: bool = None) -> anndata._core.anndata.AnnData 59 | Add details about perturbation's effect on the targeted genes 60 | 61 | Args: 62 | adata (anndata.AnnData): A perturbation dataset 63 | perturbation_type (typing.Union): one of {"overexpression", "knockout", "knockdown"}, or if mixed, an iterable of length equal to n_samples. 64 | multiple_genes_hit: Set to True if there observations with multiple genes perturbed. 65 | Raises: 66 | ValueError: Triggered by invalid perturbation types. 67 | 68 | Returns: 69 | anndata.AnnData: adata with columns filled in for 'expression_level_after_perturbation' and 'perturbation_type' 70 | 71 | ddeesseeqq22NNoorrmmaalliizzaattiioonn(counts_df) 72 | Equivalent to DESeq2:::counts.DESeqDataSet; counts(x, normalized=T) 73 | 74 | ddeesseeqq22__ssiizzee__ffaaccttoorrss(counts_df) 75 | Calculate DESeq size factors 76 | median of ratio to reference sample (geometric mean of all samples) 77 | 78 | https://github.com/broadinstitute/pyqtl/blob/master/qtl/norm.py 79 | References: 80 | [1] Anders & Huber, 2010 81 | [2] R functions: 82 | DESeq::estimateSizeFactorsForMatrix 83 | 84 | rreeaadd__ccmmaapp(expression_file, gene_metadata, instance_metadata) 85 | Read a dataset in CMAP's HDF-based gctx format, returning an AnnData object. 86 | 87 | ssiimmpplliiffyy__ccaatteeggoorriiccaall(x: pandas.core.frame.DataFrame, column: str, max_categories: int = 20, filler: str = 'other', new_column: str = None) 88 | Mark less frequent categories as other. Accepts and returns a dataframe. 89 | 90 | ssiimmuullaattee__ssiinnggllee__cceellllss(adata: anndata._core.anndata.AnnData, num_cells: int, counts_per_cell: int) 91 | Split bulk RNA samples into a simulated homogeneous population of cells. 92 | 93 | Args: 94 | adata (anndata.AnnData): bulk RNA perturbation data 95 | num_cells (int): how many cells to split each sample into 96 | counts_per_cell (int): how many reads or UMIs to simulate per cell 97 | 98 | Returns: 99 | anndata.AnnData: perturbation data with the same variables and num_cells times as many samples 100 | 101 | ttrryy__ttooaarrrraayy(x) 102 | 103 | vviissuuaalliizzeeLLooggFFCC(fc, pval=None, show_plots=False) 104 | 105 | vviissuuaalliizzeePPeerrttuurrbbaattiioonnEEffffeecctt(adata, metrics, TFDict, EpiDict) 106 | Visualize effect size versus type of perturbation, e.g. TF versus non-TF 107 | 108 | vviissuuaalliizzeePPeerrttuurrbbaattiioonnMMeettaaddaattaa(adata: anndata._core.anndata.AnnData, x: str, y: str, style=None, hue=None, markers=None, xlim=None, s=30) 109 | Plot characteristics of each perturbation, e.g. correlation between replicates or global effect size. 110 | 111 | FFIILLEE 112 | /home/ekernf01/Desktop/jhu/research/projects/perturbation_prediction/cell_type_knowledge_transfer/perturbation_data/setup/ingestion.py 113 | 114 | -------------------------------------------------------------------------------- /make_figures/replicate_correlations_and_stereotypical_effects.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | import pereggrn_perturbations 4 | from pereggrn import experimenter 5 | import altair as alt 6 | import re 7 | import os 8 | pereggrn_perturbations.set_data_path('../../perturbation_data/perturbations') 9 | DATASET_ORDER = [ 10 | "nakatake", 11 | "nakatake\nscrna\nsimulated", 12 | "joung", 13 | "norman", 14 | "replogle1", 15 | "replogle3", 16 | "replogle4", 17 | "adamson", 18 | "replogle2", 19 | "replogle2 large effect", 20 | "replogle2 tf only", 21 | "replogle2_large_effect", 22 | "replogle2_tf_only", 23 | "replogle2\nlarge effect", 24 | "replogle2\nlarge\neffect", 25 | "replogle2\ntf only", 26 | "freimer", 27 | "dixit", 28 | "frangieh_IFNg_v1", 29 | "frangieh\nIFNg v1", 30 | "frangieh\nIFNg\nv1", 31 | "frangieh IFNg v1", 32 | "frangieh_IFNg_v2", 33 | "frangieh\nIFNg v2", 34 | "frangieh\nIFNg\nv2", 35 | "frangieh IFNg v2", 36 | "frangieh_IFNg_v3", 37 | "frangieh\nIFNg v3", 38 | "frangieh\nIFNg\nv3", 39 | "frangieh IFNg v3" 40 | ] 41 | 42 | # this eccentric code keeps the datasets in a consistent order across figures. 43 | datasets_used = [d for d in DATASET_ORDER if d in [ 44 | "nakatake", 45 | "replogle1", 46 | "replogle2", 47 | "replogle2\ntf only", 48 | "replogle2\nlarge effect", 49 | "replogle3", 50 | "replogle4", 51 | "joung", 52 | "freimer", 53 | "frangieh IFNg v3", 54 | "norman", 55 | "adamson", 56 | "dixit", 57 | ]] 58 | 59 | # See GEARS paper fig. S11 60 | # I used this interactively, but it is not used in the final version of the paper. 61 | norman_blacklist = { 62 | "IKZF3", 63 | "PRDM1", 64 | "PTPN1", 65 | "C3orf72", 66 | "NIT1", 67 | "RREB1", 68 | "CDKN1C", 69 | "CNN1", 70 | "PTPN13", 71 | "JUN", 72 | "ZBTB1", 73 | } 74 | 75 | # This is for enrichment analysis of genes with high logFC between controls and the average of the rest of the data. 76 | os.makedirs("control_vs_all_perts", exist_ok=True) 77 | for dataset in datasets_used: 78 | print(f"Processing dataset: {dataset}") 79 | adata = pereggrn_perturbations.load_perturbation(re.sub("\n| ", "_", dataset)) 80 | controls = adata.obs_names[adata.obs["is_control"]] 81 | treateds = adata.obs_names[~adata.obs["is_control"]] 82 | lfc = adata[treateds, :].X.mean(axis=0) - adata[controls, :].X.mean(axis=0) 83 | lfc = np.array(lfc).flatten() 84 | lfc = pd.DataFrame({"lfc": lfc, "gene": adata.var_names}).sort_values("lfc") 85 | lfc.head(100).to_csv(f"control_vs_all_perts/bottom_genes_{dataset}.csv") 86 | lfc.tail(100).to_csv(f"control_vs_all_perts/top_genes_{dataset}.csv") 87 | 88 | # This is for a figure testing if independent control-treatment pairs have consistent log fold changes when the treatment is the same. 89 | correlations = dict() 90 | for dataset in datasets_used: 91 | adata = pereggrn_perturbations.load_perturbation(re.sub("\n| ", "_", dataset)) 92 | correlations[dataset] = pd.DataFrame(index = range(len(adata.obs["perturbation"].unique())), columns = ["correlation", "dataset"]) 93 | i=0 94 | controls = adata.obs_names[adata.obs["is_control"]] 95 | control_expression = adata[controls, :].X.mean(axis=0) 96 | for p in adata.obs["perturbation"].unique(): 97 | try: 98 | 99 | treatment1, treatment2 = np.random.choice(adata.obs.loc[adata.obs["perturbation"]==p, :].index, size=2, replace=False) 100 | except ValueError: 101 | # no replicates :[ 102 | correlations[dataset].loc[i, "correlation"] = np.nan 103 | continue 104 | if adata.obs.loc[treatment1, "is_control"]: 105 | # Don't include controls as treatments, even if they have a weird name like 'CAG-rtTA35-IH' 106 | continue 107 | lfc1 = adata[treatment1, :].X.mean(axis=0) - control_expression 108 | lfc2 = adata[treatment2, :].X.mean(axis=0) - control_expression 109 | correlations[dataset].loc[i, "correlation"] = np.corrcoef(lfc1, lfc2)[0, 1] 110 | correlations[dataset].loc[i, "perturbation"] = p 111 | correlations[dataset].loc[i, "dataset"] = dataset 112 | i = i + 1 113 | 114 | correlations = pd.concat(correlations.values()) 115 | correlations = correlations.loc[~correlations["correlation"].isna(), :] 116 | print("Lowest correlations:") 117 | print(correlations.sort_values("correlation").head(10)) 118 | print("Numbers of replicates:") 119 | print(correlations["dataset"].value_counts()) 120 | box_plot = alt.Chart(correlations).mark_boxplot(color = "black").encode( 121 | x=alt.X('dataset:N', title='', sort = datasets_used), 122 | y=alt.Y('correlation:Q', title='Pearson correlation between log FC from independent treatments'), 123 | ).properties( 124 | title='Replicate correlations', 125 | ) 126 | red_line = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(color='red').encode( 127 | y='y:Q' 128 | ) 129 | 130 | # Combine the box plot and the red line 131 | final_chart = box_plot + red_line 132 | 133 | # Save the chart 134 | final_chart.save("plots/genome_wide_logfc_correlation.svg") 135 | 136 | # This is for a figure testing if independent control-treatment pairs have consistent log fold changes even across different treatments. 137 | 138 | datasets_used = [d for d in DATASET_ORDER if d in [ # this eccentric code keeps the datasets in a consistent order across figures. 139 | "nakatake", 140 | "replogle1", 141 | "replogle2", 142 | "replogle2\ntf only", 143 | "replogle2\nlarge effect", 144 | "replogle3", 145 | "replogle4", 146 | "joung", 147 | "freimer" 148 | ]] 149 | num_pairs = 100 150 | correlations = dict() 151 | for dataset in datasets_used: 152 | adata = pereggrn_perturbations.load_perturbation(re.sub("\n| ", "_", dataset)) 153 | correlations[dataset] = pd.DataFrame(index = range(num_pairs), columns = ["correlation", "dataset"]) 154 | i=0 155 | while i < num_pairs: 156 | control1 = np.random.choice(adata.obs_names[adata.obs["is_control"]]) 157 | control2 = np.random.choice(adata.obs_names[adata.obs["is_control"]]) 158 | treatment1 = np.random.choice(adata.obs.loc[~adata.obs["is_control"], "perturbation"]) 159 | treatment2 = np.random.choice(adata.obs.loc[~adata.obs["is_control"], "perturbation"]) 160 | if treatment1==treatment2: 161 | continue 162 | if control1==control2: 163 | continue 164 | lfc1 = adata[control1].X.mean(axis=0) - adata[adata.obs["perturbation"]==treatment1].X.mean(axis=0) 165 | lfc2 = adata[control2].X.mean(axis=0) - adata[adata.obs["perturbation"]==treatment2].X.mean(axis=0) 166 | correlations[dataset].loc[i, "correlation"] = np.corrcoef(lfc1, lfc2)[0, 1] 167 | correlations[dataset].loc[i, "dataset"] = dataset 168 | i = i + 1 169 | correlations = pd.concat(correlations.values()) 170 | box_plot = alt.Chart(correlations).mark_boxplot(color = "black").encode( 171 | x=alt.X('dataset:N', title='', sort = datasets_used), 172 | y=alt.Y('correlation:Q', title='Pearson correlation between log FC within 100 independent control-treatment pairs'), 173 | ).properties( 174 | title='Correlations by dataset', 175 | ) 176 | red_line = alt.Chart(pd.DataFrame({'y': [0]})).mark_rule(color='red').encode( 177 | y='y:Q' 178 | ) 179 | 180 | # Combine the box plot and the red line 181 | final_chart = box_plot + red_line 182 | 183 | # Save the chart 184 | final_chart.save("plots/stereotypical_responses.svg") 185 | -------------------------------------------------------------------------------- /make_figures/global_effects/frangieh_IFNg_v3.txt: -------------------------------------------------------------------------------- 1 | deg,mi,mean,norm2,median 2 | 0.0,1.8640777755412477,1.5609329866998158,199.4889866614173,1.5781911104919941 3 | 0.0,2.2381782310639156,1.5564469075932919,195.37887534668164,1.5753619003456205 4 | 0.0,2.2146129900918607,1.55387206964863,195.12431583019907,1.572317066612451 5 | 0.0,2.17864857726856,1.5502577618885491,195.15675427216254,1.571027818437378 6 | 0.0,2.143805661124758,1.5415083491298502,194.26777568511886,1.565942632867314 7 | 0.0,2.1616418787607525,1.5424024594928842,194.14200699678977,1.569884079539798 8 | 0.0,2.091400328116583,1.5469775280898237,195.39082977570428,1.5747049875440255 9 | 0.0,2.1926744656597017,1.5608596898537956,196.17649290039483,1.5758254662918882 10 | 0.0,2.1567343549140965,1.5650105652992246,196.9810360237488,1.5827025167134265 11 | 0.0,2.2268631647167627,1.5519783931717825,194.9262772999478,1.5729230242628418 12 | 0.0,1.9083184207749087,1.5222697612760214,194.4373118943775,1.550593464708645 13 | 0.0,2.149422410667196,1.5392542935981004,193.97333882326117,1.5608210821100927 14 | 0.0,2.21148604629312,1.5666434999331502,196.70284397013413,1.5795388880897012 15 | 0.0,2.0063769936579625,1.525228861670856,193.53789348173277,1.5477483913971124 16 | 0.0,2.142495553126543,1.534509983342969,193.41342573926613,1.5554688938353964 17 | 0.0,1.8485792334911988,1.4899423138441248,190.35177059499426,1.5155990721321806 18 | 0.0,2.1231270032310547,1.541349667999332,194.30697260052753,1.561486847479887 19 | 0.0,2.1340725720831424,1.546294606032218,194.97147511138792,1.5683646486024583 20 | 0.0,2.2446588325246086,1.5623723518062338,196.06174733634003,1.5770216589154689 21 | 0.0,2.177385304838936,1.5478919762354075,194.77235644615556,1.5671498260045515 22 | 0.0,2.137426054458756,1.5503454918715107,195.3734166350972,1.5663646175213937 23 | 0.0,2.1076097974365418,1.558895246909559,196.6611550837507,1.5796028618030715 24 | 0.0,2.265773660982213,1.5463120420780834,193.95632845087187,1.5658130529843945 25 | 0.0,2.1139825111613684,1.5412720163156162,194.56389567959533,1.564791555845289 26 | 0.0,1.415685342313327,1.405580766200538,194.31861736424278,1.3602309221984261 27 | 0.0,2.087598892522954,1.5402147821081675,194.52740382550317,1.5604597994048544 28 | 0.0,1.8510104523288886,1.5062488164002255,192.48318965271915,1.5177931629164139 29 | 0.0,2.1234203490603583,1.5543498809044658,195.73130687365915,1.5718041451355966 30 | 0.0,2.1215835910210528,1.5397446675282571,194.30678975491705,1.5629532834016175 31 | 0.0,2.1108168980619126,1.565319161411174,197.13144848394558,1.582658132552802 32 | 0.0,2.1357522726466915,1.5414106242670618,194.1715346343759,1.5645937044023777 33 | 0.0,2.182388591495276,1.5609345887006152,196.23827570258575,1.5750510522307217 34 | 0.0,2.184889890754682,1.5638902387877258,196.7063106246717,1.5829483811202238 35 | 0.0,2.1581864657997376,1.5569068285744685,196.03154361251327,1.5687567884944296 36 | 0.0,2.1364393112605864,1.562613548728406,196.83498177962963,1.5820432375634383 37 | 0.0,2.148673690985009,1.546425620499626,194.85703701720652,1.5648129689736225 38 | 0.0,1.9415834740694804,1.5459645754313236,196.75077582805162,1.5610250928779343 39 | 0.0,1.8361447747468795,1.560322620699423,199.66746227032564,1.5662067089104021 40 | 0.0,2.13869411989653,1.570424760448402,197.7835706771045,1.5889761549055694 41 | 0.0,2.193614661642634,1.544623271715702,194.19198847334093,1.5677133876934737 42 | 0.0,2.245068641740921,1.5658308249739197,196.45173938548572,1.5829112716002596 43 | 0.0,2.154032028855592,1.56165374630263,196.75305058460683,1.5792645561811411 44 | 0.0,2.1886111324660185,1.5493526551414794,194.82306283818133,1.5693485477334077 45 | 0.0,2.157687531835807,1.5579216189638376,196.22474598266695,1.5756410131151717 46 | 0.0,2.10261525186135,1.5408640983874744,194.61664219306303,1.5629619786836157 47 | 0.0,2.1984340576171832,1.5377352652151677,193.36428496681577,1.5576610492034721 48 | 0.0,2.026423254465132,1.5448108191356402,195.66685920458838,1.5670222107141463 49 | 0.0,1.9740747933618819,1.5391376944403437,195.71288151301516,1.568027871944885 50 | 0.0,2.155652742303728,1.5559045198438703,195.89936409991617,1.5699470176881383 51 | 0.0,1.646400479754516,1.515089296619962,198.02852272428197,1.5120570619167317 52 | 0.0,1.9912384923285922,1.525964098125269,193.71475766218472,1.5474136252160302 53 | 0.0,2.013124700838948,1.5452507794095025,195.86909924769154,1.5705184527122131 54 | 0.0,1.5185687940215273,1.4831225745256398,197.18336414414497,1.4791928631633757 55 | 0.0,2.0126105646808057,1.5695954000737264,198.50693076797475,1.5787671685704154 56 | 0.0,2.1508967320830856,1.55688314547353,196.08418092978462,1.577116390813067 57 | 0.0,2.142483910617741,1.5625690040209503,196.8885207558313,1.574741117521875 58 | 0.0,2.0703794142750196,1.5482914072608331,195.73410952279326,1.5675275634646697 59 | 0.0,1.4384651376526867,1.4281337091413215,195.29146796121395,1.3561108694172535 60 | 0.0,2.162085793333776,1.5529603138247763,195.49730860400592,1.5732664440822757 61 | 0.0,2.2625088917034875,1.570442028611116,196.9230360203464,1.585241176356665 62 | 0.0,2.078323240553724,1.547451412344719,195.68090736721518,1.5763017567141577 63 | 0.0,2.2806125505915307,1.5540507887338513,194.85596970158656,1.568189682507506 64 | 0.0,2.0370231231124682,1.547338301703512,195.94346507709105,1.5712998474610274 65 | 0.0,1.645333124590206,1.4998445768487476,196.18719896733361,1.519768098521916 66 | 0.0,2.083480856444875,1.5373446908167252,194.29045989983356,1.5620324044482161 67 | 0.0,2.2168321913794373,1.5487188161243786,194.56348086444126,1.5691249594744308 68 | 0.0,2.121720310089325,1.5376791203052502,193.81073381107933,1.5601841707997217 69 | 0.0,2.0263778677198268,1.536791967307301,194.67200725431542,1.5609471400162183 70 | 0.0,2.12281088384689,1.5438380193375987,194.68726469072567,1.5613545813159133 71 | 0.0,2.1655282295791833,1.5514898751311164,195.27435375304844,1.5701866057852747 72 | 0.0,1.9698977448149457,1.5483071400933763,196.6593573948986,1.5660242540257614 73 | 0.0,2.211111560470838,1.552942821633374,195.11211160343956,1.5692227649907298 74 | 0.0,1.8925633364572512,1.5099657221344673,193.2098644070017,1.5378067848811237 75 | 0.0,2.2329806404979715,1.561646227176743,196.07998011291474,1.577544728765365 76 | 0.0,1.932581241189833,1.5637396892245976,199.2857959672798,1.5785682209965772 77 | 0.0,1.9884012416036378,1.54257371472264,195.90619144575348,1.5626696596500922 78 | 0.0,2.0854399207017966,1.560007839941076,196.91610245539837,1.574321313707657 79 | 0.0,2.051918099759816,1.547937310125488,195.805379708888,1.569007553057012 80 | 0.0,2.180962217080367,1.5542753368941402,195.5032006529314,1.5737854607689523 81 | 0.0,2.0008385104951243,1.5594144893351818,197.60987128564545,1.5740475424871678 82 | 0.0,1.444989872195319,1.411348588538996,192.47441633541158,1.406311168786681 83 | 0.0,2.1229153302538792,1.5387922632032018,194.11973625164177,1.5627768393160022 84 | 0.0,2.2097709127458875,1.5450657393731981,194.19935625463415,1.5674390930166782 85 | 0.0,1.8539703772374319,1.6061603662587003,204.58572212209245,1.637949543765706 86 | 0.0,1.983342528007456,1.6027114324898186,202.62395956223804,1.625357931746318 87 | 0.0,1.5307977256206562,1.5089235045521172,200.19753595708247,1.5122452229075352 88 | 0.0,2.1522226470115298,1.5620301414972428,196.73107449446087,1.5805386268525883 89 | 0.0,2.0047772048870023,1.540411638202022,195.40913242619797,1.567571576916326 90 | 0.0,2.1328941484679826,1.5490660615669747,195.22898615963723,1.5734127614645266 91 | 0.0,1.9068323333274704,1.5854450292101352,201.20729765411272,1.6148566609105695 92 | 0.0,1.9408611731592433,1.6154995145390716,204.63659432390375,1.6390101940336206 93 | 0.0,1.883510473067413,1.5324767698541615,195.88453761119865,1.5505516563263808 94 | 0.0,2.228971397448362,1.5616555102125103,196.03457521235217,1.5749355465309498 95 | 0.0,2.1967887189198265,1.54873531551021,194.66415182438772,1.5690105597328485 96 | 0.0,2.0798270863101327,1.5481242184122967,195.45975613828253,1.56627418816735 97 | 0.0,2.160358117621133,1.5448997808198106,194.35859412496424,1.5636346555472325 98 | 0.0,2.23395005336625,1.5684884461288386,196.86755039384283,1.5878436667337712 99 | 0.0,2.0878533060432334,1.5463923111524376,195.372820848338,1.5699116915471563 100 | 0.0,2.060805118562739,1.5529261098483382,196.45974974040328,1.575149763186573 101 | 0.0,2.1852108711151663,1.5537795520530573,195.33739994157963,1.5673692056664343 102 | 0.0,2.1723956840688214,1.5410405201946737,193.89711378657466,1.5629236160801026 103 | 0.0,2.1624195251909413,1.5475789514410003,194.82016725886558,1.568032621774313 104 | 0.0,2.0354585234984484,1.5188927705879876,192.36077571022926,1.5389618795041367 105 | 0.0,2.314670027953225,1.572605157225235,196.9004402955716,1.5858425661991622 106 | 0.0,2.1883470281653365,1.5612987149448965,196.27897552026596,1.579306925991565 107 | 0.0,1.9051920193767289,1.4914015202328919,190.60867099558462,1.5026761521341778 108 | 0.0,2.0246838792004067,1.5411527660237252,195.27137876489357,1.5644692725655591 109 | 0.0,1.5099178911014048,1.47995025382225,196.4747953615395,1.4806169283389723 110 | 0.0,1.4771256138313278,1.4631864825076955,196.08452507242382,1.4429037548897312 111 | 0.0,1.8760693326819093,1.5209786657496862,194.7605046706265,1.5470790679371103 112 | 0.0,2.125850944619379,1.5420893253847805,194.43031476821017,1.5628876526637296 113 | 0.0,2.102146433738809,1.5585068390944619,196.6185261672021,1.5798999214577591 114 | 0.0,2.1598454095443245,1.5526100991586054,195.46109768232776,1.5720211673940145 115 | 0.0,1.8042287046922387,1.5171228197889328,195.54491993849456,1.536775072138354 116 | 0.0,2.0763786450300525,1.5481833853488909,195.55984905330055,1.5690829674366582 117 | 0.0,2.1186284207655506,1.5473479668061056,194.8013755920857,1.5629967636456727 118 | 0.0,2.1198114886154613,1.5429854734515656,194.55923629587397,1.565548364006912 119 | 0.0,2.169343041302846,1.551425505489008,195.1944610738628,1.569942451505478 120 | 0.0,2.274963584737022,1.5656366749692452,196.26420432759627,1.5820869172247243 121 | 0.0,1.8446331337189035,1.5279904847437138,196.20108351194324,1.5518236001678176 122 | 0.0,1.5439144357588774,1.5535761858648764,204.6463168705661,1.5510111903439598 123 | 0.0,1.6794377222856218,1.517095329213177,197.45913845634124,1.537604208660047 124 | 0.0,1.8956130468865373,1.5454190279012916,197.45924105261773,1.560811639495884 125 | 0.0,2.084316984259716,1.5403456665761686,194.722976383967,1.5531142135872757 126 | 0.0,1.7838343091296807,1.5347511987171807,197.71940894337837,1.5574776801357277 127 | 0.0,1.5791435901431778,1.4669246652338017,193.9073958107502,1.479681230430519 128 | 0.0,1.934234259454923,1.5403941939989703,196.1069055267106,1.5501171023858566 129 | 0.0,2.0365987777890777,1.545911034678182,195.80842417558142,1.5656023454305925 130 | 0.0,2.231803397549866,1.5671170997572619,196.7197092480343,1.5829647298896372 131 | 0.0,2.0894025097241067,1.5483521400399898,195.5572400231854,1.5670255742976025 132 | 0.0,2.0818271393018324,1.5279270572794672,193.02902788318465,1.5533094128647558 133 | 0.0,1.5277103925377589,1.44095498812414,193.4678854514472,1.3858780585375379 134 | 0.0,2.0569673937760005,1.5402442261374258,194.90389865369,1.5638912846276043 135 | 0.0,2.1196826740155754,1.5510783908474288,195.57200072476016,1.5719270009568445 136 | 0.0,2.147735312075022,1.5562321072355592,195.68866512797405,1.5733503568007277 137 | 0.0,2.1970969346970284,1.5452486941452033,194.15781106348393,1.5627433267211983 138 | 0.0,1.90784636046063,1.5335951103181935,195.4021003601159,1.552574322971833 139 | 0.0,2.089718383433232,1.5579241573402456,196.76210783706733,1.5745865252198137 140 | 0.0,1.5338245175505505,1.4801049835269258,196.09806846140276,1.4777718468140926 141 | 0.0,1.5308564546094194,1.3525228356574912,193.27090950490745,1.3193996730822846 142 | 0.0,1.5740577681329069,1.519394116265145,200.22614521142762,1.5109289163727921 143 | 0.0,2.2081093772223452,1.5418842645016535,193.84098315418208,1.5648391281572702 144 | 0.0,2.158177853294038,1.5653843071799556,197.090758091642,1.5862503173350966 145 | 0.0,2.0803415089071353,1.5408799990974118,194.57653704456138,1.5620267655820013 146 | 0.0,2.1404350468012394,1.5533705921847842,195.7559663197285,1.5725786221332942 147 | 0.0,2.26432898137135,1.5522891819303313,194.76016761530136,1.567694073438641 148 | 0.0,2.1380670718146373,1.5457834156194816,194.7552176609661,1.5698965331240486 149 | 0.0,2.148562792196919,1.5501633152454923,195.28620977123848,1.5680156606521105 150 | 0.0,2.0640027592937678,1.5595367461321166,197.1046049370052,1.5773436281839779 151 | 0.0,1.9932499822399365,1.5352175756273032,194.60104408996276,1.5525199849708773 152 | 0.0,2.1526367527154306,1.550473562593641,195.2744890381642,1.575663595946704 153 | 0.0,2.055216784492464,1.5409194924980316,195.0036174412795,1.563945518194417 154 | 0.0,1.8091797714249829,1.5268951675031466,196.22278438034814,1.5466601451185265 155 | 0.0,2.040230128855401,1.540350868108229,195.0585128544913,1.5626756665942685 156 | 0.0,2.2871549400801134,1.567064822886356,196.38189758161954,1.578833953285653 157 | 0.0,2.1972096438085535,1.551962229765701,195.0511175356856,1.568072582965678 158 | 0.0,2.2134806169440115,1.5407181638311975,193.66522447500836,1.5635672171368549 159 | 0.0,2.19983356493271,1.5574078966211726,195.85337542383704,1.5772312587748345 160 | 0.0,1.893275252973158,1.5480731863258566,197.921888322951,1.5736067258312068 161 | 0.0,2.206737681843113,1.5553667273548109,195.40959842211802,1.5736292234807383 162 | 0.0,2.184137916042091,1.5666278500826556,196.9174009458554,1.581896104794101 163 | 0.0,2.176280143009812,1.5528729668552068,195.26156914783107,1.5729586392853119 164 | 0.0,2.1379025140711483,1.560572213202117,196.63125329266384,1.5761261153704718 165 | 0.0,2.196993960309918,1.55282370090618,195.27822173922985,1.5736670096851357 166 | 0.0,2.1319891530903425,1.5429302892424888,194.55391823892364,1.5677271376576394 167 | 0.0,2.097896118991125,1.5552981188022972,196.21999803583267,1.5761179859402965 168 | 0.0,2.046249138843732,1.5443778328025561,195.157552635341,1.5582929677659882 169 | 0.0,1.8850594527676086,1.6040034659609628,203.80393468967938,1.632870602719037 170 | 0.0,2.0284295881276098,1.5455540634437015,195.51017503299556,1.5671990385138497 171 | 0.0,2.2179642919768616,1.5508818085102298,194.88435403268508,1.5691804406525827 172 | 0.0,2.1684226449454256,1.553239808917264,195.5320778305028,1.5741744189817077 173 | 0.0,2.1582494316044363,1.5436382196705518,194.4885708865621,1.5692245883109626 174 | 0.0,2.1294479453574633,1.5455015726838326,194.89203792700417,1.570972237834125 175 | 0.0,2.176103195754049,1.5551445796728127,195.60683472967204,1.5786037948247857 176 | 0.0,2.1943146792388486,1.561915744534123,196.36636401906836,1.5785805990560702 177 | 0.0,1.822766444361539,1.5057347897988325,193.85929823740096,1.5292511955784214 178 | 0.0,2.168893277275868,1.543970541663893,194.32813142687357,1.5657897243740715 179 | 0.0,1.9831645986439677,1.5416768152123117,195.9416089280911,1.5615482473431592 180 | 0.0,2.109807952977918,1.55108106082843,195.80492140493843,1.5721647966489303 181 | 0.0,2.1264764288467894,1.550727616247975,195.43408241786932,1.5691919140016939 182 | 0.0,1.5523206302514265,1.508428003572635,198.1678179429505,1.5120452450619024 183 | 0.0,2.1448644580884344,1.5555607805576206,195.9932274995643,1.577986050468815 184 | 0.0,2.1005359651462374,1.5548282866601129,196.35332306469067,1.5760821883909935 185 | 0.0,2.15470270549886,1.5574981376851567,196.11394128318008,1.5724905987839823 186 | 0.0,2.175010482738152,1.5432376877535128,194.28607121259859,1.5605623043932368 187 | 0.0,1.5224836407887739,1.472653882651759,195.36682904709403,1.4718125579522716 188 | 0.0,2.0801032596431868,1.5397058742447813,194.71982518235208,1.5618416510577042 189 | 0.0,2.1972788586802516,1.5437038722319774,194.07166205738181,1.56697246476462 190 | 0.0,2.1932624723251157,1.5544561018926852,195.4312600624217,1.5738567292170003 191 | 0.0,2.045184730299986,1.5628419483484843,197.2900739488058,1.579576675594899 192 | 0.0,1.8880620927252965,0.9635258228901327,128.05342258383,0.9789828926428183 193 | 0.0,2.0846155294096533,1.5291712339438326,193.35462153105976,1.55459497254967 194 | 0.0,1.853229487638607,1.5340635997555556,196.4838058902042,1.5478681489558024 195 | 0.0,1.7401475392447134,1.5215835638751685,197.10886143383803,1.5424434097034552 196 | 0.0,2.0437962682133666,1.5484623658914356,196.01709980964904,1.5709140057625275 197 | 0.0,2.158723268827176,1.5517773640361345,195.4835719000345,1.5674839750329848 198 | 0.0,2.202084184989831,1.5593504415085588,195.93439439434738,1.5757596981647635 199 | --------------------------------------------------------------------------------