├── LICENSE ├── PUMA.png ├── README.md ├── addendum.md ├── analysis ├── .Rhistory ├── 01-PUMA_feature_transform.ipynb ├── 02-dataset_filtering.ipynb ├── 03-generate_splits_and_UMAP.ipynb ├── 04-late_fusion.ipynb ├── 05-tables_for_HitRates_plot.ipynb ├── 06-Hit-Rates_EF.ipynb ├── 07-assay_modality_AUC_plots.ipynb ├── 08-exceed_auc.ipynb ├── 09-Venn_diagrams.ipynb ├── 10-AUC_comparison.ipynb ├── 11-Figure_auc.Rmd └── 12-Boxplots.ipynb ├── assay_data ├── assay_matrix_discrete_270_assays.csv ├── assay_metadata.csv ├── assay_metadata_expanded.csv ├── bioassay_aids.csv ├── broad_ids.txt ├── extract_assay_metadata.R └── smiles.txt ├── misc ├── compound_analysis.npz └── compounds16978to16170.npy ├── plots ├── auc.svg ├── late_fusion_box.svg ├── similarities │ ├── GEcv0.svg │ ├── GEcv1.svg │ ├── GEcv2.svg │ ├── GEcv3.svg │ ├── GEcv4.svg │ ├── MOBCcv0.svg │ ├── MOBCcv1.svg │ ├── MOBCcv2.svg │ ├── MOBCcv3.svg │ ├── MOBCcv4.svg │ ├── SCcv0.svg │ ├── SCcv1.svg │ ├── SCcv2.svg │ ├── SCcv3.svg │ ├── SCcv4.svg │ └── similarities_final_p.svg ├── single_modalities_box.svg ├── venn_combined_modalities_70.svg ├── venn_combined_modalities_90.svg ├── venn_single_modalities_70.svg └── venn_single_modalities_90.svg ├── predictions ├── scaffold_mean_AUC.csv ├── scaffold_mean_EF.csv ├── scaffold_median_AUC.csv └── scaffold_median_EF.csv ├── python ├── EF_calc.py ├── find_pains.py ├── fingerprint_similarity.py ├── mean_and_median.py ├── plot_all_sim.py ├── plots_m.py ├── prauc.py ├── retrospective.py └── scaffold_split.py ├── requirements.txt ├── scripts ├── single_task │ ├── merge_predictions.py │ ├── single_assay_prediction.py │ └── single_assay_training.py ├── starter.py ├── train_chemprop_combined_models.sh ├── train_chemprop_models_single_models.sh └── train_chemprop_models_single_models_balanced.sh └── splitting ├── GE_clusters_size_constrained.npz ├── MOBC_clusters_size_constrained.npz ├── cross_validation_indicies.npz └── scaffold_based_split.npz /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/LICENSE -------------------------------------------------------------------------------- /PUMA.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/PUMA.png -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/README.md -------------------------------------------------------------------------------- /addendum.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/addendum.md -------------------------------------------------------------------------------- /analysis/.Rhistory: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/.Rhistory -------------------------------------------------------------------------------- /analysis/01-PUMA_feature_transform.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/01-PUMA_feature_transform.ipynb -------------------------------------------------------------------------------- /analysis/02-dataset_filtering.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/02-dataset_filtering.ipynb -------------------------------------------------------------------------------- /analysis/03-generate_splits_and_UMAP.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/03-generate_splits_and_UMAP.ipynb -------------------------------------------------------------------------------- /analysis/04-late_fusion.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/04-late_fusion.ipynb -------------------------------------------------------------------------------- /analysis/05-tables_for_HitRates_plot.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/05-tables_for_HitRates_plot.ipynb -------------------------------------------------------------------------------- /analysis/06-Hit-Rates_EF.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/06-Hit-Rates_EF.ipynb -------------------------------------------------------------------------------- /analysis/07-assay_modality_AUC_plots.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/07-assay_modality_AUC_plots.ipynb -------------------------------------------------------------------------------- /analysis/08-exceed_auc.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/08-exceed_auc.ipynb -------------------------------------------------------------------------------- /analysis/09-Venn_diagrams.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/09-Venn_diagrams.ipynb -------------------------------------------------------------------------------- /analysis/10-AUC_comparison.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/10-AUC_comparison.ipynb -------------------------------------------------------------------------------- /analysis/11-Figure_auc.Rmd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/11-Figure_auc.Rmd -------------------------------------------------------------------------------- /analysis/12-Boxplots.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/analysis/12-Boxplots.ipynb -------------------------------------------------------------------------------- /assay_data/assay_matrix_discrete_270_assays.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/assay_data/assay_matrix_discrete_270_assays.csv -------------------------------------------------------------------------------- /assay_data/assay_metadata.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/assay_data/assay_metadata.csv -------------------------------------------------------------------------------- /assay_data/assay_metadata_expanded.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/assay_data/assay_metadata_expanded.csv -------------------------------------------------------------------------------- /assay_data/bioassay_aids.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/assay_data/bioassay_aids.csv -------------------------------------------------------------------------------- /assay_data/broad_ids.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/assay_data/broad_ids.txt -------------------------------------------------------------------------------- /assay_data/extract_assay_metadata.R: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/assay_data/extract_assay_metadata.R -------------------------------------------------------------------------------- /assay_data/smiles.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/assay_data/smiles.txt -------------------------------------------------------------------------------- /misc/compound_analysis.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/misc/compound_analysis.npz -------------------------------------------------------------------------------- /misc/compounds16978to16170.npy: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/misc/compounds16978to16170.npy -------------------------------------------------------------------------------- /plots/auc.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/auc.svg -------------------------------------------------------------------------------- /plots/late_fusion_box.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/late_fusion_box.svg -------------------------------------------------------------------------------- /plots/similarities/GEcv0.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/GEcv0.svg -------------------------------------------------------------------------------- /plots/similarities/GEcv1.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/GEcv1.svg -------------------------------------------------------------------------------- /plots/similarities/GEcv2.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/GEcv2.svg -------------------------------------------------------------------------------- /plots/similarities/GEcv3.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/GEcv3.svg -------------------------------------------------------------------------------- /plots/similarities/GEcv4.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/GEcv4.svg -------------------------------------------------------------------------------- /plots/similarities/MOBCcv0.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/MOBCcv0.svg -------------------------------------------------------------------------------- /plots/similarities/MOBCcv1.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/MOBCcv1.svg -------------------------------------------------------------------------------- /plots/similarities/MOBCcv2.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/MOBCcv2.svg -------------------------------------------------------------------------------- /plots/similarities/MOBCcv3.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/MOBCcv3.svg -------------------------------------------------------------------------------- /plots/similarities/MOBCcv4.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/MOBCcv4.svg -------------------------------------------------------------------------------- /plots/similarities/SCcv0.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/SCcv0.svg -------------------------------------------------------------------------------- /plots/similarities/SCcv1.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/SCcv1.svg -------------------------------------------------------------------------------- /plots/similarities/SCcv2.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/SCcv2.svg -------------------------------------------------------------------------------- /plots/similarities/SCcv3.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/SCcv3.svg -------------------------------------------------------------------------------- /plots/similarities/SCcv4.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/SCcv4.svg -------------------------------------------------------------------------------- /plots/similarities/similarities_final_p.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/similarities/similarities_final_p.svg -------------------------------------------------------------------------------- /plots/single_modalities_box.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/single_modalities_box.svg -------------------------------------------------------------------------------- /plots/venn_combined_modalities_70.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/venn_combined_modalities_70.svg -------------------------------------------------------------------------------- /plots/venn_combined_modalities_90.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/venn_combined_modalities_90.svg -------------------------------------------------------------------------------- /plots/venn_single_modalities_70.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/venn_single_modalities_70.svg -------------------------------------------------------------------------------- /plots/venn_single_modalities_90.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/plots/venn_single_modalities_90.svg -------------------------------------------------------------------------------- /predictions/scaffold_mean_AUC.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/predictions/scaffold_mean_AUC.csv -------------------------------------------------------------------------------- /predictions/scaffold_mean_EF.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/predictions/scaffold_mean_EF.csv -------------------------------------------------------------------------------- /predictions/scaffold_median_AUC.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/predictions/scaffold_median_AUC.csv -------------------------------------------------------------------------------- /predictions/scaffold_median_EF.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/predictions/scaffold_median_EF.csv -------------------------------------------------------------------------------- /python/EF_calc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/python/EF_calc.py -------------------------------------------------------------------------------- /python/find_pains.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/python/find_pains.py -------------------------------------------------------------------------------- /python/fingerprint_similarity.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/python/fingerprint_similarity.py -------------------------------------------------------------------------------- /python/mean_and_median.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/python/mean_and_median.py -------------------------------------------------------------------------------- /python/plot_all_sim.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/python/plot_all_sim.py -------------------------------------------------------------------------------- /python/plots_m.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/python/plots_m.py -------------------------------------------------------------------------------- /python/prauc.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/python/prauc.py -------------------------------------------------------------------------------- /python/retrospective.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/python/retrospective.py -------------------------------------------------------------------------------- /python/scaffold_split.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/python/scaffold_split.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/requirements.txt -------------------------------------------------------------------------------- /scripts/single_task/merge_predictions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/scripts/single_task/merge_predictions.py -------------------------------------------------------------------------------- /scripts/single_task/single_assay_prediction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/scripts/single_task/single_assay_prediction.py -------------------------------------------------------------------------------- /scripts/single_task/single_assay_training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/scripts/single_task/single_assay_training.py -------------------------------------------------------------------------------- /scripts/starter.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/scripts/starter.py -------------------------------------------------------------------------------- /scripts/train_chemprop_combined_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/scripts/train_chemprop_combined_models.sh -------------------------------------------------------------------------------- /scripts/train_chemprop_models_single_models.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/scripts/train_chemprop_models_single_models.sh -------------------------------------------------------------------------------- /scripts/train_chemprop_models_single_models_balanced.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/scripts/train_chemprop_models_single_models_balanced.sh -------------------------------------------------------------------------------- /splitting/GE_clusters_size_constrained.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/splitting/GE_clusters_size_constrained.npz -------------------------------------------------------------------------------- /splitting/MOBC_clusters_size_constrained.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/splitting/MOBC_clusters_size_constrained.npz -------------------------------------------------------------------------------- /splitting/cross_validation_indicies.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/splitting/cross_validation_indicies.npz -------------------------------------------------------------------------------- /splitting/scaffold_based_split.npz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CaicedoLab/2023_Moshkov_NatComm/HEAD/splitting/scaffold_based_split.npz --------------------------------------------------------------------------------