├── __init__.py ├── code ├── __init__.py ├── models │ ├── __init__.py │ ├── kmeans │ │ └── __init__.py │ └── distributions │ │ ├── __init__.py │ │ ├── exponential.py │ │ ├── normal.py │ │ └── gamma.py └── cross_validation │ └── __init__.py ├── plots ├── __init__.py ├── time_toy │ ├── nmf_icm_times.txt~ │ ├── nmf_np_times.txt~ │ ├── nmf_vb_times.txt~ │ ├── nmtf_np_times.txt~ │ ├── nmf_gibbs_performances.txt~ │ ├── nmf_icm_performances.txt~ │ ├── nmf_np_performances.txt~ │ ├── nmf_vb_performances.txt~ │ ├── nmtf_gibbs_performances.txt~ │ ├── nmtf_np_performances.txt~ │ └── nmtf_vb_performances.txt~ ├── time_Sanger │ ├── nmf_gibbs_times.txt~ │ ├── nmf_icm_times.txt~ │ ├── nmf_np_times.txt~ │ ├── nmf_vb_times.txt~ │ ├── nmtf_icm_times.txt~ │ ├── nmf_icm_performances.txt~ │ ├── nmf_np_performances.txt~ │ ├── nmf_vb_performances.txt~ │ ├── nmtf_gibbs_times.txt~ │ ├── nmf_gibbs_performances.txt~ │ ├── nmtf_gibbs_performances.txt~ │ └── nmtf_icm_performances.txt~ ├── graphs_toy │ ├── legend.pdf │ ├── legend.png │ ├── mse_nmf_times.pdf │ ├── mse_nmf_times.png │ ├── mse_nmtf_times.pdf │ ├── mse_nmtf_times.png │ ├── mse_nmf_noise_test.pdf │ ├── mse_nmf_noise_test.png │ ├── mse_nmf_convergences.pdf │ ├── mse_nmf_convergences.png │ ├── mse_nmtf_noise_test.pdf │ ├── mse_nmtf_noise_test.png │ ├── aic_nmf_model_selection.pdf │ ├── aic_nmf_model_selection.png │ ├── mse_nmf_model_selection.pdf │ ├── mse_nmf_model_selection.png │ ├── mse_nmtf_convergences.pdf │ ├── mse_nmtf_convergences.png │ ├── aic_nmtf_model_selection.pdf │ ├── aic_nmtf_model_selection.png │ ├── elbo_nmf_model_selection.png │ ├── mse_nmtf_model_selection.pdf │ ├── mse_nmtf_model_selection.png │ ├── aic_nmtf_greedy_model_selection.pdf │ ├── aic_nmtf_greedy_model_selection.png │ ├── mse_nmf_missing_values_predictions.pdf │ ├── mse_nmf_missing_values_predictions.png │ ├── mse_nmtf_missing_values_predictions.pdf │ └── mse_nmtf_missing_values_predictions.png └── graphs_Sanger │ ├── mse_Sanger_nmf_times.pdf │ ├── mse_Sanger_nmf_times.png │ ├── mse_Sanger_nmtf_times.pdf │ ├── mse_Sanger_nmtf_times.png │ ├── mse_Sanger_nmf_convergences.pdf │ ├── mse_Sanger_nmf_convergences.png │ ├── mse_Sanger_nmtf_convergences.pdf │ ├── mse_Sanger_nmtf_convergences.png │ ├── aic_Sanger_line_model_selection.pdf │ ├── aic_Sanger_line_model_selection.png │ ├── aic_Sanger_greedy_model_selection.pdf │ └── aic_Sanger_greedy_model_selection.png ├── tests ├── __init__.py ├── code │ ├── __init__.py │ └── distributions │ │ ├── __init__.py │ │ ├── test_gamma.py │ │ ├── test_truncated_normal_vector.py │ │ └── test_truncated_normal.py └── grid_search │ └── __init__.py ├── data_toy ├── __init__.py ├── bnmf │ └── __init__.py └── bnmtf │ ├── __init__.py │ └── S.txt ├── experiments ├── __init__.py ├── experiments_ccle │ ├── __init__.py │ └── cross_validation │ │ ├── ccle_ec_gibbs_nmf │ │ ├── results.txt │ │ └── linesearch_xval_gibbs.py │ │ ├── ccle_ec_np_nmf │ │ ├── fold_1.txt │ │ ├── fold_3.txt │ │ ├── fold_7.txt │ │ ├── results.txt │ │ ├── np_nmf_nested_xval.py │ │ ├── fold_9.txt │ │ ├── fold_5.txt │ │ ├── fold_6.txt │ │ ├── fold_2.txt │ │ ├── fold_8.txt │ │ ├── fold_10.txt │ │ └── fold_4.txt │ │ ├── ccle_ec_np_nmtf │ │ ├── fold_10.txt │ │ ├── fold_3.txt │ │ ├── fold_8.txt │ │ ├── fold_9.txt │ │ ├── results.txt │ │ └── np_nmtf_nested_xval.py │ │ ├── ccle_ic_np_nmf │ │ ├── results.txt │ │ ├── np_nmf_nested_xval.py │ │ ├── fold_7.txt │ │ ├── fold_6.txt │ │ ├── fold_8.txt │ │ ├── fold_1.txt │ │ ├── fold_2.txt │ │ ├── fold_3.txt │ │ ├── fold_5.txt │ │ ├── fold_10.txt │ │ ├── fold_4.txt │ │ └── fold_9.txt │ │ ├── ccle_ic_np_nmtf │ │ ├── results.txt │ │ └── np_nmtf_nested_xval.py │ │ ├── ccle_ec_vb_nmf │ │ └── linesearch_xval_vb.py │ │ ├── ccle_ic_vb_nmf │ │ └── linesearch_xval_vb.py │ │ ├── ccle_ec_icm_nmf │ │ └── linesearch_xval_icm.py │ │ ├── ccle_ic_icm_nmf │ │ └── linesearch_xval_icm.py │ │ ├── ccle_ic_gibbs_nmf │ │ └── linesearch_xval_gibbs.py │ │ ├── ccle_ic_vb_nmtf │ │ └── greedysearch_xval_vb.py │ │ ├── ccle_ec_vb_nmtf │ │ └── greedysearch_xval_vb.py │ │ ├── ccle_ic_icm_nmtf │ │ └── greedysearch_xval_icm.py │ │ ├── ccle_ec_icm_nmtf │ │ └── greedysearch_xval_icm.py │ │ ├── ccle_ic_gibbs_nmtf │ │ └── greedysearch_xval_gibbs.py │ │ └── ccle_ec_gibbs_nmtf │ │ └── greedysearch_xval_gibbs.py ├── experiments_gdsc │ ├── __init__.py │ ├── time │ │ ├── __init__.py │ │ ├── nmf_np_time.py │ │ ├── nmtf_np_time.py │ │ ├── nmf_vb_time.py │ │ ├── nmf_icm_time.py │ │ ├── nmf_gibbs_time.py │ │ ├── nmtf_vb_time.py │ │ └── nmtf_gibbs_time.py │ ├── convergence │ │ └── __init__.py │ ├── cross_validation │ │ ├── __init__.py │ │ ├── kbmf │ │ │ ├── .Rhistory │ │ │ ├── kbmf_regression_test.R │ │ │ ├── kbmf1mkl1mkl │ │ │ │ ├── kbmf1mkl1mkl_supervised_regression_variational_test.R │ │ │ │ ├── kbmf1mkl1mkl_semisupervised_regression_variational_test.R │ │ │ │ ├── kbmf1mkl1mkl_supervised_classification_variational_test.R │ │ │ │ ├── kbmf1mkl1mkl_semisupervised_classification_variational_test.R │ │ │ │ ├── kbmf1mkl1mkl_supervised_regression_variational_test.m │ │ │ │ ├── kbmf1mkl1mkl_semisupervised_regression_variational_test.m │ │ │ │ ├── kbmf1mkl1mkl_supervised_classification_variational_test.m │ │ │ │ └── kbmf1mkl1mkl_semisupervised_classification_variational_test.m │ │ │ ├── run_nested_cross_val_kbmf.R~ │ │ │ ├── run_nested_cross_val_kbmf.R │ │ │ ├── nested_cross_val_kbmf.R~ │ │ │ ├── nested_cross_val_kbmf.R │ │ │ ├── run_cross_val_kbmf.R~ │ │ │ ├── run_cross_val_kbmf.R │ │ │ ├── run_kbmf.R~ │ │ │ └── run_kbmf.R │ │ ├── np_nmf │ │ │ ├── __init__.py │ │ │ ├── results.txt │ │ │ ├── np_nmf_nested_xval.py │ │ │ └── np_nmf_xval.py │ │ ├── np_nmtf │ │ │ ├── __init__.py │ │ │ ├── results.txt │ │ │ ├── np_nmtf_xval.py │ │ │ ├── np_nmtf_nested_xval.py │ │ │ ├── fold_1.txt │ │ │ ├── fold_10.txt │ │ │ ├── fold_2.txt │ │ │ ├── fold_4.txt │ │ │ ├── fold_5.txt │ │ │ ├── fold_7.txt │ │ │ ├── fold_8.txt │ │ │ ├── fold_9.txt │ │ │ ├── fold_3.txt │ │ │ └── fold_6.txt │ │ ├── vb_nmf │ │ │ ├── linesearch_xval_vb.py │ │ │ └── results.txt │ │ ├── icm_nmf │ │ │ ├── linesearch_xval_icm.py │ │ │ └── results.txt │ │ ├── gibbs_nmf │ │ │ ├── linesearch_xval_gibbs.py │ │ │ └── results.txt │ │ ├── vb_nmtf │ │ │ └── greedysearch_xval_vb.py │ │ ├── icm_nmtf │ │ │ └── greedysearch_xval_icm.py │ │ └── gibbs_nmtf │ │ │ └── greedysearch_xval_gibbs.py │ └── model_selection │ │ └── __init__.py └── experiments_toy │ ├── convergence │ └── __init__.py │ ├── grid_search │ └── run_line_search_bnmf_gibbs.py │ └── time │ ├── nmtf_np_time.py │ └── nmf_np_time.py ├── data_drug_sensitivity ├── __init__.py ├── ccle │ ├── __init__.py │ ├── drugs.txt │ └── load_data.py └── gdsc │ ├── __init__.py │ ├── notes │ ├── notes~ │ └── drug_names_sorted_filtered ├── images ├── mf_mtf.pdf └── mf_mtf.png └── .gitignore /__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/models/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_toy/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/code/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_toy/bnmf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_toy/bnmtf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/cross_validation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/models/kmeans/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_drug_sensitivity/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_toy/nmf_icm_times.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_toy/nmf_np_times.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_toy/nmf_vb_times.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_toy/nmtf_np_times.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/grid_search/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /code/models/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_drug_sensitivity/ccle/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data_drug_sensitivity/gdsc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_Sanger/nmf_gibbs_times.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_Sanger/nmf_icm_times.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_Sanger/nmf_np_times.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_Sanger/nmf_vb_times.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_Sanger/nmtf_icm_times.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/code/distributions/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_Sanger/nmf_icm_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_Sanger/nmf_np_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_Sanger/nmf_vb_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_Sanger/nmtf_gibbs_times.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_toy/nmf_gibbs_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_toy/nmf_icm_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_toy/nmf_np_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_toy/nmf_vb_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_toy/nmtf_gibbs_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_toy/nmtf_np_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_toy/nmtf_vb_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/time/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_Sanger/nmf_gibbs_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_Sanger/nmtf_gibbs_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /plots/time_Sanger/nmtf_icm_performances.txt~: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/convergence/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/experiments_toy/convergence/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/model_selection/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/.Rhistory: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_gibbs_nmf/results.txt: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /images/mf_mtf.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/images/mf_mtf.pdf -------------------------------------------------------------------------------- /images/mf_mtf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/images/mf_mtf.png -------------------------------------------------------------------------------- /plots/graphs_toy/legend.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/legend.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/legend.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/legend.png -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmf_times.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_times.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmf_times.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_times.png -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmtf_times.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_times.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmtf_times.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_times.png -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmf_noise_test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_noise_test.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmf_noise_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_noise_test.png -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmf_convergences.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_convergences.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmf_convergences.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_convergences.png -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmtf_noise_test.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_noise_test.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmtf_noise_test.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_noise_test.png -------------------------------------------------------------------------------- /plots/graphs_Sanger/mse_Sanger_nmf_times.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmf_times.pdf -------------------------------------------------------------------------------- /plots/graphs_Sanger/mse_Sanger_nmf_times.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmf_times.png -------------------------------------------------------------------------------- /plots/graphs_toy/aic_nmf_model_selection.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/aic_nmf_model_selection.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/aic_nmf_model_selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/aic_nmf_model_selection.png -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmf_model_selection.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_model_selection.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmf_model_selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_model_selection.png -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmtf_convergences.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_convergences.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmtf_convergences.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_convergences.png -------------------------------------------------------------------------------- /plots/graphs_Sanger/mse_Sanger_nmtf_times.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmtf_times.pdf -------------------------------------------------------------------------------- /plots/graphs_Sanger/mse_Sanger_nmtf_times.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmtf_times.png -------------------------------------------------------------------------------- /plots/graphs_toy/aic_nmtf_model_selection.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/aic_nmtf_model_selection.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/aic_nmtf_model_selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/aic_nmtf_model_selection.png -------------------------------------------------------------------------------- /plots/graphs_toy/elbo_nmf_model_selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/elbo_nmf_model_selection.png -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmtf_model_selection.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_model_selection.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmtf_model_selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_model_selection.png -------------------------------------------------------------------------------- /plots/graphs_Sanger/mse_Sanger_nmf_convergences.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmf_convergences.pdf -------------------------------------------------------------------------------- /plots/graphs_Sanger/mse_Sanger_nmf_convergences.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmf_convergences.png -------------------------------------------------------------------------------- /plots/graphs_Sanger/mse_Sanger_nmtf_convergences.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmtf_convergences.pdf -------------------------------------------------------------------------------- /plots/graphs_Sanger/mse_Sanger_nmtf_convergences.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmtf_convergences.png -------------------------------------------------------------------------------- /plots/graphs_toy/aic_nmtf_greedy_model_selection.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/aic_nmtf_greedy_model_selection.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/aic_nmtf_greedy_model_selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/aic_nmtf_greedy_model_selection.png -------------------------------------------------------------------------------- /plots/graphs_Sanger/aic_Sanger_line_model_selection.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/aic_Sanger_line_model_selection.pdf -------------------------------------------------------------------------------- /plots/graphs_Sanger/aic_Sanger_line_model_selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/aic_Sanger_line_model_selection.png -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmf_missing_values_predictions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_missing_values_predictions.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmf_missing_values_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_missing_values_predictions.png -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmtf_missing_values_predictions.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_missing_values_predictions.pdf -------------------------------------------------------------------------------- /plots/graphs_toy/mse_nmtf_missing_values_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_missing_values_predictions.png -------------------------------------------------------------------------------- /plots/graphs_Sanger/aic_Sanger_greedy_model_selection.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/aic_Sanger_greedy_model_selection.pdf -------------------------------------------------------------------------------- /plots/graphs_Sanger/aic_Sanger_greedy_model_selection.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/aic_Sanger_greedy_model_selection.png -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/kbmf_regression_test.R: -------------------------------------------------------------------------------- 1 | # Mehmet Gonen (mehmet.gonen@gmail.com) 2 | 3 | kbmf_regression_test <- function(Kx, Kz, state) { 4 | prediction <- state$parameters$test_function(drop(Kx), drop(Kz), state) 5 | } -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_1.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 2 | Tried parameters {'K': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 3 | Tried parameters {'K': 3} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 4 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_3.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 2 | Tried parameters {'K': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 3 | Tried parameters {'K': 3} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 4 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_7.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 2 | Tried parameters {'K': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 3 | Tried parameters {'K': 3} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 4 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmtf/fold_10.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 2 | Tried parameters {'K': 1, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 3 | Tried parameters {'K': 2, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 4 | Tried parameters {'K': 2, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 5 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmtf/fold_3.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 2 | Tried parameters {'K': 1, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 3 | Tried parameters {'K': 2, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 4 | Tried parameters {'K': 2, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 5 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmtf/fold_8.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 2 | Tried parameters {'K': 1, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 3 | Tried parameters {'K': 2, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 4 | Tried parameters {'K': 2, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 5 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmtf/fold_9.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 2 | Tried parameters {'K': 1, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 3 | Tried parameters {'K': 2, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 4 | Tried parameters {'K': 2, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 5 | -------------------------------------------------------------------------------- /data_drug_sensitivity/ccle/drugs.txt: -------------------------------------------------------------------------------- 1 | 17aag 17-AAG 2 | aew541 AEW541 3 | azd0530 AZD0530 4 | azd6244 AZD6244 5 | erlotinib Erlotinib 6 | irinotecan Irinotecan 7 | l685458 L-685458 8 | lapatinib Lapatinib 9 | lbw242 LBW242 10 | nilotinib Nilotinib 11 | nutlin3 Nutlin-3 12 | paclitaxel Paclitaxel 13 | panobinostat Panobinostat 14 | pd0325901 PD-0325901 15 | pd0332991 PD-0332991 16 | pf2341066 PF2341066 17 | pha665752 PHA-665752 18 | plx4720 PLX4720 19 | raf265 RAF265 20 | sorafenib Sorafenib 21 | tae684 TAE684 22 | tki258 TKI258 23 | topotecan Topotecan 24 | zd6474 ZD-6474 25 | -------------------------------------------------------------------------------- /data_toy/bnmtf/S.txt: -------------------------------------------------------------------------------- 1 | 3.119652900231379422e-01 4.667713213280465512e-01 4.149760289926684997e-01 1.193043027963739816e-01 1.841317074265872655e+00 2 | 4.974795265023240209e-01 2.058768233004351078e+00 1.875517966446155760e-01 8.465283062509249001e-01 7.220983298177682252e-01 3 | 1.155660063996406084e+00 3.094347422574944328e+00 6.029326566297028567e-01 5.619143373047055245e-01 5.926448256085022903e-01 4 | 4.845264910112820900e-01 8.674386568923128094e-01 3.845620514887508445e+00 2.844154789202054112e-01 7.828094354652509468e-01 5 | 1.300921478072263104e+00 3.764568502857092436e-01 7.217656694934858341e-02 1.887483058053026852e+00 5.789976327985083548e-01 6 | -------------------------------------------------------------------------------- /code/models/distributions/exponential.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class representing an exponential distribution, allowing us to sample from it. 3 | """ 4 | from numpy.random import exponential 5 | 6 | # Exponential draws 7 | def exponential_draw(lambdax): 8 | scale = 1.0 / lambdax 9 | return exponential(scale=scale,size=None) 10 | 11 | ''' 12 | # Do 1000 draws and plot them 13 | import matplotlib.pyplot as plt 14 | import numpy as np 15 | scale = 2. 16 | s = [exponential_draw(1./scale) for i in range(0,1000)] 17 | s2 = np.random.exponential(scale, 1000) 18 | count, bins, ignored = plt.hist(s, 50, normed=True) 19 | count, bins, ignored = plt.hist(s2, 50, normed=True) 20 | plt.show() 21 | ''' -------------------------------------------------------------------------------- /code/models/distributions/normal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class representing an normal distribution, allowing us to sample from it. 3 | """ 4 | from numpy.random import normal 5 | import numpy, math 6 | 7 | # Draw a value for tau ~ Gamma(alpha,beta) 8 | def normal_draw(mu,tau): 9 | sigma = numpy.float64(1.0) / math.sqrt(tau) 10 | return normal(loc=mu,scale=sigma,size=None) 11 | 12 | 13 | ''' 14 | # Do 1000 draws and plot them 15 | import matplotlib.pyplot as plt 16 | import numpy as np 17 | mu = -1. 18 | tau = 4. 19 | sigma = 1./2. 20 | s = [normal_draw(mu,tau) for i in range(0,1000)] 21 | s2 = np.random.normal(mu,sigma, 1000) 22 | count, bins, ignored = plt.hist(s, 50, normed=True) 23 | count, bins, ignored = plt.hist(s2, 50, normed=True) 24 | plt.show() 25 | ''' -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmf/results.txt: -------------------------------------------------------------------------------- 1 | Average performances: {'R^2': 0.8082101768976138, 'MSE': 2.251110677994558, 'Rp': 0.8994506246266909}. 2 | All performances: {'R^2': [0.8133682117581214, 0.798979194046697, 0.8051355068238598, 0.8090828602116515, 0.8178068200269057, 0.8105171037995288, 0.8002399738798882, 0.7997607151839987, 0.8160747071552853, 0.8111366760902006], 'MSE': [2.2521197525864687, 2.2748992448875924, 2.2942175862307983, 2.2556573686657075, 2.1739052328898292, 2.2227894183419878, 2.3208767637860492, 2.3416493427979819, 2.1392407721599365, 2.2357512975992306], 'Rp': [0.90219032607860539, 0.89474794719228812, 0.89784042356311322, 0.89984829753427398, 0.9045679937026978, 0.90062265439506262, 0.89497500946862962, 0.89493733015674526, 0.90371517822333047, 0.9010610859521605]}. 3 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/results.txt: -------------------------------------------------------------------------------- 1 | Average performances: {'R^2': 0.8077440039100411, 'MSE': 2.2576486936637714, 'Rp': 0.89914824429384355}. 2 | All performances: {'R^2': [0.7957199747101262, 0.8141723551198858, 0.8056171442590625, 0.8116290674813398, 0.8108989639498128, 0.8071070614930603, 0.8041122553537439, 0.812358444747342, 0.8127652701427079, 0.8030595018433286], 'MSE': [2.3753205049125281, 2.1919391613602976, 2.2938850389890315, 2.17790199886428, 2.2413691490156138, 2.2693811279944294, 2.3605569584646107, 2.2303183150776187, 2.1364651293176347, 2.299349552641671], 'Rp': [0.89230313136198702, 0.90276056743084043, 0.89814196546978564, 0.90130647586264789, 0.9010626799620397, 0.89878885253115048, 0.89711511071416949, 0.90150884819951982, 0.90193388078270109, 0.89656093062359299]}. 3 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/results.txt: -------------------------------------------------------------------------------- 1 | Average performances: {'R^2': 0.5834383321227549, 'MSE': 4.6836813431881321, 'Rp': 0.76539038468678466}. 2 | All performances: {'R^2': [0.5581154055756108, 0.5976144301819742, 0.6074602591964648, 0.60317826238292, 0.5580933330115285, 0.6056417388078126, 0.5417251115941935, 0.5638098628847992, 0.5993572553570361, 0.5993876622352095], 'MSE': [4.9761831411253876, 4.601164367855695, 4.1604958665433571, 4.4068545475314576, 4.7952719987422787, 4.5976626695669811, 5.0297510989555079, 4.9786459996680481, 4.6775970019259292, 4.6131867399666815], 'Rp': [0.74918609043742923, 0.77505321719834175, 0.78001264369772305, 0.77808776416730452, 0.74977787790685602, 0.77855419896769817, 0.73887575798487093, 0.75294195132016772, 0.77499177897479554, 0.7764225662126597]}. 3 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/results.txt: -------------------------------------------------------------------------------- 1 | Average performances: {'R^2': 0.2781758379686691, 'MSE': 8.0471891476130519, 'Rp': 0.54543041945837778}. 2 | All performances: {'R^2': [0.2893664139798344, 0.3675563339460708, 0.25980035553357406, 0.23938271705899428, 0.2530976731048762, 0.29012022286183825, 0.3538687087841409, 0.2760378760205108, 0.262746922851533, 0.18978115554531882], 'MSE': [8.0648790807580379, 6.9006784036206179, 8.1409961811992009, 8.2999731998977939, 8.2065025131542164, 8.1708120827606479, 7.1793657752559277, 7.9072408051910985, 8.2407904301235817, 9.3606530041694036], 'Rp': [0.55540920431152441, 0.60921980844748336, 0.52761474543693132, 0.51591801537692683, 0.5236675142267132, 0.55021414609128683, 0.60009224481417844, 0.54631672706180834, 0.53941318669187432, 0.48643860212505091]}. 3 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmtf/results.txt: -------------------------------------------------------------------------------- 1 | Average performances: {'R^2': 0.27452751064922026, 'MSE': 8.0761164553596867, 'Rp': 0.54280291631635058}. 2 | All performances: {'R^2': [0.25462406025751205, 0.28060743426965606, 0.2586817089274871, 0.183234123218048, 0.33228159578412275, 0.33928544150672446, 0.2614317329721215, 0.31697648925716626, 0.29233928029354916, 0.22581324000581526], 'MSE': [8.4875837684602278, 7.7879390718853401, 8.4309347691568135, 9.1382102849973688, 7.3126838761894719, 7.5807817178720285, 7.9856521963029952, 7.7495817839986261, 8.0086247560326136, 8.2791723287013728], 'Rp': [0.51992973113537488, 0.5514566825341487, 0.5279403053176901, 0.48668451107067162, 0.58224134947774886, 0.58787052204193668, 0.5345849839602469, 0.57042001347817517, 0.5613693249217635, 0.50553173922574979]}. 3 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmtf/results.txt: -------------------------------------------------------------------------------- 1 | Average performances: {'R^2': 0.5852741068439489, 'MSE': 4.6642022787566386, 'Rp': 0.76667607772042845}. 2 | All performances: {'R^2': [0.5612166455312766, 0.5776112491459862, 0.5918545629428087, 0.5582129959667665, 0.6031838913058494, 0.6212836918766113, 0.5973625605929315, 0.5914757901910388, 0.5644144495125436, 0.5861252313736759], 'MSE': [5.0659262098767446, 4.8075073376466833, 4.6609715083992764, 4.8552683532599499, 4.4969035768920209, 4.3985206643584442, 4.5046485520163779, 4.5133760738818456, 4.8092333428351868, 4.5296671683998628], 'Rp': [0.75076620323159227, 0.76105736566014892, 0.77141186726015187, 0.74946287320262439, 0.77766010330458246, 0.78844764519390043, 0.77456026739973793, 0.77018933558005898, 0.75590853879080033, 0.76729657758068637]}. 3 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_supervised_regression_variational_test.R: -------------------------------------------------------------------------------- 1 | # Mehmet Gonen (mehmet.gonen@gmail.com) 2 | 3 | kbmf1mkl1mkl_supervised_regression_variational_test <- function(Kx, Kz, state) { 4 | Nx <- dim(Kx)[2] 5 | Px <- dim(Kx)[3] 6 | Nz <- dim(Kz)[2] 7 | Pz <- dim(Kz)[3] 8 | R <- dim(state$Ax$mu)[2] 9 | 10 | Gx <- list(mu = array(0, c(R, Nx, Px))) 11 | for (m in 1:Px) { 12 | Gx$mu[,,m] <- crossprod(state$Ax$mu, Kx[,,m]) 13 | } 14 | Hx <- list(mu = matrix(0, R, Nx)) 15 | for (m in 1:Px) { 16 | Hx$mu <- Hx$mu + state$ex$mu[m] * Gx$mu[,,m] 17 | } 18 | 19 | Gz <- list(mu = array(0, c(R, Nz, Pz))) 20 | for (n in 1:Pz) { 21 | Gz$mu[,,n] <- crossprod(state$Az$mu, Kz[,,n]) 22 | } 23 | Hz <- list(mu = matrix(0, R, Nz)) 24 | for (n in 1:Pz) { 25 | Hz$mu <- Hz$mu + state$ez$mu[n] * Gz$mu[,,n] 26 | } 27 | 28 | Y <- list(mu = crossprod(Hx$mu, Hz$mu)) 29 | 30 | prediction <- list(Gx = Gx, Hx = Hx, Gz = Gz, Hz = Hz, Y = Y) 31 | } -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_semisupervised_regression_variational_test.R: -------------------------------------------------------------------------------- 1 | # Mehmet Gonen (mehmet.gonen@gmail.com) 2 | 3 | kbmf1mkl1mkl_semisupervised_regression_variational_test <- function(Kx, Kz, state) { 4 | Nx <- dim(Kx)[2] 5 | Px <- dim(Kx)[3] 6 | Nz <- dim(Kz)[2] 7 | Pz <- dim(Kz)[3] 8 | R <- dim(state$Ax$mu)[2] 9 | 10 | Gx <- list(mu = array(0, c(R, Nx, Px))) 11 | for (m in 1:Px) { 12 | Gx$mu[,,m] <- crossprod(state$Ax$mu, Kx[,,m]) 13 | } 14 | Hx <- list(mu = matrix(0, R, Nx)) 15 | for (m in 1:Px) { 16 | Hx$mu <- Hx$mu + state$ex$mu[m] * Gx$mu[,,m] 17 | } 18 | 19 | Gz <- list(mu = array(0, c(R, Nz, Pz))) 20 | for (n in 1:Pz) { 21 | Gz$mu[,,n] <- crossprod(state$Az$mu, Kz[,,n]) 22 | } 23 | Hz <- list(mu = matrix(0, R, Nz)) 24 | for (n in 1:Pz) { 25 | Hz$mu <- Hz$mu + state$ez$mu[n] * Gz$mu[,,n] 26 | } 27 | 28 | Y <- list(mu = crossprod(Hx$mu, Hz$mu)) 29 | 30 | prediction <- list(Gx = Gx, Hx = Hx, Gz = Gz, Hz = Hz, Y = Y) 31 | } -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_supervised_classification_variational_test.R: -------------------------------------------------------------------------------- 1 | # Mehmet Gonen (mehmet.gonen@gmail.com) 2 | 3 | kbmf1mkl1mkl_supervised_classification_variational_test <- function(Kx, Kz, state) { 4 | Nx <- dim(Kx)[2] 5 | Px <- dim(Kx)[3] 6 | Nz <- dim(Kz)[2] 7 | Pz <- dim(Kz)[3] 8 | R <- dim(state$Ax$mu)[2] 9 | 10 | Gx <- list(mu = array(0, c(R, Nx, Px))) 11 | for (m in 1:Px) { 12 | Gx$mu[,,m] <- crossprod(state$Ax$mu, Kx[,,m]) 13 | } 14 | Hx <- list(mu = matrix(0, R, Nx)) 15 | for (m in 1:Px) { 16 | Hx$mu <- Hx$mu + state$ex$mu[m] * Gx$mu[,,m] 17 | } 18 | 19 | Gz <- list(mu = array(0, c(R, Nz, Pz))) 20 | for (n in 1:Pz) { 21 | Gz$mu[,,n] <- crossprod(state$Az$mu, Kz[,,n]) 22 | } 23 | Hz <- list(mu = matrix(0, R, Nz)) 24 | for (n in 1:Pz) { 25 | Hz$mu <- Hz$mu + state$ez$mu[n] * Gz$mu[,,n] 26 | } 27 | 28 | F <- list(mu = crossprod(Hx$mu, Hz$mu)) 29 | 30 | prediction <- list(Gx = Gx, Hx = Hx, Gz = Gz, Hz = Hz, F = F) 31 | } -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_semisupervised_classification_variational_test.R: -------------------------------------------------------------------------------- 1 | # Mehmet Gonen (mehmet.gonen@gmail.com) 2 | 3 | kbmf1mkl1mkl_semisupervised_classification_variational_test <- function(Kx, Kz, state) { 4 | Nx <- dim(Kx)[2] 5 | Px <- dim(Kx)[3] 6 | Nz <- dim(Kz)[2] 7 | Pz <- dim(Kz)[3] 8 | R <- dim(state$Ax$mu)[2] 9 | 10 | Gx <- list(mu = array(0, c(R, Nx, Px))) 11 | for (m in 1:Px) { 12 | Gx$mu[,,m] <- crossprod(state$Ax$mu, Kx[,,m]) 13 | } 14 | Hx <- list(mu = matrix(0, R, Nx)) 15 | for (m in 1:Px) { 16 | Hx$mu <- Hx$mu + state$ex$mu[m] * Gx$mu[,,m] 17 | } 18 | 19 | Gz <- list(mu = array(0, c(R, Nz, Pz))) 20 | for (n in 1:Pz) { 21 | Gz$mu[,,n] <- crossprod(state$Az$mu, Kz[,,n]) 22 | } 23 | Hz <- list(mu = matrix(0, R, Nz)) 24 | for (n in 1:Pz) { 25 | Hz$mu <- Hz$mu + state$ez$mu[n] * Gz$mu[,,n] 26 | } 27 | 28 | F <- list(mu = crossprod(Hx$mu, Hz$mu)) 29 | 30 | prediction <- list(Gx = Gx, Hx = Hx, Gz = Gz, Hz = Hz, F = F) 31 | } -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_supervised_regression_variational_test.m: -------------------------------------------------------------------------------- 1 | % Mehmet Gonen (mehmet.gonen@gmail.com) 2 | 3 | function prediction = kbmf1mkl1mkl_supervised_regression_variational_test(Kx, Kz, state) 4 | Nx = size(Kx, 2); 5 | Px = size(Kx, 3); 6 | Nz = size(Kz, 2); 7 | Pz = size(Kz, 3); 8 | R = size(state.Ax.mu, 2); 9 | 10 | prediction.Gx.mu = zeros(R, Nx, Px); 11 | for m = 1:Px 12 | prediction.Gx.mu(:, :, m) = state.Ax.mu' * Kx(:, :, m); 13 | end 14 | prediction.Hx.mu = zeros(R, Nx); 15 | for m = 1:Px 16 | prediction.Hx.mu = prediction.Hx.mu + state.ex.mu(m) * prediction.Gx.mu(:, :, m); 17 | end 18 | 19 | prediction.Gz.mu = zeros(R, Nz, Pz); 20 | for n = 1:Pz 21 | prediction.Gz.mu(:, :, n) = state.Az.mu' * Kz(:, :, n); 22 | end 23 | prediction.Hz.mu = zeros(R, Nz); 24 | for n = 1:Pz 25 | prediction.Hz.mu = prediction.Hz.mu + state.ez.mu(n) * prediction.Gz.mu(:, :, n); 26 | end 27 | 28 | prediction.Y.mu = prediction.Hx.mu' * prediction.Hz.mu; 29 | end -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_semisupervised_regression_variational_test.m: -------------------------------------------------------------------------------- 1 | % Mehmet Gonen (mehmet.gonen@gmail.com) 2 | 3 | function prediction = kbmf1mkl1mkl_semisupervised_regression_variational_test(Kx, Kz, state) 4 | Nx = size(Kx, 2); 5 | Px = size(Kx, 3); 6 | Nz = size(Kz, 2); 7 | Pz = size(Kz, 3); 8 | R = size(state.Ax.mu, 2); 9 | 10 | prediction.Gx.mu = zeros(R, Nx, Px); 11 | for m = 1:Px 12 | prediction.Gx.mu(:, :, m) = state.Ax.mu' * Kx(:, :, m); 13 | end 14 | prediction.Hx.mu = zeros(R, Nx); 15 | for m = 1:Px 16 | prediction.Hx.mu = prediction.Hx.mu + state.ex.mu(m) * prediction.Gx.mu(:, :, m); 17 | end 18 | 19 | prediction.Gz.mu = zeros(R, Nz, Pz); 20 | for n = 1:Pz 21 | prediction.Gz.mu(:, :, n) = state.Az.mu' * Kz(:, :, n); 22 | end 23 | prediction.Hz.mu = zeros(R, Nz); 24 | for n = 1:Pz 25 | prediction.Hz.mu = prediction.Hz.mu + state.ez.mu(n) * prediction.Gz.mu(:, :, n); 26 | end 27 | 28 | prediction.Y.mu = prediction.Hx.mu' * prediction.Hz.mu; 29 | end -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_supervised_classification_variational_test.m: -------------------------------------------------------------------------------- 1 | % Mehmet Gonen (mehmet.gonen@gmail.com) 2 | 3 | function prediction = kbmf1mkl1mkl_supervised_classification_variational_test(Kx, Kz, state) 4 | Nx = size(Kx, 2); 5 | Px = size(Kx, 3); 6 | Nz = size(Kz, 2); 7 | Pz = size(Kz, 3); 8 | R = size(state.Ax.mu, 2); 9 | 10 | prediction.Gx.mu = zeros(R, Nx, Px); 11 | for m = 1:Px 12 | prediction.Gx.mu(:, :, m) = state.Ax.mu' * Kx(:, :, m); 13 | end 14 | prediction.Hx.mu = zeros(R, Nx); 15 | for m = 1:Px 16 | prediction.Hx.mu = prediction.Hx.mu + state.ex.mu(m) * prediction.Gx.mu(:, :, m); 17 | end 18 | 19 | prediction.Gz.mu = zeros(R, Nz, Pz); 20 | for n = 1:Pz 21 | prediction.Gz.mu(:, :, n) = state.Az.mu' * Kz(:, :, n); 22 | end 23 | prediction.Hz.mu = zeros(R, Nz); 24 | for n = 1:Pz 25 | prediction.Hz.mu = prediction.Hz.mu + state.ez.mu(n) * prediction.Gz.mu(:, :, n); 26 | end 27 | 28 | prediction.F.mu = prediction.Hx.mu' * prediction.Hz.mu; 29 | end -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_semisupervised_classification_variational_test.m: -------------------------------------------------------------------------------- 1 | % Mehmet Gonen (mehmet.gonen@gmail.com) 2 | 3 | function prediction = kbmf1mkl1mkl_semisupervised_classification_variational_test(Kx, Kz, state) 4 | Nx = size(Kx, 2); 5 | Px = size(Kx, 3); 6 | Nz = size(Kz, 2); 7 | Pz = size(Kz, 3); 8 | R = size(state.Ax.mu, 2); 9 | 10 | prediction.Gx.mu = zeros(R, Nx, Px); 11 | for m = 1:Px 12 | prediction.Gx.mu(:, :, m) = state.Ax.mu' * Kx(:, :, m); 13 | end 14 | prediction.Hx.mu = zeros(R, Nx); 15 | for m = 1:Px 16 | prediction.Hx.mu = prediction.Hx.mu + state.ex.mu(m) * prediction.Gx.mu(:, :, m); 17 | end 18 | 19 | prediction.Gz.mu = zeros(R, Nz, Pz); 20 | for n = 1:Pz 21 | prediction.Gz.mu(:, :, n) = state.Az.mu' * Kz(:, :, n); 22 | end 23 | prediction.Hz.mu = zeros(R, Nz); 24 | for n = 1:Pz 25 | prediction.Hz.mu = prediction.Hz.mu + state.ez.mu(n) * prediction.Gz.mu(:, :, n); 26 | end 27 | 28 | prediction.F.mu = prediction.Hx.mu' * prediction.Hz.mu; 29 | end -------------------------------------------------------------------------------- /tests/code/distributions/test_gamma.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test the class for Gamma draws and expectations in gamma.py. 3 | """ 4 | 5 | import sys, os 6 | project_location = os.path.dirname(__file__)+"/../../../" 7 | sys.path.append(project_location) 8 | 9 | from BNMTF.code.models.distributions.gamma import gamma_draw, gamma_expectation, gamma_expectation_log, gamma_mode 10 | 11 | def test_expectation(): 12 | alpha = 2.0 13 | beta = 3.0 14 | 15 | expectation = 2.0 / 3.0 16 | assert gamma_expectation(alpha,beta) == expectation 17 | 18 | def test_expectation_log(): 19 | alpha = 2.0 20 | beta = 3.0 21 | 22 | expectation_log = -0.67582795356964265 # digamma(2) - log_e (3) in Wolfram Alpha 23 | assert gamma_expectation_log(alpha,beta) == expectation_log 24 | 25 | # Test a draw - simply verify it is > 0. 26 | def test_draw(): 27 | alpha = 2.0 28 | beta = 3.0 29 | for i in range(0,100): 30 | assert gamma_draw(alpha,beta) >= 0.0 31 | 32 | # Test median 33 | def test_median(): 34 | alpha = 2.0 35 | beta = 3.0 36 | median = 1./3. 37 | assert gamma_mode(alpha,beta) == median -------------------------------------------------------------------------------- /data_drug_sensitivity/ccle/load_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | Helper function for reading in the CCLE dataset, splitting into data X and mask M. 3 | Returns: 4 | X Drug sensitivity values (original) 5 | M Mask of known vs unknown values 6 | """ 7 | 8 | import numpy 9 | 10 | import os 11 | 12 | folder_ccle = os.path.dirname(__file__)+"/" 13 | ccle_ic_file = folder_ccle+"ic50.txt" 14 | ccle_ec_file = folder_ccle+"ec50.txt" 15 | 16 | def load_ccle(ic50=True, delim='\t'): 17 | filelocation = (ccle_ic_file if ic50 else ccle_ec_file) 18 | data = numpy.genfromtxt(filelocation, delimiter=delim, missing_values=[numpy.nan]) 19 | I, J = data.shape 20 | 21 | # Construct the mask matrix, and replace any nan values by 0 22 | new_data, mask = numpy.zeros((I,J)), numpy.zeros((I,J)) 23 | for i in range(0,I): 24 | for j in range(0,J): 25 | if not numpy.isnan(data[i,j]): 26 | new_data[i,j] = data[i,j] 27 | mask[i,j] = 1. 28 | return new_data, mask 29 | 30 | ''' 31 | X, M = load_ccle(ic50=False) 32 | (I,J)= X.shape 33 | print I,J 34 | print I*J, M.sum(), M.sum()/(I*J) 35 | ''' -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/np_nmf_nested_xval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the nested cross-validation for the NMF class, on the CCLE EC50 dataset. 3 | """ 4 | 5 | import sys, os 6 | project_location = os.path.dirname(__file__)+"/../../../../../" 7 | sys.path.append(project_location) 8 | 9 | from BNMTF.code.models.nmf_np import NMF 10 | from BNMTF.code.cross_validation.nested_matrix_cross_validation import MatrixNestedCrossValidation 11 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 12 | 13 | 14 | # Settings 15 | standardised = False 16 | train_config = { 17 | 'iterations' : 2000, 18 | 'init_UV' : 'exponential', 19 | 'expo_prior' : 0.1 20 | } 21 | K_range = [1,2,3] 22 | no_threads = 5 23 | no_folds = 10 24 | output_file = "./results.txt" 25 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)] 26 | 27 | # Construct the parameter search 28 | parameter_search = [{'K':K} for K in K_range] 29 | 30 | # Load in the CCLE EC50 dataset 31 | R,M = load_ccle(ic50=False) 32 | 33 | # Run the cross-validation framework 34 | #random.seed(42) 35 | #numpy.random.seed(9000) 36 | nested_crossval = MatrixNestedCrossValidation( 37 | method=NMF, 38 | X=R, 39 | M=M, 40 | K=no_folds, 41 | P=no_threads, 42 | parameter_search=parameter_search, 43 | train_config=train_config, 44 | file_performance=output_file, 45 | files_nested_performances=files_nested_performances 46 | ) 47 | nested_crossval.run() 48 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/np_nmf_nested_xval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the nested cross-validation for the NMF class, on the CCLE IC50 dataset. 3 | """ 4 | 5 | import sys, os 6 | project_location = os.path.dirname(__file__)+"/../../../../../" 7 | sys.path.append(project_location) 8 | 9 | from BNMTF.code.models.nmf_np import NMF 10 | from BNMTF.code.cross_validation.nested_matrix_cross_validation import MatrixNestedCrossValidation 11 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 12 | 13 | 14 | # Settings 15 | standardised = False 16 | train_config = { 17 | 'iterations' : 2000, 18 | 'init_UV' : 'exponential', 19 | 'expo_prior' : 0.1 20 | } 21 | K_range = [1,2,3] 22 | no_threads = 2 23 | no_folds = 10 24 | output_file = "./results.txt" 25 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)] 26 | 27 | # Construct the parameter search 28 | parameter_search = [{'K':K} for K in K_range] 29 | 30 | # Load in the CCLE IC50 dataset 31 | R,M = load_ccle(ic50=True) 32 | 33 | # Run the cross-validation framework 34 | #random.seed(42) 35 | #numpy.random.seed(9000) 36 | nested_crossval = MatrixNestedCrossValidation( 37 | method=NMF, 38 | X=R, 39 | M=M, 40 | K=no_folds, 41 | P=no_threads, 42 | parameter_search=parameter_search, 43 | train_config=train_config, 44 | file_performance=output_file, 45 | files_nested_performances=files_nested_performances 46 | ) 47 | nested_crossval.run() 48 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmf/np_nmf_nested_xval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the nested cross-validation for the NMTF class, on the Sanger dataset. 3 | """ 4 | 5 | import sys, os 6 | project_location = os.path.dirname(__file__)+"/../../../../../" 7 | sys.path.append(project_location) 8 | 9 | import numpy, random 10 | from BNMTF.code.models.nmf_np import NMF 11 | from BNMTF.code.cross_validation.nested_matrix_cross_validation import MatrixNestedCrossValidation 12 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 13 | 14 | 15 | # Settings 16 | standardised = False 17 | train_config = { 18 | 'iterations' : 2000, 19 | 'init_UV' : 'exponential', 20 | 'expo_prior' : 0.1 21 | } 22 | K_range = [6,8,10,12,14] 23 | no_folds = 10 24 | output_file = "./results.txt" 25 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)] 26 | 27 | # Construct the parameter search 28 | parameter_search = [{'K':K} for K in K_range] 29 | 30 | # Load in the Sanger dataset 31 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised,sep=',') 32 | 33 | # Run the cross-validation framework 34 | random.seed(42) 35 | numpy.random.seed(9000) 36 | nested_crossval = MatrixNestedCrossValidation( 37 | method=NMF, 38 | X=X_min, 39 | M=M, 40 | K=no_folds, 41 | P=5, 42 | parameter_search=parameter_search, 43 | train_config=train_config, 44 | file_performance=output_file, 45 | files_nested_performances=files_nested_performances 46 | ) 47 | nested_crossval.run() 48 | -------------------------------------------------------------------------------- /code/models/distributions/gamma.py: -------------------------------------------------------------------------------- 1 | """ 2 | Class representing a gamma distribution, allowing us to sample from it, 3 | and compute the expectation and the expectation of the log. 4 | """ 5 | import math 6 | from scipy.special import psi as digamma 7 | from numpy.random import gamma 8 | 9 | 10 | # Gamma draws 11 | def gamma_draw(alpha,beta): 12 | shape = float(alpha) 13 | scale = 1.0 / float(beta) 14 | return gamma(shape=shape,scale=scale,size=None) 15 | 16 | # Gamma expectation 17 | def gamma_expectation(alpha,beta): 18 | alpha, beta = float(alpha), float(beta) 19 | return alpha / beta 20 | 21 | # Gamma variance 22 | def gamma_expectation_log(alpha,beta): 23 | alpha, beta = float(alpha), float(beta) 24 | return digamma(alpha) - math.log(beta) 25 | 26 | # Gamma mode 27 | def gamma_mode(alpha,beta): 28 | alpha, beta = float(alpha), float(beta) 29 | return (alpha-1) / beta 30 | 31 | 32 | ''' 33 | # Do 1000 draws and plot them 34 | import matplotlib.pyplot as plt 35 | import scipy.special as sps 36 | import numpy as np 37 | shape, scale = 2., 2. # mean and dispersion 38 | s = [gamma_draw(shape,1.0/scale) for i in range(0,1000)] 39 | s2 = np.random.gamma(shape, scale, 1000) 40 | count, bins, ignored = plt.hist(s, 50, normed=True) 41 | count, bins, ignored = plt.hist(s2, 50, normed=True) 42 | y = bins**(shape-1)*(np.exp(-bins/scale) / 43 | (sps.gamma(shape)*scale**shape)) 44 | plt.plot(bins, y, linewidth=2, color='r') 45 | plt.show() 46 | ''' -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_vb_nmf/linesearch_xval_vb.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with line search for model selection using VB-NMF on 3 | the CCLE EC50 dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | import numpy, random 11 | from BNMTF.code.models.bnmf_vb_optimised import bnmf_vb_optimised 12 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation 13 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 14 | 15 | 16 | # Settings 17 | standardised = False 18 | iterations = 1000 19 | init_UV = 'random' 20 | 21 | K_range = [1,2,3] 22 | no_folds = 10 23 | restarts = 1 24 | 25 | quality_metric = 'AIC' 26 | output_file = "./results.txt" 27 | 28 | alpha, beta = 1., 1. 29 | lambdaU = 1./10. 30 | lambdaV = 1./10. 31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } 32 | 33 | # Load in the CCLE EC50 dataset 34 | R,M = load_ccle(ic50=False) 35 | 36 | # Run the cross-validation framework 37 | #random.seed(42) 38 | #numpy.random.seed(9000) 39 | nested_crossval = LineSearchCrossValidation( 40 | classifier=bnmf_vb_optimised, 41 | R=R, 42 | M=M, 43 | values_K=K_range, 44 | folds=no_folds, 45 | priors=priors, 46 | init_UV=init_UV, 47 | iterations=iterations, 48 | restarts=restarts, 49 | quality_metric=quality_metric, 50 | file_performance=output_file 51 | ) 52 | nested_crossval.run() 53 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_vb_nmf/linesearch_xval_vb.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with line search for model selection using VB-NMF on 3 | the CCLE IC50 dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | import numpy, random 11 | from BNMTF.code.models.bnmf_vb_optimised import bnmf_vb_optimised 12 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation 13 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 14 | 15 | 16 | # Settings 17 | standardised = False 18 | iterations = 1000 19 | init_UV = 'random' 20 | 21 | K_range = [3,4,5,6,7] 22 | no_folds = 10 23 | restarts = 1 24 | 25 | quality_metric = 'AIC' 26 | output_file = "./results.txt" 27 | 28 | alpha, beta = 1., 1. 29 | lambdaU = 1./10. 30 | lambdaV = 1./10. 31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } 32 | 33 | # Load in the CCLE IC50 dataset 34 | R,M = load_ccle(ic50=True) 35 | 36 | # Run the cross-validation framework 37 | #random.seed(42) 38 | #numpy.random.seed(9000) 39 | nested_crossval = LineSearchCrossValidation( 40 | classifier=bnmf_vb_optimised, 41 | R=R, 42 | M=M, 43 | values_K=K_range, 44 | folds=no_folds, 45 | priors=priors, 46 | init_UV=init_UV, 47 | iterations=iterations, 48 | restarts=restarts, 49 | quality_metric=quality_metric, 50 | file_performance=output_file 51 | ) 52 | nested_crossval.run() 53 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_icm_nmf/linesearch_xval_icm.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with line search for model selection using VB-NMF on 3 | the CCLE EC50 dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | from BNMTF.code.models.nmf_icm import nmf_icm 11 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation 12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 13 | 14 | 15 | # Settings 16 | standardised = False 17 | iterations = 1000 18 | init_UV = 'random' 19 | 20 | K_range = [1,2,3,4,5,6,7,8,9,10] 21 | no_folds = 10 22 | restarts = 1 23 | 24 | quality_metric = 'AIC' 25 | output_file = "./results.txt" 26 | 27 | alpha, beta = 1., 1. 28 | lambdaU = 1./10. 29 | lambdaV = 1./10. 30 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } 31 | 32 | minimum_TN = 0.01 33 | 34 | # Load in the CCLE EC50 dataset 35 | R,M = load_ccle(ic50=False) 36 | 37 | # Run the cross-validation framework 38 | #random.seed(42) 39 | #numpy.random.seed(9000) 40 | nested_crossval = LineSearchCrossValidation( 41 | classifier=nmf_icm, 42 | R=R, 43 | M=M, 44 | values_K=K_range, 45 | folds=no_folds, 46 | priors=priors, 47 | init_UV=init_UV, 48 | iterations=iterations, 49 | restarts=restarts, 50 | quality_metric=quality_metric, 51 | file_performance=output_file 52 | ) 53 | nested_crossval.run(minimum_TN=minimum_TN) 54 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_icm_nmf/linesearch_xval_icm.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with line search for model selection using VB-NMF on 3 | the CCLE IC50 dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | from BNMTF.code.models.nmf_icm import nmf_icm 11 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation 12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 13 | 14 | 15 | # Settings 16 | standardised = False 17 | iterations = 1000 18 | init_UV = 'random' 19 | 20 | K_range = [1,2,3,4,5,6,7,8,9,10] 21 | no_folds = 10 22 | restarts = 1 23 | 24 | quality_metric = 'AIC' 25 | output_file = "./results.txt" 26 | 27 | alpha, beta = 1., 1. 28 | lambdaU = 1./10. 29 | lambdaV = 1./10. 30 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } 31 | 32 | minimum_TN = 0.01 33 | 34 | # Load in the CCLE IC50 dataset 35 | R,M = load_ccle(ic50=True) 36 | 37 | # Run the cross-validation framework 38 | #random.seed(42) 39 | #numpy.random.seed(9000) 40 | nested_crossval = LineSearchCrossValidation( 41 | classifier=nmf_icm, 42 | R=R, 43 | M=M, 44 | values_K=K_range, 45 | folds=no_folds, 46 | priors=priors, 47 | init_UV=init_UV, 48 | iterations=iterations, 49 | restarts=restarts, 50 | quality_metric=quality_metric, 51 | file_performance=output_file 52 | ) 53 | nested_crossval.run(minimum_TN=minimum_TN) 54 | -------------------------------------------------------------------------------- /data_drug_sensitivity/gdsc/notes: -------------------------------------------------------------------------------- 1 | Data from Sanger's Genomics of Drug Sensitivity in Cancer project. 2 | http://www.cancerrxgene.org/downloads/ 3 | 4 | *** ic50_excl_empty_filtered_cell_lines_drugs.txt *** 5 | The original file from website is called gdsc_manova_input_w5.csv (this file has all the genetic info etc filtered, leaving only the IC50 values). 6 | We removed row with drug AZD6482 (PubChem id 44137675) as there are two columns with that drug (removed the first one). 7 | Filtered rows 57 and 635 because they had only 2 resp. 1 drug tested. 8 | Only included those cancer cell lines that have features available (en_input_w5.csv). The cancer cell lines are also ordered alphabetically (so that they align perfectly with the cell line kernels/constraint matrices). 9 | Also removed the drug Cisplatin (PubChem id 84691) and reordered columns alphabetically (so that they align perfectly with the drug and cancer line kernels/constraint matrices). 10 | Finally, removed one value (-36.485443) as this was an extreme outlier (next lowest value around -17). 11 | We end up with: 622 cell lines, 138 drugs 12 | 13 | *** ic50_excl_empty_filtered_cell_lines_drugs_standardised.txt *** 14 | As above, but standardised the cell lines (so each row has mean 0, std 1). 15 | We end up with: 622 cell lines, 138 drugs. 16 | 17 | *** /kernels/ *** 18 | The different similarity kernels, based on drug and cell line features. 19 | For binary features we use a Jaccard kernel. 20 | For real-valued features we use a Gaussian kernel with as the kernel standard deviation value: (no. features) / 4. 21 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmf/np_nmf_xval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the nested cross-validation for the NMTF class, on the Sanger dataset. 3 | 4 | Since we want to find co-clusters of significantly higher/lower drug sensitivity 5 | values, we should use the unstandardised Sanger dataset. 6 | """ 7 | 8 | import sys, os 9 | project_location = os.path.dirname(__file__)+"/../../../../../" 10 | sys.path.append(project_location) 11 | 12 | import numpy, random 13 | from BNMTF.code.nmf_np import NMF 14 | from BNMTF.cross_validation.matrix_cross_validation import MatrixCrossValidation 15 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 16 | 17 | 18 | # Settings 19 | standardised = False 20 | train_config = { 21 | 'iterations' : 1000, 22 | 'init_UV' : 'exponential', 23 | 'expo_prior' : 0.1 24 | } 25 | K_range = range(2,10+1,2) 26 | no_folds = 10 27 | output_file = "./results.txt" 28 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)] 29 | 30 | # Construct the parameter search 31 | parameter_search = [{'K':K} for K in K_range] 32 | 33 | # Load in the Sanger dataset 34 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised,sep=',') 35 | 36 | # Run the cross-validation framework 37 | random.seed(42) 38 | numpy.random.seed(9000) 39 | nested_crossval = MatrixCrossValidation( 40 | method=NMF, 41 | X=X_min, 42 | M=M, 43 | K=no_folds, 44 | parameter_search=parameter_search, 45 | train_config=train_config, 46 | file_performance=output_file 47 | ) 48 | nested_crossval.run() 49 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_gibbs_nmf/linesearch_xval_gibbs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with line search for model selection using VB-NMF on 3 | the CCLE IC50 dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | from BNMTF.code.models.bnmf_gibbs_optimised import bnmf_gibbs_optimised 11 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation 12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 13 | 14 | 15 | # Settings 16 | iterations = 1000 17 | burn_in = 900 18 | thinning = 2 19 | init_UV = 'random' 20 | 21 | K_range = [3,4,5,6] 22 | no_folds = 10 23 | restarts = 1 24 | 25 | quality_metric = 'AIC' 26 | output_file = "./results.txt" 27 | 28 | alpha, beta = 1., 1. 29 | lambdaU = 1./10. 30 | lambdaV = 1./10. 31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } 32 | 33 | # Load in the CCLE IC50 dataset 34 | R,M = load_ccle(ic50=True) 35 | 36 | # Run the cross-validation framework 37 | #random.seed(42) 38 | #numpy.random.seed(9000) 39 | nested_crossval = LineSearchCrossValidation( 40 | classifier=bnmf_gibbs_optimised, 41 | R=R, 42 | M=M, 43 | values_K=K_range, 44 | folds=no_folds, 45 | priors=priors, 46 | init_UV=init_UV, 47 | iterations=iterations, 48 | restarts=restarts, 49 | quality_metric=quality_metric, 50 | file_performance=output_file 51 | ) 52 | nested_crossval.run(burn_in=burn_in,thinning=thinning) 53 | -------------------------------------------------------------------------------- /data_drug_sensitivity/gdsc/notes~: -------------------------------------------------------------------------------- 1 | Data from Sanger's Genomics of Drug Sensitivity in Cancer project. 2 | http://www.cancerrxgene.org/downloads/ 3 | 4 | 5 | *** ic50_excl_empty_filtered_cell_lines_drugs.txt *** 6 | The original file from website is called gdsc_manova_input_w5.csv (this file has all the genetic info etc filtered, leaving only the IC50 values). 7 | We removed row with drug AZD6482 (PubChem id 44137675) as there are two columns with that drug (removed the first one). 8 | Filtered rows 57 and 635 because they had only 2 resp. 1 drug tested. 9 | Only included those cancer cell lines that have features available (en_input_w5.csv). The cancer cell lines are also ordered alphabetically (so that they align perfectly with the cell line kernels/constraint matrices). 10 | Also removed the drug Cisplatin (PubChem id 84691) and reordered columns alphabetically (so that they align perfectly with the drug and cancer line kernels/constraint matrices). 11 | Finally, removed one value (-36.485443) as this was an extreme outlier (next lowest value around -17). 12 | We end up with: 622 cell lines, 138 drugs 13 | 14 | *** ic50_excl_empty_filtered_cell_lines_drugs_standardised.txt *** 15 | As above, but standardised the cell lines (so each row has mean 0, std 1). 16 | We end up with: 622 cell lines, 138 drugs. 17 | 18 | *** /kernels/ *** 19 | The different similarity kernels, based on drug and cell line features. 20 | For binary features we use a Jaccard kernel. 21 | For real-valued features we use a Gaussian kernel with as the kernel standard deviation value: (no. features) / 4. 22 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_gibbs_nmf/linesearch_xval_gibbs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with line search for model selection using VB-NMF on 3 | the CCLE EC50 dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | from BNMTF.code.models.bnmf_gibbs_optimised import bnmf_gibbs_optimised 11 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation 12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 13 | 14 | 15 | # Settings 16 | iterations = 1000 17 | burn_in = 900 18 | thinning = 2 19 | init_UV = 'random' 20 | 21 | K_range = [1,2,3] 22 | no_folds = 10 23 | restarts = 1 24 | 25 | quality_metric = 'AIC' 26 | output_file = "./results_test.txt" 27 | 28 | alpha, beta = 1., 1. 29 | lambdaU = 1./10. 30 | lambdaV = 1./10. 31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } 32 | 33 | # Load in the CCLE EC50 dataset 34 | R,M = load_ccle(ic50=False) 35 | 36 | # Run the cross-validation framework 37 | #random.seed(42) 38 | #numpy.random.seed(9000) 39 | nested_crossval = LineSearchCrossValidation( 40 | classifier=bnmf_gibbs_optimised, 41 | R=R, 42 | M=M, 43 | values_K=K_range, 44 | folds=no_folds, 45 | priors=priors, 46 | init_UV=init_UV, 47 | iterations=iterations, 48 | restarts=restarts, 49 | quality_metric=quality_metric, 50 | file_performance=output_file 51 | ) 52 | nested_crossval.run(burn_in=burn_in,thinning=thinning) 53 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmtf/np_nmtf_nested_xval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the nested cross-validation for the NMTF class, on the CCLE EC50 dataset. 3 | """ 4 | 5 | import sys, os 6 | project_location = os.path.dirname(__file__)+"/../../../../../" 7 | sys.path.append(project_location) 8 | 9 | import itertools 10 | from BNMTF.code.models.nmtf_np import NMTF 11 | from BNMTF.code.cross_validation.nested_matrix_cross_validation import MatrixNestedCrossValidation 12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 13 | 14 | 15 | # Settings 16 | standardised = False 17 | train_config = { 18 | 'iterations' : 2000, 19 | 'init_FG' : 'kmeans', 20 | 'init_S' : 'exponential', 21 | 'expo_prior' : 0.1 22 | } 23 | K_range = [1,2] 24 | L_range = [1,2] 25 | no_threads = 5 26 | no_folds = 10 27 | output_file = "./results.txt" 28 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)] 29 | 30 | # Construct the parameter search 31 | parameter_search = [{'K':K,'L':L} for (K,L) in itertools.product(K_range,L_range)] 32 | 33 | # Load in the CCLE IC50 dataset 34 | R,M = load_ccle(ic50=False) 35 | 36 | # Run the cross-validation framework 37 | #random.seed(42) 38 | #numpy.random.seed(9000) 39 | nested_crossval = MatrixNestedCrossValidation( 40 | method=NMTF, 41 | X=R, 42 | M=M, 43 | K=no_folds, 44 | P=no_threads, 45 | parameter_search=parameter_search, 46 | train_config=train_config, 47 | file_performance=output_file, 48 | files_nested_performances=files_nested_performances 49 | ) 50 | nested_crossval.run() 51 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmtf/np_nmtf_nested_xval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the nested cross-validation for the NMTF class, on the CCLE IC50 dataset. 3 | """ 4 | 5 | import sys, os 6 | project_location = os.path.dirname(__file__)+"/../../../../../" 7 | sys.path.append(project_location) 8 | 9 | import itertools 10 | from BNMTF.code.models.nmtf_np import NMTF 11 | from BNMTF.code.cross_validation.nested_matrix_cross_validation import MatrixNestedCrossValidation 12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 13 | 14 | 15 | # Settings 16 | standardised = False 17 | train_config = { 18 | 'iterations' : 2000, 19 | 'init_FG' : 'kmeans', 20 | 'init_S' : 'exponential', 21 | 'expo_prior' : 0.1 22 | } 23 | K_range = [1,2,3] 24 | L_range = [1,2,3] 25 | no_threads = 2 26 | no_folds = 10 27 | output_file = "./results.txt" 28 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)] 29 | 30 | # Construct the parameter search 31 | parameter_search = [{'K':K,'L':L} for (K,L) in itertools.product(K_range,L_range)] 32 | 33 | # Load in the CCLE IC50 dataset 34 | R,M = load_ccle(ic50=True) 35 | 36 | # Run the cross-validation framework 37 | #random.seed(42) 38 | #numpy.random.seed(9000) 39 | nested_crossval = MatrixNestedCrossValidation( 40 | method=NMTF, 41 | X=R, 42 | M=M, 43 | K=no_folds, 44 | P=no_threads, 45 | parameter_search=parameter_search, 46 | train_config=train_config, 47 | file_performance=output_file, 48 | files_nested_performances=files_nested_performances 49 | ) 50 | nested_crossval.run() 51 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/np_nmtf_xval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the nested cross-validation for the NMTF class, on the Sanger dataset. 3 | 4 | Since we want to find co-clusters of significantly higher/lower drug sensitivity 5 | values, we should use the unstandardised Sanger dataset. 6 | """ 7 | 8 | import sys, os 9 | project_location = os.path.dirname(__file__)+"/../../../../../" 10 | sys.path.append(project_location) 11 | 12 | import numpy, itertools, random 13 | from BNMTF.code.nmtf_np import NMTF 14 | from BNMTF.cross_validation.matrix_cross_validation import MatrixCrossValidation 15 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 16 | 17 | 18 | # Settings 19 | standardised = False 20 | train_config = { 21 | 'iterations' : 3000, 22 | 'init_FG' : 'kmeans', 23 | 'init_S' : 'exponential', 24 | 'expo_prior' : 0.1 25 | } 26 | K_range = [2,4,6,8,10] 27 | L_range = [2,4,6,8,10] 28 | P = 5 29 | no_folds = 5 30 | output_file = "./results.txt" 31 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)] 32 | 33 | # Construct the parameter search 34 | parameter_search = [{'K':K,'L':L} for (K,L) in itertools.product(K_range,L_range)] 35 | 36 | # Load in the Sanger dataset 37 | (_,X_min,M,_,_,_,_) = load_Sanger(standardised=standardised) 38 | 39 | # Run the cross-validation framework 40 | random.seed(42) 41 | numpy.random.seed(9000) 42 | nested_crossval = MatrixCrossValidation( 43 | method=NMTF, 44 | X=X_min, 45 | M=M, 46 | K=no_folds, 47 | parameter_search=parameter_search, 48 | train_config=train_config, 49 | file_performance=output_file 50 | ) 51 | nested_crossval.run() -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_vb_nmtf/greedysearch_xval_vb.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with greedy search for model selection using VB-NMTF 3 | on the CCLE IC50 dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | from BNMTF.code.models.bnmtf_vb_optimised import bnmtf_vb_optimised 11 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation 12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 13 | 14 | 15 | # Settings 16 | standardised = False 17 | iterations = 1000 18 | 19 | init_S = 'random' #'exp' # 20 | init_FG = 'kmeans' #'exp' # 21 | 22 | K_range = [4,5,6,7,8,9,10] 23 | L_range = [4,5,6,7,8,9,10] 24 | no_folds = 10 25 | restarts = 1 26 | 27 | quality_metric = 'AIC' 28 | output_file = "./results.txt" 29 | 30 | alpha, beta = 1., 1. 31 | lambdaF = 1./10. 32 | lambdaS = 1./10. 33 | lambdaG = 1./10. 34 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG } 35 | 36 | # Load in the CCLE IC50 dataset 37 | R,M = load_ccle(ic50=True) 38 | 39 | # Run the cross-validation framework 40 | #random.seed(42) 41 | #numpy.random.seed(9000) 42 | nested_crossval = GreedySearchCrossValidation( 43 | classifier=bnmtf_vb_optimised, 44 | R=R, 45 | M=M, 46 | values_K=K_range, 47 | values_L=L_range, 48 | folds=no_folds, 49 | priors=priors, 50 | init_S=init_S, 51 | init_FG=init_FG, 52 | iterations=iterations, 53 | restarts=restarts, 54 | quality_metric=quality_metric, 55 | file_performance=output_file 56 | ) 57 | nested_crossval.run() 58 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_vb_nmtf/greedysearch_xval_vb.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with greedy search for model selection using VB-NMTF 3 | on the CCLE EC50 dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | from BNMTF.code.models.bnmtf_vb_optimised import bnmtf_vb_optimised 11 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation 12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 13 | 14 | 15 | # Settings 16 | standardised = False 17 | iterations = 1000 18 | 19 | init_S = 'random' #'exp' # 20 | init_FG = 'kmeans' #'exp' # 21 | 22 | K_range = [1,2,3,4,5,6,7,8,9,10] 23 | L_range = [1,2,3,4,5,6,7,8,9,10] 24 | no_folds = 10 25 | restarts = 1 26 | 27 | quality_metric = 'AIC' 28 | output_file = "./results.txt" 29 | 30 | alpha, beta = 1., 1. 31 | lambdaF = 1./10. 32 | lambdaS = 1./10. 33 | lambdaG = 1./10. 34 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG } 35 | 36 | # Load in the CCLE EC50 dataset 37 | R,M = load_ccle(ic50=False) 38 | 39 | # Run the cross-validation framework 40 | #random.seed(42) 41 | #numpy.random.seed(9000) 42 | nested_crossval = GreedySearchCrossValidation( 43 | classifier=bnmtf_vb_optimised, 44 | R=R, 45 | M=M, 46 | values_K=K_range, 47 | values_L=L_range, 48 | folds=no_folds, 49 | priors=priors, 50 | init_S=init_S, 51 | init_FG=init_FG, 52 | iterations=iterations, 53 | restarts=restarts, 54 | quality_metric=quality_metric, 55 | file_performance=output_file 56 | ) 57 | nested_crossval.run() 58 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_icm_nmtf/greedysearch_xval_icm.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with greedy search for model selection using VB-NMTF 3 | on the CCLE IC50 dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | from BNMTF.code.models.nmtf_icm import nmtf_icm 11 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation 12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 13 | 14 | 15 | # Settings 16 | standardised = False 17 | iterations = 1000 18 | 19 | init_S = 'random' #'exp' # 20 | init_FG = 'kmeans' #'exp' # 21 | 22 | K_range = [4,5,6,7,8,9,10] 23 | L_range = [4,5,6,7,8,9,10] 24 | no_folds = 10 25 | restarts = 1 26 | 27 | quality_metric = 'AIC' 28 | output_file = "./results.txt" 29 | 30 | alpha, beta = 1., 1. 31 | lambdaF = 1./10. 32 | lambdaS = 1./10. 33 | lambdaG = 1./10. 34 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG } 35 | 36 | minimum_TN = 0.01 37 | 38 | # Load in the CCLE IC50 dataset 39 | R,M = load_ccle(ic50=True) 40 | 41 | # Run the cross-validation framework 42 | #random.seed(1) 43 | #numpy.random.seed(1) 44 | nested_crossval = GreedySearchCrossValidation( 45 | classifier=nmtf_icm, 46 | R=R, 47 | M=M, 48 | values_K=K_range, 49 | values_L=L_range, 50 | folds=no_folds, 51 | priors=priors, 52 | init_S=init_S, 53 | init_FG=init_FG, 54 | iterations=iterations, 55 | restarts=restarts, 56 | quality_metric=quality_metric, 57 | file_performance=output_file 58 | ) 59 | nested_crossval.run(minimum_TN=minimum_TN) 60 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_icm_nmtf/greedysearch_xval_icm.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with greedy search for model selection using VB-NMTF 3 | on the CCLE EC50 dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | from BNMTF.code.models.nmtf_icm import nmtf_icm 11 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation 12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 13 | 14 | 15 | # Settings 16 | standardised = False 17 | iterations = 1000 18 | 19 | init_S = 'random' #'exp' # 20 | init_FG = 'kmeans' #'exp' # 21 | 22 | K_range = [1,2,3,4,5,6,7,8,9,10] 23 | L_range = [1,2,3,4,5,6,7,8,9,10] 24 | no_folds = 10 25 | restarts = 1 26 | 27 | quality_metric = 'AIC' 28 | output_file = "./results.txt" 29 | 30 | alpha, beta = 1., 1. 31 | lambdaF = 1./10. 32 | lambdaS = 1./10. 33 | lambdaG = 1./10. 34 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG } 35 | 36 | minimum_TN = 0.01 37 | 38 | # Load in the CCLE EC50 dataset 39 | R,M = load_ccle(ic50=False) 40 | 41 | # Run the cross-validation framework 42 | #random.seed(1) 43 | #numpy.random.seed(1) 44 | nested_crossval = GreedySearchCrossValidation( 45 | classifier=nmtf_icm, 46 | R=R, 47 | M=M, 48 | values_K=K_range, 49 | values_L=L_range, 50 | folds=no_folds, 51 | priors=priors, 52 | init_S=init_S, 53 | init_FG=init_FG, 54 | iterations=iterations, 55 | restarts=restarts, 56 | quality_metric=quality_metric, 57 | file_performance=output_file 58 | ) 59 | nested_crossval.run(minimum_TN=minimum_TN) 60 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_gibbs_nmtf/greedysearch_xval_gibbs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with greedy search for model selection using VB-NMTF 3 | on the CCLE IC50 dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | from BNMTF.code.models.bnmtf_gibbs_optimised import bnmtf_gibbs_optimised 11 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation 12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 13 | 14 | 15 | # Settings 16 | standardised = False 17 | iterations = 1000 18 | burn_in = 900 19 | thinning = 2 20 | 21 | init_S = 'random' #'exp' # 22 | init_FG = 'kmeans' #'exp' # 23 | 24 | K_range = [4,5,6,7,8,9,10] 25 | L_range = [4,5,6,7,8,9,10] 26 | no_folds = 10 27 | restarts = 1 28 | 29 | quality_metric = 'AIC' 30 | output_file = "./results.txt" 31 | 32 | alpha, beta = 1., 1. 33 | lambdaF = 1./10. 34 | lambdaS = 1./10. 35 | lambdaG = 1./10. 36 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG } 37 | 38 | # Load in the CCLE IC50 dataset 39 | R,M = load_ccle(ic50=True) 40 | 41 | # Run the cross-validation framework 42 | #random.seed(1) 43 | #numpy.random.seed(1) 44 | nested_crossval = GreedySearchCrossValidation( 45 | classifier=bnmtf_gibbs_optimised, 46 | R=R, 47 | M=M, 48 | values_K=K_range, 49 | values_L=L_range, 50 | folds=no_folds, 51 | priors=priors, 52 | init_S=init_S, 53 | init_FG=init_FG, 54 | iterations=iterations, 55 | restarts=restarts, 56 | quality_metric=quality_metric, 57 | file_performance=output_file 58 | ) 59 | nested_crossval.run(burn_in=burn_in,thinning=thinning) 60 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_gibbs_nmtf/greedysearch_xval_gibbs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with greedy search for model selection using VB-NMTF 3 | on the CCLE EC50 dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | from BNMTF.code.models.bnmtf_gibbs_optimised import bnmtf_gibbs_optimised 11 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation 12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle 13 | 14 | 15 | # Settings 16 | standardised = False 17 | iterations = 1000 18 | burn_in = 900 19 | thinning = 2 20 | 21 | init_S = 'random' #'exp' # 22 | init_FG = 'kmeans' #'exp' # 23 | 24 | K_range = [1,2,3,4,5,6,7,8,9,10] 25 | L_range = [1,2,3,4,5,6,7,8,9,10] 26 | no_folds = 10 27 | restarts = 1 28 | 29 | quality_metric = 'AIC' 30 | output_file = "./results.txt" 31 | 32 | alpha, beta = 1., 1. 33 | lambdaF = 1./10. 34 | lambdaS = 1./10. 35 | lambdaG = 1./10. 36 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG } 37 | 38 | # Load in the CCLE EC50 dataset 39 | R,M = load_ccle(ic50=False) 40 | 41 | # Run the cross-validation framework 42 | #random.seed(1) 43 | #numpy.random.seed(1) 44 | nested_crossval = GreedySearchCrossValidation( 45 | classifier=bnmtf_gibbs_optimised, 46 | R=R, 47 | M=M, 48 | values_K=K_range, 49 | values_L=L_range, 50 | folds=no_folds, 51 | priors=priors, 52 | init_S=init_S, 53 | init_FG=init_FG, 54 | iterations=iterations, 55 | restarts=restarts, 56 | quality_metric=quality_metric, 57 | file_performance=output_file 58 | ) 59 | nested_crossval.run(burn_in=burn_in,thinning=thinning) 60 | -------------------------------------------------------------------------------- /tests/code/distributions/test_truncated_normal_vector.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test the class for Truncated Normal draws and expectations in truncated_normal_vector.py. 3 | """ 4 | 5 | import sys, os 6 | project_location = os.path.dirname(__file__)+"/../../../" 7 | sys.path.append(project_location) 8 | 9 | from BNMTF.code.models.distributions.truncated_normal_vector import TN_vector_draw, TN_vector_expectation, TN_vector_variance, TN_vector_mode 10 | from scipy.stats import norm 11 | import numpy 12 | 13 | def test_expectation(): 14 | # One normal case, one exponential approximation 15 | mu = [1.0, -1] 16 | tau = [3.0, 2000] 17 | sigma = [0.5773502691896258,0.022360679774997897] 18 | 19 | lambdav = ( norm.pdf( - mu[0] / sigma[0] ) ) / ( 1 - norm.cdf( - mu[0] / sigma[0] ) ) 20 | expectation = mu[0] + sigma[0] * lambdav 21 | assert numpy.array_equal(TN_vector_expectation(mu,tau), [expectation, 1./2000.]) 22 | 23 | def test_variance(): 24 | # One normal case, one exponential approximation 25 | mu = [1.0, -1] 26 | tau = [3.0, 2000] 27 | sigma = [0.5773502691896258,0.022360679774997897] 28 | 29 | lambdav = ( norm.pdf( - mu[0] / sigma[0] ) ) / ( 1 - norm.cdf( - mu[0] / sigma[0] ) ) 30 | variance = sigma[0]**2 * ( 1 - ( lambdav * ( lambdav + mu[0] / sigma[0] ) ) ) 31 | assert numpy.array_equal(TN_vector_variance(mu,tau), [variance, (1./2000.)**2]) 32 | 33 | # Test a draw - simply verify it is > 0. 34 | # Also test whether we get inf for a very negative mean and high variance 35 | def test_draw(): 36 | # One normal case, and one when tau=0 - then draws should be inf, and hence return 0.0 37 | mu = [1.0, 0.32] 38 | tau = [3.0, 0.0] 39 | for i in range(0,100): 40 | v1,v2 = TN_vector_draw(mu,tau) 41 | assert v1 >= 0.0 and v2 == 0.0 42 | 43 | # Test the mode 44 | def test_mode(): 45 | # Positive mean 46 | mus = [1.0, -2.0] 47 | assert numpy.array_equal(TN_vector_mode(mus), [1.0, 0.0]) -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_9.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 2 | All performances: {'R^2': [0.216611015910689, 0.22933311154207237, nan, 0.2578897031878836, 0.331604508102044, 0.2697618006011768, 0.2960951709801749, 0.27877041582910267, 0.2214366029449465, 0.2804182684113584], 'MSE': [8.4986860459330984, 8.5964614922072542, nan, 8.5121677002903731, 7.6477769705814671, 7.9285508271679408, 7.6131400065062351, 8.1400250543716748, 8.1792659908773242, 8.2557830299434443], 'Rp': [0.50867373747678257, 0.51234252231129318, nan, 0.53353692811687503, 0.58600869226509711, 0.54555429137926303, 0.56002232775843508, 0.54098479061009419, 0.51717368577490908, 0.55164102112309277]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 4 | All performances: {'R^2': [-622.7877969532341, 0.11866718131919385, 0.25720659597986006, nan, 0.1073351374391378, 0.1339953245715475, 0.17407769998419298, nan, 0.11432082256516418, 0.04864542858531151], 'MSE': [6848.0349188302898, 9.8672941726370471, 8.1186952386872093, nan, 10.079746954062026, 10.030552260655949, 8.808698754376044, nan, 9.7855561241850868, 10.733309285508104], 'Rp': [-0.021028385208208673, 0.49379477886605455, 0.54540536423306707, nan, 0.45766094883634423, 0.46705932389244287, 0.50272337567034409, nan, 0.47485890729545754, 0.43622611302790137]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 6 | All performances: {'R^2': [nan, -0.30128901246075346, nan, nan, -5.60373268052594, 0.0949469532562468, 0.01551745106717084, nan, -0.36250925441276416, -0.4932119464538114], 'MSE': [nan, 13.609745253502085, nan, nan, 76.421353615923834, 9.9988394995214769, 10.414351112018062, nan, 15.231313868648897, 15.712739700150539], 'Rp': [nan, 0.40763353038370881, nan, nan, 0.17483985605473276, 0.47663592771395846, 0.46636156907415749, nan, 0.38745567937050013, 0.36924602012392543]}. 7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_5.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 2 | All performances: {'R^2': [0.1951063526332698, 0.316439306419682, 0.23327128941659037, 0.30175896170468575, 0.2142841835316992, 0.29609455118365846, nan, 0.3419277760444793, 0.24574257705700353, nan], 'MSE': [9.3830407990764026, 7.5263625619577228, 8.5618716606822556, 7.8237129224716648, 8.6884840829642567, 7.7946395379214941, nan, 7.2419464425302946, 8.5037513334722501, nan], 'Rp': [0.49316405761232224, 0.57375170746955828, 0.50963314820943684, 0.56292971825455729, 0.50738977808977548, 0.55746303835286581, nan, 0.60180070406302111, 0.51315467505155044, nan]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 4 | All performances: {'R^2': [-1.171048530181305, 0.17458891333616122, nan, -6.983946877507161, 0.2084722168745612, 0.1354558151522567, -0.6329056207525043, 0.2097682986614563, nan, -0.37967075134692996], 'MSE': [24.699130185346856, 8.7077657693886135, nan, 89.263419623862603, 9.1881137220451858, 9.966194105810974, 17.892775374851659, 8.6585261045687663, nan, 15.710727465272999], 'Rp': [0.29534962587610775, 0.51276511781961609, nan, 0.1285628671805373, 0.52825710742940135, 0.47887037522434611, 0.35118586400298868, 0.51805479369074614, nan, 0.31378365310386303]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 6 | All performances: {'R^2': [-0.10314277468709032, -6.0921653957558055, -1.6700151618735837, -1.528039236344119, nan, -0.35292735925501484, -0.5718810602754036, -2276.398465328347, -360.54052317583694, nan], 'MSE': [12.864918483970719, 78.576740843638092, 30.663168649581646, 26.793135113840798, nan, 15.206494089831338, 17.675306146077091, 24317.680535005977, 4046.8792049399917, nan], 'Rp': [0.4228702940540261, 0.10975646394271045, 0.18479661662258651, 0.26681684928992894, nan, 0.40313808211268082, 0.33405360206444284, -0.020030370755298336, 0.064598843075654958, nan]}. 7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_6.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 2 | All performances: {'R^2': [nan, 0.21430513502919502, 0.3468227875245955, 0.2714590712810081, 0.2743058214663774, -0.8018205361110275, 0.28330106292595114, 0.2804508379442078, nan, 0.2684107840979276], 'MSE': [nan, 8.7103107036285259, 7.4736931590160927, 7.9425008980092029, 7.8075668195354346, 20.042343074149425, 7.8943248408454219, 7.8541624076149636, nan, 8.1746314786815333], 'Rp': [nan, 0.5030448555822199, 0.59770635580789122, 0.53869718688815016, 0.54249148126495483, 0.30961441432529646, 0.54671815087531295, 0.53759309539724076, nan, 0.53933630252274423]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 4 | All performances: {'R^2': [-10.574586141244735, -1.389365906618926, 0.1284951617868606, -0.6532257613373607, 0.26548647351581556, 0.08535354607001988, nan, 0.19519917191963965, 0.010138463016313604, 0.09993722409041916], 'MSE': [127.09304660766573, 26.00332411273774, 9.1928647934283152, 18.20398460274615, 8.1815853726180183, 10.763542481389488, nan, 8.8600967344552952, 11.085183888564529, 10.108906577294286], 'Rp': [0.083929115710765639, 0.2559405330829273, 0.48476312444207975, 0.33603090410971781, 0.55650541757382033, 0.44797117581445706, nan, 0.51567825096849229, 0.44585464672819869, 0.43696636140740319]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 6 | All performances: {'R^2': [-0.6553260799449678, nan, -1.0850670786351428, -689.7869815763883, -35501.60512548042, nan, -1.3136255833332835, nan, -1.1379935986609118, -0.5751721428616199], 'MSE': [18.521139937641482, nan, 23.261201666785105, 7472.8920848146545, 386406.65049337299, nan, 24.398293916690342, nan, 24.429166844167778, 17.143793627639749], 'Rp': [0.33110133766758371, nan, 0.29845543955805898, -0.02066184909799116, -0.027047100512401397, nan, 0.32654047243119572, nan, 0.28576413729628591, 0.36784454670170641]}. 7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/np_nmtf_nested_xval.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the nested cross-validation for the NMTF class, on the Sanger dataset. 3 | """ 4 | 5 | import sys, os 6 | project_location = os.path.dirname(__file__)+"/../../../../../" 7 | sys.path.append(project_location) 8 | 9 | import numpy, itertools, random 10 | from BNMTF.code.models.nmtf_np import NMTF 11 | from BNMTF.code.cross_validation.nested_matrix_cross_validation import MatrixNestedCrossValidation 12 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 13 | 14 | 15 | # Settings 16 | standardised = False 17 | train_config = { 18 | 'iterations' : 2000, 19 | 'init_FG' : 'kmeans', 20 | 'init_S' : 'exponential', 21 | 'expo_prior' : 0.1 22 | } 23 | P = 5 24 | no_folds = 10 25 | output_file = "./results.txt" 26 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)] 27 | 28 | # Construct the parameter search 29 | parameter_search = [{'K':K,'L':L} for (K,L) in [(6,6), (8,8), (10,10)]] 30 | 31 | # Load in the Sanger dataset 32 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised) 33 | 34 | # Run the cross-validation framework 35 | #random.seed(42) 36 | #numpy.random.seed(9000) 37 | nested_crossval = MatrixNestedCrossValidation( 38 | method=NMTF, 39 | X=X_min, 40 | M=M, 41 | K=no_folds, 42 | P=5, 43 | parameter_search=parameter_search, 44 | train_config=train_config, 45 | file_performance=output_file, 46 | files_nested_performances=files_nested_performances 47 | ) 48 | nested_crossval.run() 49 | 50 | """ 51 | Average performances: {'R^2': 0.7948758708329315, 'MSE': 2.3988138408823394, 'Rp': 0.89178480273294591}. 52 | All performances: {'R^2': [0.7990403667846077, 0.7974592552426493, 0.7971559801700554, 0.7908843325029544, 0.7898394194643907], 'MSE': [2.3352752661466534, 2.3572503168518866, 2.4163844950465756, 2.4334280833191895, 2.4517310430473933], 'Rp': [0.89405814991652022, 0.89333868839901787, 0.89316370564203762, 0.88952291745977685, 0.88884055224737657]}. 53 | 54 | Average MSE: 2.3988 +- 0.0449 55 | Averagr R^2: 0.795 +- 0.004 56 | """ -------------------------------------------------------------------------------- /tests/code/distributions/test_truncated_normal.py: -------------------------------------------------------------------------------- 1 | """ 2 | Test the class for Truncated Normal draws and expectations in truncated_normal.py. 3 | """ 4 | 5 | import sys, os 6 | project_location = os.path.dirname(__file__)+"/../../../" 7 | sys.path.append(project_location) 8 | 9 | from BNMTF.code.models.distributions.truncated_normal import TN_draw, TN_expectation, TN_variance, TN_mode 10 | from scipy.stats import norm 11 | import numpy 12 | 13 | def test_expectation(): 14 | mu = 1.0 15 | tau = 3.0 16 | sigma = 0.5773502691896258 17 | 18 | lambdav = ( norm.pdf( - mu / sigma ) ) / ( 1 - norm.cdf( - mu / sigma ) ) 19 | expectation = mu + sigma * lambdav 20 | assert TN_expectation(mu,tau) == expectation 21 | 22 | # Also test that we get variance and exp of an Exp if mu is less than -30*sigma 23 | mu = -1. 24 | tau = 2000. 25 | assert TN_expectation(mu,tau) == 1./2000. 26 | 27 | 28 | def test_variance(): 29 | mu = 1.0 30 | tau = 3.0 31 | sigma = 0.5773502691896258 32 | 33 | lambdav = ( norm.pdf( - mu / sigma ) ) / ( 1 - norm.cdf( - mu / sigma ) ) 34 | variance = sigma**2 * ( 1 - ( lambdav * ( lambdav + mu / sigma ) ) ) 35 | assert TN_variance(mu,tau) == variance 36 | 37 | # Also test that we get variance and exp of an Exp if mu is less than -30*sigma 38 | mu = -1. 39 | tau = 2000. 40 | assert TN_variance(mu,tau) == (1./2000.)**2 41 | 42 | # Test a draw - simply verify it is > 0. 43 | # Also test whether we get inf for a very negative mean and high variance 44 | def test_draw(): 45 | mu = 1.0 46 | tau = 3.0 47 | for i in range(0,100): 48 | assert TN_draw(mu,tau) >= 0.0 49 | 50 | # Test everything is handled when tau = 0 - then draws should be inf, and hence return 0.0 51 | mu = 0.32 52 | tau = 0.0 53 | for i in range(0,100): 54 | assert TN_draw(mu,tau) == 0.0 55 | 56 | # Test the mode 57 | def test_mode(): 58 | # Positive mean 59 | mu = 1.0 60 | assert TN_mode(mu) == mu 61 | 62 | # Negative mean 63 | mu = -2.0 64 | assert TN_mode(mu) == 0. -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_2.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 2 | All performances: {'R^2': [nan, 0.2338026674512549, 0.24267924746500213, 0.2985746728951062, 0.3439138150615887, 0.2054017340552602, 0.2581434876694477, 0.2810058579538135, 0.18815607426151892, 0.2627100412359329], 'MSE': [nan, 8.0682632161323298, 8.082164151585177, 7.7212090530315711, 7.6052527849646081, 9.0132883629946878, 8.175161024131949, 7.9449355744781682, 8.8800426511770194, 8.7053342310583872], 'Rp': [nan, 0.5203560701358898, 0.52977117817558295, 0.55682352912447286, 0.59177419501963513, 0.50257463819761383, 0.52992755846598638, 0.54362913606526708, 0.49035409310988592, 0.52776401905582515]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 4 | All performances: {'R^2': [0.04419676032579323, nan, 0.13241529554268783, nan, 0.17083211691009592, 0.11015768624051969, -0.8861236370084338, 0.19676246581206613, 0.20490864015981958, 0.17146332368824513], 'MSE': [10.570809464373193, nan, 9.882166538940016, nan, 9.1712738223699404, 10.08957191600056, 22.14389377012699, 8.9157757459420175, 8.9353998702741126, 9.2894707483163845], 'Rp': [0.44046375507402102, nan, 0.46060627143916572, nan, 0.50996876567397387, 0.47963797008188425, 0.31075222674827291, 0.50736608025434837, 0.52106850777596314, 0.48942498851092314]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 6 | All performances: {'R^2': [-1.288956827542092, nan, -0.05552907508683691, nan, -259885.1400121829, -1.2009091814714075, -2.2520116244746298, -0.19843734625887466, -37907.93493393469, -1.6704726837818593], 'MSE': [27.860769248738279, nan, 10.727334198480079, nan, 2852996.905621931, 24.322556483089905, 35.494435132567318, 13.737167249203825, 426017.1245206889, 30.11831714079592], 'Rp': [0.20484999889655225, nan, 0.46510169681540581, nan, -0.028802496071943077, 0.29814811042808903, 0.19372292261481913, 0.36085875391294031, 0.014855762342711364, 0.21617475250238427]}. 7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/vb_nmf/linesearch_xval_vb.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with line search for model selection using VB-NMF on 3 | the Sanger dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | import numpy, random 11 | from BNMTF.code.models.bnmf_vb_optimised import bnmf_vb_optimised 12 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation 13 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 14 | 15 | 16 | # Settings 17 | standardised = False 18 | iterations = 1000 19 | init_UV = 'random' 20 | 21 | K_range = [15,20,25,30] 22 | no_folds = 10 23 | restarts = 1 24 | 25 | quality_metric = 'AIC' 26 | output_file = "./results.txt" 27 | 28 | alpha, beta = 1., 1. 29 | lambdaU = 1./10. 30 | lambdaV = 1./10. 31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } 32 | 33 | # Load in the Sanger dataset 34 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised,sep=',') 35 | 36 | # Run the cross-validation framework 37 | #random.seed(42) 38 | #numpy.random.seed(9000) 39 | nested_crossval = LineSearchCrossValidation( 40 | classifier=bnmf_vb_optimised, 41 | R=X_min, 42 | M=M, 43 | values_K=K_range, 44 | folds=no_folds, 45 | priors=priors, 46 | init_UV=init_UV, 47 | iterations=iterations, 48 | restarts=restarts, 49 | quality_metric=quality_metric, 50 | file_performance=output_file 51 | ) 52 | nested_crossval.run() 53 | 54 | """ 55 | all_MSE = [2.2242309355503416, 2.3108126630384804, 2.4095896447817631, 2.2188694213830114, 2.4185938516134278, 2.1808748510586002, 2.2503432196374651, 2.2305023229025145, 2.3595465204422488, 2.2186318302878667] 56 | all_R2 = [0.8123419361488506, 0.8011409466575017, 0.7943028271877304, 0.8125046212085996, 0.7934881370166628, 0.8111969927756486, 0.8058878338360765, 0.811089129626958, 0.798953276136085, 0.8151865445946502] 57 | 58 | Average MSE: 2.2821995260695718 +- 0.0066998949966021598 59 | Average R^2: 0.80560922451887629 +- 5.8495363723835686e-05 60 | """ -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_8.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 2 | All performances: {'R^2': [0.2191117879065282, 0.3025208761584226, 0.2335431843981689, 0.23230056670630927, 0.2698234452551742, 0.2237914673446625, 0.2349923420288721, nan, 0.2395934680562396, 0.3714262044867832], 'MSE': [8.5294450822462249, 7.8455410585413157, 8.6458871689201704, 8.6222047230900785, 8.2643080332090388, 8.1643384867735271, 8.220769247665789, nan, 7.7266809380121932, 7.6950694660610504], 'Rp': [0.51081940829159389, 0.56292089638728704, 0.51097352461126666, 0.52396126247043062, 0.53533794193009399, 0.51975939464576337, 0.51057358098857397, nan, 0.53897161493527534, 0.61552779573747873]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 4 | All performances: {'R^2': [nan, 0.21402336348228312, -0.9954016738729357, -0.05708216181933623, nan, 0.15983725469682797, 0.17734143313805995, 0.11493552555492692, 0.09913208752230518, 0.13745251757599408], 'MSE': [nan, 8.8866787037910804, 21.709065879083756, 12.574803111226991, nan, 9.9914215596752083, 8.0762880740899661, 9.5080242587996295, 10.190956716491094, 10.041978971360008], 'Rp': [nan, 0.51647947427073315, 0.29170858333600236, 0.39777416683641215, nan, 0.49135944242495899, 0.54227453286269667, 0.47669909015187717, 0.4596133072820941, 0.46539961616166731]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 6 | All performances: {'R^2': [-0.7688841094869072, -1.133058310558594, -2.6890824592103306, -5.683793153559361, -340.9197946230261, -0.32626659476204556, -178300.35916493824, nan, -6.665355573094988, nan], 'MSE': [18.81684489189961, 23.690856566159884, 42.460735167261419, 75.942538259218182, 3904.7656453702798, 15.064314751645929, 1864462.7642969301, nan, 84.415553270096979, nan], 'Rp': [0.29769511272608729, 0.26630387087269397, 0.23469363048225003, 0.12218533675310858, 0.058908368944496886, 0.3621226360166579, -0.030959163112430244, nan, 0.12457829508174811, nan]}. 7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/icm_nmf/linesearch_xval_icm.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with line search for model selection using VB-NMF on 3 | the Sanger dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | import numpy, random 11 | from BNMTF.code.models.nmf_icm import nmf_icm 12 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation 13 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 14 | 15 | 16 | # Settings 17 | standardised = False 18 | iterations = 1000 19 | init_UV = 'random' 20 | 21 | K_range = [15,20,25,30] 22 | no_folds = 10 23 | restarts = 1 24 | 25 | quality_metric = 'AIC' 26 | output_file = "./results.txt" 27 | 28 | alpha, beta = 1., 1. 29 | lambdaU = 1./10. 30 | lambdaV = 1./10. 31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } 32 | 33 | minimum_TN = 0.1 34 | 35 | # Load in the Sanger dataset 36 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised) 37 | 38 | # Run the cross-validation framework 39 | #random.seed(42) 40 | #numpy.random.seed(9000) 41 | nested_crossval = LineSearchCrossValidation( 42 | classifier=nmf_icm, 43 | R=X_min, 44 | M=M, 45 | values_K=K_range, 46 | folds=no_folds, 47 | priors=priors, 48 | init_UV=init_UV, 49 | iterations=iterations, 50 | restarts=restarts, 51 | quality_metric=quality_metric, 52 | file_performance=output_file 53 | ) 54 | nested_crossval.run(minimum_TN=minimum_TN) 55 | 56 | """ 57 | all_MSE = [3.5039148405029135, 9.0622730084824674, 3.7009069757338917, 3.3451246835265178, 3.1147595748400358, 3.9037354439533258, 13.991970030783968, 3.1814210224127897, 3.2677197491020404, 12.460551868851933] 58 | all_R2 = [0.7072309782081623, 0.2162669348625822, 0.6853079551313846, 0.7144108917311998, 0.7341480430315861, 0.6671037956836574, -0.17013019643779437, 0.7288988508164431, 0.7201731755424339, -0.07478035943340289] 59 | 60 | Average MSE: 5.953237719818989 +- 16.165927731904752 61 | Average R^2: 0.49286300691362522 +- 0.11663176700952635 62 | """ -------------------------------------------------------------------------------- /data_drug_sensitivity/gdsc/drug_names_sorted_filtered: -------------------------------------------------------------------------------- 1 | 17-AAG 2 | 681640 3 | A-443654 4 | A-770041 5 | ABT-263 6 | ABT-888 7 | AG-014699 8 | AICAR 9 | AKT inhibitor VIII 10 | AMG-706 11 | AP-24534 12 | AS601245 13 | ATRA 14 | AUY922 15 | AZ628 16 | AZD-0530 17 | AZD-2281 18 | AZD6244 19 | AZD6482 20 | AZD7762 21 | AZD8055 22 | Axitinib 23 | BAY 61-3606 24 | BI-2536 25 | BIBW2992 26 | BIRB 0796 27 | BMS-509744 28 | BMS-536924 29 | BMS-708163 30 | BMS-754807 31 | BX-795 32 | Bexarotene 33 | Bicalutamide 34 | Bleomycin 35 | Bortezomib 36 | Bosutinib 37 | Bryostatin 1 38 | CCT007093 39 | CCT018159 40 | CEP-701 41 | CGP-082996 42 | CGP-60474 43 | CHIR-99021 44 | CI-1040 45 | CMK 46 | Camptothecin 47 | Cyclopamine 48 | Cytarabine 49 | DMOG 50 | Dasatinib 51 | Docetaxel 52 | Doxorubicin 53 | EHT 1864 54 | Elesclomol 55 | Embelin 56 | Epothilone B 57 | Erlotinib 58 | Etoposide 59 | FH535 60 | FTI-277 61 | GDC-0449 62 | GDC0941 63 | GNF-2 64 | GSK-1904529A 65 | GSK-650394 66 | GSK269962A 67 | GW 441756 68 | GW843682X 69 | Gefitinib 70 | Gemcitabine 71 | IPA-3 72 | Imatinib 73 | JNJ-26854165 74 | JNK Inhibitor VIII 75 | JNK-9L 76 | JW-7-52-1 77 | KIN001-135 78 | KU-55933 79 | LAQ824 80 | LFM-A13 81 | Lapatinib 82 | Lenalidomide 83 | MG-132 84 | MK-2206 85 | MS-275 86 | Methotrexate 87 | Midostaurin 88 | Mitomycin C 89 | NSC-87877 90 | NU-7441 91 | NVP-BEZ235 92 | NVP-TAE684 93 | Nilotinib 94 | Nutlin-3a 95 | OSI-906 96 | OSU-03012 97 | Obatoclax Mesylate 98 | PAC-1 99 | PD-0325901 100 | PD-0332991 101 | PD-173074 102 | PF-02341066 103 | PF-4708671 104 | PF-562271 105 | PHA-665752 106 | PLX4720 107 | Paclitaxel 108 | Parthenolide 109 | Pazopanib 110 | Pyrimethamine 111 | QS11 112 | RDEA119 113 | RO-3306 114 | Rapamycin 115 | Roscovitine 116 | S-Trityl-L-cysteine 117 | SB 216763 118 | SB590885 119 | SL 0101-1 120 | Salubrinal 121 | Shikonin 122 | Sorafenib 123 | Sunitinib 124 | TGX221 125 | TW 37 126 | Temsirolimus 127 | Thapsigargin 128 | Tipifarnib 129 | VX-680 130 | VX-702 131 | Vinblastine 132 | Vinorelbine 133 | Vorinostat 134 | WH-4-023 135 | WZ-1-84 136 | XMD8-85 137 | Z-LLNle-CHO 138 | ZM-447439 139 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/run_nested_cross_val_kbmf.R~: -------------------------------------------------------------------------------- 1 | # Run the nested cross-validation for KBMF 2 | 3 | source("nested_cross_val_kbmf.R") 4 | K <- 10 5 | R_values <- c(5,6,7,8,9,10) 6 | 7 | Px <- 3 8 | Nx <- 622 9 | Pz <- 3 10 | Nz <- 138 11 | 12 | # Load in the drug sensitivity values 13 | folder_drug_sensitivity <- '/home/tab43/Documents/Projects/libraries/BNMTF/drug_sensitivity/data/gdsc/' 14 | name_drug_sensitivity <- 'ic50_excl_empty_filtered_cell_lines_drugs.txt' 15 | Y <- as.matrix(read.table(paste(folder_drug_sensitivity,name_drug_sensitivity,sep=''), 16 | header=TRUE, 17 | sep=',', 18 | colClasses=c(rep("NULL",3), rep("numeric",138)))) 19 | 20 | # Load in the kernels - X = cancer cell lines, Z = drugs 21 | folder_kernels <- '/home/tab43/Documents/Projects/libraries/BNMTF/drug_sensitivity/data/gdsc/kernels/' 22 | 23 | kernel_copy_variation <- as.matrix(read.table(paste(folder_kernels,'copy_variation.txt',sep=''),header=TRUE,sep='\t')) 24 | kernel_gene_expression <- as.matrix(read.table(paste(folder_kernels,'gene_expression.txt',sep=''),header=TRUE,sep='\t')) 25 | kernel_mutation <- as.matrix(read.table(paste(folder_kernels,'mutation.txt',sep=''),header=TRUE,sep='\t')) 26 | 27 | kernel_1d2d <- as.matrix(read.table(paste(folder_kernels,'1d2d_descriptors.txt',sep=''),header=TRUE,sep=',')) 28 | kernel_fingerprints<- as.matrix(read.table(paste(folder_kernels,'PubChem_fingerprints.txt',sep=''),header=TRUE,sep=',')) 29 | kernel_targets <- as.matrix(read.table(paste(folder_kernels,'targets.txt',sep=''),header=TRUE,sep=',')) 30 | 31 | Kx <- array(0, c(Nx, Nx, Px)) 32 | Kx[,, 1] <- kernel_copy_variation 33 | Kx[,, 2] <- kernel_gene_expression 34 | Kx[,, 3] <- kernel_mutation 35 | 36 | Kz <- array(0, c(Nz, Nz, Pz)) 37 | Kz[,, 1] <- kernel_1d2d 38 | Kz[,, 2] <- kernel_fingerprints 39 | Kz[,, 3] <- kernel_targets 40 | 41 | # Run the cross-validation 42 | kbmf_nested_cross_validation(Kx, Kz, Y, R_values, K) 43 | 44 | # R_values <- c(7,8,9) 45 | # MSE: 2.1906, 2.1993, 2.2380, 2.2522, 2.3098 46 | # R^2: 0.8108, 0.8109, 0.8073, 0.8120, 0.8021 47 | # Rp: 0.9005, 0.9005, 0.8986, 0.9011, 0.8958 48 | # Average performances: MSE=2.2380, R^2=0.8086, Rp=0.8993 49 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_10.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 2 | All performances: {'R^2': [0.2990448024868735, nan, 0.2840769553627075, 0.26714246444465617, 0.11179458930986019, 0.28414563451824437, 0.2789897405789954, 0.23437852058959108, 0.23197749343682172, 0.19454331249373813], 'MSE': [7.2081285106701838, nan, 8.4540785671560474, 7.9755098743377015, 10.490060311415816, 7.7292357330047716, 8.2834217516744371, 8.1726540674248351, 8.8516732651546768, 8.7154462891311084], 'Rp': [0.56460215202757513, nan, 0.54849125263803944, 0.53843893380351859, 0.45073282179886515, 0.55572028232090409, 0.54613477312734926, 0.54132475362304144, 0.50771080346211717, 0.48526614285520114]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 4 | All performances: {'R^2': [0.17339316174401498, 0.12382636842374595, 0.09920910311542641, -0.698529354830483, 0.189725601465631, -0.27796723954820046, nan, 0.24436246653104365, 0.1367540667531495, -1.282101408415528], 'MSE': [9.6512734326581207, 10.110047661709734, 10.335585943906789, 18.476940433649386, 8.5770228973499112, 13.86498823666988, nan, 8.5054971891034548, 9.3803252970458857, 24.605894753348949], 'Rp': [0.49969350509944144, 0.46420457593582953, 0.46921142164619528, 0.29817919762006106, 0.51785697799272423, 0.38253978258581672, nan, 0.53981172599605909, 0.49286325986064006, 0.25750045194421955]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 6 | All performances: {'R^2': [0.04600338411794669, -0.07679370717699197, -6.898094240598942, -0.23883899157556043, -0.5859210306749381, nan, -28.72937659902503, -0.12351150816049028, -1.8911807711311281, nan], 'MSE': [10.926870945064891, 12.439280057723746, 86.053786844642929, 13.804728763778797, 15.933965745136792, nan, 335.52834016553817, 11.334188982594746, 33.640031796664339, nan], 'Rp': [0.48184819709055382, 0.42290700962235894, 0.14784010520281102, 0.37723161731746629, 0.37499801529072868, nan, 0.051046476971676892, 0.4162096954820968, 0.25129992903087628, nan]}. 7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_4.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 2 | All performances: {'R^2': [0.3239066468360543, 0.26591082328875126, 0.277232407693747, 0.3002988912118617, 0.2230720732092316, 0.3016019016338344, 0.27359969988871646, 0.30245806011734355, nan, 0.2159285164272251], 'MSE': [7.2598155079118172, 8.4367043044885328, 8.1663588046295921, 8.0833222544490368, 9.0500330345001512, 8.0276501013862784, 7.980061121753228, 7.5103071477783869, nan, 8.7415995269791615], 'Rp': [0.58003173592262225, 0.5418780706142905, 0.54467016939444701, 0.5606218293239178, 0.50746679314807175, 0.56032425939131081, 0.54443797762797175, 0.56407691159055506, nan, 0.5016971427584187]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 4 | All performances: {'R^2': [-5.091737569044002, 0.14848438000496866, 0.2336683179270983, 0.047599750811030406, 0.2375436580542296, 0.17234147867507055, nan, 0.18011945080160585, 0.20935282545199996, 0.24152107002501078], 'MSE': [69.11062766964352, 9.4084696891166733, 9.1935756756055103, 9.6688824290995576, 8.4776314656262617, 9.6605352430164988, nan, 8.3922350493859295, 9.0872003997111541, 8.7609250002282675], 'Rp': [0.16590956551300862, 0.49774626633210262, 0.53473711841463611, 0.45700815466190642, 0.53792917790183337, 0.50001532989741448, nan, 0.51238501859198027, 0.50713253504549305, 0.53691018555046077]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 6 | All performances: {'R^2': [-5.2634475331075015, -0.22918117651745296, -0.3166474856182855, nan, 0.0067056182688344235, -293.88343994992823, -27550.438457672837, -0.32233750286340146, -0.198384191064096, nan], 'MSE': [65.498625565971452, 14.231269176284863, 14.934270478137543, nan, 10.841791309191223, 3398.2560242145123, 312819.06982480449, 13.755389034937668, 13.163945172420226, nan], 'Rp': [0.18370774363564404, 0.39777648711936636, 0.3675755735405864, nan, 0.48444087358632931, -0.0040364822562375582, -0.023977524852361095, 0.37843878210640453, 0.38381454211200866, nan]}. 7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/run_nested_cross_val_kbmf.R: -------------------------------------------------------------------------------- 1 | # Run the nested cross-validation for KBMF 2 | 3 | source("nested_cross_val_kbmf.R") 4 | K <- 10 5 | R_values <- c(6,8,10) 6 | 7 | Px <- 3 8 | Nx <- 622 9 | Pz <- 3 10 | Nz <- 138 11 | 12 | # Load in the drug sensitivity values 13 | folder_drug_sensitivity <- '/Users/thomasbrouwer/Documents/Projects/libraries/BNMTF/data_drug_sensitivity/gdsc/' 14 | name_drug_sensitivity <- 'ic50_excl_empty_filtered_cell_lines_drugs.txt' 15 | Y <- as.matrix(read.table(paste(folder_drug_sensitivity,name_drug_sensitivity,sep=''), 16 | header=TRUE, 17 | sep=',', 18 | colClasses=c(rep("NULL",3), rep("numeric",138)))) 19 | 20 | # Load in the kernels - X = cancer cell lines, Z = drugs 21 | folder_kernels <- '/Users/thomasbrouwer/Documents/Projects/libraries/BNMTF/data_drug_sensitivity/gdsc/kernels/' 22 | 23 | kernel_copy_variation <- as.matrix(read.table(paste(folder_kernels,'copy_variation.txt',sep=''),header=TRUE,sep='\t')) 24 | kernel_gene_expression <- as.matrix(read.table(paste(folder_kernels,'gene_expression.txt',sep=''),header=TRUE,sep='\t')) 25 | kernel_mutation <- as.matrix(read.table(paste(folder_kernels,'mutation.txt',sep=''),header=TRUE,sep='\t')) 26 | 27 | kernel_1d2d <- as.matrix(read.table(paste(folder_kernels,'1d2d_descriptors.txt',sep=''),header=TRUE,sep=',')) 28 | kernel_fingerprints<- as.matrix(read.table(paste(folder_kernels,'PubChem_fingerprints.txt',sep=''),header=TRUE,sep=',')) 29 | kernel_targets <- as.matrix(read.table(paste(folder_kernels,'targets.txt',sep=''),header=TRUE,sep=',')) 30 | 31 | Kx <- array(0, c(Nx, Nx, Px)) 32 | Kx[,, 1] <- kernel_copy_variation 33 | Kx[,, 2] <- kernel_gene_expression 34 | Kx[,, 3] <- kernel_mutation 35 | 36 | Kz <- array(0, c(Nz, Nz, Pz)) 37 | Kz[,, 1] <- kernel_1d2d 38 | Kz[,, 2] <- kernel_fingerprints 39 | Kz[,, 3] <- kernel_targets 40 | 41 | # Run the cross-validation 42 | kbmf_nested_cross_validation(Kx, Kz, Y, R_values, K) 43 | 44 | # R_values <- c(7,8,9) 45 | # MSE: 2.1906, 2.1993, 2.2380, 2.2522, 2.3098 46 | # R^2: 0.8108, 0.8109, 0.8073, 0.8120, 0.8021 47 | # Rp: 0.9005, 0.9005, 0.8986, 0.9011, 0.8958 48 | # Average performances: MSE=2.2380, R^2=0.8086, Rp=0.8993 49 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/nested_cross_val_kbmf.R~: -------------------------------------------------------------------------------- 1 | # Nested cross-validation for the KBMF method. 2 | 3 | source("cross_val_kbmf.R") 4 | 5 | kbmf_nested_cross_validation <- function(Kx, Kz, Y, R_values, K) { 6 | # Split the dataset up into K folds 7 | sets = create_train_test_sets(split_dataset(Y, K), Y) 8 | training_sets = sets[[1]] 9 | test_sets = sets[[2]] 10 | 11 | MSEs = list() 12 | R2s = list() 13 | Rps = list() 14 | for (f in seq(training_sets)) { 15 | print(sprintf("FOLD %i. Now running cross-validation to find best R.", f)) 16 | train = training_sets[[f]] 17 | test = test_sets[[f]] 18 | 19 | # Run X-val on each training set 20 | results = kbmf_cross_validation(Kx, Kz, train, R_values, K) 21 | 22 | # Use the best value for R to train and evaluate on the test set 23 | best_R = results[[1]] 24 | state <- kbmf_regression_train(Kx, Kz, train, best_R) 25 | prediction <- kbmf_regression_test(Kx, Kz, state)$Y$mu 26 | 27 | MSE = mean((prediction - test)^2, na.rm=TRUE ) 28 | mean_test = mean( test, na.rm=TRUE ) 29 | R2 = 1 - ( sum( (test - prediction)^2, na.rm=TRUE ) / sum( (test - mean_test)^2, na.rm=TRUE ) ) 30 | mean_pred = mean( prediction, na.rm=TRUE ) 31 | Rp = cor(c(test),c(prediction),use='pairwise.complete.obs',method='pearson') 32 | #Rp = sum( (test - mean_test) * (prediction - mean_pred) , na.rm=TRUE ) / ( sqrt( sum( (test - mean_test)^2 , na.rm=TRUE ) ) * sqrt( sum( (prediction - mean_pred)^2 , na.rm=TRUE ) ) ) 33 | print(sprintf("Performance on fold %i: MSE=%.4f, R^2=%.4f, Rp=%.4f.", f,MSE,R2,Rp)) 34 | 35 | # Store the performance 36 | MSEs = c(MSEs,MSE) 37 | R2s = c(R2s,R2) 38 | Rps = c(Rps,Rp) 39 | } 40 | 41 | # Print all performances 42 | print(sprintf("All performances nested cross-validation: MSE=%.4f, R^2=%.4f, Rp=%.4f.",MSEs,R2s,Rps)) 43 | 44 | # Compute the average performances, and return that. 45 | average_MSE = mean(unlist(MSEs)) 46 | average_R2 = mean(unlist(R2s)) 47 | average_Rp = mean(unlist(Rps)) 48 | print(sprintf("Performances nested cross-validation: MSE=%.4f, R^2=%.4f, Rp=%.4f.",average_MSE,average_R2,average_Rp)) 49 | return(list(average_MSE, average_R2, average_Rp)) 50 | } 51 | 52 | 53 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/gibbs_nmf/linesearch_xval_gibbs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with line search for model selection using G-NMF on 3 | the Sanger dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | import numpy, random 11 | from BNMTF.code.models.bnmf_gibbs_optimised import bnmf_gibbs_optimised 12 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation 13 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 14 | 15 | 16 | # Settings 17 | standardised = False 18 | iterations = 1000 19 | burn_in = 900 20 | thinning = 2 21 | init_UV = 'random' 22 | 23 | K_range = [15,20,25,30] 24 | no_folds = 10 25 | restarts = 1 26 | 27 | quality_metric = 'AIC' 28 | output_file = "./results.txt" 29 | 30 | alpha, beta = 1., 1. 31 | lambdaU = 1./10. 32 | lambdaV = 1./10. 33 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } 34 | 35 | # Load in the Sanger dataset 36 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised) 37 | 38 | # Run the cross-validation framework 39 | #random.seed(42) 40 | #numpy.random.seed(9000) 41 | nested_crossval = LineSearchCrossValidation( 42 | classifier=bnmf_gibbs_optimised, 43 | R=X_min, 44 | M=M, 45 | values_K=K_range, 46 | folds=no_folds, 47 | priors=priors, 48 | init_UV=init_UV, 49 | iterations=iterations, 50 | restarts=restarts, 51 | quality_metric=quality_metric, 52 | file_performance=output_file 53 | ) 54 | nested_crossval.run(burn_in=burn_in,thinning=thinning) 55 | 56 | """ 57 | all_MSE = [2.0115451703143985, 2.0532542729784833, 2.0454971069846226, 1.994656076757727, 2.0281421630490297, 2.0691704067461281, 2.0708801136454622, 2.1137440615703653, 2.1153688464049725, 2.0478097531374373] 58 | all_R2 = [0.8248485588294542, 0.8219514639515233, 0.8217549958515522, 0.8349672123366683, 0.830543344804296, 0.8229475100079148, 0.8234388009582426, 0.8228191950789238, 0.8195240616800068, 0.8266748390223762, ] 59 | 60 | Average MSE: 2.0550067971588626 +- 0.0013944347250178673 61 | Average R^2: 0.82494699825209561 +- 1.9408941387580883e-05 62 | """ -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/nested_cross_val_kbmf.R: -------------------------------------------------------------------------------- 1 | # Nested cross-validation for the KBMF method. 2 | 3 | source("cross_val_kbmf.R") 4 | 5 | kbmf_nested_cross_validation <- function(Kx, Kz, Y, R_values, K) { 6 | # Split the dataset up into K folds 7 | sets = create_train_test_sets(split_dataset(Y, K), Y) 8 | training_sets = sets[[1]] 9 | test_sets = sets[[2]] 10 | 11 | MSEs = list() 12 | R2s = list() 13 | Rps = list() 14 | for (f in seq(training_sets)) { 15 | print(sprintf("FOLD %i. Now running cross-validation to find best R.", f)) 16 | train = training_sets[[f]] 17 | test = test_sets[[f]] 18 | 19 | # Run X-val on each training set 20 | results = kbmf_cross_validation(Kx, Kz, train, R_values, K) 21 | 22 | # Use the best value for R to train and evaluate on the test set 23 | best_R = results[[1]] 24 | state <- kbmf_regression_train(Kx, Kz, train, best_R) 25 | prediction <- kbmf_regression_test(Kx, Kz, state)$Y$mu 26 | 27 | MSE = mean((prediction - test)^2, na.rm=TRUE ) 28 | mean_test = mean( test, na.rm=TRUE ) 29 | R2 = 1 - ( sum( (test - prediction)^2, na.rm=TRUE ) / sum( (test - mean_test)^2, na.rm=TRUE ) ) 30 | mean_pred = mean( prediction, na.rm=TRUE ) 31 | Rp = cor(c(test),c(prediction),use='pairwise.complete.obs',method='pearson') 32 | #Rp = sum( (test - mean_test) * (prediction - mean_pred) , na.rm=TRUE ) / ( sqrt( sum( (test - mean_test)^2 , na.rm=TRUE ) ) * sqrt( sum( (prediction - mean_pred)^2 , na.rm=TRUE ) ) ) 33 | print(sprintf("Performance on fold %i: MSE=%.4f, R^2=%.4f, Rp=%.4f.", f,MSE,R2,Rp)) 34 | 35 | # Store the performance 36 | MSEs = c(MSEs,MSE) 37 | R2s = c(R2s,R2) 38 | Rps = c(Rps,Rp) 39 | } 40 | 41 | # Print all performances 42 | print(sprintf("All performances nested cross-validation: MSE=%.4f, R^2=%.4f, Rp=%.4f.",unlist(MSEs),unlist(R2s),unlist(Rps))) 43 | 44 | # Compute the average performances, and return that. 45 | average_MSE = mean(unlist(MSEs)) 46 | average_R2 = mean(unlist(R2s)) 47 | average_Rp = mean(unlist(Rps)) 48 | print(sprintf("Performances nested cross-validation: MSE=%.4f, R^2=%.4f, Rp=%.4f.",average_MSE,average_R2,average_Rp)) 49 | return(list(average_MSE, average_R2, average_Rp)) 50 | } 51 | 52 | 53 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/run_cross_val_kbmf.R~: -------------------------------------------------------------------------------- 1 | # Run the cross-validation for KBMF 2 | 3 | source("cross_val_kbmf.R") 4 | K <- 5 5 | R_values <- c(2,4,6,8,10,12,14,16,18,20) 6 | 7 | Px <- 3 8 | Nx <- 622 9 | Pz <- 3 10 | Nz <- 138 11 | 12 | # Load in the drug sensitivity values 13 | folder_drug_sensitivity <- '/home/tab43/Dropbox/Biological databases/Sanger_drug_sensivitity/' 14 | name_drug_sensitivity <- 'ic50_excl_empty_filtered_cell_lines_drugs.txt' 15 | Y <- as.matrix(read.table(paste(folder_drug_sensitivity,name_drug_sensitivity,sep=''), 16 | header=TRUE, 17 | sep=',', 18 | colClasses=c(rep("NULL",3), rep("numeric",138)))) 19 | 20 | # Load in the kernels - X = cancer cell lines, Z = drugs 21 | folder_kernels <- './kernels/' 22 | 23 | kernel_copy_variation <- as.matrix(read.table(paste(folder_kernels,'copy_variation.txt',sep=''),header=TRUE,sep='\t')) 24 | kernel_gene_expression <- as.matrix(read.table(paste(folder_kernels,'gene_expression.txt',sep=''),header=TRUE,sep='\t')) 25 | kernel_mutation <- as.matrix(read.table(paste(folder_kernels,'mutation.txt',sep=''),header=TRUE,sep='\t')) 26 | 27 | kernel_1d2d <- as.matrix(read.table(paste(folder_kernels,'1d2d_descriptors.txt',sep=''),header=TRUE,sep=',')) 28 | kernel_fingerprints<- as.matrix(read.table(paste(folder_kernels,'PubChem_fingerprints.txt',sep=''),header=TRUE,sep=',')) 29 | kernel_targets <- as.matrix(read.table(paste(folder_kernels,'targets.txt',sep=''),header=TRUE,sep=',')) 30 | 31 | Kx <- array(0, c(Nx, Nx, Px)) 32 | Kx[,, 1] <- kernel_copy_variation 33 | Kx[,, 2] <- kernel_gene_expression 34 | Kx[,, 3] <- kernel_mutation 35 | 36 | Kz <- array(0, c(Nz, Nz, Pz)) 37 | Kz[,, 1] <- kernel_1d2d 38 | Kz[,, 2] <- kernel_fingerprints 39 | Kz[,, 3] <- kernel_targets 40 | 41 | # Run the cross-validation 42 | kbmf_cross_validation(Kx, Kz, Y, R_values, K) 43 | 44 | # Results (5 folds, 200 iterations): 45 | # R: 2 4 6 8 10 12 14 16 18 20 46 | # MSE: 2.832466 2.448098 2.294287 2.227165 2.243336 2.259782 2.283704 2.309363 2.335845 2.358715 47 | # R^2: 0.7578040 0.7906790 0.8038126 0.8095175 0.8081712 0.8067867 0.8047146 0.8025545 0.8002464 0.7983178 48 | # Rp: 0.8705774 0.8892419 0.8965853 0.8997967 0.8991491 0.8985184 0.8975142 0.8964987 0.8954419 0.8944387 49 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/vb_nmtf/greedysearch_xval_vb.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with greedy search for model selection using Gibbs-NMTF 3 | on the Sanger dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | import numpy, random 11 | from BNMTF.code.models.bnmtf_vb_optimised import bnmtf_vb_optimised 12 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation 13 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 14 | 15 | 16 | # Settings 17 | standardised = False 18 | iterations = 1000 19 | 20 | init_S = 'random' #'exp' # 21 | init_FG = 'kmeans' #'exp' # 22 | 23 | K_range = [5,6,7,8,9,10] 24 | L_range = [5,6,7,8,9,10] 25 | no_folds = 10 26 | restarts = 1 27 | 28 | quality_metric = 'AIC' 29 | output_file = "./results.txt" 30 | 31 | alpha, beta = 1., 1. 32 | lambdaF = 1./10. 33 | lambdaS = 1./10. 34 | lambdaG = 1./10. 35 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG } 36 | 37 | # Load in the Sanger dataset 38 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised) 39 | 40 | # Run the cross-validation framework 41 | #random.seed(42) 42 | #numpy.random.seed(9000) 43 | nested_crossval = GreedySearchCrossValidation( 44 | classifier=bnmtf_vb_optimised, 45 | R=X_min, 46 | M=M, 47 | values_K=K_range, 48 | values_L=L_range, 49 | folds=no_folds, 50 | priors=priors, 51 | init_S=init_S, 52 | init_FG=init_FG, 53 | iterations=iterations, 54 | restarts=restarts, 55 | quality_metric=quality_metric, 56 | file_performance=output_file 57 | ) 58 | nested_crossval.run() 59 | 60 | """ 61 | all_MSE = [2.2811777476249415, 2.1782935772707153, 2.3760214934948851, 2.4070138866182651, 2.1679193763392863, 2.4351661211853344, 2.3531667160686407, 2.4375820084579578, 2.1737221434522502, 2.3957602752026799] 62 | all_R2 = [0.8004514561880776, 0.8095655871226215, 0.7982332012844026, 0.7939011733335062, 0.8135460410954071, 0.7914028391107459, 0.8050979272119902, 0.7964032435159856, 0.8102340265362746, 0.805071751458151] 63 | 64 | Average MSE: 2.3205823345714953 +- 0.011074845252916733 65 | Average R^2: 0.80239072468571615 +- 5.0165577464731684e-05 66 | """ -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/icm_nmtf/greedysearch_xval_icm.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with greedy search for model selection using ICM-NMTF 3 | on the Sanger dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | import numpy, random 11 | from BNMTF.code.models.nmtf_icm import nmtf_icm 12 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation 13 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 14 | 15 | 16 | # Settings 17 | standardised = False 18 | iterations = 1000 19 | 20 | init_S = 'random' #'exp' # 21 | init_FG = 'kmeans' #'exp' # 22 | 23 | K_range = [5,6,7,8,9,10] 24 | L_range = [5,6,7,8,9,10] 25 | no_folds = 10 26 | restarts = 1 27 | 28 | quality_metric = 'AIC' 29 | output_file = "./results.txt" 30 | 31 | alpha, beta = 1., 1. 32 | lambdaF = 1./10. 33 | lambdaS = 1./10. 34 | lambdaG = 1./10. 35 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG } 36 | 37 | minimum_TN = 0.1 38 | 39 | # Load in the Sanger dataset 40 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised,sep=',') 41 | 42 | # Run the cross-validation framework 43 | #random.seed(1) 44 | #numpy.random.seed(1) 45 | nested_crossval = GreedySearchCrossValidation( 46 | classifier=nmtf_icm, 47 | R=X_min, 48 | M=M, 49 | values_K=K_range, 50 | values_L=L_range, 51 | folds=no_folds, 52 | priors=priors, 53 | init_S=init_S, 54 | init_FG=init_FG, 55 | iterations=iterations, 56 | restarts=restarts, 57 | quality_metric=quality_metric, 58 | file_performance=output_file 59 | ) 60 | nested_crossval.run(minimum_TN=minimum_TN) 61 | 62 | """ 63 | all_MSE = [2.2020002331612534, 2.2364503149918011, 2.1611831576199534, 2.1569381861635395, 2.1530470452271864, 2.272519698528658, 2.1910498022580613, 2.2302383199950797, 2.1027416628364484, 2.283196008129782] 64 | all_R2 = [0.8068027775294401, 0.8122652321538621, 0.8155286993833876, 0.8151068635575036, 0.8227521825461013, 0.8062086302462692, 0.8136429679161671, 0.8113058601446024, 0.8152542609952846, 0.8080593057170452] 65 | 66 | Average MSE: 2.1989364428911764 +- 0.0029521290510586768 67 | Average R^2: 0.81269267801896627 +- 2.2283761452627026e-05 68 | """ -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/run_cross_val_kbmf.R: -------------------------------------------------------------------------------- 1 | # Run the cross-validation for KBMF 2 | 3 | source("cross_val_kbmf.R") 4 | K <- 5 5 | R_values <- c(2,4,6,8,10,12,14,16,18,20) 6 | 7 | Px <- 3 8 | Nx <- 622 9 | Pz <- 3 10 | Nz <- 138 11 | 12 | # Load in the drug sensitivity values 13 | folder_drug_sensitivity <- '/Users/thomasbrouwer/Documents/Projects/libraries/BNMTF/data_drug_sensitivity/gdsc/' 14 | name_drug_sensitivity <- 'ic50_excl_empty_filtered_cell_lines_drugs.txt' 15 | Y <- as.matrix(read.table(paste(folder_drug_sensitivity,name_drug_sensitivity,sep=''), 16 | header=TRUE, 17 | sep=',', 18 | colClasses=c(rep("NULL",3), rep("numeric",138)))) 19 | 20 | # Load in the kernels - X = cancer cell lines, Z = drugs 21 | folder_kernels <- '/Users/thomasbrouwer/Documents/Projects/libraries/BNMTF/data_drug_sensitivity/gdsc/kernels/' 22 | 23 | kernel_copy_variation <- as.matrix(read.table(paste(folder_kernels,'copy_variation.txt',sep=''),header=TRUE,sep='\t')) 24 | kernel_gene_expression <- as.matrix(read.table(paste(folder_kernels,'gene_expression.txt',sep=''),header=TRUE,sep='\t')) 25 | kernel_mutation <- as.matrix(read.table(paste(folder_kernels,'mutation.txt',sep=''),header=TRUE,sep='\t')) 26 | 27 | kernel_1d2d <- as.matrix(read.table(paste(folder_kernels,'1d2d_descriptors.txt',sep=''),header=TRUE,sep=',')) 28 | kernel_fingerprints<- as.matrix(read.table(paste(folder_kernels,'PubChem_fingerprints.txt',sep=''),header=TRUE,sep=',')) 29 | kernel_targets <- as.matrix(read.table(paste(folder_kernels,'targets.txt',sep=''),header=TRUE,sep=',')) 30 | 31 | Kx <- array(0, c(Nx, Nx, Px)) 32 | Kx[,, 1] <- kernel_copy_variation 33 | Kx[,, 2] <- kernel_gene_expression 34 | Kx[,, 3] <- kernel_mutation 35 | 36 | Kz <- array(0, c(Nz, Nz, Pz)) 37 | Kz[,, 1] <- kernel_1d2d 38 | Kz[,, 2] <- kernel_fingerprints 39 | Kz[,, 3] <- kernel_targets 40 | 41 | # Run the cross-validation 42 | kbmf_cross_validation(Kx, Kz, Y, R_values, K) 43 | 44 | # Results (5 folds, 200 iterations): 45 | # R: 2 4 6 8 10 12 14 16 18 20 46 | # MSE: 2.832466 2.448098 2.294287 2.227165 2.243336 2.259782 2.283704 2.309363 2.335845 2.358715 47 | # R^2: 0.7578040 0.7906790 0.8038126 0.8095175 0.8081712 0.8067867 0.8047146 0.8025545 0.8002464 0.7983178 48 | # Rp: 0.8705774 0.8892419 0.8965853 0.8997967 0.8991491 0.8985184 0.8975142 0.8964987 0.8954419 0.8944387 49 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/gibbs_nmtf/greedysearch_xval_gibbs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the cross validation with greedy search for model selection using VB-NMTF 3 | on the Sanger dataset. 4 | """ 5 | 6 | import sys, os 7 | project_location = os.path.dirname(__file__)+"/../../../../../" 8 | sys.path.append(project_location) 9 | 10 | import numpy, random 11 | from BNMTF.code.models.bnmtf_gibbs_optimised import bnmtf_gibbs_optimised 12 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation 13 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 14 | 15 | 16 | # Settings 17 | standardised = False 18 | iterations = 1000 19 | burn_in = 900 20 | thinning = 2 21 | 22 | init_S = 'random' #'exp' # 23 | init_FG = 'kmeans' #'exp' # 24 | 25 | K_range = [5,6,7,8,9,10] 26 | L_range = [5,6,7,8,9,10] 27 | no_folds = 10 28 | restarts = 1 29 | 30 | quality_metric = 'AIC' 31 | output_file = "./results.txt" 32 | 33 | alpha, beta = 1., 1. 34 | lambdaF = 1./10. 35 | lambdaS = 1./10. 36 | lambdaG = 1./10. 37 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG } 38 | 39 | # Load in the Sanger dataset 40 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised) 41 | 42 | # Run the cross-validation framework 43 | #random.seed(1) 44 | #numpy.random.seed(1) 45 | nested_crossval = GreedySearchCrossValidation( 46 | classifier=bnmtf_gibbs_optimised, 47 | R=X_min, 48 | M=M, 49 | values_K=K_range, 50 | values_L=L_range, 51 | folds=no_folds, 52 | priors=priors, 53 | init_S=init_S, 54 | init_FG=init_FG, 55 | iterations=iterations, 56 | restarts=restarts, 57 | quality_metric=quality_metric, 58 | file_performance=output_file 59 | ) 60 | nested_crossval.run(burn_in=burn_in,thinning=thinning) 61 | 62 | """ 63 | all_MSE = [2.2840197244732074, 2.4010413568146909, 2.3867096829182866, 2.5140729100375911, 2.4161603588039613, 2.5768426948112859, 2.4258351325273564, 2.416620106102529, 2.2286332627076089, 2.3745461326347104] 64 | all_R2 = [0.8033980427153291, 0.798845320492358, 0.8023608504542508, 0.7847220094659351, 0.7846794714863345, 0.7881485488273184, 0.7940181660135461, 0.7954596533423378, 0.8057721746024293, 0.7961801714226922] 65 | 66 | Average MSE: 2.4024481361831223 +- 0.0089074472278596831 67 | Average R^2: 0.79535844088225327 +- 5.1591154270217092e-05 68 | """ -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/run_kbmf.R~: -------------------------------------------------------------------------------- 1 | source("kbmf_regression_train.R") 2 | source("kbmf_regression_test.R") 3 | 4 | set.seed(1606) 5 | 6 | Px <- 3 7 | Nx <- 622 8 | Pz <- 3 9 | Nz <- 138 10 | 11 | # Load in the drug sensitivity values 12 | folder_drug_sensitivity <- '/home/tab43/Dropbox/Biological databases/Sanger_drug_sensivitity/' 13 | name_drug_sensitivity <- 'ic50_excl_empty_filtered_cell_lines_drugs.txt' 14 | Y <- as.matrix(read.table(paste(folder_drug_sensitivity,name_drug_sensitivity,sep=''), 15 | header=TRUE, 16 | sep=',', 17 | colClasses=c(rep("NULL",3), rep("numeric",138)))) 18 | 19 | print("Loaded data") 20 | 21 | # Load in the kernels - X = cancer cell lines, Z = drugs 22 | folder_kernels <- './kernels/' 23 | 24 | kernel_copy_variation <- as.matrix(read.table(paste(folder_kernels,'copy_variation.txt',sep=''),header=TRUE)) 25 | kernel_gene_expression <- as.matrix(read.table(paste(folder_kernels,'gene_expression.txt',sep=''),header=TRUE)) 26 | kernel_mutation <- as.matrix(read.table(paste(folder_kernels,'mutation.txt',sep=''),header=TRUE)) 27 | 28 | kernel_1d2d <- as.matrix(read.table(paste(folder_kernels,'1d2d_descriptors.txt',sep=''),header=TRUE)) 29 | kernel_fingerprints<- as.matrix(read.table(paste(folder_kernels,'PubChem_fingerprints.txt',sep=''),header=TRUE)) 30 | kernel_targets <- as.matrix(read.table(paste(folder_kernels,'targets.txt',sep=''),header=TRUE)) 31 | 32 | Kx <- array(0, c(Nx, Nx, Px)) 33 | Kx[,, 1] <- kernel_copy_variation 34 | Kx[,, 2] <- kernel_gene_expression 35 | Kx[,, 3] <- kernel_mutation 36 | 37 | Kz <- array(0, c(Nz, Nz, Pz)) 38 | Kz[,, 1] <- kernel_1d2d 39 | Kz[,, 2] <- kernel_fingerprints 40 | Kz[,, 3] <- kernel_targets 41 | 42 | print("Loaded kernels") 43 | 44 | # Train the model, and test the performance on the training data 45 | state <- kbmf_regression_train(Kx, Kz, Y, 10) 46 | prediction <- kbmf_regression_test(Kx, Kz, state) 47 | 48 | print("Trained model") 49 | #print(prediction$Y$mu) 50 | 51 | print(sprintf("MSE = %.4f", mean((prediction$Y$mu - Y)^2, na.rm=TRUE ))) 52 | # R=5, 200 iterations: "MSE = 2.0170" 53 | # R=5, 1000 iterations: "MSE = 2.0131" 54 | # R=10, 100 iterations: "MSE = 1.5869" 55 | # R=10, 200 iterations: "MSE = 1.5736" 56 | # R=10, 1000 iterations: "MSE = 1.5644" 57 | 58 | print("kernel weights on X") 59 | print(state$ex$mu) 60 | 61 | print("kernel weights on Z") 62 | print(state$ez$mu) 63 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/time/nmf_np_time.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run NMF NP on the Sanger dataset. 3 | 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset. 5 | 6 | We give flat priors (1/10). 7 | """ 8 | 9 | import sys, os 10 | project_location = os.path.dirname(__file__)+"/../../../../" 11 | sys.path.append(project_location) 12 | 13 | from BNMTF.code.models.nmf_np import NMF 14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 15 | 16 | import numpy, random, scipy, matplotlib.pyplot as plt 17 | 18 | ########## 19 | 20 | standardised = False #standardised Sanger or unstandardised 21 | 22 | repeats = 10 23 | 24 | iterations = 1000 25 | I, J, K = 622,138,25 26 | 27 | init_UV = 'exponential' 28 | expo_prior = 1/10. 29 | 30 | # Load in data 31 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised) 32 | 33 | 34 | # Run the VB algorithm, times 35 | times_repeats = [] 36 | performances_repeats = [] 37 | for i in range(0,repeats): 38 | # Set all the seeds 39 | numpy.random.seed(0) 40 | 41 | # Run the classifier 42 | nmf = NMF(R,M,K) 43 | nmf.initialise(init_UV,expo_prior) 44 | nmf.run(iterations) 45 | 46 | # Extract the performances and timestamps across all iterations 47 | times_repeats.append(nmf.all_times) 48 | performances_repeats.append(nmf.all_performances) 49 | 50 | # Check whether seed worked: all performances should be the same 51 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \ 52 | "Seed went wrong - performances not the same across repeats!" 53 | 54 | # Print out the performances, and the average times 55 | all_times_average = list(numpy.average(times_repeats, axis=0)) 56 | all_performances = performances_repeats[0] 57 | print "np_all_times_average = %s" % all_times_average 58 | print "np_all_performances = %s" % all_performances 59 | 60 | 61 | # Print all time plots, the average, and performance vs iterations 62 | plt.figure() 63 | plt.title("Performance against time") 64 | plt.ylim(0,10) 65 | for times in times_repeats: 66 | plt.plot(times, all_performances['MSE']) 67 | 68 | plt.figure() 69 | plt.title("Performance against average time") 70 | plt.plot(all_times_average, all_performances['MSE']) 71 | plt.ylim(0,10) 72 | 73 | plt.figure() 74 | plt.title("Performance against iteration") 75 | plt.plot(all_performances['MSE']) 76 | plt.ylim(0,10) -------------------------------------------------------------------------------- /experiments/experiments_gdsc/time/nmtf_np_time.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run NMTF VB on the Sanger dataset. 3 | 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset. 5 | 6 | We give flat priors (1/10). 7 | """ 8 | 9 | import sys, os 10 | project_location = os.path.dirname(__file__)+"/../../../../" 11 | sys.path.append(project_location) 12 | 13 | from BNMTF.code.models.nmtf_np import NMTF 14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 15 | 16 | import numpy, random, scipy, matplotlib.pyplot as plt 17 | 18 | ########## 19 | 20 | standardised = False #standardised Sanger or unstandardised 21 | 22 | repeats = 10 23 | 24 | iterations = 3000 25 | I, J, K, L = 622,138,5,5 26 | 27 | init_FG = 'kmeans' 28 | init_S = 'exponential' 29 | expo_prior = 1/10. 30 | 31 | 32 | # Load in data 33 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised) 34 | 35 | 36 | # Run the VB algorithm, times 37 | times_repeats = [] 38 | performances_repeats = [] 39 | for i in range(0,repeats): 40 | # Set all the seeds 41 | numpy.random.seed(3) 42 | 43 | # Run the classifier 44 | nmtf = NMTF(R,M,K,L) 45 | nmtf.initialise(init_S,init_FG,expo_prior) 46 | nmtf.run(iterations) 47 | 48 | # Extract the performances and timestamps across all iterations 49 | times_repeats.append(nmtf.all_times) 50 | performances_repeats.append(nmtf.all_performances) 51 | 52 | # Check whether seed worked: all performances should be the same 53 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \ 54 | "Seed went wrong - performances not the same across repeats!" 55 | 56 | # Print out the performances, and the average times 57 | all_times_average = list(numpy.average(times_repeats, axis=0)) 58 | all_performances = performances_repeats[0] 59 | print "np_all_times_average = %s" % all_times_average 60 | print "np_all_performances = %s" % all_performances 61 | 62 | 63 | # Print all time plots, the average, and performance vs iterations 64 | plt.figure() 65 | plt.title("Performance against time") 66 | plt.ylim(0,10) 67 | for times in times_repeats: 68 | plt.plot(times, all_performances['MSE']) 69 | 70 | plt.figure() 71 | plt.title("Performance against average time") 72 | plt.plot(all_times_average, all_performances['MSE']) 73 | plt.ylim(0,10) 74 | 75 | plt.figure() 76 | plt.title("Performance against iteration") 77 | plt.plot(all_performances['MSE']) 78 | plt.ylim(0,10) -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_7.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5879277621825882, 'MSE': 4.6483056218383441, 'Rp': 0.76860740296692032}. 2 | All performances: {'R^2': [0.577259234339829, 0.5833463091261437, 0.6149041456125593, 0.5898025177932728, 0.55860672372988, 0.5745341683829451, 0.5957712042436116, 0.5762771829791546, 0.6094814959381496, 0.599294639680336], 'MSE': [4.8035268087934542, 4.8000827101259524, 4.3313238479664218, 4.7140519136330727, 4.8268095706858523, 4.8200029259366355, 4.5121900719775088, 4.7059381118272876, 4.4222629949366468, 4.5468672625006104], 'Rp': [0.76276386620519177, 0.76523783674847645, 0.78485203898267386, 0.76985904705525532, 0.75139840714058859, 0.76119026200470352, 0.77282863943314861, 0.7608104637456734, 0.78188439586002323, 0.77524907249346875]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5440261808488415, 'MSE': 5.140477511723387, 'Rp': 0.75155822300718567}. 4 | All performances: {'R^2': [0.5622621093705895, 0.604290897715341, 0.5198859737379906, 0.5006860518100077, 0.45748105011968343, 0.5709438055178231, 0.525954316579423, 0.5771046016744266, 0.5881949508556492, 0.5334580511074805], 'MSE': [5.0024933828897531, 4.4244510154082857, 5.392161823770266, 5.4514362432235872, 6.0782833039389415, 4.9329734406986656, 5.207932661419048, 4.9367216222215369, 4.7526104617516323, 5.2257111619121464], 'Rp': [0.75925304721062148, 0.78344135770327183, 0.7357122966846944, 0.72415370791335465, 0.71964145886508413, 0.7656183361953397, 0.7442864253064454, 0.76667822318668466, 0.77435382688019949, 0.74244355012616026]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5300656231834285, 'MSE': 5.2959240320842422, 'Rp': 0.74914101293736834}. 6 | All performances: {'R^2': [0.48646007976574757, 0.485360531552703, 0.5112726098686872, 0.5642481181273951, 0.5359567699385488, 0.5656812545752545, 0.4980766743214996, 0.560265197330502, 0.5305116556880487, 0.5628233406658987], 'MSE': [5.5716738777243275, 5.7739150964057977, 5.5264827337018341, 4.9673575725461454, 5.372463476573575, 5.0291068114559092, 5.6452705558564036, 5.1674838582082074, 4.9759528076530621, 4.9295335307171557], 'Rp': [0.73420104567619704, 0.74164762788583549, 0.73838720892043141, 0.76510029209809638, 0.74581746371787849, 0.76626325120432948, 0.72903149070225692, 0.76141893642684, 0.74695376119956847, 0.76258905154224976]}. 7 | Best performances: {'R^2': 0.5879277621825882, 'MSE': 4.6483056218383441, 'Rp': 0.76860740296692032}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_6.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.581024004507962, 'MSE': 4.6884246861798404, 'Rp': 0.76442617749065556}. 2 | All performances: {'R^2': [0.582575546356901, 0.5876016712055097, 0.5887548576714253, 0.5566001094300044, 0.6069954425590554, 0.588999311487476, 0.5722034232893978, 0.5798703115188777, 0.5891488534628431, 0.557490518098129], 'MSE': [4.7566793634886153, 4.4781373469991905, 4.5936690701585414, 4.875819027975159, 4.5280645946080611, 4.7608122667043125, 4.6929273120578943, 4.86228000601806, 4.5944799012390396, 4.741377972549528], 'Rp': [0.76652501716451915, 0.76776449798920399, 0.76809514696410419, 0.74978311144862841, 0.78118050873778788, 0.76851936063816195, 0.75799844183635023, 0.76433110655244585, 0.76984517868854241, 0.75021940488681127]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5455547379702661, 'MSE': 5.0891400260923749, 'Rp': 0.75018973680480672}. 4 | All performances: {'R^2': [0.5860501099020681, 0.5613122813367499, 0.5540251431607827, 0.600507677495075, 0.42884297599528753, 0.6018862241089442, 0.5490402640160589, 0.5142874194436797, 0.5940643026343987, 0.4655309816096165], 'MSE': [4.7246803767495109, 5.0783310812505329, 4.8366629006050266, 4.6150649889059183, 6.3228325500782585, 4.5056227939132008, 4.9966028428737275, 5.557253814649898, 4.3824873888425531, 5.8718615230551316], 'Rp': [0.76981234834133305, 0.76468027961905694, 0.75235665143954467, 0.78265528563711118, 0.68652034358133252, 0.77876264511458981, 0.75238070029485049, 0.73119475745325246, 0.77694856764329967, 0.7065857889236965]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.47908479493368966, 'MSE': 5.8336648578857604, 'Rp': 0.73002630406494329}. 6 | All performances: {'R^2': [0.6096483534715216, 0.41090059190455375, 0.5194240040191034, 0.5987704587298991, 0.15525846498948015, 0.40607386515582156, 0.522760860088342, 0.553969411802216, 0.45747041225944274, 0.5565715269165163], 'MSE': [4.4228641636466417, 6.7364572840251213, 5.4165754858835475, 4.4862725389750899, 9.431576978577862, 6.6225308334688791, 5.2419607458298563, 4.873964639647304, 5.9456910506394749, 5.1587548581638289], 'Rp': [0.78810970616774012, 0.70016545354268056, 0.73482885264106956, 0.78524034029260303, 0.6217027199533306, 0.70333594787226461, 0.73956029670424606, 0.75817762696984381, 0.71000059561805995, 0.7591415008875948]}. 7 | Best performances: {'R^2': 0.581024004507962, 'MSE': 4.6884246861798404, 'Rp': 0.76442617749065556}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_8.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5853602207302742, 'MSE': 4.6545162989240199, 'Rp': 0.76710564661346348}. 2 | All performances: {'R^2': [0.600337918469428, 0.573331397872287, 0.6043869806051346, 0.6113688193879931, 0.5663643423872363, 0.6141076440975802, 0.5645178031465836, 0.5749526126276556, 0.584291256344347, 0.5599434323644956], 'MSE': [4.5300747716661816, 4.721944612372738, 4.4609305782621034, 4.4114397794491795, 4.7868751148584021, 4.4928473224843843, 4.7769813998723576, 4.7016550060279592, 4.7433288620757086, 4.9190855421711834], 'Rp': [0.77582073601777546, 0.76066465953121076, 0.77907555345923862, 0.78304384896513657, 0.75543980167621316, 0.78385359216690675, 0.75605340198454085, 0.76065196266995017, 0.76570854090011009, 0.75074436876355244]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5639583441151805, 'MSE': 4.8966166052775586, 'Rp': 0.75993488334722958}. 4 | All performances: {'R^2': [0.5078927692074906, 0.5268354707499995, 0.5515753413059283, 0.536354771166293, 0.5604148644567482, 0.5708139906124785, 0.6017185660287829, 0.5848215824043537, 0.576649478901923, 0.6225066063178074], 'MSE': [5.6120999811630989, 5.2373774144973133, 5.0170985987675136, 5.1829283989883468, 4.844804767155761, 4.6912547894029952, 4.4773816901235763, 4.7189041605953497, 4.9198244864808096, 4.2644917656008134], 'Rp': [0.72844311497061687, 0.73848036508083192, 0.75937569520710646, 0.74686038769351348, 0.75760094750509055, 0.76354140405534954, 0.77931433526838589, 0.76973477516011646, 0.76280215632415538, 0.79319565220713006]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5346870063876862, 'MSE': 5.2288809878579112, 'Rp': 0.74978741374870927}. 6 | All performances: {'R^2': [0.3923332662252925, 0.566563611154509, 0.5326006311525158, 0.53822728357004, 0.5305541957057447, 0.5920734268034531, 0.5680791559583926, 0.5574544305599982, 0.5302209959281297, 0.5387630668187867], 'MSE': [6.9526443798030426, 4.7986191239280096, 5.3200578974727391, 5.2482748276698903, 5.2941615982625905, 4.4519440150599667, 5.0607493338430558, 4.825760967355504, 5.2188974145988922, 5.1177003205854152], 'Rp': [0.67845433007918032, 0.76338920448432213, 0.74910387033866987, 0.75555971481198037, 0.75133255961401002, 0.77938248466473758, 0.76633986140239452, 0.75628540081140139, 0.74602327744378205, 0.75200343383661539]}. 7 | Best performances: {'R^2': 0.5853602207302742, 'MSE': 4.6545162989240199, 'Rp': 0.76710564661346348}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_1.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5876766889244068, 'MSE': 4.6372934909068046, 'Rp': 0.76838326647708211}. 2 | All performances: {'R^2': [0.599292247517454, 0.5553816486925056, 0.5895305279203282, 0.5469604232036354, 0.5829484206762812, 0.5898158173589084, 0.615031525104127, 0.5969288867638178, 0.6149639327940273, 0.5859134592129829], 'MSE': [4.5518552138165704, 5.0561588185142643, 4.6595320210221916, 5.0027369105400901, 4.730939590816182, 4.5862513941888494, 4.4342406737876088, 4.5501021058735835, 4.3620302951501797, 4.4390878853585312], 'Rp': [0.77513584855549533, 0.74906084525131111, 0.7702088509627053, 0.74195781351701318, 0.76522345132598213, 0.77054466205577921, 0.78509026099167589, 0.77384652250168839, 0.78465205493495371, 0.76811235467421624]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5684405477699104, 'MSE': 4.8508536204724146, 'Rp': 0.76211232573597165}. 4 | All performances: {'R^2': [0.5354947515555483, 0.6129768996894718, 0.534273094986979, 0.6016838649411256, 0.6034836909700101, 0.5915501645928221, 0.5693970490467506, 0.5436076034843629, 0.5840170850393012, 0.5079212733927316], 'MSE': [4.9971693043173033, 4.3473073736557266, 5.3227836262553225, 4.5238812641532817, 4.4045600213924843, 4.5469276306306794, 4.9234982790841499, 5.1453205427494364, 4.9108052919138308, 5.3862828705719226], 'Rp': [0.73934668523319558, 0.78479342432850219, 0.73859884692321753, 0.77836225419999761, 0.7838541106278657, 0.77080907621239958, 0.76257986766853325, 0.7465182436344806, 0.77677861522742919, 0.73948213330409573]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5258591400909807, 'MSE': 5.3308248627323369, 'Rp': 0.74864257761688435}. 6 | All performances: {'R^2': [0.5403557693156795, 0.5244276099913511, 0.5490958695708181, 0.617616179919101, 0.5489326001942245, 0.47207287092832495, 0.5870963557462132, 0.36612627767646344, 0.5768125479761206, 0.4760553195915104], 'MSE': [4.9853699811180672, 5.1300878508389349, 5.2585904784054343, 4.331580494273588, 5.1645677146319287, 5.8848412211935273, 4.5745775597817717, 7.0901497936401077, 4.7942399464072629, 6.0942435870327394], 'Rp': [0.75058268114278237, 0.74878245554076073, 0.75658718936752911, 0.79743869882474439, 0.758096430987103, 0.72690496636868307, 0.77647866956279687, 0.67909142865690453, 0.76846193283200115, 0.72400132288553809]}. 7 | Best performances: {'R^2': 0.5876766889244068, 'MSE': 4.6372934909068046, 'Rp': 0.76838326647708211}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_2.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5809689379908838, 'MSE': 4.6975160815548165, 'Rp': 0.76441365810623052}. 2 | All performances: {'R^2': [0.596742611584289, 0.5790176887687597, 0.5619720272875866, 0.6254530245678394, 0.5700294095857155, 0.5792499513034616, 0.5722305509085497, 0.5611597775730899, 0.5691534210788772, 0.5946809172506685], 'MSE': [4.6727931877252393, 4.8688764727858471, 4.9064877927464581, 4.2435826902101015, 4.8933021125324787, 4.9789504195628176, 4.589877948538442, 4.6067522837161992, 4.7521996309464081, 4.4623382767841813], 'Rp': [0.77355144277342069, 0.76254918658423654, 0.75282254867779197, 0.79115655427897091, 0.75679242317508444, 0.76416740752416334, 0.76037857730914982, 0.75290079033097201, 0.75660727874410127, 0.77321037166441431]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5532422127174393, 'MSE': 5.0039542470660185, 'Rp': 0.7550932461048897}. 4 | All performances: {'R^2': [0.6240256930361567, 0.614184862372549, 0.5325851476066428, 0.6100724412139076, 0.5717304140258538, 0.48320296375804184, 0.5379400102999448, 0.4663539768949878, 0.5534539043679487, 0.5388727135983606], 'MSE': [4.3779771531769276, 4.5080722286515291, 5.181990565016199, 4.3613597366085104, 4.8644199624620406, 5.7086812364809738, 5.1043290318164889, 5.7950500455737952, 5.0243177232428753, 5.1133447876308491], 'Rp': [0.79443654297444422, 0.78664906869251294, 0.74039612156479562, 0.7872922374261726, 0.76526175129689333, 0.71905976937252869, 0.74224203883745876, 0.71186164890903081, 0.75689841193287422, 0.74683487004218574]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5365183433511134, 'MSE': 5.194918072679946, 'Rp': 0.75031743625327896}. 6 | All performances: {'R^2': [0.6002946366225028, 0.5174440662737703, 0.5534646596815548, 0.5414027067354412, 0.5168045001565171, 0.4950109432188847, 0.4306832339825627, 0.5845608868913246, 0.5518575261714523, 0.5736602737771241], 'MSE': [4.603452367493218, 5.6672272796427912, 4.8056029025431366, 4.9898977417854118, 5.3988828530615418, 5.5696699338592071, 6.178795910405162, 4.8738915328936701, 5.0768532850573971, 4.7849069200579271], 'Rp': [0.78501080202362328, 0.73750623404435067, 0.75874175928522125, 0.74892170869320285, 0.74454414614944597, 0.7319907737039334, 0.69744540590647819, 0.77363198289226642, 0.75684196278601612, 0.76853958704825154]}. 7 | Best performances: {'R^2': 0.5809689379908838, 'MSE': 4.6975160815548165, 'Rp': 0.76441365810623052}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_3.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5770301203197727, 'MSE': 4.7894443774157569, 'Rp': 0.76146127673478115}. 2 | All performances: {'R^2': [0.6010545415747162, 0.5450404270636202, 0.5839096007429101, 0.5443570617047796, 0.5596978837901593, 0.6116829470550653, 0.6053307462122484, 0.5732459696735912, 0.5880728061638161, 0.55790921921682], 'MSE': [4.5359102473849031, 5.247801675530031, 4.7689725138311481, 5.1896942576069431, 4.9584102613525696, 4.3670881309616787, 4.3882274847437905, 4.8174369431861832, 4.6539276158916492, 4.9669746436686779], 'Rp': [0.77664433005756872, 0.74152078783154673, 0.76648774123750774, 0.74124638150159183, 0.75093212441962354, 0.78323160090600641, 0.77904124954224607, 0.75953645770092948, 0.767782616775049, 0.74818947737574149]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5474678950032861, 'MSE': 5.1165503955437641, 'Rp': 0.75106904387558104}. 4 | All performances: {'R^2': [0.5755624560028856, 0.5553739902883607, 0.5272667015874346, 0.560986817670751, 0.5556885404493047, 0.4737348485704289, 0.581356132422387, 0.5594058698934261, 0.5159240618466885, 0.5693795313011931], 'MSE': [4.7727690821124282, 5.1724155625670614, 5.284178043489498, 4.7691745977918059, 5.152245207026807, 5.8011313362430812, 4.9307184795163952, 4.9396089526874825, 5.4040770869420784, 4.9391856070610061], 'Rp': [0.76105984705123142, 0.75426665867871789, 0.73184462952653273, 0.7530663273769086, 0.75319978337988258, 0.72336520829875639, 0.7691918481185186, 0.75835901603272948, 0.74055491120849659, 0.76578220908403594]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5090391366469726, 'MSE': 5.5594872181210135, 'Rp': 0.73651902139470926}. 6 | All performances: {'R^2': [0.5601536602887773, 0.537458568726672, 0.5303385278775352, 0.4248889255895516, 0.5942131169923975, 0.5083841689886551, 0.5593328174271724, 0.41500477941886815, 0.45364145480544704, 0.5069753463546505], 'MSE': [5.0360489781364546, 5.0673356651211501, 5.2873279715122976, 6.4924115601706927, 4.6071093886787926, 5.4791890558354828, 5.0574891961730417, 6.7263527424475429, 6.2487021173508595, 5.5929055057838077], 'Rp': [0.76152947121309589, 0.75156989000280661, 0.74789111911353068, 0.69715009430773589, 0.77993790694290355, 0.73208195041531576, 0.76209632124283844, 0.69076712897229486, 0.70859140512306396, 0.73357492661350565]}. 7 | Best performances: {'R^2': 0.5770301203197727, 'MSE': 4.7894443774157569, 'Rp': 0.76146127673478115}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_5.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5846610365587009, 'MSE': 4.6898896282770277, 'Rp': 0.76675663347109335}. 2 | All performances: {'R^2': [0.6073257724660781, 0.5714045658085589, 0.5525842383910535, 0.57797530044815, 0.6104915351562262, 0.590901065637566, 0.5629806650202323, 0.5783922341960113, 0.5972771880159924, 0.5972778004471413], 'MSE': [4.4000308059103057, 4.9032454934484138, 4.9655529357940029, 4.8023500347149914, 4.2600462971004616, 4.8025451126329477, 4.8155044264474371, 4.6918799649394014, 4.6646797949280829, 4.5930614168542263], 'Rp': [0.78037686106595316, 0.75795073268299051, 0.74735552101447378, 0.76318838561230851, 0.78279312127176393, 0.77073273335679782, 0.75364003118147782, 0.7623823415005333, 0.77387815064435628, 0.7752684563802783]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5555205958339418, 'MSE': 5.0196218407795223, 'Rp': 0.75794630568809285}. 4 | All performances: {'R^2': [0.5986705797435277, 0.5725447152940277, 0.5735972486972993, 0.5922980566146359, 0.5492218059312644, 0.5452296787493598, 0.5384455734096758, 0.5574686880364181, 0.48859787592792836, 0.5391317359352801], 'MSE': [4.5458042065927726, 4.9138477241420579, 4.8550373472274941, 4.6883875145322715, 5.1319531335960509, 5.0850469248329579, 5.0858809000621346, 4.8509693243010288, 5.7704378438728758, 5.2688534886355765], 'Rp': [0.77804987318428909, 0.76482059713903239, 0.7641110809787357, 0.77685624910075035, 0.75402910443916393, 0.75434079238845686, 0.74867023318105475, 0.75868640114193797, 0.72885423918418757, 0.75104448614332053]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5381335127604856, 'MSE': 5.2175838077036962, 'Rp': 0.7531017493453116}. 6 | All performances: {'R^2': [0.27724838010817354, 0.47575253592450895, 0.6042550071267403, 0.5498834333186184, 0.5951677494521638, 0.595980744006377, 0.5454259477630615, 0.5680859626342947, 0.6030756066294463, 0.5664597606414714], 'MSE': [8.2677545365553726, 5.9593342175321968, 4.6134796358397976, 4.7789048746360967, 4.4783051788051225, 4.597679976967501, 5.0892585962016552, 5.0181274756472956, 4.4332132319508197, 4.9397803529011055], 'Rp': [0.64290965975240566, 0.72129000150440115, 0.78650462178537939, 0.75337712545559254, 0.78027392647473393, 0.77799022917147709, 0.75263405199985356, 0.76635875559513777, 0.78498153672608073, 0.76469758498805385]}. 7 | Best performances: {'R^2': 0.5846610365587009, 'MSE': 4.6898896282770277, 'Rp': 0.76675663347109335}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_10.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5803406447185868, 'MSE': 4.7078885097005738, 'Rp': 0.76379605584591315}. 2 | All performances: {'R^2': [0.579603443083555, 0.6005804373848687, 0.5519952686049137, 0.5845977486157223, 0.5646501197777706, 0.5805258509974865, 0.5958982988257917, 0.5764829973178376, 0.5874216712186578, 0.5816506113592641], 'MSE': [4.6965861333953818, 4.4835531529403028, 4.9632725890055243, 4.7166917403876587, 4.7387749094708784, 4.7702260185461114, 4.5905286326373718, 4.8892285987723154, 4.4779729799451502, 4.7520503419050479], 'Rp': [0.76451355516282282, 0.77654803769709513, 0.74530340229919556, 0.76608474578272823, 0.753099947551084, 0.76403263562658774, 0.77334660412123057, 0.76156912628924955, 0.76906149678356639, 0.76440100714557113]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5582460522262854, 'MSE': 4.9521749955708767, 'Rp': 0.75878176743809145}. 4 | All performances: {'R^2': [0.6002283975157652, 0.5648988489376408, 0.5288317731450323, 0.5876991583021669, 0.5695053914342509, 0.5158685054621471, 0.6223486275909867, 0.5644654503622191, 0.6037742105908777, 0.4248401589217675], 'MSE': [4.510455524748628, 5.0160855326117773, 5.1889191682661799, 4.6409396205438194, 4.8293261417365647, 5.5031787829183338, 4.2658980519566816, 4.6827566024830869, 4.5589329862067105, 6.3252575442369796], 'Rp': [0.7812018033422401, 0.75861852300274013, 0.74199636471206309, 0.77424424823902938, 0.76264848012088771, 0.74076394622232378, 0.79247705727205875, 0.76083237673246684, 0.78328471533329358, 0.6917501594038109]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.4329433334443459, 'MSE': 6.3247875787367374, 'Rp': 0.72416487364093807}. 6 | All performances: {'R^2': [0.5460291329220222, 0.413501228874781, 0.5298650158542253, 0.5964544715775122, 0.3495967165337064, -0.2740151211630466, 0.6028597909457314, 0.49374686648078236, 0.5771150052374752, 0.49428022718026987], 'MSE': [5.3273495174309975, 6.7026409143961478, 5.3409689547010917, 4.5595672390546529, 7.1026232277483592, 13.796015005222822, 4.4527421791849182, 5.5209707273106234, 4.7606615154748217, 5.6843365068429454], 'Rp': [0.75442685031381562, 0.69297090085950808, 0.75254289792828277, 0.78287598675168502, 0.67054064480825293, 0.56363362041910103, 0.78946316479876466, 0.73511977023285835, 0.76958579311771291, 0.73048910717939897]}. 7 | Best performances: {'R^2': 0.5803406447185868, 'MSE': 4.7078885097005738, 'Rp': 0.76379605584591315}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_4.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5788156817785745, 'MSE': 4.7440811246210313, 'Rp': 0.7629034411674479}. 2 | All performances: {'R^2': [0.5865946267477808, 0.5682340542093237, 0.6405657551849558, 0.5558914710042061, 0.5670043375843827, 0.5905722975419019, 0.5336624908738525, 0.5881489780756943, 0.5561936314003003, 0.6012891751633459], 'MSE': [4.7310189056595675, 4.9262652924034809, 4.078611428468701, 4.9480842738700366, 4.9632690254352045, 4.5680891178987109, 5.0029879792509586, 4.5395749807895376, 5.1021455886878373, 4.5807646537462716], 'Rp': [0.76853549032506108, 0.75554023786979407, 0.80046278150270622, 0.75016397877247987, 0.75627106093525187, 0.77207529613176351, 0.73395092603827505, 0.76848577730261181, 0.74809329101748456, 0.77545557177905045]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5640548013348997, 'MSE': 4.9090774431829489, 'Rp': 0.76033618931570346}. 4 | All performances: {'R^2': [0.5591867738843999, 0.5769773720679048, 0.6036642901221293, 0.5896394078779017, 0.4993774707670251, 0.5582261071997894, 0.5536381672917026, 0.6050414229982104, 0.5277384826785458, 0.5670585184613889], 'MSE': [4.9763470513526915, 4.7484115882418561, 4.6406823855772474, 4.7211084070290097, 5.6537600926757499, 5.0924254439601633, 4.9908846724672999, 4.4895556266834689, 5.2253164675977288, 4.552282696244272], 'Rp': [0.75478419301680699, 0.76677682930730073, 0.78153261290956566, 0.77392360435843688, 0.72999565811081368, 0.75663837396883904, 0.75489077612930477, 0.78557572354598892, 0.73864767577483503, 0.76059644603514232]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5412661147927961, 'MSE': 5.1657000396164552, 'Rp': 0.75358807477043688}. 6 | All performances: {'R^2': [0.5171065887028614, 0.5319299208824039, 0.5657588725632409, 0.4522967347326965, 0.5339577715492605, 0.630899957475058, 0.5351876557931077, 0.6166468799327876, 0.5075291085998318, 0.5213476576967125], 'MSE': [5.5659857605392782, 5.3358875416476401, 4.8157225417783822, 6.2529764682260636, 5.2360154063686499, 4.1838807752400813, 5.2189353001875771, 4.4247158868862106, 5.3640065405248212, 5.2588741747658529], 'Rp': [0.75071349385273789, 0.74560685633134394, 0.76222151424792739, 0.71282192236323594, 0.75040099003864391, 0.79899703256848276, 0.74768017539212206, 0.79090949019367351, 0.73853311823996315, 0.73799615447623879]}. 7 | Best performances: {'R^2': 0.5788156817785745, 'MSE': 4.7440811246210313, 'Rp': 0.7629034411674479}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_9.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5788055478458782, 'MSE': 4.7121312940649487, 'Rp': 0.76286128789951202}. 2 | All performances: {'R^2': [0.5498192902587136, 0.5762166605027792, 0.6110324022974549, 0.5444997420431797, 0.6231097469134808, 0.6074870756756103, 0.5149837093842093, 0.5639895803067945, 0.6038072954955447, 0.5931099755810159], 'MSE': [4.9442923281895155, 4.6906436044942641, 4.5244795709048873, 5.0131159011803197, 4.2831299847977853, 4.5348334058809101, 5.3621537165526156, 4.7908162635282636, 4.4369543960380353, 4.5408937690828957], 'Rp': [0.74411099958734939, 0.76197886591018704, 0.78232300957836753, 0.74147014284582091, 0.79035990384714816, 0.77999034751416141, 0.72408814244237696, 0.75447138223927834, 0.77824262300777369, 0.77157746202265509]}. 3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5224721279471691, 'MSE': 5.3523066375974935, 'Rp': 0.74298171374638422}. 4 | All performances: {'R^2': [0.5656803685025703, 0.5316627325217892, 0.591140304932898, 0.5001650197048881, 0.275513848823845, 0.49630338479452907, 0.5627207692702139, 0.5932122698886195, 0.5358150474013654, 0.5725075336309726], 'MSE': [5.0108657241746206, 5.2898725076299131, 4.6990875884916168, 5.8241596904099895, 8.1960466072870393, 5.3807051959214851, 4.8257087538782848, 4.5235132636359667, 5.1194970392217582, 4.6536100053242579], 'Rp': [0.76081204172907613, 0.74576315161815898, 0.77273246575896393, 0.72250243664135116, 0.64471802089392816, 0.7280547744511483, 0.76084994198593825, 0.77853119799315895, 0.75121281185653632, 0.76464029453558136]}. 5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.49882383110553574, 'MSE': 5.6014365475089782, 'Rp': 0.7335897681845045}. 6 | All performances: {'R^2': [0.4979285826647162, 0.5078318349514966, 0.5370842929765497, 0.5261297199227486, 0.49478715983864985, 0.5181273024224665, 0.3635429875022478, 0.4866876708250858, 0.4870546421896771, 0.5690641177617197], 'MSE': [5.6022114477648666, 5.6940073275222449, 5.1270029611472649, 5.4075444999672087, 5.5400881267055171, 5.5134804710439438, 6.6435095542392357, 5.6669675134381006, 5.9697375950129841, 4.8498159782484098], 'Rp': [0.72790390670407468, 0.73671029243674768, 0.751870662799628, 0.74791505556489202, 0.7273977453187348, 0.75316697298500024, 0.67908787903285139, 0.72385549058070464, 0.72285980515217696, 0.76512987127023335]}. 7 | Best performances: {'R^2': 0.5788055478458782, 'MSE': 4.7121312940649487, 'Rp': 0.76286128789951202}. Best parameters: {'K': 1}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_toy/grid_search/run_line_search_bnmf_gibbs.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run the line search method for finding the best value for K for BNMF. 3 | We use the parameters for the true priors. 4 | 5 | The BIC tends to give overly simple models, preferring K=1 oftentimes. 6 | The log likelihood and AIC tend to peak at the true K if the correct priors are 7 | given (this has to do with converging to a good local minimum). 8 | 9 | If we give the wrong prior (true/5) we still obtain good convergence (with 10 | true*5 all values get pushed to 0, leading to terrible solutions), and we get 11 | a nice peak for the log likelihood and AIC around the true K. 12 | """ 13 | 14 | import sys, os 15 | project_location = os.path.dirname(__file__)+"/../../../../" 16 | sys.path.append(project_location) 17 | 18 | from BNMTF.data_toy.bnmf.generate_bnmf import generate_dataset, try_generate_M 19 | from BNMTF.code.cross_validation.line_search_bnmf import LineSearch 20 | from BNMTF.code.models.bnmf_gibbs_optimised import bnmf_gibbs_optimised 21 | 22 | import numpy, matplotlib.pyplot as plt 23 | 24 | ########## 25 | 26 | restarts = 10 27 | iterations = 1000 28 | burn_in = 800 29 | thinning = 5 30 | 31 | I, J = 100, 80 32 | true_K = 10 33 | values_K = range(1,20+1) 34 | 35 | fraction_unknown = 0.1 36 | attempts_M = 100 37 | 38 | alpha, beta = 1., 1. #1., 1. 39 | tau = alpha / beta 40 | lambdaU = numpy.ones((I,true_K)) 41 | lambdaV = numpy.ones((J,true_K)) 42 | 43 | classifier = bnmf_gibbs_optimised 44 | initUV = 'random' 45 | 46 | # Generate data 47 | (_,_,_,_,R) = generate_dataset(I,J,true_K,lambdaU,lambdaV,tau) 48 | M = numpy.ones((I,J)) 49 | #M = try_generate_M(I,J,fraction_unknown,attempts_M) 50 | 51 | # Run the line search. The priors lambdaU and lambdaV need to be a single value (recall K is unknown) 52 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU[0,0]/10, 'lambdaV':lambdaV[0,0]/10 } 53 | line_search = LineSearch(classifier,values_K,R,M,priors,initUV,iterations,restarts) 54 | line_search.search(burn_in,thinning) 55 | 56 | # Plot the performances of all three metrics - but MSE separately 57 | metrics = ['loglikelihood', 'BIC', 'AIC', 'MSE'] 58 | for metric in metrics: 59 | plt.figure() 60 | plt.plot(values_K, line_search.all_values(metric), label=metric) 61 | plt.legend(loc=3) 62 | 63 | # Also print out all values in a dictionary 64 | all_values = {} 65 | for metric in metrics: 66 | all_values[metric] = line_search.all_values(metric) 67 | 68 | print "all_values = %s" % all_values 69 | 70 | ''' 71 | 72 | ''' -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/kbmf/run_kbmf.R: -------------------------------------------------------------------------------- 1 | source("kbmf_regression_train.R") 2 | source("kbmf_regression_test.R") 3 | 4 | set.seed(1606) 5 | 6 | Px <- 3 7 | Nx <- 622 8 | Pz <- 3 9 | Nz <- 138 10 | 11 | # Load in the drug sensitivity values 12 | folder_drug_sensitivity <- '/Users/thomasbrouwer/Documents/Projects/libraries/BNMTF/data_drug_sensitivity/gdsc/' 13 | name_drug_sensitivity <- 'ic50_excl_empty_filtered_cell_lines_drugs.txt' 14 | Y <- as.matrix(read.table(paste(folder_drug_sensitivity,name_drug_sensitivity,sep=''), 15 | header=TRUE, 16 | sep=',', 17 | colClasses=c(rep("NULL",3), rep("numeric",138)))) 18 | 19 | print("Loaded data") 20 | 21 | # Load in the kernels - X = cancer cell lines, Z = drugs 22 | folder_kernels <- '/Users/thomasbrouwer/Documents/Projects/libraries/BNMTF/data_drug_sensitivity/gdsc/kernels/' 23 | 24 | kernel_copy_variation <- as.matrix(read.table(paste(folder_kernels,'copy_variation.txt',sep=''),header=TRUE,sep='\t')) 25 | kernel_gene_expression <- as.matrix(read.table(paste(folder_kernels,'gene_expression.txt',sep=''),header=TRUE,sep='\t')) 26 | kernel_mutation <- as.matrix(read.table(paste(folder_kernels,'mutation.txt',sep=''),header=TRUE,sep='\t')) 27 | 28 | kernel_1d2d <- as.matrix(read.table(paste(folder_kernels,'1d2d_descriptors.txt',sep=''),header=TRUE,sep=',')) 29 | kernel_fingerprints<- as.matrix(read.table(paste(folder_kernels,'PubChem_fingerprints.txt',sep=''),header=TRUE,sep=',')) 30 | kernel_targets <- as.matrix(read.table(paste(folder_kernels,'targets.txt',sep=''),header=TRUE,sep=',')) 31 | 32 | Kx <- array(0, c(Nx, Nx, Px)) 33 | Kx[,, 1] <- kernel_copy_variation 34 | Kx[,, 2] <- kernel_gene_expression 35 | Kx[,, 3] <- kernel_mutation 36 | 37 | Kz <- array(0, c(Nz, Nz, Pz)) 38 | Kz[,, 1] <- kernel_1d2d 39 | Kz[,, 2] <- kernel_fingerprints 40 | Kz[,, 3] <- kernel_targets 41 | 42 | print("Loaded kernels") 43 | 44 | # Train the model, and test the performance on the training data 45 | state <- kbmf_regression_train(Kx, Kz, Y, 10) 46 | prediction <- kbmf_regression_test(Kx, Kz, state) 47 | 48 | print("Trained model") 49 | #print(prediction$Y$mu) 50 | 51 | print(sprintf("MSE = %.4f", mean((prediction$Y$mu - Y)^2, na.rm=TRUE ))) 52 | # R=5, 200 iterations: "MSE = 2.0170" 53 | # R=5, 1000 iterations: "MSE = 2.0131" 54 | # R=10, 100 iterations: "MSE = 1.5869" 55 | # R=10, 200 iterations: "MSE = 1.5736" 56 | # R=10, 1000 iterations: "MSE = 1.5644" 57 | 58 | print("kernel weights on X") 59 | print(state$ex$mu) 60 | 61 | print("kernel weights on Z") 62 | print(state$ez$mu) 63 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/fold_1.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7962793111127799, 'MSE': 2.3942128786704253, 'Rp': 0.89255379031404714}. 2 | All performances: {'R^2': [0.7904854226278109, 0.7977480320423695, 0.7967836001359289, 0.7896255269460981, 0.8003652266849697, 0.8048060186925018, 0.7989542277743389, 0.7903809106476973, 0.7980343043693358, 0.7956098412067476], 'MSE': [2.422273364658484, 2.3630302114580588, 2.3672172193024279, 2.4448223591251672, 2.3511364821316394, 2.3274346177947707, 2.4149613416010665, 2.433361714709386, 2.4232179007302661, 2.3946735751929906], 'Rp': [0.88938815315203712, 0.89344672984968354, 0.89279324860415155, 0.88895013720269911, 0.89472480723056569, 0.89722386636305007, 0.89394445210840856, 0.88927457802807974, 0.893449912747178, 0.8923420178546172]}. 3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8040323686653522, 'MSE': 2.3026338827465, 'Rp': 0.89702020965730611}. 4 | All performances: {'R^2': [0.8006527841042816, 0.8103274513761641, 0.8089860752213169, 0.7992510418334184, 0.8033775036229067, 0.808599092203559, 0.8041182552951762, 0.8011531209246552, 0.8039437041995383, 0.7999146578725052], 'MSE': [2.4076049889234357, 2.2510060279166928, 2.2888403594696261, 2.2639094244927089, 2.2626514184189954, 2.263015092042711, 2.2966149451504321, 2.2578180945236928, 2.3526107090099173, 2.3822677675167911], 'Rp': [0.89516742029212826, 0.90032136294324194, 0.8995513518121524, 0.89453020572987596, 0.89650478845426862, 0.89955753916293113, 0.89723956718663933, 0.89566903124067365, 0.89683611491299475, 0.89482471483815451]}. 5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8044644076252941, 'MSE': 2.2972110493106284, 'Rp': 0.89737508381421827}. 6 | All performances: {'R^2': [0.801553768283229, 0.8168281002065513, 0.8017482560228743, 0.8164645745982658, 0.7974793410175808, 0.8126752109488761, 0.7922738642286442, 0.8121128144351457, 0.7922437016984221, 0.8012644448133527], 'MSE': [2.3401138139820441, 2.2412933449738417, 2.3301598817292413, 2.1277149835149576, 2.4372391411811156, 2.256260196211139, 2.3860111097544703, 2.181558246411071, 2.3543859005330581, 2.3173738748153485], 'Rp': [0.89583190351821995, 0.90406019464110221, 0.89583155180606222, 0.90394382579429122, 0.89361333406802179, 0.90167499115860139, 0.89111621416927489, 0.90140803161731942, 0.89083874744861835, 0.89543204392067077]}. 7 | Best performances: {'R^2': 0.8044644076252941, 'MSE': 2.2972110493106284, 'Rp': 0.89737508381421827}. Best parameters: {'K': 10, 'L': 10}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/fold_10.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7962027030089187, 'MSE': 2.3936326519001163, 'Rp': 0.89252895387061104}. 2 | All performances: {'R^2': [0.7959869706381066, 0.7951509497634806, 0.7969493235960402, 0.7917983478265285, 0.7996969448736383, 0.7945761499005599, 0.7896867883958768, 0.8010506447138536, 0.7973366253719005, 0.7997942850092019], 'MSE': [2.4125360112093301, 2.4321089495536086, 2.3785545457598629, 2.4278312020304433, 2.3925625440518412, 2.3547068077976792, 2.3686869906807329, 2.3720104723133737, 2.4085107533355119, 2.3888182422687745], 'Rp': [0.89247689970218513, 0.8918651487165179, 0.89293690822373861, 0.89014332519541384, 0.89434738602375319, 0.89179573129795087, 0.889038490013907, 0.89510910936092192, 0.8931810196232669, 0.89439552054845484]}. 3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8028743159206282, 'MSE': 2.3159767919964684, 'Rp': 0.89639111146093975}. 4 | All performances: {'R^2': [0.8019130426479211, 0.8039798797754616, 0.8017096416266156, 0.8045136679705778, 0.805594218026878, 0.8055794698377987, 0.8046734984279295, 0.8019207232836603, 0.8022474012860819, 0.796611616323357], 'MSE': [2.2756878597227019, 2.3261329010708769, 2.3246602151872202, 2.3359126338758314, 2.3291356177319114, 2.2818278400775198, 2.3200395012492949, 2.3388578608981754, 2.2875386837406597, 2.3399748064104879], 'Rp': [0.8957245125924933, 0.89681567482548652, 0.89566170837658976, 0.89711747083963589, 0.89773778255997994, 0.89793974646105512, 0.89758487382370156, 0.89600275232751192, 0.89629057551020386, 0.89303601729273985]}. 5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8056584869493152, 'MSE': 2.2828101914911216, 'Rp': 0.89804463141550228}. 6 | All performances: {'R^2': [0.7952093373746402, 0.8019811480395942, 0.8096655946682076, 0.7972032570401394, 0.8108057961654705, 0.8077523366359415, 0.7928828981339053, 0.8182132920277003, 0.810007866782938, 0.8128633426246147], 'MSE': [2.4014181521428939, 2.2994028205470158, 2.2961444437199772, 2.3258121356602111, 2.175861105126502, 2.2374753813735562, 2.4287027811997315, 2.1554886929719257, 2.2572489420291766, 2.2505474601402278], 'Rp': [0.8924534481072659, 0.89617879160321468, 0.90016992738919865, 0.8933114452597315, 0.90100948537653491, 0.89956761398233076, 0.89081967738690337, 0.90502044855233454, 0.90018260254792426, 0.90173287394958423]}. 7 | Best performances: {'R^2': 0.8056584869493152, 'MSE': 2.2828101914911216, 'Rp': 0.89804463141550228}. Best parameters: {'K': 10, 'L': 10}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/fold_2.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7946522802147737, 'MSE': 2.4091382983018734, 'Rp': 0.89169311760322434}. 2 | All performances: {'R^2': [0.8000569040927532, 0.8091322941196964, 0.7957285744250913, 0.7901977707725168, 0.7915534329410909, 0.7945219387414444, 0.7954266072892766, 0.7877669115389623, 0.786394673269638, 0.7957436949572676], 'MSE': [2.3948155427310391, 2.2863440424424439, 2.3948497443038783, 2.4579662634655768, 2.454431927336242, 2.3738582922274305, 2.3919940020141639, 2.4595950303302856, 2.4739895279683979, 2.4035386101992748], 'Rp': [0.8946687142763815, 0.89968079972229509, 0.89230491886791075, 0.88914375470802232, 0.89010508005765621, 0.89175323078082958, 0.8919296638755716, 0.88797231037752344, 0.88720035247823026, 0.89217235088782243]}. 3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8009583033431479, 'MSE': 2.3354299140272849, 'Rp': 0.89527503027427147}. 4 | All performances: {'R^2': [0.8038189537325939, 0.8067094787430149, 0.799421147933612, 0.7940147353250736, 0.7954182713439355, 0.8112072748202167, 0.8033374435947094, 0.8008082361604596, 0.7962787608890061, 0.7985687308888577], 'MSE': [2.2775440014675103, 2.3073840327230726, 2.2251856978503253, 2.3944902356305415, 2.3830037099235617, 2.2085859675496602, 2.3910241568014086, 2.3743954219025736, 2.3728241080673667, 2.4198618083568322], 'Rp': [0.89689384936601291, 0.89861580020976839, 0.89486680983261369, 0.89134664287576593, 0.89208889220503118, 0.90084165616134937, 0.89653281875824209, 0.89498173965876215, 0.89252204709999583, 0.8940600465751728]}. 5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8029663502145356, 'MSE': 2.3120449560692848, 'Rp': 0.89650829367241669}. 6 | All performances: {'R^2': [0.8118443788980988, 0.7982761582334134, 0.8054113508909871, 0.8000401187474862, 0.7909248439189163, 0.8115191023401522, 0.802353454367045, 0.7974777338971236, 0.8046538800343781, 0.8071624808177548], 'MSE': [2.238817749613915, 2.3718311071675506, 2.321085138854412, 2.3069904316570242, 2.4384709383389773, 2.2660479738340973, 2.2465856242139819, 2.4028720226236793, 2.2737899950494542, 2.2539585793397547], 'Rp': [0.90126474846899507, 0.89383068465732296, 0.89782899235744995, 0.8951151917108997, 0.88987891254965101, 0.90116928601449775, 0.89640284958312444, 0.89340672124408593, 0.89718488657153039, 0.89900066356661046]}. 7 | Best performances: {'R^2': 0.8029663502145356, 'MSE': 2.3120449560692848, 'Rp': 0.89650829367241669}. Best parameters: {'K': 10, 'L': 10}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/fold_4.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7950874521624522, 'MSE': 2.4099176058721867, 'Rp': 0.89188437812650834}. 2 | All performances: {'R^2': [0.7934265600418325, 0.794397141121076, 0.7834791018577417, 0.7946554668516492, 0.7912707234891984, 0.7954116360248146, 0.8020378836631872, 0.8002171280111819, 0.797782279209116, 0.7981966013547238], 'MSE': [2.3972913804342868, 2.4462897570763844, 2.545852871423413, 2.4025244229289457, 2.3990153289118039, 2.3657112797051689, 2.3982445616113042, 2.2887828923566937, 2.4565279959010407, 2.3989355683728242], 'Rp': [0.8910131495949245, 0.8914621928743427, 0.88536410169325674, 0.89150482081937421, 0.88979551146025782, 0.89252186873863548, 0.8956392728458733, 0.89473361294657283, 0.89327820230196953, 0.8935310479898767]}. 3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8018644803855853, 'MSE': 2.3306768091956491, 'Rp': 0.89585244288485377}. 4 | All performances: {'R^2': [0.7998618381825771, 0.7984758980125775, 0.7975094166463333, 0.7993220682686236, 0.8067454463416195, 0.8126131749328396, 0.8012243311235306, 0.8004104174758595, 0.8039269011633705, 0.7985553117085221], 'MSE': [2.3616973653495599, 2.3922748088099079, 2.4469261165153782, 2.3685299013018382, 2.2846072647427667, 2.2208802602255195, 2.2528069487309055, 2.3108692766233925, 2.2387157946497718, 2.4294603550074498], 'Rp': [0.89468738306211437, 0.8939699469380411, 0.89333507283527014, 0.89438184405604759, 0.89863487780213458, 0.90158145610463758, 0.89542621542839362, 0.89547815412944198, 0.89724062710633579, 0.89378885138612052]}. 5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8051988981636494, 'MSE': 2.2906800918261339, 'Rp': 0.89775970520821491}. 6 | All performances: {'R^2': [0.8039768394265854, 0.7951560725701752, 0.7986521865506377, 0.8085563548487484, 0.8077833228146585, 0.8144529561676849, 0.7988272888585155, 0.8056176811720999, 0.8042356870845461, 0.8147305921428414], 'MSE': [2.2864434283187531, 2.4051797154234449, 2.2952099931550434, 2.28558345606428, 2.3286454151639648, 2.2131017894237646, 2.3558272688800757, 2.2807439502444762, 2.2758331563451724, 2.1802327452423627], 'Rp': [0.89714797830755455, 0.89204387677125041, 0.894204692685447, 0.89953630450523814, 0.89893001148813634, 0.9028470593014466, 0.89405400165104298, 0.89821175949161602, 0.89729245399739443, 0.90332891388302305]}. 7 | Best performances: {'R^2': 0.8051988981636494, 'MSE': 2.2906800918261339, 'Rp': 0.89775970520821491}. Best parameters: {'K': 10, 'L': 10}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/fold_5.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7958402302389869, 'MSE': 2.3945373072588754, 'Rp': 0.89235187386994297}. 2 | All performances: {'R^2': [0.7940207279673263, 0.795837407255736, 0.7993871010255709, 0.7902867310992964, 0.792361322799282, 0.8071115770611526, 0.7881777543312496, 0.7970320689931153, 0.8003778802941676, 0.7938097315629732], 'MSE': [2.4081763119130284, 2.3678890233351226, 2.3789678487694337, 2.4541092846724992, 2.4239173652173802, 2.2585787225459888, 2.4508681036866693, 2.3780773014054111, 2.3869254806122329, 2.4378636304309866], 'Rp': [0.89139752102312586, 0.89266019778280037, 0.89444792022457265, 0.88927831495569287, 0.89034727241906342, 0.89848210168041975, 0.88820812803332394, 0.89278446280918677, 0.89477556881278963, 0.8911372509584542]}. 3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8019570441102051, 'MSE': 2.3232871438767111, 'Rp': 0.89584534414652794}. 4 | All performances: {'R^2': [0.8048247888815004, 0.798500936204051, 0.8066825644642783, 0.8018207342898751, 0.8130024542366234, 0.7983025758139942, 0.7992729913073152, 0.8053085345934738, 0.7944551253934088, 0.7973997359175296], 'MSE': [2.3183898000977181, 2.2842494373009536, 2.2495209058448578, 2.2800699453996081, 2.1817190188188023, 2.4009495614278324, 2.3823247222872133, 2.2912030018639888, 2.4160814299723405, 2.4283636157537987], 'Rp': [0.89742028268222795, 0.89395156221256966, 0.89870803685506662, 0.89560520110452202, 0.90205487880530766, 0.89395862285382088, 0.8942347332010061, 0.89767678873256274, 0.89168008020363809, 0.89316325481455783]}. 5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.804128574013742, 'MSE': 2.2970066124616162, 'Rp': 0.89724386792145572}. 6 | All performances: {'R^2': [0.8018918940502309, 0.796376676917294, 0.8062295864621227, 0.8082060853516105, 0.8124761605307512, 0.796538157062373, 0.8134384566254184, 0.792980154343343, 0.8111628238974505, 0.8019857448968262], 'MSE': [2.3228671565079866, 2.4038097663148181, 2.3105097578576017, 2.2190218756207134, 2.1603316694782113, 2.3226214136563268, 2.2680059983564402, 2.3996616134702533, 2.230436034835257, 2.3328008385185544], 'Rp': [0.89596193285991099, 0.89294528687928565, 0.89823431276554377, 0.89925281985266425, 0.90203515241436072, 0.89295471952452565, 0.90251673601363913, 0.89130734779662391, 0.90140689845976385, 0.89582347264824025]}. 7 | Best performances: {'R^2': 0.804128574013742, 'MSE': 2.2970066124616162, 'Rp': 0.89724386792145572}. Best parameters: {'K': 10, 'L': 10}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/fold_7.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7962373804844697, 'MSE': 2.3853968914479422, 'Rp': 0.89256292745410359}. 2 | All performances: {'R^2': [0.8015608956764622, 0.7876138719134953, 0.7935886607772044, 0.7892693423573884, 0.7987991598231307, 0.7938395728539742, 0.796096706569903, 0.8047219572561222, 0.7983943468917059, 0.7984892907253112], 'MSE': [2.3495799604902268, 2.4571317674208109, 2.4023851550842719, 2.4682236903080623, 2.3387523111291411, 2.4153742583515334, 2.403010174729153, 2.303202124129101, 2.3685595362115213, 2.3477499366256001], 'Rp': [0.89563064955076099, 0.88795531351128754, 0.8910866625222128, 0.88864332874497964, 0.89405702524284802, 0.89136444225606593, 0.89240422521857332, 0.89714012724208325, 0.89363408197011895, 0.8937134182821056]}. 3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.801060277630849, 'MSE': 2.3288473103475309, 'Rp': 0.89532909472238364}. 4 | All performances: {'R^2': [0.8038257081318991, 0.7916609129375809, 0.7970632591201944, 0.8137899871206278, 0.803238527407148, 0.7991606776891241, 0.8001396576693462, 0.8014644315127357, 0.8017419185025294, 0.7985176962173043], 'MSE': [2.3377803883604593, 2.4865238573561879, 2.3508379593618489, 2.180610092879713, 2.3671274459237561, 2.3350184560180214, 2.3391118099968953, 2.3184257824042969, 2.3307482815339635, 2.2422890296401667], 'Rp': [0.89692183415336268, 0.89013716032794021, 0.89336956941142875, 0.90236924566559151, 0.89650782879980195, 0.89421733242733037, 0.89470885119314936, 0.89555373955483519, 0.8956627074752348, 0.89384267821515906]}. 5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8049883317548986, 'MSE': 2.2830135555330942, 'Rp': 0.89766937945265668}. 6 | All performances: {'R^2': [0.8015963912749063, 0.8083878374418325, 0.8044650965318931, 0.8056131931880682, 0.8041182151113851, 0.8076909047647529, 0.7997312196829396, 0.8076027952025622, 0.8100207287101648, 0.8006569356404791], 'MSE': [2.325063405979956, 2.2633171437171256, 2.2128991414886454, 2.3228738184807427, 2.2825584293815857, 2.3202674699303674, 2.298570078595001, 2.2318234984901948, 2.176913347645943, 2.3958492216213787], 'Rp': [0.89576918933267324, 0.89976002518785425, 0.8974568840439372, 0.89792639689406428, 0.89746256458333418, 0.89894720356090274, 0.89491354387162947, 0.89906124420735756, 0.90027894167880318, 0.89511780116601025]}. 7 | Best performances: {'R^2': 0.8049883317548986, 'MSE': 2.2830135555330942, 'Rp': 0.89766937945265668}. Best parameters: {'K': 10, 'L': 10}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/fold_8.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7964900224414253, 'MSE': 2.3861335874411442, 'Rp': 0.89266053002129664}. 2 | All performances: {'R^2': [0.794475052052582, 0.804578074407563, 0.7953904648899824, 0.7873804476088627, 0.8038215772603295, 0.7915910843091055, 0.8087018029665941, 0.7955745265118743, 0.7902222163759548, 0.7931649780314047], 'MSE': [2.4001096679360101, 2.2667472151286292, 2.4982148114733134, 2.423070768777603, 2.3450689330003085, 2.4274455551094198, 2.2297924090176267, 2.4038778476551328, 2.4566117654413899, 2.410396900872009], 'Rp': [0.89151606522610005, 0.8974139242351824, 0.89194020988267142, 0.88755902528355424, 0.89671448121692243, 0.88994643628806225, 0.89947889364857059, 0.89221660294436078, 0.88899551995073023, 0.89082414153681144]}. 3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.7997221295979017, 'MSE': 2.3471753676047951, 'Rp': 0.89464267116036622}. 4 | All performances: {'R^2': [0.7879088633138132, 0.8057443062004819, 0.7995848586996388, 0.7980329678791787, 0.8009123378226934, 0.8152522376049086, 0.805901867285057, 0.7854884604825368, 0.794763828338703, 0.8036315683520067], 'MSE': [2.4311718989280275, 2.2865164856718647, 2.3057491685551024, 2.3677476950803777, 2.3398133747572234, 2.2163812952808297, 2.2606850213238907, 2.4693772984161018, 2.403906443901008, 2.3904049941335233], 'Rp': [0.88820254833377621, 0.89799051336381031, 0.89471959579938143, 0.89346168254896052, 0.89530942571427796, 0.90305398395656822, 0.89817901705708114, 0.88716150328664389, 0.89172818861625236, 0.89662025292690894]}. 5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8038687646008519, 'MSE': 2.2988967993000236, 'Rp': 0.89706053293886379}. 6 | All performances: {'R^2': [0.7930382719289255, 0.79626905817854, 0.8099109552715251, 0.803207024194675, 0.8101198957306462, 0.7982475595755186, 0.8194113774034337, 0.8053253595948064, 0.8044065019807765, 0.7987516421496721], 'MSE': [2.3761997204140402, 2.3456636913819295, 2.3148546853832812, 2.3168488669180829, 2.2619395262550315, 2.3549028633139377, 2.1302607551241199, 2.25308533418259, 2.2934829666254726, 2.3417295834017482], 'Rp': [0.89149652076402786, 0.89280160729539848, 0.90038573819372447, 0.89667502415293232, 0.90016756417796462, 0.89385755993691185, 0.90550618152935503, 0.89800573707409492, 0.89738985216351108, 0.89431954410071768]}. 7 | Best performances: {'R^2': 0.8038687646008519, 'MSE': 2.2988967993000236, 'Rp': 0.89706053293886379}. Best parameters: {'K': 10, 'L': 10}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/fold_9.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7963963800861193, 'MSE': 2.3982113726281038, 'Rp': 0.89261941324805338}. 2 | All performances: {'R^2': [0.7949888695361219, 0.8010306367630496, 0.7991311619876645, 0.8021350200758728, 0.7941086418523625, 0.7926981173442411, 0.7970076456232205, 0.7908113770604897, 0.7973274070849623, 0.7947249235332094], 'MSE': [2.4102765911482722, 2.380863298237792, 2.326736466464066, 2.3234155345370442, 2.5192119752188731, 2.4419849869237598, 2.2760263917564942, 2.4327715604823101, 2.4539669976714111, 2.4168599238410171], 'Rp': [0.89197025592442114, 0.89513612660321062, 0.8941939241831639, 0.89581558437995579, 0.8913423061525243, 0.89058054692061173, 0.89311627390117698, 0.88944378883149877, 0.89308531157496085, 0.89151001400900864]}. 3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8017290486819405, 'MSE': 2.3340448490642589, 'Rp': 0.8957243254129883}. 4 | All performances: {'R^2': [0.8012439730174359, 0.791160247067829, 0.7910476177400406, 0.8052265053167637, 0.8064183573714383, 0.7975723030110036, 0.8055658779512659, 0.8135958616158998, 0.8126150843212142, 0.7928446594065144], 'MSE': [2.3613559444606, 2.4063457737220504, 2.4063009256228467, 2.3123768640865232, 2.2920760535064297, 2.3099786587112159, 2.3349040594265476, 2.2392407012312754, 2.2315689466126272, 2.4463005632624761], 'Rp': [0.89531774678600895, 0.88980947409916711, 0.89004735023737214, 0.89770032281523282, 0.89829353614686802, 0.89342798544826008, 0.89779645136681052, 0.90205039669937126, 0.9018589305817627, 0.89094105994903061]}. 5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8032228166725975, 'MSE': 2.3174364968101586, 'Rp': 0.89675388823395452}. 6 | All performances: {'R^2': [0.8085749453675722, 0.7975557461175751, 0.7942203071834579, 0.7954438427412809, 0.8147565597668069, 0.8049120478829231, 0.8071341008675909, 0.8093371221337018, 0.8086980499584601, 0.7915954447066054], 'MSE': [2.234975086516183, 2.311453596756829, 2.4544707874414051, 2.4695313604543663, 2.2043290406101459, 2.2277643068806481, 2.3016331792080971, 2.2234639591739742, 2.3490233796334761, 2.3977202714264609], 'Rp': [0.89976177735888874, 0.89369662094192492, 0.8917313674280164, 0.89215001757957668, 0.90284235443305061, 0.89832614544033462, 0.89882408273539538, 0.90018019859040921, 0.89953461531877854, 0.89049170251316945]}. 7 | Best performances: {'R^2': 0.8032228166725975, 'MSE': 2.3174364968101586, 'Rp': 0.89675388823395452}. Best parameters: {'K': 10, 'L': 10}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/fold_3.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7965397901626805, 'MSE': 2.3881568828950366, 'Rp': 0.89272365518691998}. 2 | All performances: {'R^2': [0.7937045534860062, 0.8123114155441549, 0.7918976004339093, 0.7855558434518486, 0.8004344378378311, 0.7985765932853583, 0.7985608771389182, 0.7972422786584668, 0.7939476696609845, 0.7931666321293271], 'MSE': [2.3641759944672165, 2.1898715021533013, 2.4983227204748935, 2.5346544382552052, 2.3421510098201006, 2.3334526276186573, 2.3682109295188614, 2.3898883103520006, 2.4134640308733806, 2.4473772654167489], 'Rp': [0.89121247262145231, 0.90132710125301474, 0.88997822486264555, 0.88653974153748771, 0.8950024865577576, 0.89385509123989759, 0.89399933466505688, 0.89307511773430603, 0.89139070308957835, 0.89085627830800274]}. 3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8008675620781804, 'MSE': 2.3369687378949195, 'Rp': 0.89524324374406949}. 4 | All performances: {'R^2': [0.8040702518483612, 0.7957541457139967, 0.8043670153189693, 0.7970107984254063, 0.7994914927552363, 0.7977877963977614, 0.7972950888811783, 0.8129681487450874, 0.8030142606189579, 0.7969166220768514], 'MSE': [2.3460168032678639, 2.3899380279689608, 2.2513577539230032, 2.4017201908785273, 2.2529659144084744, 2.3662195184231893, 2.3916965907331447, 2.1850075177708757, 2.358720827150032, 2.4260442344251287], 'Rp': [0.896951825128949, 0.89259869385738488, 0.89713641009809386, 0.89305138153040564, 0.89465924572324718, 0.89366571950265339, 0.89314072548487422, 0.90182016275119536, 0.89637745407477321, 0.89303081928911865]}. 5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8020843324781719, 'MSE': 2.3220336321770811, 'Rp': 0.89604381132040045}. 6 | All performances: {'R^2': [0.7966805567974335, 0.8034904045142354, 0.7990622945019292, 0.8055367169519014, 0.799049000125107, 0.7990725918556073, 0.8045158820824074, 0.8116612597747448, 0.8003302047569263, 0.8014444134214274], 'MSE': [2.366538762856337, 2.3029380258119412, 2.3007728352311276, 2.2849673222532236, 2.3788373894280221, 2.4082518971435269, 2.2629805132764318, 2.2674543428355669, 2.2582432908801553, 2.3893519420544784], 'Rp': [0.89305688274992512, 0.89663656581300721, 0.89441372763367488, 0.89773566622985834, 0.89439502769169721, 0.89457158570956796, 0.89757415641973803, 0.90131898987600501, 0.89495006836338398, 0.89578544271714688]}. 7 | Best performances: {'R^2': 0.8020843324781719, 'MSE': 2.3220336321770811, 'Rp': 0.89604381132040045}. Best parameters: {'K': 10, 'L': 10}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/np_nmtf/fold_6.txt: -------------------------------------------------------------------------------- 1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7952803369442216, 'MSE': 2.4026048701453222, 'Rp': 0.89201558726821273}. 2 | All performances: {'R^2': [0.7977771201750432, 0.8039212527492999, 0.8006453852695992, 0.7842785135291789, 0.8010595676347045, 0.7843197592376037, 0.7991280193326178, 0.7996025576942166, 0.7888416267302656, 0.7932295670896878], 'MSE': [2.3331045323342203, 2.3965694439049181, 2.302505575036764, 2.4618875102975961, 2.3881187786815237, 2.5260202018624969, 2.3557077163595479, 2.3607984762269321, 2.4581769728542722, 2.4431594938949477], 'Rp': [0.89354344382185591, 0.89665266067193905, 0.89502649015246205, 0.88590397751995431, 0.89503694668305078, 0.88617005619008937, 0.89414819552702207, 0.8945155617897379, 0.88842947592867938, 0.89072906439733779]}. 3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8004171700687225, 'MSE': 2.3428801344649193, 'Rp': 0.89501776861235116}. 4 | All performances: {'R^2': [0.7918697033098796, 0.8065369167726792, 0.7964846281051495, 0.7997335942370991, 0.8086342523731586, 0.7998339835651985, 0.8064152232602552, 0.7960017293527646, 0.7991749548866869, 0.7994867148243546], 'MSE': [2.4486314638975251, 2.2576154930906593, 2.364139770278765, 2.3872472895361008, 2.2719223876318009, 2.3920607808680754, 2.2501439624653741, 2.3821015138581001, 2.3558835323844569, 2.3190551506383339], 'Rp': [0.89011112800870651, 0.89850781254851975, 0.89277991691190295, 0.89485025549589203, 0.89949730019392693, 0.89488008514924722, 0.89830075046921265, 0.89267834978320748, 0.89414456291272149, 0.89442752465017505]}. 5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8052883269730977, 'MSE': 2.2853406282207653, 'Rp': 0.89776655739064259}. 6 | All performances: {'R^2': [0.8081524404338646, 0.7937885698989963, 0.8072212312359672, 0.8085978650015806, 0.8038642392447332, 0.8074333955813097, 0.8071426704318253, 0.8053098764967874, 0.8111183562449857, 0.8002546251609262], 'MSE': [2.3111691792446067, 2.3750378134243428, 2.2813537761593516, 2.2375732860074868, 2.3039484171398312, 2.2817379778741946, 2.2886098494117908, 2.2393009911979633, 2.2379920990999973, 2.2966828926480871], 'Rp': [0.89927455899867881, 0.89147066068622771, 0.89891497871229142, 0.89967049370813446, 0.89706734855607184, 0.89884031750182958, 0.89860907091636288, 0.89778327572400185, 0.90071415857169124, 0.8953207105311356]}. 7 | Best performances: {'R^2': 0.8052883269730977, 'MSE': 2.2853406282207653, 'Rp': 0.89776655739064259}. Best parameters: {'K': 10, 'L': 10}. 8 | -------------------------------------------------------------------------------- /experiments/experiments_toy/time/nmtf_np_time.py: -------------------------------------------------------------------------------- 1 | """ 2 | Recover the toy dataset generated by example/generate_toy/bnmf/generate_bnmtf.py 3 | using the non-probabilistic NMTF, and plot the MSE against timestamps. 4 | 5 | We can plot the MSE, R2 and Rp as it converges, on the entire dataset. 6 | 7 | We have I=100, J=80, K=5, L=5, and no test data. 8 | """ 9 | 10 | import sys, os 11 | project_location = os.path.dirname(__file__)+"/../../../../" 12 | sys.path.append(project_location) 13 | 14 | from BNMTF.code.models.nmtf_np import NMTF 15 | 16 | import numpy, random, scipy, matplotlib.pyplot as plt 17 | 18 | ########## 19 | 20 | input_folder = project_location+"BNMTF/data_toy/bnmtf/" 21 | 22 | repeats = 10 23 | 24 | iterations = 10000 25 | I, J, K, L = 100,80,5,5 26 | 27 | init_FG = 'kmeans' 28 | init_S = 'exponential' 29 | expo_prior = 1/10. 30 | 31 | # Load in data 32 | R = numpy.loadtxt(input_folder+"R.txt") 33 | M = numpy.ones((I,J)) 34 | 35 | # Run the VB algorithm, times 36 | times_repeats = [] 37 | performances_repeats = [] 38 | for i in range(0,repeats): 39 | # Set all the seeds 40 | numpy.random.seed(3) 41 | random.seed(4) 42 | scipy.random.seed(5) 43 | 44 | # Run the classifier 45 | nmtf = NMTF(R,M,K,L) 46 | nmtf.initialise(init_S,init_FG,expo_prior) 47 | nmtf.run(iterations) 48 | 49 | # Extract the performances and timestamps across all iterations 50 | times_repeats.append(nmtf.all_times) 51 | performances_repeats.append(nmtf.all_performances) 52 | 53 | # Check whether seed worked: all performances should be the same 54 | assert all([numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats]), \ 55 | "Seed went wrong - performances not the same across repeats!" 56 | 57 | # Print out the performances, and the average times 58 | all_times_average = list(numpy.average(times_repeats, axis=0)) 59 | all_performances = performances_repeats[0] 60 | print "np_all_times_average = %s" % all_times_average 61 | print "np_all_performances = %s" % all_performances 62 | 63 | 64 | # Print all time plots, the average, and performance vs iterations 65 | plt.figure() 66 | plt.title("Performance against time") 67 | plt.ylim(0,10) 68 | for times in times_repeats: 69 | plt.plot(times, all_performances['MSE']) 70 | 71 | plt.figure() 72 | plt.title("Performance against average time") 73 | plt.plot(all_times_average, all_performances['MSE']) 74 | plt.ylim(0,10) 75 | 76 | plt.figure() 77 | plt.title("Performance against iteration") 78 | plt.plot(all_performances['MSE']) 79 | plt.ylim(0,10) -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/vb_nmf/results.txt: -------------------------------------------------------------------------------- 1 | All model fits for fold 1, metric AIC: [240290.16735229088, 238980.60715952617, 238049.02123880715, 239030.05666991285]. 2 | Best K for fold 1: 25. 3 | Performance: {'R^2': 0.8123419361488506, 'MSE': 2.2242309355503416, 'Rp': 0.90259104726287731}. 4 | 5 | All model fits for fold 2, metric AIC: [240229.80122119767, 239274.82620135765, 238160.91629605644, 237460.97778081335]. 6 | Best K for fold 2: 30. 7 | Performance: {'R^2': 0.8011409466575017, 'MSE': 2.3108126630384804, 'Rp': 0.89686786281647779}. 8 | 9 | All model fits for fold 3, metric AIC: [239983.14391001957, 238318.7796743041, 238267.80746191408, 237854.79107721607]. 10 | Best K for fold 3: 30. 11 | Performance: {'R^2': 0.7943028271877304, 'MSE': 2.4095896447817631, 'Rp': 0.89414010092697216}. 12 | 13 | All model fits for fold 4, metric AIC: [240174.32637766597, 239002.67747375579, 238210.09773381858, 238586.6912395501]. 14 | Best K for fold 4: 25. 15 | Performance: {'R^2': 0.8125046212085996, 'MSE': 2.2188694213830114, 'Rp': 0.90282141562178775}. 16 | 17 | All model fits for fold 5, metric AIC: [240212.19600782395, 238669.50000280215, 237874.41753096576, 237701.38034011592]. 18 | Best K for fold 5: 30. 19 | Performance: {'R^2': 0.7934881370166628, 'MSE': 2.4185938516134278, 'Rp': 0.89292623713793573}. 20 | 21 | All model fits for fold 6, metric AIC: [240173.43494869216, 238558.97209104756, 238422.99853905643, 237979.10762966136]. 22 | Best K for fold 6: 30. 23 | Performance: {'R^2': 0.8111969927756486, 'MSE': 2.1808748510586002, 'Rp': 0.90319019710203263}. 24 | 25 | All model fits for fold 7, metric AIC: [239869.44683602237, 238353.65447191551, 238152.05580152466, 238180.82009855763]. 26 | Best K for fold 7: 25. 27 | Performance: {'R^2': 0.8058878338360765, 'MSE': 2.2503432196374651, 'Rp': 0.89931551399355991}. 28 | 29 | All model fits for fold 8, metric AIC: [240577.89475339543, 238771.80770711903, 237667.67713085565, 238088.97970950397]. 30 | Best K for fold 8: 25. 31 | Performance: {'R^2': 0.811089129626958, 'MSE': 2.2305023229025145, 'Rp': 0.90161087516091587}. 32 | 33 | All model fits for fold 9, metric AIC: [240287.46868564631, 238744.09024294608, 238219.47537998416, 238134.03195392119]. 34 | Best K for fold 9: 30. 35 | Performance: {'R^2': 0.798953276136085, 'MSE': 2.3595465204422488, 'Rp': 0.89633675449065164}. 36 | 37 | All model fits for fold 10, metric AIC: [240451.49121444131, 238581.40721198689, 237968.91066330951, 238284.25023562534]. 38 | Best K for fold 10: 25. 39 | Performance: {'R^2': 0.8151865445946502, 'MSE': 2.2186318302878667, 'Rp': 0.90445283498221596}. 40 | 41 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/gibbs_nmf/results.txt: -------------------------------------------------------------------------------- 1 | All model fits for fold 1, metric AIC: [243595.44979297026, 241968.19780653264, 241833.11276982535, 242525.46377454145]. 2 | Best K for fold 1: 25. 3 | Performance: {'R^2': 0.8248485588294542, 'MSE': 2.0115451703143985, 'Rp': 0.90908045103379365}. 4 | 5 | All model fits for fold 2, metric AIC: [243025.70626974344, 242168.69699825841, 242026.95270679001, 242134.08099919959]. 6 | Best K for fold 2: 25. 7 | Performance: {'R^2': 0.8219514639515233, 'MSE': 2.0532542729784833, 'Rp': 0.90755752809182066}. 8 | 9 | All model fits for fold 3, metric AIC: [243612.7997870651, 242259.96864944304, 242061.09393043793, 242475.63898623944]. 10 | Best K for fold 3: 25. 11 | Performance: {'R^2': 0.8217549958515522, 'MSE': 2.0454971069846226, 'Rp': 0.90714536611822205}. 12 | 13 | All model fits for fold 4, metric AIC: [243420.81499282017, 241914.29134586823, 241670.16193069995, 242280.64342551032]. 14 | Best K for fold 4: 25. 15 | Performance: {'R^2': 0.8349672123366683, 'MSE': 1.994656076757727, 'Rp': 0.91396299549995652}. 16 | 17 | All model fits for fold 5, metric AIC: [243255.89851395186, 242389.40115483353, 242100.97265390822, 242468.23189654254]. 18 | Best K for fold 5: 25. 19 | Performance: {'R^2': 0.830543344804296, 'MSE': 2.0281421630490297, 'Rp': 0.91179392839675322}. 20 | 21 | All model fits for fold 6, metric AIC: [243313.94572363066, 242350.42676329185, 242011.27639744786, 241870.62415823588]. 22 | Best K for fold 6: 30. 23 | Performance: {'R^2': 0.8229475100079148, 'MSE': 2.0691704067461281, 'Rp': 0.90832416407316152}. 24 | 25 | All model fits for fold 7, metric AIC: [243305.98134092687, 241888.00165122704, 241598.72213610105, 242204.50154172539]. 26 | Best K for fold 7: 25. 27 | Performance: {'R^2': 0.8234388009582426, 'MSE': 2.0708801136454622, 'Rp': 0.90786269315758972}. 28 | 29 | All model fits for fold 8, metric AIC: [243113.28511082294, 242329.20180314814, 241593.33037916449, 242287.90117164943]. 30 | Best K for fold 8: 25. 31 | Performance: {'R^2': 0.8228191950789238, 'MSE': 2.1137440615703653, 'Rp': 0.90778790396895936}. 32 | 33 | All model fits for fold 9, metric AIC: [243404.35296055069, 242122.98676400131, 241606.51841963449, 242404.48081945442]. 34 | Best K for fold 9: 25. 35 | Performance: {'R^2': 0.8195240616800068, 'MSE': 2.1153688464049725, 'Rp': 0.90600658191475891}. 36 | 37 | All model fits for fold 10, metric AIC: [243643.31508766502, 242065.17131888881, 241698.58228740888, 242122.23747144494]. 38 | Best K for fold 10: 25. 39 | Performance: {'R^2': 0.8266748390223762, 'MSE': 2.0478097531374373, 'Rp': 0.90967154000028361}. 40 | 41 | -------------------------------------------------------------------------------- /experiments/experiments_gdsc/cross_validation/icm_nmf/results.txt: -------------------------------------------------------------------------------- 1 | All model fits for fold 1, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475]. 2 | Best K for fold 1: 30. 3 | Performance: {'R^2': 0.7072309782081623, 'MSE': 3.5039148405029135, 'Rp': 0.85755329346470166}. 4 | 5 | All model fits for fold 2, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475]. 6 | Best K for fold 2: 30. 7 | Performance: {'R^2': 0.2162669348625822, 'MSE': 9.0622730084824674, 'Rp': 0.6982606574842174}. 8 | 9 | All model fits for fold 3, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475]. 10 | Best K for fold 3: 30. 11 | Performance: {'R^2': 0.6853079551313846, 'MSE': 3.7009069757338917, 'Rp': 0.84719052665866601}. 12 | 13 | All model fits for fold 4, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475]. 14 | Best K for fold 4: 30. 15 | Performance: {'R^2': 0.7144108917311998, 'MSE': 3.3451246835265178, 'Rp': 0.86206595196591884}. 16 | 17 | All model fits for fold 5, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475]. 18 | Best K for fold 5: 30. 19 | Performance: {'R^2': 0.7341480430315861, 'MSE': 3.1147595748400358, 'Rp': 0.86644930808200238}. 20 | 21 | All model fits for fold 6, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475]. 22 | Best K for fold 6: 30. 23 | Performance: {'R^2': 0.6671037956836574, 'MSE': 3.9037354439533258, 'Rp': 0.83763274145781152}. 24 | 25 | All model fits for fold 7, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475]. 26 | Best K for fold 7: 30. 27 | Performance: {'R^2': -0.17013019643779437, 'MSE': 13.991970030783968, 'Rp': 0.6226739536838739}. 28 | 29 | All model fits for fold 8, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475]. 30 | Best K for fold 8: 30. 31 | Performance: {'R^2': 0.7288988508164431, 'MSE': 3.1814210224127897, 'Rp': 0.86428985480373288}. 32 | 33 | All model fits for fold 9, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475]. 34 | Best K for fold 9: 30. 35 | Performance: {'R^2': 0.7201731755424339, 'MSE': 3.2677197491020404, 'Rp': 0.86193739061972197}. 36 | 37 | All model fits for fold 10, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475]. 38 | Best K for fold 10: 30. 39 | Performance: {'R^2': -0.07478035943340289, 'MSE': 12.460551868851933, 'Rp': 0.64163764940452106}. 40 | 41 | -------------------------------------------------------------------------------- /experiments/experiments_toy/time/nmf_np_time.py: -------------------------------------------------------------------------------- 1 | """ 2 | Recover the toy dataset generated by example/generate_toy/bnmf/generate_bnmf.py 3 | using the non-probabilistic NMF, and plot the MSE against timestamps. 4 | 5 | We can plot the MSE, R2 and Rp as it converges, on the entire dataset. 6 | 7 | We have I=100, J=80, K=10, and no test data. 8 | We give flatter priors (1/10) than what was used to generate the data (1). 9 | """ 10 | 11 | import sys, os 12 | project_location = os.path.dirname(__file__)+"/../../../../" 13 | sys.path.append(project_location) 14 | 15 | from BNMTF.code.models.nmf_np import NMF 16 | 17 | import numpy, random, scipy, matplotlib.pyplot as plt 18 | 19 | ########## 20 | 21 | input_folder = project_location+"BNMTF/data_toy/bnmf/" 22 | 23 | repeats = 10 24 | 25 | iterations = 2000 26 | I, J, K = 100,80,10 27 | 28 | init_UV = 'exponential' 29 | expo_prior = 1/10. 30 | 31 | # Load in data 32 | R = numpy.loadtxt(input_folder+"R.txt") 33 | M = numpy.ones((I,J)) 34 | 35 | 36 | # Run the VB algorithm, times 37 | times_repeats = [] 38 | performances_repeats = [] 39 | for i in range(0,repeats): 40 | # Set all the seeds 41 | numpy.random.seed(0) 42 | random.seed(0) 43 | scipy.random.seed(0) 44 | 45 | # Run the classifier 46 | nmf = NMF(R,M,K) 47 | nmf.initialise(init_UV,expo_prior) 48 | nmf.run(iterations) 49 | 50 | # Extract the performances and timestamps across all iterations 51 | times_repeats.append(nmf.all_times) 52 | performances_repeats.append(nmf.all_performances) 53 | 54 | # Check whether seed worked: all performances should be the same 55 | assert all([numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats]), \ 56 | "Seed went wrong - performances not the same across repeats!" 57 | 58 | # Print out the performances, and the average times 59 | all_times_average = list(numpy.average(times_repeats, axis=0)) 60 | all_performances = performances_repeats[0] 61 | print "np_all_times_average = %s" % all_times_average 62 | print "np_all_performances = %s" % all_performances 63 | 64 | 65 | # Print all time plots, the average, and performance vs iterations 66 | plt.figure() 67 | plt.title("Performance against time") 68 | plt.ylim(0,10) 69 | for times in times_repeats: 70 | plt.plot(times, all_performances['MSE']) 71 | 72 | plt.figure() 73 | plt.title("Performance against average time") 74 | plt.plot(all_times_average, all_performances['MSE']) 75 | plt.ylim(0,10) 76 | 77 | plt.figure() 78 | plt.title("Performance against iteration") 79 | plt.plot(all_performances['MSE']) 80 | plt.ylim(0,10) -------------------------------------------------------------------------------- /experiments/experiments_gdsc/time/nmf_vb_time.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run NMF VB on the Sanger dataset. 3 | 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset. 5 | 6 | We give flat priors (1/10). 7 | """ 8 | 9 | import sys, os 10 | project_location = os.path.dirname(__file__)+"/../../../../" 11 | sys.path.append(project_location) 12 | 13 | from BNMTF.code.models.bnmf_vb_optimised import bnmf_vb_optimised 14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 15 | 16 | import numpy, random, scipy, matplotlib.pyplot as plt 17 | 18 | ########## 19 | 20 | standardised = False #standardised Sanger or unstandardised 21 | 22 | repeats = 10 23 | 24 | iterations = 500 25 | init_UV = 'random' 26 | I, J, K = 622,138,25 27 | 28 | alpha, beta = 1., 1. #1., 1. 29 | lambdaU = numpy.ones((I,K))/10. 30 | lambdaV = numpy.ones((J,K))/10. 31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } 32 | 33 | # Load in data 34 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised) 35 | 36 | 37 | # Run the VB algorithm, times 38 | times_repeats = [] 39 | performances_repeats = [] 40 | for i in range(0,repeats): 41 | # Set all the seeds 42 | numpy.random.seed(0) 43 | 44 | # Run the classifier 45 | BNMF = bnmf_vb_optimised(R,M,K,priors) 46 | BNMF.initialise(init_UV) 47 | BNMF.run(iterations) 48 | 49 | # Extract the performances and timestamps across all iterations 50 | times_repeats.append(BNMF.all_times) 51 | performances_repeats.append(BNMF.all_performances) 52 | 53 | # Check whether seed worked: all performances should be the same 54 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \ 55 | "Seed went wrong - performances not the same across repeats!" 56 | 57 | # Print out the performances, and the average times 58 | vb_all_times_average = list(numpy.average(times_repeats, axis=0)) 59 | vb_all_performances = performances_repeats[0] 60 | print "vb_all_times_average = %s" % vb_all_times_average 61 | print "vb_all_performances = %s" % vb_all_performances 62 | 63 | 64 | # Print all time plots, the average, and performance vs iterations 65 | plt.figure() 66 | plt.title("Performance against time") 67 | plt.ylim(0,10) 68 | for times in times_repeats: 69 | plt.plot(times, vb_all_performances['MSE']) 70 | 71 | plt.figure() 72 | plt.title("Performance against average time") 73 | plt.plot(vb_all_times_average, vb_all_performances['MSE']) 74 | plt.ylim(0,10) 75 | 76 | plt.figure() 77 | plt.title("Performance against iteration") 78 | plt.plot(vb_all_performances['MSE']) 79 | plt.ylim(0,10) -------------------------------------------------------------------------------- /experiments/experiments_gdsc/time/nmf_icm_time.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run NMF ICM on the Sanger dataset. 3 | 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset. 5 | 6 | We give flat priors (1/10). 7 | """ 8 | 9 | import sys, os 10 | project_location = os.path.dirname(__file__)+"/../../../../" 11 | sys.path.append(project_location) 12 | 13 | from BNMTF.code.models.nmf_icm import nmf_icm 14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 15 | 16 | import numpy, random, scipy, matplotlib.pyplot as plt 17 | 18 | ########## 19 | 20 | standardised = False #standardised Sanger or unstandardised 21 | 22 | repeats = 10 23 | 24 | iterations = 1000 25 | init_UV = 'random' 26 | I, J, K = 622,138,25 27 | 28 | minimum_TN = 0.1 29 | 30 | alpha, beta = 1., 1. #1., 1. 31 | lambdaU = numpy.ones((I,K))/10. 32 | lambdaV = numpy.ones((J,K))/10. 33 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } 34 | 35 | # Load in data 36 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised) 37 | 38 | 39 | # Run the VB algorithm, times 40 | times_repeats = [] 41 | performances_repeats = [] 42 | for i in range(0,repeats): 43 | # Set all the seeds 44 | numpy.random.seed(0) 45 | 46 | # Run the classifier 47 | nmf = nmf_icm(R,M,K,priors) 48 | nmf.initialise(init_UV) 49 | nmf.run(iterations,minimum_TN=minimum_TN) 50 | 51 | # Extract the performances and timestamps across all iterations 52 | times_repeats.append(nmf.all_times) 53 | performances_repeats.append(nmf.all_performances) 54 | 55 | # Check whether seed worked: all performances should be the same 56 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \ 57 | "Seed went wrong - performances not the same across repeats!" 58 | 59 | # Print out the performances, and the average times 60 | icm_all_times_average = list(numpy.average(times_repeats, axis=0)) 61 | icm_all_performances = performances_repeats[0] 62 | print "icm_all_times_average = %s" % icm_all_times_average 63 | print "icm_all_performances = %s" % icm_all_performances 64 | 65 | 66 | # Print all time plots, the average, and performance vs iterations 67 | plt.figure() 68 | plt.title("Performance against time") 69 | plt.ylim(0,10) 70 | for times in times_repeats: 71 | plt.plot(times, icm_all_performances['MSE']) 72 | 73 | plt.figure() 74 | plt.title("Performance against average time") 75 | plt.plot(icm_all_times_average, icm_all_performances['MSE']) 76 | plt.ylim(0,10) 77 | 78 | plt.figure() 79 | plt.title("Performance against iteration") 80 | plt.plot(icm_all_performances['MSE']) 81 | plt.ylim(0,10) -------------------------------------------------------------------------------- /experiments/experiments_gdsc/time/nmf_gibbs_time.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run NMF Gibbs on the Sanger dataset. 3 | 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset. 5 | 6 | We give flat priors (1/10). 7 | """ 8 | 9 | import sys, os 10 | project_location = os.path.dirname(__file__)+"/../../../../" 11 | sys.path.append(project_location) 12 | 13 | from BNMTF.code.models.bnmf_gibbs_optimised import bnmf_gibbs_optimised 14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 15 | 16 | import numpy, random, scipy, matplotlib.pyplot as plt 17 | 18 | ########## 19 | 20 | standardised = False #standardised Sanger or unstandardised 21 | 22 | repeats = 10 23 | 24 | iterations = 500 25 | init_UV = 'random' 26 | I, J, K = 622,138,25 27 | 28 | alpha, beta = 1., 1. #1., 1. 29 | lambdaU = numpy.ones((I,K))/10. 30 | lambdaV = numpy.ones((J,K))/10. 31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV } 32 | 33 | # Load in data 34 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised) 35 | 36 | 37 | # Run the VB algorithm, times 38 | times_repeats = [] 39 | performances_repeats = [] 40 | for i in range(0,repeats): 41 | # Set all the seeds 42 | numpy.random.seed(0) 43 | 44 | # Run the classifier 45 | BNMF = bnmf_gibbs_optimised(R,M,K,priors) 46 | BNMF.initialise(init_UV) 47 | BNMF.run(iterations) 48 | 49 | # Extract the performances and timestamps across all iterations 50 | times_repeats.append(BNMF.all_times) 51 | performances_repeats.append(BNMF.all_performances) 52 | 53 | # Check whether seed worked: all performances should be the same 54 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \ 55 | "Seed went wrong - performances not the same across repeats!" 56 | 57 | # Print out the performances, and the average times 58 | gibbs_all_times_average = list(numpy.average(times_repeats, axis=0)) 59 | gibbs_all_performances = performances_repeats[0] 60 | print "gibbs_all_times_average = %s" % gibbs_all_times_average 61 | print "gibbs_all_performances = %s" % gibbs_all_performances 62 | 63 | # Print all time plots, the average, and performance vs iterations 64 | plt.figure() 65 | plt.title("Performance against time") 66 | plt.ylim(0,10) 67 | for times in times_repeats: 68 | plt.plot(times, gibbs_all_performances['MSE']) 69 | 70 | plt.figure() 71 | plt.title("Performance against average time") 72 | plt.plot(gibbs_all_times_average, gibbs_all_performances['MSE']) 73 | plt.ylim(0,10) 74 | 75 | plt.figure() 76 | plt.title("Performance against iteration") 77 | plt.plot(gibbs_all_performances['MSE']) 78 | plt.ylim(0,10) -------------------------------------------------------------------------------- /experiments/experiments_gdsc/time/nmtf_vb_time.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run NMTF VB on the Sanger dataset. 3 | 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset. 5 | 6 | We give flat priors (1/10). 7 | """ 8 | 9 | import sys, os 10 | project_location = os.path.dirname(__file__)+"/../../../../" 11 | sys.path.append(project_location) 12 | 13 | from BNMTF.code.models.bnmtf_vb_optimised import bnmtf_vb_optimised 14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 15 | 16 | import numpy, random, scipy, matplotlib.pyplot as plt 17 | 18 | ########## 19 | 20 | standardised = False #standardised Sanger or unstandardised 21 | 22 | repeats = 10 23 | 24 | iterations = 1000 25 | init_FG = 'kmeans' 26 | init_S = 'random' 27 | I, J, K, L = 622,138,5,5 28 | 29 | alpha, beta = 1., 1. 30 | lambdaF = numpy.ones((I,K))/10. 31 | lambdaS = numpy.ones((K,L))/10. 32 | lambdaG = numpy.ones((J,L))/10. 33 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG } 34 | 35 | 36 | # Load in data 37 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised) 38 | 39 | 40 | # Run the VB algorithm, times 41 | times_repeats = [] 42 | performances_repeats = [] 43 | for i in range(0,repeats): 44 | # Set all the seeds 45 | numpy.random.seed(3) 46 | 47 | # Run the classifier 48 | BNMTF = bnmtf_vb_optimised(R,M,K,L,priors) 49 | BNMTF.initialise(init_S,init_FG) 50 | BNMTF.run(iterations) 51 | 52 | # Extract the performances and timestamps across all iterations 53 | times_repeats.append(BNMTF.all_times) 54 | performances_repeats.append(BNMTF.all_performances) 55 | 56 | # Check whether seed worked: all performances should be the same 57 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \ 58 | "Seed went wrong - performances not the same across repeats!" 59 | 60 | # Print out the performances, and the average times 61 | vb_all_times_average = list(numpy.average(times_repeats, axis=0)) 62 | vb_all_performances = performances_repeats[0] 63 | print "vb_all_times_average = %s" % vb_all_times_average 64 | print "vb_all_performances = %s" % vb_all_performances 65 | 66 | 67 | # Print all time plots, the average, and performance vs iterations 68 | plt.figure() 69 | plt.title("Performance against time") 70 | plt.ylim(0,10) 71 | for times in times_repeats: 72 | plt.plot(times, vb_all_performances['MSE']) 73 | 74 | plt.figure() 75 | plt.title("Performance against average time") 76 | plt.plot(vb_all_times_average, vb_all_performances['MSE']) 77 | plt.ylim(0,10) 78 | 79 | plt.figure() 80 | plt.title("Performance against iteration") 81 | plt.plot(vb_all_performances['MSE']) 82 | plt.ylim(0,10) -------------------------------------------------------------------------------- /experiments/experiments_gdsc/time/nmtf_gibbs_time.py: -------------------------------------------------------------------------------- 1 | """ 2 | Run NMTF VB on the Sanger dataset. 3 | 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset. 5 | 6 | We give flat priors (1/10). 7 | """ 8 | 9 | import sys, os 10 | project_location = os.path.dirname(__file__)+"/../../../../" 11 | sys.path.append(project_location) 12 | 13 | from BNMTF.code.models.bnmtf_gibbs_optimised import bnmtf_gibbs_optimised 14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc 15 | 16 | import numpy, random, scipy, matplotlib.pyplot as plt 17 | 18 | ########## 19 | 20 | standardised = False #standardised Sanger or unstandardised 21 | 22 | repeats = 10 23 | 24 | iterations = 1000 25 | init_FG = 'kmeans' 26 | init_S = 'random' 27 | I, J, K, L = 622,138,5,5 28 | 29 | alpha, beta = 1., 1. 30 | lambdaF = numpy.ones((I,K))/10. 31 | lambdaS = numpy.ones((K,L))/10. 32 | lambdaG = numpy.ones((J,L))/10. 33 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG } 34 | 35 | # Load in data 36 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised) 37 | 38 | 39 | # Run the VB algorithm, times 40 | times_repeats = [] 41 | performances_repeats = [] 42 | for i in range(0,repeats): 43 | # Set all the seeds 44 | numpy.random.seed(3) 45 | 46 | # Run the classifier 47 | BNMTF = bnmtf_gibbs_optimised(R,M,K,L,priors) 48 | BNMTF.initialise(init_S,init_FG) 49 | BNMTF.run(iterations) 50 | 51 | # Extract the performances and timestamps across all iterations 52 | times_repeats.append(BNMTF.all_times) 53 | performances_repeats.append(BNMTF.all_performances) 54 | 55 | # Check whether seed worked: all performances should be the same 56 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \ 57 | "Seed went wrong - performances not the same across repeats!" 58 | 59 | # Print out the performances, and the average times 60 | gibbs_all_times_average = list(numpy.average(times_repeats, axis=0)) 61 | gibbs_all_performances = performances_repeats[0] 62 | print "gibbs_all_times_average = %s" % gibbs_all_times_average 63 | print "gibbs_all_performances = %s" % gibbs_all_performances 64 | 65 | 66 | # Print all time plots, the average, and performance vs iterations 67 | plt.figure() 68 | plt.title("Performance against time") 69 | plt.ylim(0,10) 70 | for times in times_repeats: 71 | plt.plot(times, gibbs_all_performances['MSE']) 72 | 73 | plt.figure() 74 | plt.title("Performance against average time") 75 | plt.plot(gibbs_all_times_average, gibbs_all_performances['MSE']) 76 | plt.ylim(0,10) 77 | 78 | plt.figure() 79 | plt.title("Performance against iteration") 80 | plt.plot(gibbs_all_performances['MSE']) 81 | plt.ylim(0,10) --------------------------------------------------------------------------------