├── __init__.py
├── code
    ├── __init__.py
    ├── models
    │   ├── __init__.py
    │   ├── kmeans
    │   │   └── __init__.py
    │   └── distributions
    │   │   ├── __init__.py
    │   │   ├── exponential.py
    │   │   ├── normal.py
    │   │   └── gamma.py
    └── cross_validation
    │   └── __init__.py
├── plots
    ├── __init__.py
    ├── time_toy
    │   ├── nmf_icm_times.txt~
    │   ├── nmf_np_times.txt~
    │   ├── nmf_vb_times.txt~
    │   ├── nmtf_np_times.txt~
    │   ├── nmf_gibbs_performances.txt~
    │   ├── nmf_icm_performances.txt~
    │   ├── nmf_np_performances.txt~
    │   ├── nmf_vb_performances.txt~
    │   ├── nmtf_gibbs_performances.txt~
    │   ├── nmtf_np_performances.txt~
    │   └── nmtf_vb_performances.txt~
    ├── time_Sanger
    │   ├── nmf_gibbs_times.txt~
    │   ├── nmf_icm_times.txt~
    │   ├── nmf_np_times.txt~
    │   ├── nmf_vb_times.txt~
    │   ├── nmtf_icm_times.txt~
    │   ├── nmf_icm_performances.txt~
    │   ├── nmf_np_performances.txt~
    │   ├── nmf_vb_performances.txt~
    │   ├── nmtf_gibbs_times.txt~
    │   ├── nmf_gibbs_performances.txt~
    │   ├── nmtf_gibbs_performances.txt~
    │   └── nmtf_icm_performances.txt~
    ├── graphs_toy
    │   ├── legend.pdf
    │   ├── legend.png
    │   ├── mse_nmf_times.pdf
    │   ├── mse_nmf_times.png
    │   ├── mse_nmtf_times.pdf
    │   ├── mse_nmtf_times.png
    │   ├── mse_nmf_noise_test.pdf
    │   ├── mse_nmf_noise_test.png
    │   ├── mse_nmf_convergences.pdf
    │   ├── mse_nmf_convergences.png
    │   ├── mse_nmtf_noise_test.pdf
    │   ├── mse_nmtf_noise_test.png
    │   ├── aic_nmf_model_selection.pdf
    │   ├── aic_nmf_model_selection.png
    │   ├── mse_nmf_model_selection.pdf
    │   ├── mse_nmf_model_selection.png
    │   ├── mse_nmtf_convergences.pdf
    │   ├── mse_nmtf_convergences.png
    │   ├── aic_nmtf_model_selection.pdf
    │   ├── aic_nmtf_model_selection.png
    │   ├── elbo_nmf_model_selection.png
    │   ├── mse_nmtf_model_selection.pdf
    │   ├── mse_nmtf_model_selection.png
    │   ├── aic_nmtf_greedy_model_selection.pdf
    │   ├── aic_nmtf_greedy_model_selection.png
    │   ├── mse_nmf_missing_values_predictions.pdf
    │   ├── mse_nmf_missing_values_predictions.png
    │   ├── mse_nmtf_missing_values_predictions.pdf
    │   └── mse_nmtf_missing_values_predictions.png
    └── graphs_Sanger
    │   ├── mse_Sanger_nmf_times.pdf
    │   ├── mse_Sanger_nmf_times.png
    │   ├── mse_Sanger_nmtf_times.pdf
    │   ├── mse_Sanger_nmtf_times.png
    │   ├── mse_Sanger_nmf_convergences.pdf
    │   ├── mse_Sanger_nmf_convergences.png
    │   ├── mse_Sanger_nmtf_convergences.pdf
    │   ├── mse_Sanger_nmtf_convergences.png
    │   ├── aic_Sanger_line_model_selection.pdf
    │   ├── aic_Sanger_line_model_selection.png
    │   ├── aic_Sanger_greedy_model_selection.pdf
    │   └── aic_Sanger_greedy_model_selection.png
├── tests
    ├── __init__.py
    ├── code
    │   ├── __init__.py
    │   └── distributions
    │   │   ├── __init__.py
    │   │   ├── test_gamma.py
    │   │   ├── test_truncated_normal_vector.py
    │   │   └── test_truncated_normal.py
    └── grid_search
    │   └── __init__.py
├── data_toy
    ├── __init__.py
    ├── bnmf
    │   └── __init__.py
    └── bnmtf
    │   ├── __init__.py
    │   └── S.txt
├── experiments
    ├── __init__.py
    ├── experiments_ccle
    │   ├── __init__.py
    │   └── cross_validation
    │   │   ├── ccle_ec_gibbs_nmf
    │   │       ├── results.txt
    │   │       └── linesearch_xval_gibbs.py
    │   │   ├── ccle_ec_np_nmf
    │   │       ├── fold_1.txt
    │   │       ├── fold_3.txt
    │   │       ├── fold_7.txt
    │   │       ├── results.txt
    │   │       ├── np_nmf_nested_xval.py
    │   │       ├── fold_9.txt
    │   │       ├── fold_5.txt
    │   │       ├── fold_6.txt
    │   │       ├── fold_2.txt
    │   │       ├── fold_8.txt
    │   │       ├── fold_10.txt
    │   │       └── fold_4.txt
    │   │   ├── ccle_ec_np_nmtf
    │   │       ├── fold_10.txt
    │   │       ├── fold_3.txt
    │   │       ├── fold_8.txt
    │   │       ├── fold_9.txt
    │   │       ├── results.txt
    │   │       └── np_nmtf_nested_xval.py
    │   │   ├── ccle_ic_np_nmf
    │   │       ├── results.txt
    │   │       ├── np_nmf_nested_xval.py
    │   │       ├── fold_7.txt
    │   │       ├── fold_6.txt
    │   │       ├── fold_8.txt
    │   │       ├── fold_1.txt
    │   │       ├── fold_2.txt
    │   │       ├── fold_3.txt
    │   │       ├── fold_5.txt
    │   │       ├── fold_10.txt
    │   │       ├── fold_4.txt
    │   │       └── fold_9.txt
    │   │   ├── ccle_ic_np_nmtf
    │   │       ├── results.txt
    │   │       └── np_nmtf_nested_xval.py
    │   │   ├── ccle_ec_vb_nmf
    │   │       └── linesearch_xval_vb.py
    │   │   ├── ccle_ic_vb_nmf
    │   │       └── linesearch_xval_vb.py
    │   │   ├── ccle_ec_icm_nmf
    │   │       └── linesearch_xval_icm.py
    │   │   ├── ccle_ic_icm_nmf
    │   │       └── linesearch_xval_icm.py
    │   │   ├── ccle_ic_gibbs_nmf
    │   │       └── linesearch_xval_gibbs.py
    │   │   ├── ccle_ic_vb_nmtf
    │   │       └── greedysearch_xval_vb.py
    │   │   ├── ccle_ec_vb_nmtf
    │   │       └── greedysearch_xval_vb.py
    │   │   ├── ccle_ic_icm_nmtf
    │   │       └── greedysearch_xval_icm.py
    │   │   ├── ccle_ec_icm_nmtf
    │   │       └── greedysearch_xval_icm.py
    │   │   ├── ccle_ic_gibbs_nmtf
    │   │       └── greedysearch_xval_gibbs.py
    │   │   └── ccle_ec_gibbs_nmtf
    │   │       └── greedysearch_xval_gibbs.py
    ├── experiments_gdsc
    │   ├── __init__.py
    │   ├── time
    │   │   ├── __init__.py
    │   │   ├── nmf_np_time.py
    │   │   ├── nmtf_np_time.py
    │   │   ├── nmf_vb_time.py
    │   │   ├── nmf_icm_time.py
    │   │   ├── nmf_gibbs_time.py
    │   │   ├── nmtf_vb_time.py
    │   │   └── nmtf_gibbs_time.py
    │   ├── convergence
    │   │   └── __init__.py
    │   ├── cross_validation
    │   │   ├── __init__.py
    │   │   ├── kbmf
    │   │   │   ├── .Rhistory
    │   │   │   ├── kbmf_regression_test.R
    │   │   │   ├── kbmf1mkl1mkl
    │   │   │   │   ├── kbmf1mkl1mkl_supervised_regression_variational_test.R
    │   │   │   │   ├── kbmf1mkl1mkl_semisupervised_regression_variational_test.R
    │   │   │   │   ├── kbmf1mkl1mkl_supervised_classification_variational_test.R
    │   │   │   │   ├── kbmf1mkl1mkl_semisupervised_classification_variational_test.R
    │   │   │   │   ├── kbmf1mkl1mkl_supervised_regression_variational_test.m
    │   │   │   │   ├── kbmf1mkl1mkl_semisupervised_regression_variational_test.m
    │   │   │   │   ├── kbmf1mkl1mkl_supervised_classification_variational_test.m
    │   │   │   │   └── kbmf1mkl1mkl_semisupervised_classification_variational_test.m
    │   │   │   ├── run_nested_cross_val_kbmf.R~
    │   │   │   ├── run_nested_cross_val_kbmf.R
    │   │   │   ├── nested_cross_val_kbmf.R~
    │   │   │   ├── nested_cross_val_kbmf.R
    │   │   │   ├── run_cross_val_kbmf.R~
    │   │   │   ├── run_cross_val_kbmf.R
    │   │   │   ├── run_kbmf.R~
    │   │   │   └── run_kbmf.R
    │   │   ├── np_nmf
    │   │   │   ├── __init__.py
    │   │   │   ├── results.txt
    │   │   │   ├── np_nmf_nested_xval.py
    │   │   │   └── np_nmf_xval.py
    │   │   ├── np_nmtf
    │   │   │   ├── __init__.py
    │   │   │   ├── results.txt
    │   │   │   ├── np_nmtf_xval.py
    │   │   │   ├── np_nmtf_nested_xval.py
    │   │   │   ├── fold_1.txt
    │   │   │   ├── fold_10.txt
    │   │   │   ├── fold_2.txt
    │   │   │   ├── fold_4.txt
    │   │   │   ├── fold_5.txt
    │   │   │   ├── fold_7.txt
    │   │   │   ├── fold_8.txt
    │   │   │   ├── fold_9.txt
    │   │   │   ├── fold_3.txt
    │   │   │   └── fold_6.txt
    │   │   ├── vb_nmf
    │   │   │   ├── linesearch_xval_vb.py
    │   │   │   └── results.txt
    │   │   ├── icm_nmf
    │   │   │   ├── linesearch_xval_icm.py
    │   │   │   └── results.txt
    │   │   ├── gibbs_nmf
    │   │   │   ├── linesearch_xval_gibbs.py
    │   │   │   └── results.txt
    │   │   ├── vb_nmtf
    │   │   │   └── greedysearch_xval_vb.py
    │   │   ├── icm_nmtf
    │   │   │   └── greedysearch_xval_icm.py
    │   │   └── gibbs_nmtf
    │   │   │   └── greedysearch_xval_gibbs.py
    │   └── model_selection
    │   │   └── __init__.py
    └── experiments_toy
    │   ├── convergence
    │       └── __init__.py
    │   ├── grid_search
    │       └── run_line_search_bnmf_gibbs.py
    │   └── time
    │       ├── nmtf_np_time.py
    │       └── nmf_np_time.py
├── data_drug_sensitivity
    ├── __init__.py
    ├── ccle
    │   ├── __init__.py
    │   ├── drugs.txt
    │   └── load_data.py
    └── gdsc
    │   ├── __init__.py
    │   ├── notes
    │   ├── notes~
    │   └── drug_names_sorted_filtered
├── images
    ├── mf_mtf.pdf
    └── mf_mtf.png
└── .gitignore


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/models/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_toy/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/code/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_toy/bnmf/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_toy/bnmtf/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/cross_validation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/models/kmeans/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_drug_sensitivity/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_toy/nmf_icm_times.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_toy/nmf_np_times.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_toy/nmf_vb_times.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_toy/nmtf_np_times.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/grid_search/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/code/models/distributions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_drug_sensitivity/ccle/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/data_drug_sensitivity/gdsc/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_Sanger/nmf_gibbs_times.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_Sanger/nmf_icm_times.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_Sanger/nmf_np_times.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_Sanger/nmf_vb_times.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_Sanger/nmtf_icm_times.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/code/distributions/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_Sanger/nmf_icm_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_Sanger/nmf_np_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_Sanger/nmf_vb_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_Sanger/nmtf_gibbs_times.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_toy/nmf_gibbs_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_toy/nmf_icm_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_toy/nmf_np_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_toy/nmf_vb_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_toy/nmtf_gibbs_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_toy/nmtf_np_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_toy/nmtf_vb_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/time/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_Sanger/nmf_gibbs_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_Sanger/nmtf_gibbs_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/plots/time_Sanger/nmtf_icm_performances.txt~:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/convergence/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/experiments_toy/convergence/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/model_selection/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/.Rhistory:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmf/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_gibbs_nmf/results.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/images/mf_mtf.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/images/mf_mtf.pdf


--------------------------------------------------------------------------------
/images/mf_mtf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/images/mf_mtf.png


--------------------------------------------------------------------------------
/plots/graphs_toy/legend.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/legend.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/legend.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/legend.png


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmf_times.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_times.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmf_times.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_times.png


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmtf_times.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_times.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmtf_times.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_times.png


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmf_noise_test.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_noise_test.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmf_noise_test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_noise_test.png


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmf_convergences.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_convergences.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmf_convergences.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_convergences.png


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmtf_noise_test.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_noise_test.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmtf_noise_test.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_noise_test.png


--------------------------------------------------------------------------------
/plots/graphs_Sanger/mse_Sanger_nmf_times.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmf_times.pdf


--------------------------------------------------------------------------------
/plots/graphs_Sanger/mse_Sanger_nmf_times.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmf_times.png


--------------------------------------------------------------------------------
/plots/graphs_toy/aic_nmf_model_selection.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/aic_nmf_model_selection.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/aic_nmf_model_selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/aic_nmf_model_selection.png


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmf_model_selection.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_model_selection.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmf_model_selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_model_selection.png


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmtf_convergences.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_convergences.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmtf_convergences.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_convergences.png


--------------------------------------------------------------------------------
/plots/graphs_Sanger/mse_Sanger_nmtf_times.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmtf_times.pdf


--------------------------------------------------------------------------------
/plots/graphs_Sanger/mse_Sanger_nmtf_times.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmtf_times.png


--------------------------------------------------------------------------------
/plots/graphs_toy/aic_nmtf_model_selection.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/aic_nmtf_model_selection.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/aic_nmtf_model_selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/aic_nmtf_model_selection.png


--------------------------------------------------------------------------------
/plots/graphs_toy/elbo_nmf_model_selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/elbo_nmf_model_selection.png


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmtf_model_selection.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_model_selection.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmtf_model_selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_model_selection.png


--------------------------------------------------------------------------------
/plots/graphs_Sanger/mse_Sanger_nmf_convergences.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmf_convergences.pdf


--------------------------------------------------------------------------------
/plots/graphs_Sanger/mse_Sanger_nmf_convergences.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmf_convergences.png


--------------------------------------------------------------------------------
/plots/graphs_Sanger/mse_Sanger_nmtf_convergences.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmtf_convergences.pdf


--------------------------------------------------------------------------------
/plots/graphs_Sanger/mse_Sanger_nmtf_convergences.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/mse_Sanger_nmtf_convergences.png


--------------------------------------------------------------------------------
/plots/graphs_toy/aic_nmtf_greedy_model_selection.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/aic_nmtf_greedy_model_selection.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/aic_nmtf_greedy_model_selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/aic_nmtf_greedy_model_selection.png


--------------------------------------------------------------------------------
/plots/graphs_Sanger/aic_Sanger_line_model_selection.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/aic_Sanger_line_model_selection.pdf


--------------------------------------------------------------------------------
/plots/graphs_Sanger/aic_Sanger_line_model_selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/aic_Sanger_line_model_selection.png


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmf_missing_values_predictions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_missing_values_predictions.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmf_missing_values_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmf_missing_values_predictions.png


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmtf_missing_values_predictions.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_missing_values_predictions.pdf


--------------------------------------------------------------------------------
/plots/graphs_toy/mse_nmtf_missing_values_predictions.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_toy/mse_nmtf_missing_values_predictions.png


--------------------------------------------------------------------------------
/plots/graphs_Sanger/aic_Sanger_greedy_model_selection.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/aic_Sanger_greedy_model_selection.pdf


--------------------------------------------------------------------------------
/plots/graphs_Sanger/aic_Sanger_greedy_model_selection.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ThomasBrouwer/BNMTF/HEAD/plots/graphs_Sanger/aic_Sanger_greedy_model_selection.png


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/kbmf_regression_test.R:
--------------------------------------------------------------------------------
1 | # Mehmet Gonen (mehmet.gonen@gmail.com)
2 | 
3 | kbmf_regression_test <- function(Kx, Kz, state) {
4 |   prediction <- state$parameters$test_function(drop(Kx), drop(Kz), state)
5 | }


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_1.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
2 | Tried parameters {'K': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
3 | Tried parameters {'K': 3} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
4 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_3.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
2 | Tried parameters {'K': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
3 | Tried parameters {'K': 3} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
4 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_7.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
2 | Tried parameters {'K': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
3 | Tried parameters {'K': 3} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
4 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmtf/fold_10.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
2 | Tried parameters {'K': 1, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
3 | Tried parameters {'K': 2, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
4 | Tried parameters {'K': 2, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
5 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmtf/fold_3.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
2 | Tried parameters {'K': 1, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
3 | Tried parameters {'K': 2, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
4 | Tried parameters {'K': 2, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
5 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmtf/fold_8.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
2 | Tried parameters {'K': 1, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
3 | Tried parameters {'K': 2, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
4 | Tried parameters {'K': 2, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
5 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmtf/fold_9.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
2 | Tried parameters {'K': 1, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
3 | Tried parameters {'K': 2, 'L': 1} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
4 | Tried parameters {'K': 2, 'L': 2} but got exception: Failed to generate folds for training and test data, 1000 attempts.. 
5 | 


--------------------------------------------------------------------------------
/data_drug_sensitivity/ccle/drugs.txt:
--------------------------------------------------------------------------------
 1 | 17aag	17-AAG
 2 | aew541	AEW541
 3 | azd0530	AZD0530
 4 | azd6244	AZD6244
 5 | erlotinib	Erlotinib
 6 | irinotecan	Irinotecan
 7 | l685458	L-685458
 8 | lapatinib	Lapatinib
 9 | lbw242	LBW242
10 | nilotinib	Nilotinib
11 | nutlin3	Nutlin-3
12 | paclitaxel	Paclitaxel
13 | panobinostat	Panobinostat
14 | pd0325901	PD-0325901
15 | pd0332991	PD-0332991
16 | pf2341066	PF2341066
17 | pha665752	PHA-665752
18 | plx4720	PLX4720
19 | raf265	RAF265
20 | sorafenib	Sorafenib
21 | tae684	TAE684
22 | tki258	TKI258
23 | topotecan	Topotecan
24 | zd6474	ZD-6474
25 | 


--------------------------------------------------------------------------------
/data_toy/bnmtf/S.txt:
--------------------------------------------------------------------------------
1 | 3.119652900231379422e-01 4.667713213280465512e-01 4.149760289926684997e-01 1.193043027963739816e-01 1.841317074265872655e+00
2 | 4.974795265023240209e-01 2.058768233004351078e+00 1.875517966446155760e-01 8.465283062509249001e-01 7.220983298177682252e-01
3 | 1.155660063996406084e+00 3.094347422574944328e+00 6.029326566297028567e-01 5.619143373047055245e-01 5.926448256085022903e-01
4 | 4.845264910112820900e-01 8.674386568923128094e-01 3.845620514887508445e+00 2.844154789202054112e-01 7.828094354652509468e-01
5 | 1.300921478072263104e+00 3.764568502857092436e-01 7.217656694934858341e-02 1.887483058053026852e+00 5.789976327985083548e-01
6 | 


--------------------------------------------------------------------------------
/code/models/distributions/exponential.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Class representing an exponential distribution, allowing us to sample from it.
 3 | """
 4 | from numpy.random import exponential
 5 | 
 6 | # Exponential draws
 7 | def exponential_draw(lambdax):
 8 |     scale = 1.0 / lambdax
 9 |     return exponential(scale=scale,size=None)
10 |         
11 | '''
12 | # Do 1000 draws and plot them
13 | import matplotlib.pyplot as plt
14 | import numpy as np
15 | scale = 2.
16 | s = [exponential_draw(1./scale) for i in range(0,1000)] 
17 | s2 = np.random.exponential(scale, 1000)
18 | count, bins, ignored = plt.hist(s, 50, normed=True)
19 | count, bins, ignored = plt.hist(s2, 50, normed=True)
20 | plt.show()
21 | '''


--------------------------------------------------------------------------------
/code/models/distributions/normal.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Class representing an normal distribution, allowing us to sample from it.
 3 | """
 4 | from numpy.random import normal
 5 | import numpy, math
 6 | 
 7 | # Draw a value for tau ~ Gamma(alpha,beta)
 8 | def normal_draw(mu,tau):
 9 |     sigma = numpy.float64(1.0) / math.sqrt(tau)
10 |     return normal(loc=mu,scale=sigma,size=None)
11 |     
12 |        
13 | '''
14 | # Do 1000 draws and plot them
15 | import matplotlib.pyplot as plt
16 | import numpy as np
17 | mu = -1.
18 | tau = 4.
19 | sigma = 1./2.
20 | s = [normal_draw(mu,tau) for i in range(0,1000)] 
21 | s2 = np.random.normal(mu,sigma, 1000)
22 | count, bins, ignored = plt.hist(s, 50, normed=True)
23 | count, bins, ignored = plt.hist(s2, 50, normed=True)
24 | plt.show()
25 | '''


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmf/results.txt:
--------------------------------------------------------------------------------
1 | Average performances: {'R^2': 0.8082101768976138, 'MSE': 2.251110677994558, 'Rp': 0.8994506246266909}. 
2 | All performances: {'R^2': [0.8133682117581214, 0.798979194046697, 0.8051355068238598, 0.8090828602116515, 0.8178068200269057, 0.8105171037995288, 0.8002399738798882, 0.7997607151839987, 0.8160747071552853, 0.8111366760902006], 'MSE': [2.2521197525864687, 2.2748992448875924, 2.2942175862307983, 2.2556573686657075, 2.1739052328898292, 2.2227894183419878, 2.3208767637860492, 2.3416493427979819, 2.1392407721599365, 2.2357512975992306], 'Rp': [0.90219032607860539, 0.89474794719228812, 0.89784042356311322, 0.89984829753427398, 0.9045679937026978, 0.90062265439506262, 0.89497500946862962, 0.89493733015674526, 0.90371517822333047, 0.9010610859521605]}. 
3 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/results.txt:
--------------------------------------------------------------------------------
1 | Average performances: {'R^2': 0.8077440039100411, 'MSE': 2.2576486936637714, 'Rp': 0.89914824429384355}. 
2 | All performances: {'R^2': [0.7957199747101262, 0.8141723551198858, 0.8056171442590625, 0.8116290674813398, 0.8108989639498128, 0.8071070614930603, 0.8041122553537439, 0.812358444747342, 0.8127652701427079, 0.8030595018433286], 'MSE': [2.3753205049125281, 2.1919391613602976, 2.2938850389890315, 2.17790199886428, 2.2413691490156138, 2.2693811279944294, 2.3605569584646107, 2.2303183150776187, 2.1364651293176347, 2.299349552641671], 'Rp': [0.89230313136198702, 0.90276056743084043, 0.89814196546978564, 0.90130647586264789, 0.9010626799620397, 0.89878885253115048, 0.89711511071416949, 0.90150884819951982, 0.90193388078270109, 0.89656093062359299]}. 
3 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/results.txt:
--------------------------------------------------------------------------------
1 | Average performances: {'R^2': 0.5834383321227549, 'MSE': 4.6836813431881321, 'Rp': 0.76539038468678466}. 
2 | All performances: {'R^2': [0.5581154055756108, 0.5976144301819742, 0.6074602591964648, 0.60317826238292, 0.5580933330115285, 0.6056417388078126, 0.5417251115941935, 0.5638098628847992, 0.5993572553570361, 0.5993876622352095], 'MSE': [4.9761831411253876, 4.601164367855695, 4.1604958665433571, 4.4068545475314576, 4.7952719987422787, 4.5976626695669811, 5.0297510989555079, 4.9786459996680481, 4.6775970019259292, 4.6131867399666815], 'Rp': [0.74918609043742923, 0.77505321719834175, 0.78001264369772305, 0.77808776416730452, 0.74977787790685602, 0.77855419896769817, 0.73887575798487093, 0.75294195132016772, 0.77499177897479554, 0.7764225662126597]}. 
3 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/results.txt:
--------------------------------------------------------------------------------
1 | Average performances: {'R^2': 0.2781758379686691, 'MSE': 8.0471891476130519, 'Rp': 0.54543041945837778}. 
2 | All performances: {'R^2': [0.2893664139798344, 0.3675563339460708, 0.25980035553357406, 0.23938271705899428, 0.2530976731048762, 0.29012022286183825, 0.3538687087841409, 0.2760378760205108, 0.262746922851533, 0.18978115554531882], 'MSE': [8.0648790807580379, 6.9006784036206179, 8.1409961811992009, 8.2999731998977939, 8.2065025131542164, 8.1708120827606479, 7.1793657752559277, 7.9072408051910985, 8.2407904301235817, 9.3606530041694036], 'Rp': [0.55540920431152441, 0.60921980844748336, 0.52761474543693132, 0.51591801537692683, 0.5236675142267132, 0.55021414609128683, 0.60009224481417844, 0.54631672706180834, 0.53941318669187432, 0.48643860212505091]}. 
3 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmtf/results.txt:
--------------------------------------------------------------------------------
1 | Average performances: {'R^2': 0.27452751064922026, 'MSE': 8.0761164553596867, 'Rp': 0.54280291631635058}. 
2 | All performances: {'R^2': [0.25462406025751205, 0.28060743426965606, 0.2586817089274871, 0.183234123218048, 0.33228159578412275, 0.33928544150672446, 0.2614317329721215, 0.31697648925716626, 0.29233928029354916, 0.22581324000581526], 'MSE': [8.4875837684602278, 7.7879390718853401, 8.4309347691568135, 9.1382102849973688, 7.3126838761894719, 7.5807817178720285, 7.9856521963029952, 7.7495817839986261, 8.0086247560326136, 8.2791723287013728], 'Rp': [0.51992973113537488, 0.5514566825341487, 0.5279403053176901, 0.48668451107067162, 0.58224134947774886, 0.58787052204193668, 0.5345849839602469, 0.57042001347817517, 0.5613693249217635, 0.50553173922574979]}. 
3 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmtf/results.txt:
--------------------------------------------------------------------------------
1 | Average performances: {'R^2': 0.5852741068439489, 'MSE': 4.6642022787566386, 'Rp': 0.76667607772042845}. 
2 | All performances: {'R^2': [0.5612166455312766, 0.5776112491459862, 0.5918545629428087, 0.5582129959667665, 0.6031838913058494, 0.6212836918766113, 0.5973625605929315, 0.5914757901910388, 0.5644144495125436, 0.5861252313736759], 'MSE': [5.0659262098767446, 4.8075073376466833, 4.6609715083992764, 4.8552683532599499, 4.4969035768920209, 4.3985206643584442, 4.5046485520163779, 4.5133760738818456, 4.8092333428351868, 4.5296671683998628], 'Rp': [0.75076620323159227, 0.76105736566014892, 0.77141186726015187, 0.74946287320262439, 0.77766010330458246, 0.78844764519390043, 0.77456026739973793, 0.77018933558005898, 0.75590853879080033, 0.76729657758068637]}. 
3 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *,cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_supervised_regression_variational_test.R:
--------------------------------------------------------------------------------
 1 | # Mehmet Gonen (mehmet.gonen@gmail.com)
 2 | 
 3 | kbmf1mkl1mkl_supervised_regression_variational_test <- function(Kx, Kz, state) {
 4 |   Nx <- dim(Kx)[2]
 5 |   Px <- dim(Kx)[3]
 6 |   Nz <- dim(Kz)[2]
 7 |   Pz <- dim(Kz)[3]
 8 |   R <- dim(state$Ax$mu)[2]
 9 | 
10 |   Gx <- list(mu = array(0, c(R, Nx, Px)))
11 |   for (m in 1:Px) {
12 |     Gx$mu[,,m] <- crossprod(state$Ax$mu, Kx[,,m])
13 |   }
14 |   Hx <- list(mu = matrix(0, R, Nx))
15 |   for (m in 1:Px) {
16 |     Hx$mu <- Hx$mu + state$ex$mu[m] * Gx$mu[,,m]
17 |   }
18 | 
19 |   Gz <- list(mu = array(0, c(R, Nz, Pz)))
20 |   for (n in 1:Pz) {
21 |     Gz$mu[,,n] <- crossprod(state$Az$mu, Kz[,,n])
22 |   }
23 |   Hz <- list(mu = matrix(0, R, Nz))
24 |   for (n in 1:Pz) {
25 |     Hz$mu <- Hz$mu + state$ez$mu[n] * Gz$mu[,,n]
26 |   }
27 | 
28 |   Y <- list(mu = crossprod(Hx$mu, Hz$mu))
29 | 
30 |   prediction <- list(Gx = Gx, Hx = Hx, Gz = Gz, Hz = Hz, Y = Y)
31 | }


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_semisupervised_regression_variational_test.R:
--------------------------------------------------------------------------------
 1 | # Mehmet Gonen (mehmet.gonen@gmail.com)
 2 | 
 3 | kbmf1mkl1mkl_semisupervised_regression_variational_test <- function(Kx, Kz, state) {
 4 |   Nx <- dim(Kx)[2]
 5 |   Px <- dim(Kx)[3]
 6 |   Nz <- dim(Kz)[2]
 7 |   Pz <- dim(Kz)[3]
 8 |   R <- dim(state$Ax$mu)[2]
 9 | 
10 |   Gx <- list(mu = array(0, c(R, Nx, Px)))
11 |   for (m in 1:Px) {
12 |     Gx$mu[,,m] <- crossprod(state$Ax$mu, Kx[,,m])
13 |   }
14 |   Hx <- list(mu = matrix(0, R, Nx))
15 |   for (m in 1:Px) {
16 |     Hx$mu <- Hx$mu + state$ex$mu[m] * Gx$mu[,,m]
17 |   }
18 | 
19 |   Gz <- list(mu = array(0, c(R, Nz, Pz)))
20 |   for (n in 1:Pz) {
21 |     Gz$mu[,,n] <- crossprod(state$Az$mu, Kz[,,n])
22 |   }
23 |   Hz <- list(mu = matrix(0, R, Nz))
24 |   for (n in 1:Pz) {
25 |     Hz$mu <- Hz$mu + state$ez$mu[n] * Gz$mu[,,n]
26 |   }
27 | 
28 |   Y <- list(mu = crossprod(Hx$mu, Hz$mu))
29 | 
30 |   prediction <- list(Gx = Gx, Hx = Hx, Gz = Gz, Hz = Hz, Y = Y)
31 | }


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_supervised_classification_variational_test.R:
--------------------------------------------------------------------------------
 1 | # Mehmet Gonen (mehmet.gonen@gmail.com)
 2 | 
 3 | kbmf1mkl1mkl_supervised_classification_variational_test <- function(Kx, Kz, state) {
 4 |   Nx <- dim(Kx)[2]
 5 |   Px <- dim(Kx)[3]
 6 |   Nz <- dim(Kz)[2]
 7 |   Pz <- dim(Kz)[3]
 8 |   R <- dim(state$Ax$mu)[2]
 9 | 
10 |   Gx <- list(mu = array(0, c(R, Nx, Px)))
11 |   for (m in 1:Px) {
12 |     Gx$mu[,,m] <- crossprod(state$Ax$mu, Kx[,,m])
13 |   }
14 |   Hx <- list(mu = matrix(0, R, Nx))
15 |   for (m in 1:Px) {
16 |     Hx$mu <- Hx$mu + state$ex$mu[m] * Gx$mu[,,m]
17 |   }
18 | 
19 |   Gz <- list(mu = array(0, c(R, Nz, Pz)))
20 |   for (n in 1:Pz) {
21 |     Gz$mu[,,n] <- crossprod(state$Az$mu, Kz[,,n])
22 |   }
23 |   Hz <- list(mu = matrix(0, R, Nz))
24 |   for (n in 1:Pz) {
25 |     Hz$mu <- Hz$mu + state$ez$mu[n] * Gz$mu[,,n]
26 |   }
27 | 
28 |   F <- list(mu = crossprod(Hx$mu, Hz$mu))
29 | 
30 |   prediction <- list(Gx = Gx, Hx = Hx, Gz = Gz, Hz = Hz, F = F)
31 | }


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_semisupervised_classification_variational_test.R:
--------------------------------------------------------------------------------
 1 | # Mehmet Gonen (mehmet.gonen@gmail.com)
 2 | 
 3 | kbmf1mkl1mkl_semisupervised_classification_variational_test <- function(Kx, Kz, state) {
 4 |   Nx <- dim(Kx)[2]
 5 |   Px <- dim(Kx)[3]
 6 |   Nz <- dim(Kz)[2]
 7 |   Pz <- dim(Kz)[3]
 8 |   R <- dim(state$Ax$mu)[2]
 9 | 
10 |   Gx <- list(mu = array(0, c(R, Nx, Px)))
11 |   for (m in 1:Px) {
12 |     Gx$mu[,,m] <- crossprod(state$Ax$mu, Kx[,,m])
13 |   }
14 |   Hx <- list(mu = matrix(0, R, Nx))
15 |   for (m in 1:Px) {
16 |     Hx$mu <- Hx$mu + state$ex$mu[m] * Gx$mu[,,m]
17 |   }
18 | 
19 |   Gz <- list(mu = array(0, c(R, Nz, Pz)))
20 |   for (n in 1:Pz) {
21 |     Gz$mu[,,n] <- crossprod(state$Az$mu, Kz[,,n])
22 |   }
23 |   Hz <- list(mu = matrix(0, R, Nz))
24 |   for (n in 1:Pz) {
25 |     Hz$mu <- Hz$mu + state$ez$mu[n] * Gz$mu[,,n]
26 |   }
27 | 
28 |   F <- list(mu = crossprod(Hx$mu, Hz$mu))
29 | 
30 |   prediction <- list(Gx = Gx, Hx = Hx, Gz = Gz, Hz = Hz, F = F)
31 | }


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_supervised_regression_variational_test.m:
--------------------------------------------------------------------------------
 1 | % Mehmet Gonen (mehmet.gonen@gmail.com)
 2 | 
 3 | function prediction = kbmf1mkl1mkl_supervised_regression_variational_test(Kx, Kz, state)
 4 |     Nx = size(Kx, 2);
 5 |     Px = size(Kx, 3);
 6 |     Nz = size(Kz, 2);
 7 |     Pz = size(Kz, 3);
 8 |     R = size(state.Ax.mu, 2);
 9 | 
10 |     prediction.Gx.mu = zeros(R, Nx, Px);
11 |     for m = 1:Px
12 |         prediction.Gx.mu(:, :, m) = state.Ax.mu' * Kx(:, :, m);
13 |     end
14 |     prediction.Hx.mu = zeros(R, Nx);
15 |     for m = 1:Px
16 |         prediction.Hx.mu = prediction.Hx.mu + state.ex.mu(m) * prediction.Gx.mu(:, :, m);
17 |     end
18 | 
19 |     prediction.Gz.mu = zeros(R, Nz, Pz);
20 |     for n = 1:Pz
21 |         prediction.Gz.mu(:, :, n) = state.Az.mu' * Kz(:, :, n);
22 |     end
23 |     prediction.Hz.mu = zeros(R, Nz);
24 |     for n = 1:Pz
25 |         prediction.Hz.mu = prediction.Hz.mu + state.ez.mu(n) * prediction.Gz.mu(:, :, n);
26 |     end
27 | 
28 |     prediction.Y.mu = prediction.Hx.mu' * prediction.Hz.mu;
29 | end


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_semisupervised_regression_variational_test.m:
--------------------------------------------------------------------------------
 1 | % Mehmet Gonen (mehmet.gonen@gmail.com)
 2 | 
 3 | function prediction = kbmf1mkl1mkl_semisupervised_regression_variational_test(Kx, Kz, state)
 4 |     Nx = size(Kx, 2);
 5 |     Px = size(Kx, 3);
 6 |     Nz = size(Kz, 2);
 7 |     Pz = size(Kz, 3);
 8 |     R = size(state.Ax.mu, 2);
 9 | 
10 |     prediction.Gx.mu = zeros(R, Nx, Px);
11 |     for m = 1:Px
12 |         prediction.Gx.mu(:, :, m) = state.Ax.mu' * Kx(:, :, m);
13 |     end
14 |     prediction.Hx.mu = zeros(R, Nx);
15 |     for m = 1:Px
16 |         prediction.Hx.mu = prediction.Hx.mu + state.ex.mu(m) * prediction.Gx.mu(:, :, m);
17 |     end
18 | 
19 |     prediction.Gz.mu = zeros(R, Nz, Pz);
20 |     for n = 1:Pz
21 |         prediction.Gz.mu(:, :, n) = state.Az.mu' * Kz(:, :, n);
22 |     end
23 |     prediction.Hz.mu = zeros(R, Nz);
24 |     for n = 1:Pz
25 |         prediction.Hz.mu = prediction.Hz.mu + state.ez.mu(n) * prediction.Gz.mu(:, :, n);
26 |     end
27 | 
28 |     prediction.Y.mu = prediction.Hx.mu' * prediction.Hz.mu;
29 | end


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_supervised_classification_variational_test.m:
--------------------------------------------------------------------------------
 1 | % Mehmet Gonen (mehmet.gonen@gmail.com)
 2 | 
 3 | function prediction = kbmf1mkl1mkl_supervised_classification_variational_test(Kx, Kz, state)
 4 |     Nx = size(Kx, 2);
 5 |     Px = size(Kx, 3);
 6 |     Nz = size(Kz, 2);
 7 |     Pz = size(Kz, 3);
 8 |     R = size(state.Ax.mu, 2);
 9 | 
10 |     prediction.Gx.mu = zeros(R, Nx, Px);
11 |     for m = 1:Px
12 |         prediction.Gx.mu(:, :, m) = state.Ax.mu' * Kx(:, :, m);
13 |     end
14 |     prediction.Hx.mu = zeros(R, Nx);
15 |     for m = 1:Px
16 |         prediction.Hx.mu = prediction.Hx.mu + state.ex.mu(m) * prediction.Gx.mu(:, :, m);
17 |     end
18 | 
19 |     prediction.Gz.mu = zeros(R, Nz, Pz);
20 |     for n = 1:Pz
21 |         prediction.Gz.mu(:, :, n) = state.Az.mu' * Kz(:, :, n);
22 |     end
23 |     prediction.Hz.mu = zeros(R, Nz);
24 |     for n = 1:Pz
25 |         prediction.Hz.mu = prediction.Hz.mu + state.ez.mu(n) * prediction.Gz.mu(:, :, n);
26 |     end
27 | 
28 |     prediction.F.mu = prediction.Hx.mu' * prediction.Hz.mu;
29 | end


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/kbmf1mkl1mkl/kbmf1mkl1mkl_semisupervised_classification_variational_test.m:
--------------------------------------------------------------------------------
 1 | % Mehmet Gonen (mehmet.gonen@gmail.com)
 2 | 
 3 | function prediction = kbmf1mkl1mkl_semisupervised_classification_variational_test(Kx, Kz, state)
 4 |     Nx = size(Kx, 2);
 5 |     Px = size(Kx, 3);
 6 |     Nz = size(Kz, 2);
 7 |     Pz = size(Kz, 3);
 8 |     R = size(state.Ax.mu, 2);
 9 | 
10 |     prediction.Gx.mu = zeros(R, Nx, Px);
11 |     for m = 1:Px
12 |         prediction.Gx.mu(:, :, m) = state.Ax.mu' * Kx(:, :, m);
13 |     end
14 |     prediction.Hx.mu = zeros(R, Nx);
15 |     for m = 1:Px
16 |         prediction.Hx.mu = prediction.Hx.mu + state.ex.mu(m) * prediction.Gx.mu(:, :, m);
17 |     end
18 | 
19 |     prediction.Gz.mu = zeros(R, Nz, Pz);
20 |     for n = 1:Pz
21 |         prediction.Gz.mu(:, :, n) = state.Az.mu' * Kz(:, :, n);
22 |     end
23 |     prediction.Hz.mu = zeros(R, Nz);
24 |     for n = 1:Pz
25 |         prediction.Hz.mu = prediction.Hz.mu + state.ez.mu(n) * prediction.Gz.mu(:, :, n);
26 |     end
27 | 
28 |     prediction.F.mu = prediction.Hx.mu' * prediction.Hz.mu;
29 | end


--------------------------------------------------------------------------------
/tests/code/distributions/test_gamma.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test the class for Gamma draws and expectations in gamma.py.
 3 | """
 4 | 
 5 | import sys, os
 6 | project_location = os.path.dirname(__file__)+"/../../../"
 7 | sys.path.append(project_location)
 8 | 
 9 | from BNMTF.code.models.distributions.gamma import gamma_draw, gamma_expectation, gamma_expectation_log, gamma_mode
10 | 
11 | def test_expectation():
12 |     alpha = 2.0
13 |     beta = 3.0
14 |     
15 |     expectation = 2.0 / 3.0
16 |     assert gamma_expectation(alpha,beta) == expectation
17 |     
18 | def test_expectation_log():
19 |     alpha = 2.0
20 |     beta = 3.0
21 |     
22 |     expectation_log = -0.67582795356964265 # digamma(2) - log_e (3) in Wolfram Alpha
23 |     assert gamma_expectation_log(alpha,beta) == expectation_log
24 |     
25 | # Test a draw - simply verify it is > 0.
26 | def test_draw():
27 |     alpha = 2.0
28 |     beta = 3.0
29 |     for i in range(0,100):
30 |         assert gamma_draw(alpha,beta) >= 0.0
31 |         
32 | # Test median
33 | def test_median():
34 |     alpha = 2.0
35 |     beta = 3.0
36 |     median = 1./3.
37 |     assert gamma_mode(alpha,beta) == median


--------------------------------------------------------------------------------
/data_drug_sensitivity/ccle/load_data.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Helper function for reading in the CCLE dataset, splitting into data X and mask M.
 3 | Returns:
 4 |     X               Drug sensitivity values (original)
 5 |     M               Mask of known vs unknown values
 6 | """
 7 | 
 8 | import numpy
 9 | 
10 | import os
11 | 
12 | folder_ccle = os.path.dirname(__file__)+"/"
13 | ccle_ic_file = folder_ccle+"ic50.txt"
14 | ccle_ec_file = folder_ccle+"ec50.txt"
15 | 
16 | def load_ccle(ic50=True, delim='\t'):
17 |     filelocation = (ccle_ic_file if ic50 else ccle_ec_file)
18 |     data = numpy.genfromtxt(filelocation, delimiter=delim, missing_values=[numpy.nan])
19 |     I, J = data.shape
20 |     
21 |     # Construct the mask matrix, and replace any nan values by 0
22 |     new_data, mask = numpy.zeros((I,J)), numpy.zeros((I,J))
23 |     for i in range(0,I):
24 |         for j in range(0,J):
25 |             if not numpy.isnan(data[i,j]):
26 |                 new_data[i,j] = data[i,j]
27 |                 mask[i,j] = 1.
28 |     return new_data, mask
29 | 
30 | '''
31 | X, M = load_ccle(ic50=False)
32 | (I,J)= X.shape
33 | print I,J
34 | print I*J, M.sum(), M.sum()/(I*J)
35 | '''


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/np_nmf_nested_xval.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the nested cross-validation for the NMF class, on the CCLE EC50 dataset.
 3 | """
 4 | 
 5 | import sys, os
 6 | project_location = os.path.dirname(__file__)+"/../../../../../"
 7 | sys.path.append(project_location)
 8 | 
 9 | from BNMTF.code.models.nmf_np import NMF
10 | from BNMTF.code.cross_validation.nested_matrix_cross_validation import MatrixNestedCrossValidation
11 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
12 | 
13 | 
14 | # Settings
15 | standardised = False
16 | train_config = {
17 |     'iterations' : 2000,
18 |     'init_UV' : 'exponential',
19 |     'expo_prior' : 0.1
20 | }
21 | K_range = [1,2,3]
22 | no_threads = 5
23 | no_folds = 10
24 | output_file = "./results.txt"
25 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)]
26 | 
27 | # Construct the parameter search
28 | parameter_search = [{'K':K} for K in K_range]
29 | 
30 | # Load in the CCLE EC50 dataset
31 | R,M = load_ccle(ic50=False)
32 | 
33 | # Run the cross-validation framework
34 | #random.seed(42)
35 | #numpy.random.seed(9000)
36 | nested_crossval = MatrixNestedCrossValidation(
37 |     method=NMF,
38 |     X=R,
39 |     M=M,
40 |     K=no_folds,
41 |     P=no_threads,
42 |     parameter_search=parameter_search,
43 |     train_config=train_config,
44 |     file_performance=output_file,
45 |     files_nested_performances=files_nested_performances
46 | )
47 | nested_crossval.run()
48 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/np_nmf_nested_xval.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the nested cross-validation for the NMF class, on the CCLE IC50 dataset.
 3 | """
 4 | 
 5 | import sys, os
 6 | project_location = os.path.dirname(__file__)+"/../../../../../"
 7 | sys.path.append(project_location)
 8 | 
 9 | from BNMTF.code.models.nmf_np import NMF
10 | from BNMTF.code.cross_validation.nested_matrix_cross_validation import MatrixNestedCrossValidation
11 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
12 | 
13 | 
14 | # Settings
15 | standardised = False
16 | train_config = {
17 |     'iterations' : 2000,
18 |     'init_UV' : 'exponential',
19 |     'expo_prior' : 0.1
20 | }
21 | K_range = [1,2,3]
22 | no_threads = 2
23 | no_folds = 10
24 | output_file = "./results.txt"
25 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)]
26 | 
27 | # Construct the parameter search
28 | parameter_search = [{'K':K} for K in K_range]
29 | 
30 | # Load in the CCLE IC50 dataset
31 | R,M = load_ccle(ic50=True)
32 | 
33 | # Run the cross-validation framework
34 | #random.seed(42)
35 | #numpy.random.seed(9000)
36 | nested_crossval = MatrixNestedCrossValidation(
37 |     method=NMF,
38 |     X=R,
39 |     M=M,
40 |     K=no_folds,
41 |     P=no_threads,
42 |     parameter_search=parameter_search,
43 |     train_config=train_config,
44 |     file_performance=output_file,
45 |     files_nested_performances=files_nested_performances
46 | )
47 | nested_crossval.run()
48 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmf/np_nmf_nested_xval.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the nested cross-validation for the NMTF class, on the Sanger dataset.
 3 | """
 4 | 
 5 | import sys, os
 6 | project_location = os.path.dirname(__file__)+"/../../../../../"
 7 | sys.path.append(project_location)
 8 | 
 9 | import numpy, random
10 | from BNMTF.code.models.nmf_np import NMF
11 | from BNMTF.code.cross_validation.nested_matrix_cross_validation import MatrixNestedCrossValidation
12 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
13 | 
14 | 
15 | # Settings
16 | standardised = False
17 | train_config = {
18 |     'iterations' : 2000,
19 |     'init_UV' : 'exponential',
20 |     'expo_prior' : 0.1
21 | }
22 | K_range = [6,8,10,12,14]
23 | no_folds = 10
24 | output_file = "./results.txt"
25 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)]
26 | 
27 | # Construct the parameter search
28 | parameter_search = [{'K':K} for K in K_range]
29 | 
30 | # Load in the Sanger dataset
31 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised,sep=',')
32 | 
33 | # Run the cross-validation framework
34 | random.seed(42)
35 | numpy.random.seed(9000)
36 | nested_crossval = MatrixNestedCrossValidation(
37 |     method=NMF,
38 |     X=X_min,
39 |     M=M,
40 |     K=no_folds,
41 |     P=5,
42 |     parameter_search=parameter_search,
43 |     train_config=train_config,
44 |     file_performance=output_file,
45 |     files_nested_performances=files_nested_performances
46 | )
47 | nested_crossval.run()
48 | 


--------------------------------------------------------------------------------
/code/models/distributions/gamma.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Class representing a gamma distribution, allowing us to sample from it, 
 3 | and compute the expectation and the expectation of the log.
 4 | """
 5 | import math
 6 | from scipy.special import psi as digamma
 7 | from numpy.random import gamma
 8 | 
 9 | 
10 | # Gamma draws
11 | def gamma_draw(alpha,beta):       
12 |     shape = float(alpha)
13 |     scale = 1.0 / float(beta)
14 |     return gamma(shape=shape,scale=scale,size=None)
15 |         
16 | # Gamma expectation
17 | def gamma_expectation(alpha,beta): 
18 |     alpha, beta = float(alpha), float(beta)      
19 |     return alpha / beta
20 |         
21 | # Gamma variance
22 | def gamma_expectation_log(alpha,beta):   
23 |     alpha, beta = float(alpha), float(beta)      
24 |     return digamma(alpha) - math.log(beta)
25 |    
26 | # Gamma mode
27 | def gamma_mode(alpha,beta):
28 |     alpha, beta = float(alpha), float(beta)
29 |     return (alpha-1) / beta
30 | 
31 | 
32 | '''
33 | # Do 1000 draws and plot them
34 | import matplotlib.pyplot as plt
35 | import scipy.special as sps
36 | import numpy as np
37 | shape, scale = 2., 2. # mean and dispersion
38 | s = [gamma_draw(shape,1.0/scale) for i in range(0,1000)] 
39 | s2 = np.random.gamma(shape, scale, 1000)
40 | count, bins, ignored = plt.hist(s, 50, normed=True)
41 | count, bins, ignored = plt.hist(s2, 50, normed=True)
42 | y = bins**(shape-1)*(np.exp(-bins/scale) /
43 |                      (sps.gamma(shape)*scale**shape))
44 | plt.plot(bins, y, linewidth=2, color='r')
45 | plt.show()
46 | '''


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_vb_nmf/linesearch_xval_vb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with line search for model selection using VB-NMF on
 3 | the CCLE EC50 dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | import numpy, random
11 | from BNMTF.code.models.bnmf_vb_optimised import bnmf_vb_optimised
12 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation
13 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
14 | 
15 | 
16 | # Settings
17 | standardised = False
18 | iterations = 1000
19 | init_UV = 'random'
20 | 
21 | K_range = [1,2,3]
22 | no_folds = 10
23 | restarts = 1
24 | 
25 | quality_metric = 'AIC'
26 | output_file = "./results.txt"
27 | 
28 | alpha, beta = 1., 1.
29 | lambdaU = 1./10.
30 | lambdaV = 1./10.
31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV }
32 | 
33 | # Load in the CCLE EC50 dataset
34 | R,M = load_ccle(ic50=False)
35 | 
36 | # Run the cross-validation framework
37 | #random.seed(42)
38 | #numpy.random.seed(9000)
39 | nested_crossval = LineSearchCrossValidation(
40 |     classifier=bnmf_vb_optimised,
41 |     R=R,
42 |     M=M,
43 |     values_K=K_range,
44 |     folds=no_folds,
45 |     priors=priors,
46 |     init_UV=init_UV,
47 |     iterations=iterations,
48 |     restarts=restarts,
49 |     quality_metric=quality_metric,
50 |     file_performance=output_file
51 | )
52 | nested_crossval.run()
53 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_vb_nmf/linesearch_xval_vb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with line search for model selection using VB-NMF on
 3 | the CCLE IC50 dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | import numpy, random
11 | from BNMTF.code.models.bnmf_vb_optimised import bnmf_vb_optimised
12 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation
13 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
14 | 
15 | 
16 | # Settings
17 | standardised = False
18 | iterations = 1000
19 | init_UV = 'random'
20 | 
21 | K_range = [3,4,5,6,7]
22 | no_folds = 10
23 | restarts = 1
24 | 
25 | quality_metric = 'AIC'
26 | output_file = "./results.txt"
27 | 
28 | alpha, beta = 1., 1.
29 | lambdaU = 1./10.
30 | lambdaV = 1./10.
31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV }
32 | 
33 | # Load in the CCLE IC50 dataset
34 | R,M = load_ccle(ic50=True)
35 | 
36 | # Run the cross-validation framework
37 | #random.seed(42)
38 | #numpy.random.seed(9000)
39 | nested_crossval = LineSearchCrossValidation(
40 |     classifier=bnmf_vb_optimised,
41 |     R=R,
42 |     M=M,
43 |     values_K=K_range,
44 |     folds=no_folds,
45 |     priors=priors,
46 |     init_UV=init_UV,
47 |     iterations=iterations,
48 |     restarts=restarts,
49 |     quality_metric=quality_metric,
50 |     file_performance=output_file
51 | )
52 | nested_crossval.run()
53 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_icm_nmf/linesearch_xval_icm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with line search for model selection using VB-NMF on
 3 | the CCLE EC50 dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | from BNMTF.code.models.nmf_icm import nmf_icm
11 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation
12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
13 | 
14 | 
15 | # Settings
16 | standardised = False
17 | iterations = 1000
18 | init_UV = 'random'
19 | 
20 | K_range = [1,2,3,4,5,6,7,8,9,10]
21 | no_folds = 10
22 | restarts = 1
23 | 
24 | quality_metric = 'AIC'
25 | output_file = "./results.txt"
26 | 
27 | alpha, beta = 1., 1.
28 | lambdaU = 1./10.
29 | lambdaV = 1./10.
30 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV }
31 | 
32 | minimum_TN = 0.01
33 | 
34 | # Load in the CCLE EC50 dataset
35 | R,M = load_ccle(ic50=False)
36 | 
37 | # Run the cross-validation framework
38 | #random.seed(42)
39 | #numpy.random.seed(9000)
40 | nested_crossval = LineSearchCrossValidation(
41 |     classifier=nmf_icm,
42 |     R=R,
43 |     M=M,
44 |     values_K=K_range,
45 |     folds=no_folds,
46 |     priors=priors,
47 |     init_UV=init_UV,
48 |     iterations=iterations,
49 |     restarts=restarts,
50 |     quality_metric=quality_metric,
51 |     file_performance=output_file
52 | )
53 | nested_crossval.run(minimum_TN=minimum_TN)
54 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_icm_nmf/linesearch_xval_icm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with line search for model selection using VB-NMF on
 3 | the CCLE IC50 dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | from BNMTF.code.models.nmf_icm import nmf_icm
11 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation
12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
13 | 
14 | 
15 | # Settings
16 | standardised = False
17 | iterations = 1000
18 | init_UV = 'random'
19 | 
20 | K_range = [1,2,3,4,5,6,7,8,9,10]
21 | no_folds = 10
22 | restarts = 1
23 | 
24 | quality_metric = 'AIC'
25 | output_file = "./results.txt"
26 | 
27 | alpha, beta = 1., 1.
28 | lambdaU = 1./10.
29 | lambdaV = 1./10.
30 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV }
31 | 
32 | minimum_TN = 0.01
33 | 
34 | # Load in the CCLE IC50 dataset
35 | R,M = load_ccle(ic50=True)
36 | 
37 | # Run the cross-validation framework
38 | #random.seed(42)
39 | #numpy.random.seed(9000)
40 | nested_crossval = LineSearchCrossValidation(
41 |     classifier=nmf_icm,
42 |     R=R,
43 |     M=M,
44 |     values_K=K_range,
45 |     folds=no_folds,
46 |     priors=priors,
47 |     init_UV=init_UV,
48 |     iterations=iterations,
49 |     restarts=restarts,
50 |     quality_metric=quality_metric,
51 |     file_performance=output_file
52 | )
53 | nested_crossval.run(minimum_TN=minimum_TN)
54 | 


--------------------------------------------------------------------------------
/data_drug_sensitivity/gdsc/notes:
--------------------------------------------------------------------------------
 1 | Data from Sanger's Genomics of Drug Sensitivity in Cancer project.
 2 | http://www.cancerrxgene.org/downloads/
 3 | 
 4 | *** ic50_excl_empty_filtered_cell_lines_drugs.txt ***
 5 | The original file from website is called gdsc_manova_input_w5.csv (this file has all the genetic info etc filtered, leaving only the IC50 values).
 6 | We removed row with drug AZD6482 (PubChem id 44137675) as there are two columns with that drug (removed the first one).
 7 | Filtered rows 57 and 635 because they had only 2 resp. 1 drug tested. 
 8 | Only included those cancer cell lines that have features available (en_input_w5.csv). The cancer cell lines are also ordered alphabetically (so that they align perfectly with the cell line kernels/constraint matrices).
 9 | Also removed the drug Cisplatin (PubChem id 84691) and reordered columns alphabetically (so that they align perfectly with the drug and cancer line kernels/constraint matrices). 
10 | Finally, removed one value (-36.485443) as this was an extreme outlier (next lowest value around -17).
11 | We end up with: 622 cell lines, 138 drugs
12 | 
13 | *** ic50_excl_empty_filtered_cell_lines_drugs_standardised.txt ***
14 | As above, but standardised the cell lines (so each row has mean 0, std 1).
15 | We end up with: 622 cell lines, 138 drugs.
16 | 
17 | *** /kernels/ ***
18 | The different similarity kernels, based on drug and cell line features. 
19 | For binary features we use a Jaccard kernel.
20 | For real-valued features we use a Gaussian kernel with as the kernel standard deviation value: (no. features) / 4.
21 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmf/np_nmf_xval.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the nested cross-validation for the NMTF class, on the Sanger dataset.
 3 | 
 4 | Since we want to find co-clusters of significantly higher/lower drug sensitivity
 5 | values, we should use the unstandardised Sanger dataset.
 6 | """
 7 | 
 8 | import sys, os
 9 | project_location = os.path.dirname(__file__)+"/../../../../../"
10 | sys.path.append(project_location)
11 | 
12 | import numpy, random
13 | from BNMTF.code.nmf_np import NMF
14 | from BNMTF.cross_validation.matrix_cross_validation import MatrixCrossValidation
15 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
16 | 
17 | 
18 | # Settings
19 | standardised = False
20 | train_config = {
21 |     'iterations' : 1000,
22 |     'init_UV' : 'exponential',
23 |     'expo_prior' : 0.1
24 | }
25 | K_range = range(2,10+1,2)
26 | no_folds = 10
27 | output_file = "./results.txt"
28 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)]
29 | 
30 | # Construct the parameter search
31 | parameter_search = [{'K':K} for K in K_range]
32 | 
33 | # Load in the Sanger dataset
34 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised,sep=',')
35 | 
36 | # Run the cross-validation framework
37 | random.seed(42)
38 | numpy.random.seed(9000)
39 | nested_crossval = MatrixCrossValidation(
40 |     method=NMF,
41 |     X=X_min,
42 |     M=M,
43 |     K=no_folds,
44 |     parameter_search=parameter_search,
45 |     train_config=train_config,
46 |     file_performance=output_file
47 | )
48 | nested_crossval.run()
49 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_gibbs_nmf/linesearch_xval_gibbs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with line search for model selection using VB-NMF on
 3 | the CCLE IC50 dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | from BNMTF.code.models.bnmf_gibbs_optimised import bnmf_gibbs_optimised
11 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation
12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
13 | 
14 | 
15 | # Settings
16 | iterations = 1000
17 | burn_in = 900
18 | thinning = 2
19 | init_UV = 'random'
20 | 
21 | K_range = [3,4,5,6]
22 | no_folds = 10
23 | restarts = 1
24 | 
25 | quality_metric = 'AIC'
26 | output_file = "./results.txt"
27 | 
28 | alpha, beta = 1., 1.
29 | lambdaU = 1./10.
30 | lambdaV = 1./10.
31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV }
32 | 
33 | # Load in the CCLE IC50 dataset
34 | R,M = load_ccle(ic50=True)
35 | 
36 | # Run the cross-validation framework
37 | #random.seed(42)
38 | #numpy.random.seed(9000)
39 | nested_crossval = LineSearchCrossValidation(
40 |     classifier=bnmf_gibbs_optimised,
41 |     R=R,
42 |     M=M,
43 |     values_K=K_range,
44 |     folds=no_folds,
45 |     priors=priors,
46 |     init_UV=init_UV,
47 |     iterations=iterations,
48 |     restarts=restarts,
49 |     quality_metric=quality_metric,
50 |     file_performance=output_file
51 | )
52 | nested_crossval.run(burn_in=burn_in,thinning=thinning)
53 | 


--------------------------------------------------------------------------------
/data_drug_sensitivity/gdsc/notes~:
--------------------------------------------------------------------------------
 1 | Data from Sanger's Genomics of Drug Sensitivity in Cancer project.
 2 | http://www.cancerrxgene.org/downloads/
 3 | 
 4 | 
 5 | *** ic50_excl_empty_filtered_cell_lines_drugs.txt ***
 6 | The original file from website is called gdsc_manova_input_w5.csv (this file has all the genetic info etc filtered, leaving only the IC50 values).
 7 | We removed row with drug AZD6482 (PubChem id 44137675) as there are two columns with that drug (removed the first one).
 8 | Filtered rows 57 and 635 because they had only 2 resp. 1 drug tested. 
 9 | Only included those cancer cell lines that have features available (en_input_w5.csv). The cancer cell lines are also ordered alphabetically (so that they align perfectly with the cell line kernels/constraint matrices).
10 | Also removed the drug Cisplatin (PubChem id 84691) and reordered columns alphabetically (so that they align perfectly with the drug and cancer line kernels/constraint matrices). 
11 | Finally, removed one value (-36.485443) as this was an extreme outlier (next lowest value around -17).
12 | We end up with: 622 cell lines, 138 drugs
13 | 
14 | *** ic50_excl_empty_filtered_cell_lines_drugs_standardised.txt ***
15 | As above, but standardised the cell lines (so each row has mean 0, std 1).
16 | We end up with: 622 cell lines, 138 drugs.
17 | 
18 | *** /kernels/ ***
19 | The different similarity kernels, based on drug and cell line features. 
20 | For binary features we use a Jaccard kernel.
21 | For real-valued features we use a Gaussian kernel with as the kernel standard deviation value: (no. features) / 4.
22 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_gibbs_nmf/linesearch_xval_gibbs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with line search for model selection using VB-NMF on
 3 | the CCLE EC50 dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | from BNMTF.code.models.bnmf_gibbs_optimised import bnmf_gibbs_optimised
11 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation
12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
13 | 
14 | 
15 | # Settings
16 | iterations = 1000
17 | burn_in = 900
18 | thinning = 2
19 | init_UV = 'random'
20 | 
21 | K_range = [1,2,3]
22 | no_folds = 10
23 | restarts = 1
24 | 
25 | quality_metric = 'AIC'
26 | output_file = "./results_test.txt"
27 | 
28 | alpha, beta = 1., 1.
29 | lambdaU = 1./10.
30 | lambdaV = 1./10.
31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV }
32 | 
33 | # Load in the CCLE EC50 dataset
34 | R,M = load_ccle(ic50=False)
35 | 
36 | # Run the cross-validation framework
37 | #random.seed(42)
38 | #numpy.random.seed(9000)
39 | nested_crossval = LineSearchCrossValidation(
40 |     classifier=bnmf_gibbs_optimised,
41 |     R=R,
42 |     M=M,
43 |     values_K=K_range,
44 |     folds=no_folds,
45 |     priors=priors,
46 |     init_UV=init_UV,
47 |     iterations=iterations,
48 |     restarts=restarts,
49 |     quality_metric=quality_metric,
50 |     file_performance=output_file
51 | )
52 | nested_crossval.run(burn_in=burn_in,thinning=thinning)
53 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmtf/np_nmtf_nested_xval.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the nested cross-validation for the NMTF class, on the CCLE EC50 dataset.
 3 | """
 4 | 
 5 | import sys, os
 6 | project_location = os.path.dirname(__file__)+"/../../../../../"
 7 | sys.path.append(project_location)
 8 | 
 9 | import itertools
10 | from BNMTF.code.models.nmtf_np import NMTF
11 | from BNMTF.code.cross_validation.nested_matrix_cross_validation import MatrixNestedCrossValidation
12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
13 | 
14 | 
15 | # Settings
16 | standardised = False
17 | train_config = {
18 |     'iterations' : 2000,
19 |     'init_FG' : 'kmeans',
20 |     'init_S' : 'exponential',
21 |     'expo_prior' : 0.1
22 | }
23 | K_range = [1,2]
24 | L_range = [1,2]
25 | no_threads = 5
26 | no_folds = 10
27 | output_file = "./results.txt"
28 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)]
29 | 
30 | # Construct the parameter search
31 | parameter_search = [{'K':K,'L':L} for (K,L) in itertools.product(K_range,L_range)]
32 | 
33 | # Load in the CCLE IC50 dataset
34 | R,M = load_ccle(ic50=False)
35 | 
36 | # Run the cross-validation framework
37 | #random.seed(42)
38 | #numpy.random.seed(9000)
39 | nested_crossval = MatrixNestedCrossValidation(
40 |     method=NMTF,
41 |     X=R,
42 |     M=M,
43 |     K=no_folds,
44 |     P=no_threads,
45 |     parameter_search=parameter_search,
46 |     train_config=train_config,
47 |     file_performance=output_file,
48 |     files_nested_performances=files_nested_performances
49 | )
50 | nested_crossval.run()
51 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmtf/np_nmtf_nested_xval.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the nested cross-validation for the NMTF class, on the CCLE IC50 dataset.
 3 | """
 4 | 
 5 | import sys, os
 6 | project_location = os.path.dirname(__file__)+"/../../../../../"
 7 | sys.path.append(project_location)
 8 | 
 9 | import itertools
10 | from BNMTF.code.models.nmtf_np import NMTF
11 | from BNMTF.code.cross_validation.nested_matrix_cross_validation import MatrixNestedCrossValidation
12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
13 | 
14 | 
15 | # Settings
16 | standardised = False
17 | train_config = {
18 |     'iterations' : 2000,
19 |     'init_FG' : 'kmeans',
20 |     'init_S' : 'exponential',
21 |     'expo_prior' : 0.1
22 | }
23 | K_range = [1,2,3]
24 | L_range = [1,2,3]
25 | no_threads = 2
26 | no_folds = 10
27 | output_file = "./results.txt"
28 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)]
29 | 
30 | # Construct the parameter search
31 | parameter_search = [{'K':K,'L':L} for (K,L) in itertools.product(K_range,L_range)]
32 | 
33 | # Load in the CCLE IC50 dataset
34 | R,M = load_ccle(ic50=True)
35 | 
36 | # Run the cross-validation framework
37 | #random.seed(42)
38 | #numpy.random.seed(9000)
39 | nested_crossval = MatrixNestedCrossValidation(
40 |     method=NMTF,
41 |     X=R,
42 |     M=M,
43 |     K=no_folds,
44 |     P=no_threads,
45 |     parameter_search=parameter_search,
46 |     train_config=train_config,
47 |     file_performance=output_file,
48 |     files_nested_performances=files_nested_performances
49 | )
50 | nested_crossval.run()
51 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/np_nmtf_xval.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the nested cross-validation for the NMTF class, on the Sanger dataset.
 3 | 
 4 | Since we want to find co-clusters of significantly higher/lower drug sensitivity
 5 | values, we should use the unstandardised Sanger dataset.
 6 | """
 7 | 
 8 | import sys, os
 9 | project_location = os.path.dirname(__file__)+"/../../../../../"
10 | sys.path.append(project_location)
11 | 
12 | import numpy, itertools, random
13 | from BNMTF.code.nmtf_np import NMTF
14 | from BNMTF.cross_validation.matrix_cross_validation import MatrixCrossValidation
15 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
16 | 
17 | 
18 | # Settings
19 | standardised = False
20 | train_config = {
21 |     'iterations' : 3000,
22 |     'init_FG' : 'kmeans',
23 |     'init_S' : 'exponential',
24 |     'expo_prior' : 0.1
25 | }
26 | K_range = [2,4,6,8,10]
27 | L_range = [2,4,6,8,10]
28 | P = 5
29 | no_folds = 5
30 | output_file = "./results.txt"
31 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)]
32 | 
33 | # Construct the parameter search
34 | parameter_search = [{'K':K,'L':L} for (K,L) in itertools.product(K_range,L_range)]
35 | 
36 | # Load in the Sanger dataset
37 | (_,X_min,M,_,_,_,_) = load_Sanger(standardised=standardised)
38 | 
39 | # Run the cross-validation framework
40 | random.seed(42)
41 | numpy.random.seed(9000)
42 | nested_crossval = MatrixCrossValidation(
43 |     method=NMTF,
44 |     X=X_min,
45 |     M=M,
46 |     K=no_folds,
47 |     parameter_search=parameter_search,
48 |     train_config=train_config,
49 |     file_performance=output_file
50 | )
51 | nested_crossval.run()


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_vb_nmtf/greedysearch_xval_vb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with greedy search for model selection using VB-NMTF 
 3 | on the CCLE IC50 dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | from BNMTF.code.models.bnmtf_vb_optimised import bnmtf_vb_optimised
11 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation
12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
13 | 
14 | 
15 | # Settings
16 | standardised = False
17 | iterations = 1000
18 | 
19 | init_S = 'random' #'exp' #
20 | init_FG = 'kmeans' #'exp' #
21 | 
22 | K_range = [4,5,6,7,8,9,10]
23 | L_range = [4,5,6,7,8,9,10]
24 | no_folds = 10
25 | restarts = 1
26 | 
27 | quality_metric = 'AIC'
28 | output_file = "./results.txt"
29 | 
30 | alpha, beta = 1., 1.
31 | lambdaF = 1./10.
32 | lambdaS = 1./10.
33 | lambdaG = 1./10.
34 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
35 | 
36 | # Load in the CCLE IC50 dataset
37 | R,M = load_ccle(ic50=True)
38 | 
39 | # Run the cross-validation framework
40 | #random.seed(42)
41 | #numpy.random.seed(9000)
42 | nested_crossval = GreedySearchCrossValidation(
43 |     classifier=bnmtf_vb_optimised,
44 |     R=R,
45 |     M=M,
46 |     values_K=K_range,
47 |     values_L=L_range,
48 |     folds=no_folds,
49 |     priors=priors,
50 |     init_S=init_S,
51 |     init_FG=init_FG,
52 |     iterations=iterations,
53 |     restarts=restarts,
54 |     quality_metric=quality_metric,
55 |     file_performance=output_file
56 | )
57 | nested_crossval.run()
58 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_vb_nmtf/greedysearch_xval_vb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with greedy search for model selection using VB-NMTF 
 3 | on the CCLE EC50 dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | from BNMTF.code.models.bnmtf_vb_optimised import bnmtf_vb_optimised
11 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation
12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
13 | 
14 | 
15 | # Settings
16 | standardised = False
17 | iterations = 1000
18 | 
19 | init_S = 'random' #'exp' #
20 | init_FG = 'kmeans' #'exp' #
21 | 
22 | K_range = [1,2,3,4,5,6,7,8,9,10]
23 | L_range = [1,2,3,4,5,6,7,8,9,10]
24 | no_folds = 10
25 | restarts = 1
26 | 
27 | quality_metric = 'AIC'
28 | output_file = "./results.txt"
29 | 
30 | alpha, beta = 1., 1.
31 | lambdaF = 1./10.
32 | lambdaS = 1./10.
33 | lambdaG = 1./10.
34 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
35 | 
36 | # Load in the CCLE EC50 dataset
37 | R,M = load_ccle(ic50=False)
38 | 
39 | # Run the cross-validation framework
40 | #random.seed(42)
41 | #numpy.random.seed(9000)
42 | nested_crossval = GreedySearchCrossValidation(
43 |     classifier=bnmtf_vb_optimised,
44 |     R=R,
45 |     M=M,
46 |     values_K=K_range,
47 |     values_L=L_range,
48 |     folds=no_folds,
49 |     priors=priors,
50 |     init_S=init_S,
51 |     init_FG=init_FG,
52 |     iterations=iterations,
53 |     restarts=restarts,
54 |     quality_metric=quality_metric,
55 |     file_performance=output_file
56 | )
57 | nested_crossval.run()
58 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_icm_nmtf/greedysearch_xval_icm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with greedy search for model selection using VB-NMTF 
 3 | on the CCLE IC50 dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | from BNMTF.code.models.nmtf_icm import nmtf_icm
11 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation
12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
13 | 
14 | 
15 | # Settings
16 | standardised = False
17 | iterations = 1000
18 | 
19 | init_S = 'random' #'exp' #
20 | init_FG = 'kmeans' #'exp' #
21 | 
22 | K_range = [4,5,6,7,8,9,10]
23 | L_range = [4,5,6,7,8,9,10]
24 | no_folds = 10
25 | restarts = 1
26 | 
27 | quality_metric = 'AIC'
28 | output_file = "./results.txt"
29 | 
30 | alpha, beta = 1., 1.
31 | lambdaF = 1./10.
32 | lambdaS = 1./10.
33 | lambdaG = 1./10.
34 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
35 | 
36 | minimum_TN = 0.01
37 | 
38 | # Load in the CCLE IC50 dataset
39 | R,M = load_ccle(ic50=True)
40 | 
41 | # Run the cross-validation framework
42 | #random.seed(1)
43 | #numpy.random.seed(1)
44 | nested_crossval = GreedySearchCrossValidation(
45 |     classifier=nmtf_icm,
46 |     R=R,
47 |     M=M,
48 |     values_K=K_range,
49 |     values_L=L_range,
50 |     folds=no_folds,
51 |     priors=priors,
52 |     init_S=init_S,
53 |     init_FG=init_FG,
54 |     iterations=iterations,
55 |     restarts=restarts,
56 |     quality_metric=quality_metric,
57 |     file_performance=output_file
58 | )
59 | nested_crossval.run(minimum_TN=minimum_TN)
60 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_icm_nmtf/greedysearch_xval_icm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with greedy search for model selection using VB-NMTF 
 3 | on the CCLE EC50 dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | from BNMTF.code.models.nmtf_icm import nmtf_icm
11 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation
12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
13 | 
14 | 
15 | # Settings
16 | standardised = False
17 | iterations = 1000
18 | 
19 | init_S = 'random' #'exp' #
20 | init_FG = 'kmeans' #'exp' #
21 | 
22 | K_range = [1,2,3,4,5,6,7,8,9,10]
23 | L_range = [1,2,3,4,5,6,7,8,9,10]
24 | no_folds = 10
25 | restarts = 1
26 | 
27 | quality_metric = 'AIC'
28 | output_file = "./results.txt"
29 | 
30 | alpha, beta = 1., 1.
31 | lambdaF = 1./10.
32 | lambdaS = 1./10.
33 | lambdaG = 1./10.
34 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
35 | 
36 | minimum_TN = 0.01
37 | 
38 | # Load in the CCLE EC50 dataset
39 | R,M = load_ccle(ic50=False)
40 | 
41 | # Run the cross-validation framework
42 | #random.seed(1)
43 | #numpy.random.seed(1)
44 | nested_crossval = GreedySearchCrossValidation(
45 |     classifier=nmtf_icm,
46 |     R=R,
47 |     M=M,
48 |     values_K=K_range,
49 |     values_L=L_range,
50 |     folds=no_folds,
51 |     priors=priors,
52 |     init_S=init_S,
53 |     init_FG=init_FG,
54 |     iterations=iterations,
55 |     restarts=restarts,
56 |     quality_metric=quality_metric,
57 |     file_performance=output_file
58 | )
59 | nested_crossval.run(minimum_TN=minimum_TN)
60 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_gibbs_nmtf/greedysearch_xval_gibbs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with greedy search for model selection using VB-NMTF 
 3 | on the CCLE IC50 dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | from BNMTF.code.models.bnmtf_gibbs_optimised import bnmtf_gibbs_optimised
11 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation
12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
13 | 
14 | 
15 | # Settings
16 | standardised = False
17 | iterations = 1000
18 | burn_in = 900
19 | thinning = 2
20 | 
21 | init_S = 'random' #'exp' #
22 | init_FG = 'kmeans' #'exp' #
23 | 
24 | K_range = [4,5,6,7,8,9,10]
25 | L_range = [4,5,6,7,8,9,10]
26 | no_folds = 10
27 | restarts = 1
28 | 
29 | quality_metric = 'AIC'
30 | output_file = "./results.txt"
31 | 
32 | alpha, beta = 1., 1.
33 | lambdaF = 1./10.
34 | lambdaS = 1./10.
35 | lambdaG = 1./10.
36 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
37 | 
38 | # Load in the CCLE IC50 dataset
39 | R,M = load_ccle(ic50=True)
40 | 
41 | # Run the cross-validation framework
42 | #random.seed(1)
43 | #numpy.random.seed(1)
44 | nested_crossval = GreedySearchCrossValidation(
45 |     classifier=bnmtf_gibbs_optimised,
46 |     R=R,
47 |     M=M,
48 |     values_K=K_range,
49 |     values_L=L_range,
50 |     folds=no_folds,
51 |     priors=priors,
52 |     init_S=init_S,
53 |     init_FG=init_FG,
54 |     iterations=iterations,
55 |     restarts=restarts,
56 |     quality_metric=quality_metric,
57 |     file_performance=output_file
58 | )
59 | nested_crossval.run(burn_in=burn_in,thinning=thinning)
60 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_gibbs_nmtf/greedysearch_xval_gibbs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with greedy search for model selection using VB-NMTF 
 3 | on the CCLE EC50 dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | from BNMTF.code.models.bnmtf_gibbs_optimised import bnmtf_gibbs_optimised
11 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation
12 | from BNMTF.data_drug_sensitivity.ccle.load_data import load_ccle
13 | 
14 | 
15 | # Settings
16 | standardised = False
17 | iterations = 1000
18 | burn_in = 900
19 | thinning = 2
20 | 
21 | init_S = 'random' #'exp' #
22 | init_FG = 'kmeans' #'exp' #
23 | 
24 | K_range = [1,2,3,4,5,6,7,8,9,10]
25 | L_range = [1,2,3,4,5,6,7,8,9,10]
26 | no_folds = 10
27 | restarts = 1
28 | 
29 | quality_metric = 'AIC'
30 | output_file = "./results.txt"
31 | 
32 | alpha, beta = 1., 1.
33 | lambdaF = 1./10.
34 | lambdaS = 1./10.
35 | lambdaG = 1./10.
36 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
37 | 
38 | # Load in the CCLE EC50 dataset
39 | R,M = load_ccle(ic50=False)
40 | 
41 | # Run the cross-validation framework
42 | #random.seed(1)
43 | #numpy.random.seed(1)
44 | nested_crossval = GreedySearchCrossValidation(
45 |     classifier=bnmtf_gibbs_optimised,
46 |     R=R,
47 |     M=M,
48 |     values_K=K_range,
49 |     values_L=L_range,
50 |     folds=no_folds,
51 |     priors=priors,
52 |     init_S=init_S,
53 |     init_FG=init_FG,
54 |     iterations=iterations,
55 |     restarts=restarts,
56 |     quality_metric=quality_metric,
57 |     file_performance=output_file
58 | )
59 | nested_crossval.run(burn_in=burn_in,thinning=thinning)
60 | 


--------------------------------------------------------------------------------
/tests/code/distributions/test_truncated_normal_vector.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test the class for Truncated Normal draws and expectations in truncated_normal_vector.py.
 3 | """
 4 | 
 5 | import sys, os
 6 | project_location = os.path.dirname(__file__)+"/../../../"
 7 | sys.path.append(project_location)
 8 | 
 9 | from BNMTF.code.models.distributions.truncated_normal_vector import TN_vector_draw, TN_vector_expectation, TN_vector_variance, TN_vector_mode
10 | from scipy.stats import norm
11 | import numpy
12 | 
13 | def test_expectation():
14 |     # One normal case, one exponential approximation
15 |     mu = [1.0, -1]
16 |     tau = [3.0, 2000]
17 |     sigma = [0.5773502691896258,0.022360679774997897]
18 |     
19 |     lambdav = ( norm.pdf( - mu[0] / sigma[0] ) ) / ( 1 - norm.cdf( - mu[0] / sigma[0] ) )
20 |     expectation = mu[0] + sigma[0] * lambdav
21 |     assert numpy.array_equal(TN_vector_expectation(mu,tau), [expectation, 1./2000.])
22 |     
23 | def test_variance():
24 |     # One normal case, one exponential approximation
25 |     mu = [1.0, -1]
26 |     tau = [3.0, 2000]
27 |     sigma = [0.5773502691896258,0.022360679774997897]
28 |     
29 |     lambdav = ( norm.pdf( - mu[0] / sigma[0] ) ) / ( 1 - norm.cdf( - mu[0] / sigma[0] ) )
30 |     variance = sigma[0]**2 * ( 1 - ( lambdav * ( lambdav + mu[0] / sigma[0] ) ) )
31 |     assert numpy.array_equal(TN_vector_variance(mu,tau), [variance, (1./2000.)**2])
32 | 
33 | # Test a draw - simply verify it is > 0.
34 | # Also test whether we get inf for a very negative mean and high variance
35 | def test_draw():
36 |     # One normal case, and one when tau=0 - then draws should be inf, and hence return 0.0  
37 |     mu = [1.0, 0.32]
38 |     tau = [3.0, 0.0]
39 |     for i in range(0,100):
40 |         v1,v2 = TN_vector_draw(mu,tau)
41 |         assert v1 >= 0.0 and v2 == 0.0
42 |         
43 | # Test the mode
44 | def test_mode():
45 |     # Positive mean
46 |     mus = [1.0, -2.0]
47 |     assert numpy.array_equal(TN_vector_mode(mus), [1.0, 0.0])


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_9.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
2 | All performances: {'R^2': [0.216611015910689, 0.22933311154207237, nan, 0.2578897031878836, 0.331604508102044, 0.2697618006011768, 0.2960951709801749, 0.27877041582910267, 0.2214366029449465, 0.2804182684113584], 'MSE': [8.4986860459330984, 8.5964614922072542, nan, 8.5121677002903731, 7.6477769705814671, 7.9285508271679408, 7.6131400065062351, 8.1400250543716748, 8.1792659908773242, 8.2557830299434443], 'Rp': [0.50867373747678257, 0.51234252231129318, nan, 0.53353692811687503, 0.58600869226509711, 0.54555429137926303, 0.56002232775843508, 0.54098479061009419, 0.51717368577490908, 0.55164102112309277]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
4 | All performances: {'R^2': [-622.7877969532341, 0.11866718131919385, 0.25720659597986006, nan, 0.1073351374391378, 0.1339953245715475, 0.17407769998419298, nan, 0.11432082256516418, 0.04864542858531151], 'MSE': [6848.0349188302898, 9.8672941726370471, 8.1186952386872093, nan, 10.079746954062026, 10.030552260655949, 8.808698754376044, nan, 9.7855561241850868, 10.733309285508104], 'Rp': [-0.021028385208208673, 0.49379477886605455, 0.54540536423306707, nan, 0.45766094883634423, 0.46705932389244287, 0.50272337567034409, nan, 0.47485890729545754, 0.43622611302790137]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
6 | All performances: {'R^2': [nan, -0.30128901246075346, nan, nan, -5.60373268052594, 0.0949469532562468, 0.01551745106717084, nan, -0.36250925441276416, -0.4932119464538114], 'MSE': [nan, 13.609745253502085, nan, nan, 76.421353615923834, 9.9988394995214769, 10.414351112018062, nan, 15.231313868648897, 15.712739700150539], 'Rp': [nan, 0.40763353038370881, nan, nan, 0.17483985605473276, 0.47663592771395846, 0.46636156907415749, nan, 0.38745567937050013, 0.36924602012392543]}. 
7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_5.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
2 | All performances: {'R^2': [0.1951063526332698, 0.316439306419682, 0.23327128941659037, 0.30175896170468575, 0.2142841835316992, 0.29609455118365846, nan, 0.3419277760444793, 0.24574257705700353, nan], 'MSE': [9.3830407990764026, 7.5263625619577228, 8.5618716606822556, 7.8237129224716648, 8.6884840829642567, 7.7946395379214941, nan, 7.2419464425302946, 8.5037513334722501, nan], 'Rp': [0.49316405761232224, 0.57375170746955828, 0.50963314820943684, 0.56292971825455729, 0.50738977808977548, 0.55746303835286581, nan, 0.60180070406302111, 0.51315467505155044, nan]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
4 | All performances: {'R^2': [-1.171048530181305, 0.17458891333616122, nan, -6.983946877507161, 0.2084722168745612, 0.1354558151522567, -0.6329056207525043, 0.2097682986614563, nan, -0.37967075134692996], 'MSE': [24.699130185346856, 8.7077657693886135, nan, 89.263419623862603, 9.1881137220451858, 9.966194105810974, 17.892775374851659, 8.6585261045687663, nan, 15.710727465272999], 'Rp': [0.29534962587610775, 0.51276511781961609, nan, 0.1285628671805373, 0.52825710742940135, 0.47887037522434611, 0.35118586400298868, 0.51805479369074614, nan, 0.31378365310386303]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
6 | All performances: {'R^2': [-0.10314277468709032, -6.0921653957558055, -1.6700151618735837, -1.528039236344119, nan, -0.35292735925501484, -0.5718810602754036, -2276.398465328347, -360.54052317583694, nan], 'MSE': [12.864918483970719, 78.576740843638092, 30.663168649581646, 26.793135113840798, nan, 15.206494089831338, 17.675306146077091, 24317.680535005977, 4046.8792049399917, nan], 'Rp': [0.4228702940540261, 0.10975646394271045, 0.18479661662258651, 0.26681684928992894, nan, 0.40313808211268082, 0.33405360206444284, -0.020030370755298336, 0.064598843075654958, nan]}. 
7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_6.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
2 | All performances: {'R^2': [nan, 0.21430513502919502, 0.3468227875245955, 0.2714590712810081, 0.2743058214663774, -0.8018205361110275, 0.28330106292595114, 0.2804508379442078, nan, 0.2684107840979276], 'MSE': [nan, 8.7103107036285259, 7.4736931590160927, 7.9425008980092029, 7.8075668195354346, 20.042343074149425, 7.8943248408454219, 7.8541624076149636, nan, 8.1746314786815333], 'Rp': [nan, 0.5030448555822199, 0.59770635580789122, 0.53869718688815016, 0.54249148126495483, 0.30961441432529646, 0.54671815087531295, 0.53759309539724076, nan, 0.53933630252274423]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
4 | All performances: {'R^2': [-10.574586141244735, -1.389365906618926, 0.1284951617868606, -0.6532257613373607, 0.26548647351581556, 0.08535354607001988, nan, 0.19519917191963965, 0.010138463016313604, 0.09993722409041916], 'MSE': [127.09304660766573, 26.00332411273774, 9.1928647934283152, 18.20398460274615, 8.1815853726180183, 10.763542481389488, nan, 8.8600967344552952, 11.085183888564529, 10.108906577294286], 'Rp': [0.083929115710765639, 0.2559405330829273, 0.48476312444207975, 0.33603090410971781, 0.55650541757382033, 0.44797117581445706, nan, 0.51567825096849229, 0.44585464672819869, 0.43696636140740319]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
6 | All performances: {'R^2': [-0.6553260799449678, nan, -1.0850670786351428, -689.7869815763883, -35501.60512548042, nan, -1.3136255833332835, nan, -1.1379935986609118, -0.5751721428616199], 'MSE': [18.521139937641482, nan, 23.261201666785105, 7472.8920848146545, 386406.65049337299, nan, 24.398293916690342, nan, 24.429166844167778, 17.143793627639749], 'Rp': [0.33110133766758371, nan, 0.29845543955805898, -0.02066184909799116, -0.027047100512401397, nan, 0.32654047243119572, nan, 0.28576413729628591, 0.36784454670170641]}. 
7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/np_nmtf_nested_xval.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the nested cross-validation for the NMTF class, on the Sanger dataset.
 3 | """
 4 | 
 5 | import sys, os
 6 | project_location = os.path.dirname(__file__)+"/../../../../../"
 7 | sys.path.append(project_location)
 8 | 
 9 | import numpy, itertools, random
10 | from BNMTF.code.models.nmtf_np import NMTF
11 | from BNMTF.code.cross_validation.nested_matrix_cross_validation import MatrixNestedCrossValidation
12 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
13 | 
14 | 
15 | # Settings
16 | standardised = False
17 | train_config = {
18 |     'iterations' : 2000,
19 |     'init_FG' : 'kmeans',
20 |     'init_S' : 'exponential',
21 |     'expo_prior' : 0.1
22 | }
23 | P = 5
24 | no_folds = 10
25 | output_file = "./results.txt"
26 | files_nested_performances = ["./fold_%s.txt" % fold for fold in range(1,no_folds+1)]
27 | 
28 | # Construct the parameter search
29 | parameter_search = [{'K':K,'L':L} for (K,L) in [(6,6), (8,8), (10,10)]]
30 | 
31 | # Load in the Sanger dataset
32 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised)
33 | 
34 | # Run the cross-validation framework
35 | #random.seed(42)
36 | #numpy.random.seed(9000)
37 | nested_crossval = MatrixNestedCrossValidation(
38 |     method=NMTF,
39 |     X=X_min,
40 |     M=M,
41 |     K=no_folds,
42 |     P=5,
43 |     parameter_search=parameter_search,
44 |     train_config=train_config,
45 |     file_performance=output_file,
46 |     files_nested_performances=files_nested_performances
47 | )
48 | nested_crossval.run()
49 | 
50 | """
51 | Average performances: {'R^2': 0.7948758708329315, 'MSE': 2.3988138408823394, 'Rp': 0.89178480273294591}. 
52 | All performances: {'R^2': [0.7990403667846077, 0.7974592552426493, 0.7971559801700554, 0.7908843325029544, 0.7898394194643907], 'MSE': [2.3352752661466534, 2.3572503168518866, 2.4163844950465756, 2.4334280833191895, 2.4517310430473933], 'Rp': [0.89405814991652022, 0.89333868839901787, 0.89316370564203762, 0.88952291745977685, 0.88884055224737657]}. 
53 | 
54 | Average MSE: 2.3988 +- 0.0449
55 | Averagr R^2: 0.795 +- 0.004
56 | """


--------------------------------------------------------------------------------
/tests/code/distributions/test_truncated_normal.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Test the class for Truncated Normal draws and expectations in truncated_normal.py.
 3 | """
 4 | 
 5 | import sys, os
 6 | project_location = os.path.dirname(__file__)+"/../../../"
 7 | sys.path.append(project_location)
 8 | 
 9 | from BNMTF.code.models.distributions.truncated_normal import TN_draw, TN_expectation, TN_variance, TN_mode
10 | from scipy.stats import norm
11 | import numpy
12 | 
13 | def test_expectation():
14 |     mu = 1.0
15 |     tau = 3.0
16 |     sigma = 0.5773502691896258
17 |     
18 |     lambdav = ( norm.pdf( - mu / sigma ) ) / ( 1 - norm.cdf( - mu / sigma ) )
19 |     expectation = mu + sigma * lambdav
20 |     assert TN_expectation(mu,tau) == expectation
21 |     
22 |     # Also test that we get variance and exp of an Exp if mu is less than -30*sigma
23 |     mu = -1.
24 |     tau = 2000.
25 |     assert TN_expectation(mu,tau) == 1./2000.
26 |     
27 |     
28 | def test_variance():
29 |     mu = 1.0
30 |     tau = 3.0
31 |     sigma = 0.5773502691896258
32 |     
33 |     lambdav = ( norm.pdf( - mu / sigma ) ) / ( 1 - norm.cdf( - mu / sigma ) )
34 |     variance = sigma**2 * ( 1 - ( lambdav * ( lambdav + mu / sigma ) ) )
35 |     assert TN_variance(mu,tau) == variance
36 | 
37 |     # Also test that we get variance and exp of an Exp if mu is less than -30*sigma
38 |     mu = -1.
39 |     tau = 2000.
40 |     assert TN_variance(mu,tau) == (1./2000.)**2
41 |     
42 | # Test a draw - simply verify it is > 0.
43 | # Also test whether we get inf for a very negative mean and high variance
44 | def test_draw():
45 |     mu = 1.0
46 |     tau = 3.0
47 |     for i in range(0,100):
48 |         assert TN_draw(mu,tau) >= 0.0
49 |     
50 |     # Test everything is handled when tau = 0 - then draws should be inf, and hence return 0.0  
51 |     mu = 0.32
52 |     tau = 0.0
53 |     for i in range(0,100):
54 |         assert TN_draw(mu,tau) == 0.0
55 |         
56 | # Test the mode
57 | def test_mode():
58 |     # Positive mean
59 |     mu = 1.0
60 |     assert TN_mode(mu) == mu
61 |     
62 |     # Negative mean
63 |     mu = -2.0
64 |     assert TN_mode(mu) == 0.


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_2.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
2 | All performances: {'R^2': [nan, 0.2338026674512549, 0.24267924746500213, 0.2985746728951062, 0.3439138150615887, 0.2054017340552602, 0.2581434876694477, 0.2810058579538135, 0.18815607426151892, 0.2627100412359329], 'MSE': [nan, 8.0682632161323298, 8.082164151585177, 7.7212090530315711, 7.6052527849646081, 9.0132883629946878, 8.175161024131949, 7.9449355744781682, 8.8800426511770194, 8.7053342310583872], 'Rp': [nan, 0.5203560701358898, 0.52977117817558295, 0.55682352912447286, 0.59177419501963513, 0.50257463819761383, 0.52992755846598638, 0.54362913606526708, 0.49035409310988592, 0.52776401905582515]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
4 | All performances: {'R^2': [0.04419676032579323, nan, 0.13241529554268783, nan, 0.17083211691009592, 0.11015768624051969, -0.8861236370084338, 0.19676246581206613, 0.20490864015981958, 0.17146332368824513], 'MSE': [10.570809464373193, nan, 9.882166538940016, nan, 9.1712738223699404, 10.08957191600056, 22.14389377012699, 8.9157757459420175, 8.9353998702741126, 9.2894707483163845], 'Rp': [0.44046375507402102, nan, 0.46060627143916572, nan, 0.50996876567397387, 0.47963797008188425, 0.31075222674827291, 0.50736608025434837, 0.52106850777596314, 0.48942498851092314]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
6 | All performances: {'R^2': [-1.288956827542092, nan, -0.05552907508683691, nan, -259885.1400121829, -1.2009091814714075, -2.2520116244746298, -0.19843734625887466, -37907.93493393469, -1.6704726837818593], 'MSE': [27.860769248738279, nan, 10.727334198480079, nan, 2852996.905621931, 24.322556483089905, 35.494435132567318, 13.737167249203825, 426017.1245206889, 30.11831714079592], 'Rp': [0.20484999889655225, nan, 0.46510169681540581, nan, -0.028802496071943077, 0.29814811042808903, 0.19372292261481913, 0.36085875391294031, 0.014855762342711364, 0.21617475250238427]}. 
7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/vb_nmf/linesearch_xval_vb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with line search for model selection using VB-NMF on
 3 | the Sanger dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | import numpy, random
11 | from BNMTF.code.models.bnmf_vb_optimised import bnmf_vb_optimised
12 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation
13 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
14 | 
15 | 
16 | # Settings
17 | standardised = False
18 | iterations = 1000
19 | init_UV = 'random'
20 | 
21 | K_range = [15,20,25,30]
22 | no_folds = 10
23 | restarts = 1
24 | 
25 | quality_metric = 'AIC'
26 | output_file = "./results.txt"
27 | 
28 | alpha, beta = 1., 1.
29 | lambdaU = 1./10.
30 | lambdaV = 1./10.
31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV }
32 | 
33 | # Load in the Sanger dataset
34 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised,sep=',')
35 | 
36 | # Run the cross-validation framework
37 | #random.seed(42)
38 | #numpy.random.seed(9000)
39 | nested_crossval = LineSearchCrossValidation(
40 |     classifier=bnmf_vb_optimised,
41 |     R=X_min,
42 |     M=M,
43 |     values_K=K_range,
44 |     folds=no_folds,
45 |     priors=priors,
46 |     init_UV=init_UV,
47 |     iterations=iterations,
48 |     restarts=restarts,
49 |     quality_metric=quality_metric,
50 |     file_performance=output_file
51 | )
52 | nested_crossval.run()
53 | 
54 | """
55 | all_MSE = [2.2242309355503416, 2.3108126630384804, 2.4095896447817631, 2.2188694213830114, 2.4185938516134278, 2.1808748510586002, 2.2503432196374651, 2.2305023229025145, 2.3595465204422488, 2.2186318302878667]
56 | all_R2 = [0.8123419361488506, 0.8011409466575017, 0.7943028271877304, 0.8125046212085996, 0.7934881370166628, 0.8111969927756486, 0.8058878338360765, 0.811089129626958, 0.798953276136085, 0.8151865445946502]
57 | 
58 | Average MSE: 2.2821995260695718 +- 0.0066998949966021598
59 | Average R^2: 0.80560922451887629 +- 5.8495363723835686e-05
60 | """


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_8.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
2 | All performances: {'R^2': [0.2191117879065282, 0.3025208761584226, 0.2335431843981689, 0.23230056670630927, 0.2698234452551742, 0.2237914673446625, 0.2349923420288721, nan, 0.2395934680562396, 0.3714262044867832], 'MSE': [8.5294450822462249, 7.8455410585413157, 8.6458871689201704, 8.6222047230900785, 8.2643080332090388, 8.1643384867735271, 8.220769247665789, nan, 7.7266809380121932, 7.6950694660610504], 'Rp': [0.51081940829159389, 0.56292089638728704, 0.51097352461126666, 0.52396126247043062, 0.53533794193009399, 0.51975939464576337, 0.51057358098857397, nan, 0.53897161493527534, 0.61552779573747873]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
4 | All performances: {'R^2': [nan, 0.21402336348228312, -0.9954016738729357, -0.05708216181933623, nan, 0.15983725469682797, 0.17734143313805995, 0.11493552555492692, 0.09913208752230518, 0.13745251757599408], 'MSE': [nan, 8.8866787037910804, 21.709065879083756, 12.574803111226991, nan, 9.9914215596752083, 8.0762880740899661, 9.5080242587996295, 10.190956716491094, 10.041978971360008], 'Rp': [nan, 0.51647947427073315, 0.29170858333600236, 0.39777416683641215, nan, 0.49135944242495899, 0.54227453286269667, 0.47669909015187717, 0.4596133072820941, 0.46539961616166731]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
6 | All performances: {'R^2': [-0.7688841094869072, -1.133058310558594, -2.6890824592103306, -5.683793153559361, -340.9197946230261, -0.32626659476204556, -178300.35916493824, nan, -6.665355573094988, nan], 'MSE': [18.81684489189961, 23.690856566159884, 42.460735167261419, 75.942538259218182, 3904.7656453702798, 15.064314751645929, 1864462.7642969301, nan, 84.415553270096979, nan], 'Rp': [0.29769511272608729, 0.26630387087269397, 0.23469363048225003, 0.12218533675310858, 0.058908368944496886, 0.3621226360166579, -0.030959163112430244, nan, 0.12457829508174811, nan]}. 
7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/icm_nmf/linesearch_xval_icm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with line search for model selection using VB-NMF on
 3 | the Sanger dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | import numpy, random
11 | from BNMTF.code.models.nmf_icm import nmf_icm
12 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation
13 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
14 | 
15 | 
16 | # Settings
17 | standardised = False
18 | iterations = 1000
19 | init_UV = 'random'
20 | 
21 | K_range = [15,20,25,30]
22 | no_folds = 10
23 | restarts = 1
24 | 
25 | quality_metric = 'AIC'
26 | output_file = "./results.txt"
27 | 
28 | alpha, beta = 1., 1.
29 | lambdaU = 1./10.
30 | lambdaV = 1./10.
31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV }
32 | 
33 | minimum_TN = 0.1
34 | 
35 | # Load in the Sanger dataset
36 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised)
37 | 
38 | # Run the cross-validation framework
39 | #random.seed(42)
40 | #numpy.random.seed(9000)
41 | nested_crossval = LineSearchCrossValidation(
42 |     classifier=nmf_icm,
43 |     R=X_min,
44 |     M=M,
45 |     values_K=K_range,
46 |     folds=no_folds,
47 |     priors=priors,
48 |     init_UV=init_UV,
49 |     iterations=iterations,
50 |     restarts=restarts,
51 |     quality_metric=quality_metric,
52 |     file_performance=output_file
53 | )
54 | nested_crossval.run(minimum_TN=minimum_TN)
55 | 
56 | """
57 | all_MSE = [3.5039148405029135, 9.0622730084824674, 3.7009069757338917, 3.3451246835265178, 3.1147595748400358, 3.9037354439533258, 13.991970030783968, 3.1814210224127897, 3.2677197491020404, 12.460551868851933]
58 | all_R2 = [0.7072309782081623, 0.2162669348625822, 0.6853079551313846, 0.7144108917311998, 0.7341480430315861, 0.6671037956836574, -0.17013019643779437, 0.7288988508164431, 0.7201731755424339, -0.07478035943340289]
59 | 
60 | Average MSE: 5.953237719818989 +- 16.165927731904752
61 | Average R^2: 0.49286300691362522 +- 0.11663176700952635
62 | """


--------------------------------------------------------------------------------
/data_drug_sensitivity/gdsc/drug_names_sorted_filtered:
--------------------------------------------------------------------------------
  1 | 17-AAG
  2 | 681640
  3 | A-443654
  4 | A-770041
  5 | ABT-263
  6 | ABT-888
  7 | AG-014699
  8 | AICAR
  9 | AKT inhibitor VIII
 10 | AMG-706
 11 | AP-24534
 12 | AS601245
 13 | ATRA
 14 | AUY922
 15 | AZ628
 16 | AZD-0530
 17 | AZD-2281
 18 | AZD6244
 19 | AZD6482
 20 | AZD7762
 21 | AZD8055
 22 | Axitinib
 23 | BAY 61-3606
 24 | BI-2536
 25 | BIBW2992
 26 | BIRB 0796
 27 | BMS-509744
 28 | BMS-536924
 29 | BMS-708163
 30 | BMS-754807
 31 | BX-795
 32 | Bexarotene
 33 | Bicalutamide
 34 | Bleomycin
 35 | Bortezomib
 36 | Bosutinib
 37 | Bryostatin 1
 38 | CCT007093
 39 | CCT018159
 40 | CEP-701
 41 | CGP-082996
 42 | CGP-60474
 43 | CHIR-99021
 44 | CI-1040
 45 | CMK
 46 | Camptothecin
 47 | Cyclopamine
 48 | Cytarabine
 49 | DMOG
 50 | Dasatinib
 51 | Docetaxel
 52 | Doxorubicin
 53 | EHT 1864
 54 | Elesclomol
 55 | Embelin
 56 | Epothilone B
 57 | Erlotinib
 58 | Etoposide
 59 | FH535
 60 | FTI-277
 61 | GDC-0449
 62 | GDC0941
 63 | GNF-2
 64 | GSK-1904529A
 65 | GSK-650394
 66 | GSK269962A
 67 | GW 441756
 68 | GW843682X
 69 | Gefitinib
 70 | Gemcitabine
 71 | IPA-3
 72 | Imatinib
 73 | JNJ-26854165
 74 | JNK Inhibitor VIII
 75 | JNK-9L
 76 | JW-7-52-1
 77 | KIN001-135
 78 | KU-55933
 79 | LAQ824
 80 | LFM-A13
 81 | Lapatinib
 82 | Lenalidomide
 83 | MG-132
 84 | MK-2206
 85 | MS-275
 86 | Methotrexate
 87 | Midostaurin
 88 | Mitomycin C
 89 | NSC-87877
 90 | NU-7441
 91 | NVP-BEZ235
 92 | NVP-TAE684
 93 | Nilotinib
 94 | Nutlin-3a
 95 | OSI-906
 96 | OSU-03012
 97 | Obatoclax Mesylate
 98 | PAC-1
 99 | PD-0325901
100 | PD-0332991
101 | PD-173074
102 | PF-02341066
103 | PF-4708671
104 | PF-562271
105 | PHA-665752
106 | PLX4720
107 | Paclitaxel
108 | Parthenolide
109 | Pazopanib
110 | Pyrimethamine
111 | QS11
112 | RDEA119
113 | RO-3306
114 | Rapamycin
115 | Roscovitine
116 | S-Trityl-L-cysteine
117 | SB 216763
118 | SB590885
119 | SL 0101-1
120 | Salubrinal
121 | Shikonin
122 | Sorafenib
123 | Sunitinib
124 | TGX221
125 | TW 37
126 | Temsirolimus
127 | Thapsigargin
128 | Tipifarnib
129 | VX-680
130 | VX-702
131 | Vinblastine
132 | Vinorelbine
133 | Vorinostat
134 | WH-4-023
135 | WZ-1-84
136 | XMD8-85
137 | Z-LLNle-CHO
138 | ZM-447439
139 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/run_nested_cross_val_kbmf.R~:
--------------------------------------------------------------------------------
 1 | # Run the nested cross-validation for KBMF
 2 | 
 3 | source("nested_cross_val_kbmf.R")
 4 | K <- 10
 5 | R_values <- c(5,6,7,8,9,10)
 6 | 
 7 | Px <- 3
 8 | Nx <- 622
 9 | Pz <- 3
10 | Nz <- 138
11 | 
12 | # Load in the drug sensitivity values
13 | folder_drug_sensitivity <- '/home/tab43/Documents/Projects/libraries/BNMTF/drug_sensitivity/data/gdsc/'
14 | name_drug_sensitivity <- 'ic50_excl_empty_filtered_cell_lines_drugs.txt'
15 | Y <- as.matrix(read.table(paste(folder_drug_sensitivity,name_drug_sensitivity,sep=''),
16 | 				header=TRUE,
17 | 				sep=',',
18 | 				colClasses=c(rep("NULL",3), rep("numeric",138))))
19 | 
20 | # Load in the kernels - X = cancer cell lines, Z = drugs
21 | folder_kernels <- '/home/tab43/Documents/Projects/libraries/BNMTF/drug_sensitivity/data/gdsc/kernels/'
22 | 
23 | kernel_copy_variation <- as.matrix(read.table(paste(folder_kernels,'copy_variation.txt',sep=''),header=TRUE,sep='\t'))
24 | kernel_gene_expression <- as.matrix(read.table(paste(folder_kernels,'gene_expression.txt',sep=''),header=TRUE,sep='\t'))
25 | kernel_mutation <- as.matrix(read.table(paste(folder_kernels,'mutation.txt',sep=''),header=TRUE,sep='\t'))
26 | 
27 | kernel_1d2d <- as.matrix(read.table(paste(folder_kernels,'1d2d_descriptors.txt',sep=''),header=TRUE,sep=','))
28 | kernel_fingerprints<- as.matrix(read.table(paste(folder_kernels,'PubChem_fingerprints.txt',sep=''),header=TRUE,sep=','))
29 | kernel_targets <- as.matrix(read.table(paste(folder_kernels,'targets.txt',sep=''),header=TRUE,sep=','))
30 | 
31 | Kx <- array(0, c(Nx, Nx, Px))
32 | Kx[,, 1] <- kernel_copy_variation
33 | Kx[,, 2] <- kernel_gene_expression
34 | Kx[,, 3] <- kernel_mutation
35 | 
36 | Kz <- array(0, c(Nz, Nz, Pz))
37 | Kz[,, 1] <- kernel_1d2d
38 | Kz[,, 2] <- kernel_fingerprints
39 | Kz[,, 3] <- kernel_targets
40 | 
41 | # Run the cross-validation
42 | kbmf_nested_cross_validation(Kx, Kz, Y, R_values, K)
43 | 
44 | # R_values <- c(7,8,9)
45 | # MSE: 2.1906, 2.1993, 2.2380, 2.2522, 2.3098
46 | # R^2: 0.8108, 0.8109, 0.8073, 0.8120, 0.8021
47 | # Rp:  0.9005, 0.9005, 0.8986, 0.9011, 0.8958
48 | # Average performances: MSE=2.2380, R^2=0.8086, Rp=0.8993
49 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_10.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
2 | All performances: {'R^2': [0.2990448024868735, nan, 0.2840769553627075, 0.26714246444465617, 0.11179458930986019, 0.28414563451824437, 0.2789897405789954, 0.23437852058959108, 0.23197749343682172, 0.19454331249373813], 'MSE': [7.2081285106701838, nan, 8.4540785671560474, 7.9755098743377015, 10.490060311415816, 7.7292357330047716, 8.2834217516744371, 8.1726540674248351, 8.8516732651546768, 8.7154462891311084], 'Rp': [0.56460215202757513, nan, 0.54849125263803944, 0.53843893380351859, 0.45073282179886515, 0.55572028232090409, 0.54613477312734926, 0.54132475362304144, 0.50771080346211717, 0.48526614285520114]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
4 | All performances: {'R^2': [0.17339316174401498, 0.12382636842374595, 0.09920910311542641, -0.698529354830483, 0.189725601465631, -0.27796723954820046, nan, 0.24436246653104365, 0.1367540667531495, -1.282101408415528], 'MSE': [9.6512734326581207, 10.110047661709734, 10.335585943906789, 18.476940433649386, 8.5770228973499112, 13.86498823666988, nan, 8.5054971891034548, 9.3803252970458857, 24.605894753348949], 'Rp': [0.49969350509944144, 0.46420457593582953, 0.46921142164619528, 0.29817919762006106, 0.51785697799272423, 0.38253978258581672, nan, 0.53981172599605909, 0.49286325986064006, 0.25750045194421955]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
6 | All performances: {'R^2': [0.04600338411794669, -0.07679370717699197, -6.898094240598942, -0.23883899157556043, -0.5859210306749381, nan, -28.72937659902503, -0.12351150816049028, -1.8911807711311281, nan], 'MSE': [10.926870945064891, 12.439280057723746, 86.053786844642929, 13.804728763778797, 15.933965745136792, nan, 335.52834016553817, 11.334188982594746, 33.640031796664339, nan], 'Rp': [0.48184819709055382, 0.42290700962235894, 0.14784010520281102, 0.37723161731746629, 0.37499801529072868, nan, 0.051046476971676892, 0.4162096954820968, 0.25129992903087628, nan]}. 
7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ec_np_nmf/fold_4.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
2 | All performances: {'R^2': [0.3239066468360543, 0.26591082328875126, 0.277232407693747, 0.3002988912118617, 0.2230720732092316, 0.3016019016338344, 0.27359969988871646, 0.30245806011734355, nan, 0.2159285164272251], 'MSE': [7.2598155079118172, 8.4367043044885328, 8.1663588046295921, 8.0833222544490368, 9.0500330345001512, 8.0276501013862784, 7.980061121753228, 7.5103071477783869, nan, 8.7415995269791615], 'Rp': [0.58003173592262225, 0.5418780706142905, 0.54467016939444701, 0.5606218293239178, 0.50746679314807175, 0.56032425939131081, 0.54443797762797175, 0.56407691159055506, nan, 0.5016971427584187]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
4 | All performances: {'R^2': [-5.091737569044002, 0.14848438000496866, 0.2336683179270983, 0.047599750811030406, 0.2375436580542296, 0.17234147867507055, nan, 0.18011945080160585, 0.20935282545199996, 0.24152107002501078], 'MSE': [69.11062766964352, 9.4084696891166733, 9.1935756756055103, 9.6688824290995576, 8.4776314656262617, 9.6605352430164988, nan, 8.3922350493859295, 9.0872003997111541, 8.7609250002282675], 'Rp': [0.16590956551300862, 0.49774626633210262, 0.53473711841463611, 0.45700815466190642, 0.53792917790183337, 0.50001532989741448, nan, 0.51238501859198027, 0.50713253504549305, 0.53691018555046077]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. 
6 | All performances: {'R^2': [-5.2634475331075015, -0.22918117651745296, -0.3166474856182855, nan, 0.0067056182688344235, -293.88343994992823, -27550.438457672837, -0.32233750286340146, -0.198384191064096, nan], 'MSE': [65.498625565971452, 14.231269176284863, 14.934270478137543, nan, 10.841791309191223, 3398.2560242145123, 312819.06982480449, 13.755389034937668, 13.163945172420226, nan], 'Rp': [0.18370774363564404, 0.39777648711936636, 0.3675755735405864, nan, 0.48444087358632931, -0.0040364822562375582, -0.023977524852361095, 0.37843878210640453, 0.38381454211200866, nan]}. 
7 | Best performances: {'R^2': nan, 'MSE': nan, 'Rp': nan}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/run_nested_cross_val_kbmf.R:
--------------------------------------------------------------------------------
 1 | # Run the nested cross-validation for KBMF
 2 | 
 3 | source("nested_cross_val_kbmf.R")
 4 | K <- 10
 5 | R_values <- c(6,8,10)
 6 | 
 7 | Px <- 3
 8 | Nx <- 622
 9 | Pz <- 3
10 | Nz <- 138
11 | 
12 | # Load in the drug sensitivity values
13 | folder_drug_sensitivity <- '/Users/thomasbrouwer/Documents/Projects/libraries/BNMTF/data_drug_sensitivity/gdsc/'
14 | name_drug_sensitivity <- 'ic50_excl_empty_filtered_cell_lines_drugs.txt'
15 | Y <- as.matrix(read.table(paste(folder_drug_sensitivity,name_drug_sensitivity,sep=''),
16 | 				header=TRUE,
17 | 				sep=',',
18 | 				colClasses=c(rep("NULL",3), rep("numeric",138))))
19 | 
20 | # Load in the kernels - X = cancer cell lines, Z = drugs
21 | folder_kernels <- '/Users/thomasbrouwer/Documents/Projects/libraries/BNMTF/data_drug_sensitivity/gdsc/kernels/'
22 | 
23 | kernel_copy_variation <- as.matrix(read.table(paste(folder_kernels,'copy_variation.txt',sep=''),header=TRUE,sep='\t'))
24 | kernel_gene_expression <- as.matrix(read.table(paste(folder_kernels,'gene_expression.txt',sep=''),header=TRUE,sep='\t'))
25 | kernel_mutation <- as.matrix(read.table(paste(folder_kernels,'mutation.txt',sep=''),header=TRUE,sep='\t'))
26 | 
27 | kernel_1d2d <- as.matrix(read.table(paste(folder_kernels,'1d2d_descriptors.txt',sep=''),header=TRUE,sep=','))
28 | kernel_fingerprints<- as.matrix(read.table(paste(folder_kernels,'PubChem_fingerprints.txt',sep=''),header=TRUE,sep=','))
29 | kernel_targets <- as.matrix(read.table(paste(folder_kernels,'targets.txt',sep=''),header=TRUE,sep=','))
30 | 
31 | Kx <- array(0, c(Nx, Nx, Px))
32 | Kx[,, 1] <- kernel_copy_variation
33 | Kx[,, 2] <- kernel_gene_expression
34 | Kx[,, 3] <- kernel_mutation
35 | 
36 | Kz <- array(0, c(Nz, Nz, Pz))
37 | Kz[,, 1] <- kernel_1d2d
38 | Kz[,, 2] <- kernel_fingerprints
39 | Kz[,, 3] <- kernel_targets
40 | 
41 | # Run the cross-validation
42 | kbmf_nested_cross_validation(Kx, Kz, Y, R_values, K)
43 | 
44 | # R_values <- c(7,8,9)
45 | # MSE: 2.1906, 2.1993, 2.2380, 2.2522, 2.3098
46 | # R^2: 0.8108, 0.8109, 0.8073, 0.8120, 0.8021
47 | # Rp:  0.9005, 0.9005, 0.8986, 0.9011, 0.8958
48 | # Average performances: MSE=2.2380, R^2=0.8086, Rp=0.8993
49 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/nested_cross_val_kbmf.R~:
--------------------------------------------------------------------------------
 1 | # Nested cross-validation for the KBMF method.
 2 | 
 3 | source("cross_val_kbmf.R")
 4 | 
 5 | kbmf_nested_cross_validation <- function(Kx, Kz, Y, R_values, K) {
 6 | 	# Split the dataset up into K folds
 7 | 	sets = create_train_test_sets(split_dataset(Y, K), Y)
 8 | 	training_sets = sets[[1]]
 9 | 	test_sets = sets[[2]]
10 | 
11 | 	MSEs = list()
12 | 	R2s = list()
13 | 	Rps = list()
14 | 	for (f in seq(training_sets)) {
15 | 		print(sprintf("FOLD %i. Now running cross-validation to find best R.", f))
16 | 		train = training_sets[[f]]
17 | 		test = test_sets[[f]]
18 | 
19 | 		# Run X-val on each training set
20 | 		results = kbmf_cross_validation(Kx, Kz, train, R_values, K)
21 | 
22 | 		# Use the best value for R to train and evaluate on the test set
23 | 		best_R = results[[1]]
24 | 		state <- kbmf_regression_train(Kx, Kz, train, best_R)
25 | 		prediction <- kbmf_regression_test(Kx, Kz, state)$Y$mu
26 | 		
27 | 		MSE = mean((prediction - test)^2, na.rm=TRUE )
28 | 		mean_test = mean( test, na.rm=TRUE )
29 | 		R2 = 1 - ( sum( (test - prediction)^2, na.rm=TRUE ) / sum( (test - mean_test)^2, na.rm=TRUE ) )
30 | 		mean_pred = mean( prediction, na.rm=TRUE )
31 | 		Rp = cor(c(test),c(prediction),use='pairwise.complete.obs',method='pearson')
32 | 		#Rp = sum( (test - mean_test) * (prediction - mean_pred) , na.rm=TRUE ) / ( sqrt( sum( (test - mean_test)^2 , na.rm=TRUE ) ) * sqrt( sum( (prediction - mean_pred)^2 , na.rm=TRUE ) ) )
33 | 		print(sprintf("Performance on fold %i: MSE=%.4f, R^2=%.4f, Rp=%.4f.", f,MSE,R2,Rp))
34 | 
35 | 		# Store the performance
36 | 		MSEs = c(MSEs,MSE)
37 | 		R2s = c(R2s,R2)
38 | 		Rps = c(Rps,Rp)
39 | 	}
40 | 
41 | 	# Print all performances
42 | 	print(sprintf("All performances nested cross-validation: MSE=%.4f, R^2=%.4f, Rp=%.4f.",MSEs,R2s,Rps))
43 | 	
44 | 	# Compute the average performances, and return that.
45 | 	average_MSE = mean(unlist(MSEs))
46 | 	average_R2 = mean(unlist(R2s))
47 | 	average_Rp = mean(unlist(Rps))
48 | 	print(sprintf("Performances nested cross-validation: MSE=%.4f, R^2=%.4f, Rp=%.4f.",average_MSE,average_R2,average_Rp))
49 | 	return(list(average_MSE, average_R2, average_Rp))
50 | }
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/gibbs_nmf/linesearch_xval_gibbs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with line search for model selection using G-NMF on
 3 | the Sanger dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | import numpy, random
11 | from BNMTF.code.models.bnmf_gibbs_optimised import bnmf_gibbs_optimised
12 | from BNMTF.code.cross_validation.line_search_cross_validation import LineSearchCrossValidation
13 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
14 | 
15 | 
16 | # Settings
17 | standardised = False
18 | iterations = 1000
19 | burn_in = 900
20 | thinning = 2
21 | init_UV = 'random'
22 | 
23 | K_range = [15,20,25,30]
24 | no_folds = 10
25 | restarts = 1
26 | 
27 | quality_metric = 'AIC'
28 | output_file = "./results.txt"
29 | 
30 | alpha, beta = 1., 1.
31 | lambdaU = 1./10.
32 | lambdaV = 1./10.
33 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV }
34 | 
35 | # Load in the Sanger dataset
36 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised)
37 | 
38 | # Run the cross-validation framework
39 | #random.seed(42)
40 | #numpy.random.seed(9000)
41 | nested_crossval = LineSearchCrossValidation(
42 |     classifier=bnmf_gibbs_optimised,
43 |     R=X_min,
44 |     M=M,
45 |     values_K=K_range,
46 |     folds=no_folds,
47 |     priors=priors,
48 |     init_UV=init_UV,
49 |     iterations=iterations,
50 |     restarts=restarts,
51 |     quality_metric=quality_metric,
52 |     file_performance=output_file
53 | )
54 | nested_crossval.run(burn_in=burn_in,thinning=thinning)
55 | 
56 | """
57 | all_MSE = [2.0115451703143985, 2.0532542729784833, 2.0454971069846226, 1.994656076757727, 2.0281421630490297, 2.0691704067461281, 2.0708801136454622, 2.1137440615703653, 2.1153688464049725, 2.0478097531374373]
58 | all_R2 = [0.8248485588294542, 0.8219514639515233, 0.8217549958515522, 0.8349672123366683, 0.830543344804296, 0.8229475100079148, 0.8234388009582426, 0.8228191950789238, 0.8195240616800068, 0.8266748390223762, ]
59 | 
60 | Average MSE: 2.0550067971588626 +- 0.0013944347250178673
61 | Average R^2: 0.82494699825209561 +- 1.9408941387580883e-05
62 | """


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/nested_cross_val_kbmf.R:
--------------------------------------------------------------------------------
 1 | # Nested cross-validation for the KBMF method.
 2 | 
 3 | source("cross_val_kbmf.R")
 4 | 
 5 | kbmf_nested_cross_validation <- function(Kx, Kz, Y, R_values, K) {
 6 | 	# Split the dataset up into K folds
 7 | 	sets = create_train_test_sets(split_dataset(Y, K), Y)
 8 | 	training_sets = sets[[1]]
 9 | 	test_sets = sets[[2]]
10 | 
11 | 	MSEs = list()
12 | 	R2s = list()
13 | 	Rps = list()
14 | 	for (f in seq(training_sets)) {
15 | 		print(sprintf("FOLD %i. Now running cross-validation to find best R.", f))
16 | 		train = training_sets[[f]]
17 | 		test = test_sets[[f]]
18 | 
19 | 		# Run X-val on each training set
20 | 		results = kbmf_cross_validation(Kx, Kz, train, R_values, K)
21 | 
22 | 		# Use the best value for R to train and evaluate on the test set
23 | 		best_R = results[[1]]
24 | 		state <- kbmf_regression_train(Kx, Kz, train, best_R)
25 | 		prediction <- kbmf_regression_test(Kx, Kz, state)$Y$mu
26 | 		
27 | 		MSE = mean((prediction - test)^2, na.rm=TRUE )
28 | 		mean_test = mean( test, na.rm=TRUE )
29 | 		R2 = 1 - ( sum( (test - prediction)^2, na.rm=TRUE ) / sum( (test - mean_test)^2, na.rm=TRUE ) )
30 | 		mean_pred = mean( prediction, na.rm=TRUE )
31 | 		Rp = cor(c(test),c(prediction),use='pairwise.complete.obs',method='pearson')
32 | 		#Rp = sum( (test - mean_test) * (prediction - mean_pred) , na.rm=TRUE ) / ( sqrt( sum( (test - mean_test)^2 , na.rm=TRUE ) ) * sqrt( sum( (prediction - mean_pred)^2 , na.rm=TRUE ) ) )
33 | 		print(sprintf("Performance on fold %i: MSE=%.4f, R^2=%.4f, Rp=%.4f.", f,MSE,R2,Rp))
34 | 
35 | 		# Store the performance
36 | 		MSEs = c(MSEs,MSE)
37 | 		R2s = c(R2s,R2)
38 | 		Rps = c(Rps,Rp)
39 | 	}
40 | 
41 | 	# Print all performances
42 | 	print(sprintf("All performances nested cross-validation: MSE=%.4f, R^2=%.4f, Rp=%.4f.",unlist(MSEs),unlist(R2s),unlist(Rps)))
43 | 	
44 | 	# Compute the average performances, and return that.
45 | 	average_MSE = mean(unlist(MSEs))
46 | 	average_R2 = mean(unlist(R2s))
47 | 	average_Rp = mean(unlist(Rps))
48 | 	print(sprintf("Performances nested cross-validation: MSE=%.4f, R^2=%.4f, Rp=%.4f.",average_MSE,average_R2,average_Rp))
49 | 	return(list(average_MSE, average_R2, average_Rp))
50 | }
51 | 
52 | 
53 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/run_cross_val_kbmf.R~:
--------------------------------------------------------------------------------
 1 | # Run the cross-validation for KBMF
 2 | 
 3 | source("cross_val_kbmf.R")
 4 | K <- 5
 5 | R_values <- c(2,4,6,8,10,12,14,16,18,20)
 6 | 
 7 | Px <- 3
 8 | Nx <- 622
 9 | Pz <- 3
10 | Nz <- 138
11 | 
12 | # Load in the drug sensitivity values
13 | folder_drug_sensitivity <- '/home/tab43/Dropbox/Biological databases/Sanger_drug_sensivitity/'
14 | name_drug_sensitivity <- 'ic50_excl_empty_filtered_cell_lines_drugs.txt'
15 | Y <- as.matrix(read.table(paste(folder_drug_sensitivity,name_drug_sensitivity,sep=''),
16 | 				header=TRUE,
17 | 				sep=',',
18 | 				colClasses=c(rep("NULL",3), rep("numeric",138))))
19 | 
20 | # Load in the kernels - X = cancer cell lines, Z = drugs
21 | folder_kernels <- './kernels/'
22 | 
23 | kernel_copy_variation <- as.matrix(read.table(paste(folder_kernels,'copy_variation.txt',sep=''),header=TRUE,sep='\t'))
24 | kernel_gene_expression <- as.matrix(read.table(paste(folder_kernels,'gene_expression.txt',sep=''),header=TRUE,sep='\t'))
25 | kernel_mutation <- as.matrix(read.table(paste(folder_kernels,'mutation.txt',sep=''),header=TRUE,sep='\t'))
26 | 
27 | kernel_1d2d <- as.matrix(read.table(paste(folder_kernels,'1d2d_descriptors.txt',sep=''),header=TRUE,sep=','))
28 | kernel_fingerprints<- as.matrix(read.table(paste(folder_kernels,'PubChem_fingerprints.txt',sep=''),header=TRUE,sep=','))
29 | kernel_targets <- as.matrix(read.table(paste(folder_kernels,'targets.txt',sep=''),header=TRUE,sep=','))
30 | 
31 | Kx <- array(0, c(Nx, Nx, Px))
32 | Kx[,, 1] <- kernel_copy_variation
33 | Kx[,, 2] <- kernel_gene_expression
34 | Kx[,, 3] <- kernel_mutation
35 | 
36 | Kz <- array(0, c(Nz, Nz, Pz))
37 | Kz[,, 1] <- kernel_1d2d
38 | Kz[,, 2] <- kernel_fingerprints
39 | Kz[,, 3] <- kernel_targets
40 | 
41 | # Run the cross-validation
42 | kbmf_cross_validation(Kx, Kz, Y, R_values, K)
43 | 
44 | # Results (5 folds, 200 iterations):
45 | # R:	2		4		6		8		10		12		14		16		18		20
46 | # MSE:  2.832466 	2.448098 	2.294287 	2.227165 	2.243336 	2.259782 	2.283704 	2.309363	2.335845 	2.358715
47 | # R^2:  0.7578040 	0.7906790 	0.8038126 	0.8095175 	0.8081712 	0.8067867 	0.8047146	0.8025545 	0.8002464 	0.7983178
48 | # Rp:   0.8705774 	0.8892419 	0.8965853 	0.8997967 	0.8991491 	0.8985184 	0.8975142	0.8964987 	0.8954419 	0.8944387
49 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/vb_nmtf/greedysearch_xval_vb.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with greedy search for model selection using Gibbs-NMTF 
 3 | on the Sanger dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | import numpy, random
11 | from BNMTF.code.models.bnmtf_vb_optimised import bnmtf_vb_optimised
12 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation
13 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
14 | 
15 | 
16 | # Settings
17 | standardised = False
18 | iterations = 1000
19 | 
20 | init_S = 'random' #'exp' #
21 | init_FG = 'kmeans' #'exp' #
22 | 
23 | K_range = [5,6,7,8,9,10]
24 | L_range = [5,6,7,8,9,10]
25 | no_folds = 10
26 | restarts = 1
27 | 
28 | quality_metric = 'AIC'
29 | output_file = "./results.txt"
30 | 
31 | alpha, beta = 1., 1.
32 | lambdaF = 1./10.
33 | lambdaS = 1./10.
34 | lambdaG = 1./10.
35 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
36 | 
37 | # Load in the Sanger dataset
38 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised)
39 | 
40 | # Run the cross-validation framework
41 | #random.seed(42)
42 | #numpy.random.seed(9000)
43 | nested_crossval = GreedySearchCrossValidation(
44 |     classifier=bnmtf_vb_optimised,
45 |     R=X_min,
46 |     M=M,
47 |     values_K=K_range,
48 |     values_L=L_range,
49 |     folds=no_folds,
50 |     priors=priors,
51 |     init_S=init_S,
52 |     init_FG=init_FG,
53 |     iterations=iterations,
54 |     restarts=restarts,
55 |     quality_metric=quality_metric,
56 |     file_performance=output_file
57 | )
58 | nested_crossval.run()
59 | 
60 | """
61 | all_MSE = [2.2811777476249415, 2.1782935772707153, 2.3760214934948851, 2.4070138866182651, 2.1679193763392863, 2.4351661211853344, 2.3531667160686407, 2.4375820084579578, 2.1737221434522502, 2.3957602752026799]
62 | all_R2 = [0.8004514561880776, 0.8095655871226215, 0.7982332012844026, 0.7939011733335062, 0.8135460410954071, 0.7914028391107459, 0.8050979272119902, 0.7964032435159856, 0.8102340265362746, 0.805071751458151]
63 | 
64 | Average MSE: 2.3205823345714953 +- 0.011074845252916733
65 | Average R^2: 0.80239072468571615 +- 5.0165577464731684e-05
66 | """


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/icm_nmtf/greedysearch_xval_icm.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with greedy search for model selection using ICM-NMTF 
 3 | on the Sanger dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | import numpy, random
11 | from BNMTF.code.models.nmtf_icm import nmtf_icm
12 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation
13 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
14 | 
15 | 
16 | # Settings
17 | standardised = False
18 | iterations = 1000
19 | 
20 | init_S = 'random' #'exp' #
21 | init_FG = 'kmeans' #'exp' #
22 | 
23 | K_range = [5,6,7,8,9,10]
24 | L_range = [5,6,7,8,9,10]
25 | no_folds = 10
26 | restarts = 1
27 | 
28 | quality_metric = 'AIC'
29 | output_file = "./results.txt"
30 | 
31 | alpha, beta = 1., 1.
32 | lambdaF = 1./10.
33 | lambdaS = 1./10.
34 | lambdaG = 1./10.
35 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
36 | 
37 | minimum_TN = 0.1
38 | 
39 | # Load in the Sanger dataset
40 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised,sep=',')
41 | 
42 | # Run the cross-validation framework
43 | #random.seed(1)
44 | #numpy.random.seed(1)
45 | nested_crossval = GreedySearchCrossValidation(
46 |     classifier=nmtf_icm,
47 |     R=X_min,
48 |     M=M,
49 |     values_K=K_range,
50 |     values_L=L_range,
51 |     folds=no_folds,
52 |     priors=priors,
53 |     init_S=init_S,
54 |     init_FG=init_FG,
55 |     iterations=iterations,
56 |     restarts=restarts,
57 |     quality_metric=quality_metric,
58 |     file_performance=output_file
59 | )
60 | nested_crossval.run(minimum_TN=minimum_TN)
61 | 
62 | """
63 | all_MSE = [2.2020002331612534, 2.2364503149918011, 2.1611831576199534, 2.1569381861635395, 2.1530470452271864, 2.272519698528658, 2.1910498022580613, 2.2302383199950797, 2.1027416628364484, 2.283196008129782]
64 | all_R2 = [0.8068027775294401, 0.8122652321538621, 0.8155286993833876, 0.8151068635575036, 0.8227521825461013, 0.8062086302462692, 0.8136429679161671, 0.8113058601446024, 0.8152542609952846, 0.8080593057170452]
65 | 
66 | Average MSE: 2.1989364428911764 +- 0.0029521290510586768
67 | Average R^2: 0.81269267801896627 +- 2.2283761452627026e-05
68 | """


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/run_cross_val_kbmf.R:
--------------------------------------------------------------------------------
 1 | # Run the cross-validation for KBMF
 2 | 
 3 | source("cross_val_kbmf.R")
 4 | K <- 5
 5 | R_values <- c(2,4,6,8,10,12,14,16,18,20)
 6 | 
 7 | Px <- 3
 8 | Nx <- 622
 9 | Pz <- 3
10 | Nz <- 138
11 | 
12 | # Load in the drug sensitivity values
13 | folder_drug_sensitivity <- '/Users/thomasbrouwer/Documents/Projects/libraries/BNMTF/data_drug_sensitivity/gdsc/'
14 | name_drug_sensitivity <- 'ic50_excl_empty_filtered_cell_lines_drugs.txt'
15 | Y <- as.matrix(read.table(paste(folder_drug_sensitivity,name_drug_sensitivity,sep=''),
16 | 				header=TRUE,
17 | 				sep=',',
18 | 				colClasses=c(rep("NULL",3), rep("numeric",138))))
19 | 
20 | # Load in the kernels - X = cancer cell lines, Z = drugs
21 | folder_kernels <- '/Users/thomasbrouwer/Documents/Projects/libraries/BNMTF/data_drug_sensitivity/gdsc/kernels/'
22 | 
23 | kernel_copy_variation <- as.matrix(read.table(paste(folder_kernels,'copy_variation.txt',sep=''),header=TRUE,sep='\t'))
24 | kernel_gene_expression <- as.matrix(read.table(paste(folder_kernels,'gene_expression.txt',sep=''),header=TRUE,sep='\t'))
25 | kernel_mutation <- as.matrix(read.table(paste(folder_kernels,'mutation.txt',sep=''),header=TRUE,sep='\t'))
26 | 
27 | kernel_1d2d <- as.matrix(read.table(paste(folder_kernels,'1d2d_descriptors.txt',sep=''),header=TRUE,sep=','))
28 | kernel_fingerprints<- as.matrix(read.table(paste(folder_kernels,'PubChem_fingerprints.txt',sep=''),header=TRUE,sep=','))
29 | kernel_targets <- as.matrix(read.table(paste(folder_kernels,'targets.txt',sep=''),header=TRUE,sep=','))
30 | 
31 | Kx <- array(0, c(Nx, Nx, Px))
32 | Kx[,, 1] <- kernel_copy_variation
33 | Kx[,, 2] <- kernel_gene_expression
34 | Kx[,, 3] <- kernel_mutation
35 | 
36 | Kz <- array(0, c(Nz, Nz, Pz))
37 | Kz[,, 1] <- kernel_1d2d
38 | Kz[,, 2] <- kernel_fingerprints
39 | Kz[,, 3] <- kernel_targets
40 | 
41 | # Run the cross-validation
42 | kbmf_cross_validation(Kx, Kz, Y, R_values, K)
43 | 
44 | # Results (5 folds, 200 iterations):
45 | # R:	2		4		6		8		10		12		14		16		18		20
46 | # MSE:  2.832466 	2.448098 	2.294287 	2.227165 	2.243336 	2.259782 	2.283704 	2.309363	2.335845 	2.358715
47 | # R^2:  0.7578040 	0.7906790 	0.8038126 	0.8095175 	0.8081712 	0.8067867 	0.8047146	0.8025545 	0.8002464 	0.7983178
48 | # Rp:   0.8705774 	0.8892419 	0.8965853 	0.8997967 	0.8991491 	0.8985184 	0.8975142	0.8964987 	0.8954419 	0.8944387
49 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/gibbs_nmtf/greedysearch_xval_gibbs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the cross validation with greedy search for model selection using VB-NMTF 
 3 | on the Sanger dataset.
 4 | """
 5 | 
 6 | import sys, os
 7 | project_location = os.path.dirname(__file__)+"/../../../../../"
 8 | sys.path.append(project_location)
 9 | 
10 | import numpy, random
11 | from BNMTF.code.models.bnmtf_gibbs_optimised import bnmtf_gibbs_optimised
12 | from BNMTF.code.cross_validation.greedy_search_cross_validation import GreedySearchCrossValidation
13 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
14 | 
15 | 
16 | # Settings
17 | standardised = False
18 | iterations = 1000
19 | burn_in = 900
20 | thinning = 2
21 | 
22 | init_S = 'random' #'exp' #
23 | init_FG = 'kmeans' #'exp' #
24 | 
25 | K_range = [5,6,7,8,9,10]
26 | L_range = [5,6,7,8,9,10]
27 | no_folds = 10
28 | restarts = 1
29 | 
30 | quality_metric = 'AIC'
31 | output_file = "./results.txt"
32 | 
33 | alpha, beta = 1., 1.
34 | lambdaF = 1./10.
35 | lambdaS = 1./10.
36 | lambdaG = 1./10.
37 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
38 | 
39 | # Load in the Sanger dataset
40 | (_,X_min,M,_,_,_,_) = load_gdsc(standardised=standardised)
41 | 
42 | # Run the cross-validation framework
43 | #random.seed(1)
44 | #numpy.random.seed(1)
45 | nested_crossval = GreedySearchCrossValidation(
46 |     classifier=bnmtf_gibbs_optimised,
47 |     R=X_min,
48 |     M=M,
49 |     values_K=K_range,
50 |     values_L=L_range,
51 |     folds=no_folds,
52 |     priors=priors,
53 |     init_S=init_S,
54 |     init_FG=init_FG,
55 |     iterations=iterations,
56 |     restarts=restarts,
57 |     quality_metric=quality_metric,
58 |     file_performance=output_file
59 | )
60 | nested_crossval.run(burn_in=burn_in,thinning=thinning)
61 | 
62 | """
63 | all_MSE = [2.2840197244732074, 2.4010413568146909, 2.3867096829182866, 2.5140729100375911, 2.4161603588039613, 2.5768426948112859, 2.4258351325273564, 2.416620106102529, 2.2286332627076089, 2.3745461326347104]
64 | all_R2 = [0.8033980427153291, 0.798845320492358, 0.8023608504542508, 0.7847220094659351, 0.7846794714863345, 0.7881485488273184, 0.7940181660135461, 0.7954596533423378, 0.8057721746024293, 0.7961801714226922]
65 | 
66 | Average MSE: 2.4024481361831223 +- 0.0089074472278596831
67 | Average R^2: 0.79535844088225327 +- 5.1591154270217092e-05
68 | """


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/run_kbmf.R~:
--------------------------------------------------------------------------------
 1 | source("kbmf_regression_train.R")
 2 | source("kbmf_regression_test.R")
 3 | 
 4 | set.seed(1606)
 5 | 
 6 | Px <- 3
 7 | Nx <- 622
 8 | Pz <- 3
 9 | Nz <- 138
10 | 
11 | # Load in the drug sensitivity values
12 | folder_drug_sensitivity <- '/home/tab43/Dropbox/Biological databases/Sanger_drug_sensivitity/'
13 | name_drug_sensitivity <- 'ic50_excl_empty_filtered_cell_lines_drugs.txt'
14 | Y <- as.matrix(read.table(paste(folder_drug_sensitivity,name_drug_sensitivity,sep=''),
15 | 				header=TRUE,
16 | 				sep=',',
17 | 				colClasses=c(rep("NULL",3), rep("numeric",138))))
18 | 
19 | print("Loaded data")
20 | 
21 | # Load in the kernels - X = cancer cell lines, Z = drugs
22 | folder_kernels <- './kernels/'
23 | 
24 | kernel_copy_variation <- as.matrix(read.table(paste(folder_kernels,'copy_variation.txt',sep=''),header=TRUE))
25 | kernel_gene_expression <- as.matrix(read.table(paste(folder_kernels,'gene_expression.txt',sep=''),header=TRUE))
26 | kernel_mutation <- as.matrix(read.table(paste(folder_kernels,'mutation.txt',sep=''),header=TRUE))
27 | 
28 | kernel_1d2d <- as.matrix(read.table(paste(folder_kernels,'1d2d_descriptors.txt',sep=''),header=TRUE))
29 | kernel_fingerprints<- as.matrix(read.table(paste(folder_kernels,'PubChem_fingerprints.txt',sep=''),header=TRUE))
30 | kernel_targets <- as.matrix(read.table(paste(folder_kernels,'targets.txt',sep=''),header=TRUE))
31 | 
32 | Kx <- array(0, c(Nx, Nx, Px))
33 | Kx[,, 1] <- kernel_copy_variation
34 | Kx[,, 2] <- kernel_gene_expression
35 | Kx[,, 3] <- kernel_mutation
36 | 
37 | Kz <- array(0, c(Nz, Nz, Pz))
38 | Kz[,, 1] <- kernel_1d2d
39 | Kz[,, 2] <- kernel_fingerprints
40 | Kz[,, 3] <- kernel_targets
41 | 
42 | print("Loaded kernels")
43 | 
44 | # Train the model, and test the performance on the training data
45 | state <- kbmf_regression_train(Kx, Kz, Y, 10)
46 | prediction <- kbmf_regression_test(Kx, Kz, state)
47 | 
48 | print("Trained model")
49 | #print(prediction$Y$mu)
50 | 
51 | print(sprintf("MSE = %.4f", mean((prediction$Y$mu - Y)^2, na.rm=TRUE )))
52 | # R=5, 200 iterations: "MSE = 2.0170"
53 | # R=5, 1000 iterations: "MSE = 2.0131"
54 | # R=10, 100 iterations: "MSE = 1.5869"
55 | # R=10, 200 iterations: "MSE = 1.5736"
56 | # R=10, 1000 iterations: "MSE = 1.5644"
57 | 
58 | print("kernel weights on X")
59 | print(state$ex$mu)
60 | 
61 | print("kernel weights on Z")
62 | print(state$ez$mu)
63 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/time/nmf_np_time.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run NMF NP on the Sanger dataset.
 3 | 
 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset.
 5 | 
 6 | We give flat priors (1/10).
 7 | """
 8 | 
 9 | import sys, os
10 | project_location = os.path.dirname(__file__)+"/../../../../"
11 | sys.path.append(project_location)
12 | 
13 | from BNMTF.code.models.nmf_np import NMF
14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
15 | 
16 | import numpy, random, scipy, matplotlib.pyplot as plt
17 | 
18 | ##########
19 | 
20 | standardised = False #standardised Sanger or unstandardised
21 | 
22 | repeats = 10
23 | 
24 | iterations = 1000
25 | I, J, K = 622,138,25
26 | 
27 | init_UV = 'exponential'
28 | expo_prior = 1/10.
29 | 
30 | # Load in data
31 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised)
32 | 
33 | 
34 | # Run the VB algorithm, <repeats> times
35 | times_repeats = []
36 | performances_repeats = []
37 | for i in range(0,repeats):
38 |     # Set all the seeds
39 |     numpy.random.seed(0)
40 |     
41 |     # Run the classifier
42 |     nmf = NMF(R,M,K) 
43 |     nmf.initialise(init_UV,expo_prior)
44 |     nmf.run(iterations)
45 | 
46 |     # Extract the performances and timestamps across all iterations
47 |     times_repeats.append(nmf.all_times)
48 |     performances_repeats.append(nmf.all_performances)
49 | 
50 | # Check whether seed worked: all performances should be the same
51 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \
52 |     "Seed went wrong - performances not the same across repeats!"
53 | 
54 | # Print out the performances, and the average times
55 | all_times_average = list(numpy.average(times_repeats, axis=0))
56 | all_performances = performances_repeats[0]
57 | print "np_all_times_average = %s" % all_times_average
58 | print "np_all_performances = %s" % all_performances
59 | 
60 | 
61 | # Print all time plots, the average, and performance vs iterations
62 | plt.figure()
63 | plt.title("Performance against time")
64 | plt.ylim(0,10)
65 | for times in times_repeats:
66 |     plt.plot(times, all_performances['MSE'])
67 | 
68 | plt.figure()
69 | plt.title("Performance against average time")
70 | plt.plot(all_times_average, all_performances['MSE'])
71 | plt.ylim(0,10)
72 | 
73 | plt.figure()
74 | plt.title("Performance against iteration")
75 | plt.plot(all_performances['MSE'])
76 | plt.ylim(0,10)


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/time/nmtf_np_time.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run NMTF VB on the Sanger dataset.
 3 | 
 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset.
 5 | 
 6 | We give flat priors (1/10).
 7 | """
 8 | 
 9 | import sys, os
10 | project_location = os.path.dirname(__file__)+"/../../../../"
11 | sys.path.append(project_location)
12 | 
13 | from BNMTF.code.models.nmtf_np import NMTF
14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
15 | 
16 | import numpy, random, scipy, matplotlib.pyplot as plt
17 | 
18 | ##########
19 | 
20 | standardised = False #standardised Sanger or unstandardised
21 | 
22 | repeats = 10
23 | 
24 | iterations = 3000
25 | I, J, K, L = 622,138,5,5
26 | 
27 | init_FG = 'kmeans'
28 | init_S = 'exponential'
29 | expo_prior = 1/10.
30 | 
31 | 
32 | # Load in data
33 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised)
34 | 
35 | 
36 | # Run the VB algorithm, <repeats> times
37 | times_repeats = []
38 | performances_repeats = []
39 | for i in range(0,repeats):
40 |     # Set all the seeds
41 |     numpy.random.seed(3)
42 |     
43 |     # Run the classifier
44 |     nmtf = NMTF(R,M,K,L) 
45 |     nmtf.initialise(init_S,init_FG,expo_prior)
46 |     nmtf.run(iterations)
47 | 
48 |     # Extract the performances and timestamps across all iterations
49 |     times_repeats.append(nmtf.all_times)
50 |     performances_repeats.append(nmtf.all_performances)
51 | 
52 | # Check whether seed worked: all performances should be the same
53 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \
54 |     "Seed went wrong - performances not the same across repeats!"
55 | 
56 | # Print out the performances, and the average times
57 | all_times_average = list(numpy.average(times_repeats, axis=0))
58 | all_performances = performances_repeats[0]
59 | print "np_all_times_average = %s" % all_times_average
60 | print "np_all_performances = %s" % all_performances
61 | 
62 | 
63 | # Print all time plots, the average, and performance vs iterations
64 | plt.figure()
65 | plt.title("Performance against time")
66 | plt.ylim(0,10)
67 | for times in times_repeats:
68 |     plt.plot(times, all_performances['MSE'])
69 | 
70 | plt.figure()
71 | plt.title("Performance against average time")
72 | plt.plot(all_times_average, all_performances['MSE'])
73 | plt.ylim(0,10)
74 | 
75 | plt.figure()
76 | plt.title("Performance against iteration")
77 | plt.plot(all_performances['MSE'])
78 | plt.ylim(0,10)


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_7.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5879277621825882, 'MSE': 4.6483056218383441, 'Rp': 0.76860740296692032}. 
2 | All performances: {'R^2': [0.577259234339829, 0.5833463091261437, 0.6149041456125593, 0.5898025177932728, 0.55860672372988, 0.5745341683829451, 0.5957712042436116, 0.5762771829791546, 0.6094814959381496, 0.599294639680336], 'MSE': [4.8035268087934542, 4.8000827101259524, 4.3313238479664218, 4.7140519136330727, 4.8268095706858523, 4.8200029259366355, 4.5121900719775088, 4.7059381118272876, 4.4222629949366468, 4.5468672625006104], 'Rp': [0.76276386620519177, 0.76523783674847645, 0.78485203898267386, 0.76985904705525532, 0.75139840714058859, 0.76119026200470352, 0.77282863943314861, 0.7608104637456734, 0.78188439586002323, 0.77524907249346875]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5440261808488415, 'MSE': 5.140477511723387, 'Rp': 0.75155822300718567}. 
4 | All performances: {'R^2': [0.5622621093705895, 0.604290897715341, 0.5198859737379906, 0.5006860518100077, 0.45748105011968343, 0.5709438055178231, 0.525954316579423, 0.5771046016744266, 0.5881949508556492, 0.5334580511074805], 'MSE': [5.0024933828897531, 4.4244510154082857, 5.392161823770266, 5.4514362432235872, 6.0782833039389415, 4.9329734406986656, 5.207932661419048, 4.9367216222215369, 4.7526104617516323, 5.2257111619121464], 'Rp': [0.75925304721062148, 0.78344135770327183, 0.7357122966846944, 0.72415370791335465, 0.71964145886508413, 0.7656183361953397, 0.7442864253064454, 0.76667822318668466, 0.77435382688019949, 0.74244355012616026]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5300656231834285, 'MSE': 5.2959240320842422, 'Rp': 0.74914101293736834}. 
6 | All performances: {'R^2': [0.48646007976574757, 0.485360531552703, 0.5112726098686872, 0.5642481181273951, 0.5359567699385488, 0.5656812545752545, 0.4980766743214996, 0.560265197330502, 0.5305116556880487, 0.5628233406658987], 'MSE': [5.5716738777243275, 5.7739150964057977, 5.5264827337018341, 4.9673575725461454, 5.372463476573575, 5.0291068114559092, 5.6452705558564036, 5.1674838582082074, 4.9759528076530621, 4.9295335307171557], 'Rp': [0.73420104567619704, 0.74164762788583549, 0.73838720892043141, 0.76510029209809638, 0.74581746371787849, 0.76626325120432948, 0.72903149070225692, 0.76141893642684, 0.74695376119956847, 0.76258905154224976]}. 
7 | Best performances: {'R^2': 0.5879277621825882, 'MSE': 4.6483056218383441, 'Rp': 0.76860740296692032}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_6.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.581024004507962, 'MSE': 4.6884246861798404, 'Rp': 0.76442617749065556}. 
2 | All performances: {'R^2': [0.582575546356901, 0.5876016712055097, 0.5887548576714253, 0.5566001094300044, 0.6069954425590554, 0.588999311487476, 0.5722034232893978, 0.5798703115188777, 0.5891488534628431, 0.557490518098129], 'MSE': [4.7566793634886153, 4.4781373469991905, 4.5936690701585414, 4.875819027975159, 4.5280645946080611, 4.7608122667043125, 4.6929273120578943, 4.86228000601806, 4.5944799012390396, 4.741377972549528], 'Rp': [0.76652501716451915, 0.76776449798920399, 0.76809514696410419, 0.74978311144862841, 0.78118050873778788, 0.76851936063816195, 0.75799844183635023, 0.76433110655244585, 0.76984517868854241, 0.75021940488681127]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5455547379702661, 'MSE': 5.0891400260923749, 'Rp': 0.75018973680480672}. 
4 | All performances: {'R^2': [0.5860501099020681, 0.5613122813367499, 0.5540251431607827, 0.600507677495075, 0.42884297599528753, 0.6018862241089442, 0.5490402640160589, 0.5142874194436797, 0.5940643026343987, 0.4655309816096165], 'MSE': [4.7246803767495109, 5.0783310812505329, 4.8366629006050266, 4.6150649889059183, 6.3228325500782585, 4.5056227939132008, 4.9966028428737275, 5.557253814649898, 4.3824873888425531, 5.8718615230551316], 'Rp': [0.76981234834133305, 0.76468027961905694, 0.75235665143954467, 0.78265528563711118, 0.68652034358133252, 0.77876264511458981, 0.75238070029485049, 0.73119475745325246, 0.77694856764329967, 0.7065857889236965]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.47908479493368966, 'MSE': 5.8336648578857604, 'Rp': 0.73002630406494329}. 
6 | All performances: {'R^2': [0.6096483534715216, 0.41090059190455375, 0.5194240040191034, 0.5987704587298991, 0.15525846498948015, 0.40607386515582156, 0.522760860088342, 0.553969411802216, 0.45747041225944274, 0.5565715269165163], 'MSE': [4.4228641636466417, 6.7364572840251213, 5.4165754858835475, 4.4862725389750899, 9.431576978577862, 6.6225308334688791, 5.2419607458298563, 4.873964639647304, 5.9456910506394749, 5.1587548581638289], 'Rp': [0.78810970616774012, 0.70016545354268056, 0.73482885264106956, 0.78524034029260303, 0.6217027199533306, 0.70333594787226461, 0.73956029670424606, 0.75817762696984381, 0.71000059561805995, 0.7591415008875948]}. 
7 | Best performances: {'R^2': 0.581024004507962, 'MSE': 4.6884246861798404, 'Rp': 0.76442617749065556}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_8.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5853602207302742, 'MSE': 4.6545162989240199, 'Rp': 0.76710564661346348}. 
2 | All performances: {'R^2': [0.600337918469428, 0.573331397872287, 0.6043869806051346, 0.6113688193879931, 0.5663643423872363, 0.6141076440975802, 0.5645178031465836, 0.5749526126276556, 0.584291256344347, 0.5599434323644956], 'MSE': [4.5300747716661816, 4.721944612372738, 4.4609305782621034, 4.4114397794491795, 4.7868751148584021, 4.4928473224843843, 4.7769813998723576, 4.7016550060279592, 4.7433288620757086, 4.9190855421711834], 'Rp': [0.77582073601777546, 0.76066465953121076, 0.77907555345923862, 0.78304384896513657, 0.75543980167621316, 0.78385359216690675, 0.75605340198454085, 0.76065196266995017, 0.76570854090011009, 0.75074436876355244]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5639583441151805, 'MSE': 4.8966166052775586, 'Rp': 0.75993488334722958}. 
4 | All performances: {'R^2': [0.5078927692074906, 0.5268354707499995, 0.5515753413059283, 0.536354771166293, 0.5604148644567482, 0.5708139906124785, 0.6017185660287829, 0.5848215824043537, 0.576649478901923, 0.6225066063178074], 'MSE': [5.6120999811630989, 5.2373774144973133, 5.0170985987675136, 5.1829283989883468, 4.844804767155761, 4.6912547894029952, 4.4773816901235763, 4.7189041605953497, 4.9198244864808096, 4.2644917656008134], 'Rp': [0.72844311497061687, 0.73848036508083192, 0.75937569520710646, 0.74686038769351348, 0.75760094750509055, 0.76354140405534954, 0.77931433526838589, 0.76973477516011646, 0.76280215632415538, 0.79319565220713006]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5346870063876862, 'MSE': 5.2288809878579112, 'Rp': 0.74978741374870927}. 
6 | All performances: {'R^2': [0.3923332662252925, 0.566563611154509, 0.5326006311525158, 0.53822728357004, 0.5305541957057447, 0.5920734268034531, 0.5680791559583926, 0.5574544305599982, 0.5302209959281297, 0.5387630668187867], 'MSE': [6.9526443798030426, 4.7986191239280096, 5.3200578974727391, 5.2482748276698903, 5.2941615982625905, 4.4519440150599667, 5.0607493338430558, 4.825760967355504, 5.2188974145988922, 5.1177003205854152], 'Rp': [0.67845433007918032, 0.76338920448432213, 0.74910387033866987, 0.75555971481198037, 0.75133255961401002, 0.77938248466473758, 0.76633986140239452, 0.75628540081140139, 0.74602327744378205, 0.75200343383661539]}. 
7 | Best performances: {'R^2': 0.5853602207302742, 'MSE': 4.6545162989240199, 'Rp': 0.76710564661346348}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_1.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5876766889244068, 'MSE': 4.6372934909068046, 'Rp': 0.76838326647708211}. 
2 | All performances: {'R^2': [0.599292247517454, 0.5553816486925056, 0.5895305279203282, 0.5469604232036354, 0.5829484206762812, 0.5898158173589084, 0.615031525104127, 0.5969288867638178, 0.6149639327940273, 0.5859134592129829], 'MSE': [4.5518552138165704, 5.0561588185142643, 4.6595320210221916, 5.0027369105400901, 4.730939590816182, 4.5862513941888494, 4.4342406737876088, 4.5501021058735835, 4.3620302951501797, 4.4390878853585312], 'Rp': [0.77513584855549533, 0.74906084525131111, 0.7702088509627053, 0.74195781351701318, 0.76522345132598213, 0.77054466205577921, 0.78509026099167589, 0.77384652250168839, 0.78465205493495371, 0.76811235467421624]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5684405477699104, 'MSE': 4.8508536204724146, 'Rp': 0.76211232573597165}. 
4 | All performances: {'R^2': [0.5354947515555483, 0.6129768996894718, 0.534273094986979, 0.6016838649411256, 0.6034836909700101, 0.5915501645928221, 0.5693970490467506, 0.5436076034843629, 0.5840170850393012, 0.5079212733927316], 'MSE': [4.9971693043173033, 4.3473073736557266, 5.3227836262553225, 4.5238812641532817, 4.4045600213924843, 4.5469276306306794, 4.9234982790841499, 5.1453205427494364, 4.9108052919138308, 5.3862828705719226], 'Rp': [0.73934668523319558, 0.78479342432850219, 0.73859884692321753, 0.77836225419999761, 0.7838541106278657, 0.77080907621239958, 0.76257986766853325, 0.7465182436344806, 0.77677861522742919, 0.73948213330409573]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5258591400909807, 'MSE': 5.3308248627323369, 'Rp': 0.74864257761688435}. 
6 | All performances: {'R^2': [0.5403557693156795, 0.5244276099913511, 0.5490958695708181, 0.617616179919101, 0.5489326001942245, 0.47207287092832495, 0.5870963557462132, 0.36612627767646344, 0.5768125479761206, 0.4760553195915104], 'MSE': [4.9853699811180672, 5.1300878508389349, 5.2585904784054343, 4.331580494273588, 5.1645677146319287, 5.8848412211935273, 4.5745775597817717, 7.0901497936401077, 4.7942399464072629, 6.0942435870327394], 'Rp': [0.75058268114278237, 0.74878245554076073, 0.75658718936752911, 0.79743869882474439, 0.758096430987103, 0.72690496636868307, 0.77647866956279687, 0.67909142865690453, 0.76846193283200115, 0.72400132288553809]}. 
7 | Best performances: {'R^2': 0.5876766889244068, 'MSE': 4.6372934909068046, 'Rp': 0.76838326647708211}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_2.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5809689379908838, 'MSE': 4.6975160815548165, 'Rp': 0.76441365810623052}. 
2 | All performances: {'R^2': [0.596742611584289, 0.5790176887687597, 0.5619720272875866, 0.6254530245678394, 0.5700294095857155, 0.5792499513034616, 0.5722305509085497, 0.5611597775730899, 0.5691534210788772, 0.5946809172506685], 'MSE': [4.6727931877252393, 4.8688764727858471, 4.9064877927464581, 4.2435826902101015, 4.8933021125324787, 4.9789504195628176, 4.589877948538442, 4.6067522837161992, 4.7521996309464081, 4.4623382767841813], 'Rp': [0.77355144277342069, 0.76254918658423654, 0.75282254867779197, 0.79115655427897091, 0.75679242317508444, 0.76416740752416334, 0.76037857730914982, 0.75290079033097201, 0.75660727874410127, 0.77321037166441431]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5532422127174393, 'MSE': 5.0039542470660185, 'Rp': 0.7550932461048897}. 
4 | All performances: {'R^2': [0.6240256930361567, 0.614184862372549, 0.5325851476066428, 0.6100724412139076, 0.5717304140258538, 0.48320296375804184, 0.5379400102999448, 0.4663539768949878, 0.5534539043679487, 0.5388727135983606], 'MSE': [4.3779771531769276, 4.5080722286515291, 5.181990565016199, 4.3613597366085104, 4.8644199624620406, 5.7086812364809738, 5.1043290318164889, 5.7950500455737952, 5.0243177232428753, 5.1133447876308491], 'Rp': [0.79443654297444422, 0.78664906869251294, 0.74039612156479562, 0.7872922374261726, 0.76526175129689333, 0.71905976937252869, 0.74224203883745876, 0.71186164890903081, 0.75689841193287422, 0.74683487004218574]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5365183433511134, 'MSE': 5.194918072679946, 'Rp': 0.75031743625327896}. 
6 | All performances: {'R^2': [0.6002946366225028, 0.5174440662737703, 0.5534646596815548, 0.5414027067354412, 0.5168045001565171, 0.4950109432188847, 0.4306832339825627, 0.5845608868913246, 0.5518575261714523, 0.5736602737771241], 'MSE': [4.603452367493218, 5.6672272796427912, 4.8056029025431366, 4.9898977417854118, 5.3988828530615418, 5.5696699338592071, 6.178795910405162, 4.8738915328936701, 5.0768532850573971, 4.7849069200579271], 'Rp': [0.78501080202362328, 0.73750623404435067, 0.75874175928522125, 0.74892170869320285, 0.74454414614944597, 0.7319907737039334, 0.69744540590647819, 0.77363198289226642, 0.75684196278601612, 0.76853958704825154]}. 
7 | Best performances: {'R^2': 0.5809689379908838, 'MSE': 4.6975160815548165, 'Rp': 0.76441365810623052}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_3.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5770301203197727, 'MSE': 4.7894443774157569, 'Rp': 0.76146127673478115}. 
2 | All performances: {'R^2': [0.6010545415747162, 0.5450404270636202, 0.5839096007429101, 0.5443570617047796, 0.5596978837901593, 0.6116829470550653, 0.6053307462122484, 0.5732459696735912, 0.5880728061638161, 0.55790921921682], 'MSE': [4.5359102473849031, 5.247801675530031, 4.7689725138311481, 5.1896942576069431, 4.9584102613525696, 4.3670881309616787, 4.3882274847437905, 4.8174369431861832, 4.6539276158916492, 4.9669746436686779], 'Rp': [0.77664433005756872, 0.74152078783154673, 0.76648774123750774, 0.74124638150159183, 0.75093212441962354, 0.78323160090600641, 0.77904124954224607, 0.75953645770092948, 0.767782616775049, 0.74818947737574149]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5474678950032861, 'MSE': 5.1165503955437641, 'Rp': 0.75106904387558104}. 
4 | All performances: {'R^2': [0.5755624560028856, 0.5553739902883607, 0.5272667015874346, 0.560986817670751, 0.5556885404493047, 0.4737348485704289, 0.581356132422387, 0.5594058698934261, 0.5159240618466885, 0.5693795313011931], 'MSE': [4.7727690821124282, 5.1724155625670614, 5.284178043489498, 4.7691745977918059, 5.152245207026807, 5.8011313362430812, 4.9307184795163952, 4.9396089526874825, 5.4040770869420784, 4.9391856070610061], 'Rp': [0.76105984705123142, 0.75426665867871789, 0.73184462952653273, 0.7530663273769086, 0.75319978337988258, 0.72336520829875639, 0.7691918481185186, 0.75835901603272948, 0.74055491120849659, 0.76578220908403594]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5090391366469726, 'MSE': 5.5594872181210135, 'Rp': 0.73651902139470926}. 
6 | All performances: {'R^2': [0.5601536602887773, 0.537458568726672, 0.5303385278775352, 0.4248889255895516, 0.5942131169923975, 0.5083841689886551, 0.5593328174271724, 0.41500477941886815, 0.45364145480544704, 0.5069753463546505], 'MSE': [5.0360489781364546, 5.0673356651211501, 5.2873279715122976, 6.4924115601706927, 4.6071093886787926, 5.4791890558354828, 5.0574891961730417, 6.7263527424475429, 6.2487021173508595, 5.5929055057838077], 'Rp': [0.76152947121309589, 0.75156989000280661, 0.74789111911353068, 0.69715009430773589, 0.77993790694290355, 0.73208195041531576, 0.76209632124283844, 0.69076712897229486, 0.70859140512306396, 0.73357492661350565]}. 
7 | Best performances: {'R^2': 0.5770301203197727, 'MSE': 4.7894443774157569, 'Rp': 0.76146127673478115}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_5.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5846610365587009, 'MSE': 4.6898896282770277, 'Rp': 0.76675663347109335}. 
2 | All performances: {'R^2': [0.6073257724660781, 0.5714045658085589, 0.5525842383910535, 0.57797530044815, 0.6104915351562262, 0.590901065637566, 0.5629806650202323, 0.5783922341960113, 0.5972771880159924, 0.5972778004471413], 'MSE': [4.4000308059103057, 4.9032454934484138, 4.9655529357940029, 4.8023500347149914, 4.2600462971004616, 4.8025451126329477, 4.8155044264474371, 4.6918799649394014, 4.6646797949280829, 4.5930614168542263], 'Rp': [0.78037686106595316, 0.75795073268299051, 0.74735552101447378, 0.76318838561230851, 0.78279312127176393, 0.77073273335679782, 0.75364003118147782, 0.7623823415005333, 0.77387815064435628, 0.7752684563802783]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5555205958339418, 'MSE': 5.0196218407795223, 'Rp': 0.75794630568809285}. 
4 | All performances: {'R^2': [0.5986705797435277, 0.5725447152940277, 0.5735972486972993, 0.5922980566146359, 0.5492218059312644, 0.5452296787493598, 0.5384455734096758, 0.5574686880364181, 0.48859787592792836, 0.5391317359352801], 'MSE': [4.5458042065927726, 4.9138477241420579, 4.8550373472274941, 4.6883875145322715, 5.1319531335960509, 5.0850469248329579, 5.0858809000621346, 4.8509693243010288, 5.7704378438728758, 5.2688534886355765], 'Rp': [0.77804987318428909, 0.76482059713903239, 0.7641110809787357, 0.77685624910075035, 0.75402910443916393, 0.75434079238845686, 0.74867023318105475, 0.75868640114193797, 0.72885423918418757, 0.75104448614332053]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5381335127604856, 'MSE': 5.2175838077036962, 'Rp': 0.7531017493453116}. 
6 | All performances: {'R^2': [0.27724838010817354, 0.47575253592450895, 0.6042550071267403, 0.5498834333186184, 0.5951677494521638, 0.595980744006377, 0.5454259477630615, 0.5680859626342947, 0.6030756066294463, 0.5664597606414714], 'MSE': [8.2677545365553726, 5.9593342175321968, 4.6134796358397976, 4.7789048746360967, 4.4783051788051225, 4.597679976967501, 5.0892585962016552, 5.0181274756472956, 4.4332132319508197, 4.9397803529011055], 'Rp': [0.64290965975240566, 0.72129000150440115, 0.78650462178537939, 0.75337712545559254, 0.78027392647473393, 0.77799022917147709, 0.75263405199985356, 0.76635875559513777, 0.78498153672608073, 0.76469758498805385]}. 
7 | Best performances: {'R^2': 0.5846610365587009, 'MSE': 4.6898896282770277, 'Rp': 0.76675663347109335}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_10.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5803406447185868, 'MSE': 4.7078885097005738, 'Rp': 0.76379605584591315}. 
2 | All performances: {'R^2': [0.579603443083555, 0.6005804373848687, 0.5519952686049137, 0.5845977486157223, 0.5646501197777706, 0.5805258509974865, 0.5958982988257917, 0.5764829973178376, 0.5874216712186578, 0.5816506113592641], 'MSE': [4.6965861333953818, 4.4835531529403028, 4.9632725890055243, 4.7166917403876587, 4.7387749094708784, 4.7702260185461114, 4.5905286326373718, 4.8892285987723154, 4.4779729799451502, 4.7520503419050479], 'Rp': [0.76451355516282282, 0.77654803769709513, 0.74530340229919556, 0.76608474578272823, 0.753099947551084, 0.76403263562658774, 0.77334660412123057, 0.76156912628924955, 0.76906149678356639, 0.76440100714557113]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5582460522262854, 'MSE': 4.9521749955708767, 'Rp': 0.75878176743809145}. 
4 | All performances: {'R^2': [0.6002283975157652, 0.5648988489376408, 0.5288317731450323, 0.5876991583021669, 0.5695053914342509, 0.5158685054621471, 0.6223486275909867, 0.5644654503622191, 0.6037742105908777, 0.4248401589217675], 'MSE': [4.510455524748628, 5.0160855326117773, 5.1889191682661799, 4.6409396205438194, 4.8293261417365647, 5.5031787829183338, 4.2658980519566816, 4.6827566024830869, 4.5589329862067105, 6.3252575442369796], 'Rp': [0.7812018033422401, 0.75861852300274013, 0.74199636471206309, 0.77424424823902938, 0.76264848012088771, 0.74076394622232378, 0.79247705727205875, 0.76083237673246684, 0.78328471533329358, 0.6917501594038109]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.4329433334443459, 'MSE': 6.3247875787367374, 'Rp': 0.72416487364093807}. 
6 | All performances: {'R^2': [0.5460291329220222, 0.413501228874781, 0.5298650158542253, 0.5964544715775122, 0.3495967165337064, -0.2740151211630466, 0.6028597909457314, 0.49374686648078236, 0.5771150052374752, 0.49428022718026987], 'MSE': [5.3273495174309975, 6.7026409143961478, 5.3409689547010917, 4.5595672390546529, 7.1026232277483592, 13.796015005222822, 4.4527421791849182, 5.5209707273106234, 4.7606615154748217, 5.6843365068429454], 'Rp': [0.75442685031381562, 0.69297090085950808, 0.75254289792828277, 0.78287598675168502, 0.67054064480825293, 0.56363362041910103, 0.78946316479876466, 0.73511977023285835, 0.76958579311771291, 0.73048910717939897]}. 
7 | Best performances: {'R^2': 0.5803406447185868, 'MSE': 4.7078885097005738, 'Rp': 0.76379605584591315}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_4.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5788156817785745, 'MSE': 4.7440811246210313, 'Rp': 0.7629034411674479}. 
2 | All performances: {'R^2': [0.5865946267477808, 0.5682340542093237, 0.6405657551849558, 0.5558914710042061, 0.5670043375843827, 0.5905722975419019, 0.5336624908738525, 0.5881489780756943, 0.5561936314003003, 0.6012891751633459], 'MSE': [4.7310189056595675, 4.9262652924034809, 4.078611428468701, 4.9480842738700366, 4.9632690254352045, 4.5680891178987109, 5.0029879792509586, 4.5395749807895376, 5.1021455886878373, 4.5807646537462716], 'Rp': [0.76853549032506108, 0.75554023786979407, 0.80046278150270622, 0.75016397877247987, 0.75627106093525187, 0.77207529613176351, 0.73395092603827505, 0.76848577730261181, 0.74809329101748456, 0.77545557177905045]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5640548013348997, 'MSE': 4.9090774431829489, 'Rp': 0.76033618931570346}. 
4 | All performances: {'R^2': [0.5591867738843999, 0.5769773720679048, 0.6036642901221293, 0.5896394078779017, 0.4993774707670251, 0.5582261071997894, 0.5536381672917026, 0.6050414229982104, 0.5277384826785458, 0.5670585184613889], 'MSE': [4.9763470513526915, 4.7484115882418561, 4.6406823855772474, 4.7211084070290097, 5.6537600926757499, 5.0924254439601633, 4.9908846724672999, 4.4895556266834689, 5.2253164675977288, 4.552282696244272], 'Rp': [0.75478419301680699, 0.76677682930730073, 0.78153261290956566, 0.77392360435843688, 0.72999565811081368, 0.75663837396883904, 0.75489077612930477, 0.78557572354598892, 0.73864767577483503, 0.76059644603514232]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.5412661147927961, 'MSE': 5.1657000396164552, 'Rp': 0.75358807477043688}. 
6 | All performances: {'R^2': [0.5171065887028614, 0.5319299208824039, 0.5657588725632409, 0.4522967347326965, 0.5339577715492605, 0.630899957475058, 0.5351876557931077, 0.6166468799327876, 0.5075291085998318, 0.5213476576967125], 'MSE': [5.5659857605392782, 5.3358875416476401, 4.8157225417783822, 6.2529764682260636, 5.2360154063686499, 4.1838807752400813, 5.2189353001875771, 4.4247158868862106, 5.3640065405248212, 5.2588741747658529], 'Rp': [0.75071349385273789, 0.74560685633134394, 0.76222151424792739, 0.71282192236323594, 0.75040099003864391, 0.79899703256848276, 0.74768017539212206, 0.79090949019367351, 0.73853311823996315, 0.73799615447623879]}. 
7 | Best performances: {'R^2': 0.5788156817785745, 'MSE': 4.7440811246210313, 'Rp': 0.7629034411674479}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_ccle/cross_validation/ccle_ic_np_nmf/fold_9.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 1}. Average performances: {'R^2': 0.5788055478458782, 'MSE': 4.7121312940649487, 'Rp': 0.76286128789951202}. 
2 | All performances: {'R^2': [0.5498192902587136, 0.5762166605027792, 0.6110324022974549, 0.5444997420431797, 0.6231097469134808, 0.6074870756756103, 0.5149837093842093, 0.5639895803067945, 0.6038072954955447, 0.5931099755810159], 'MSE': [4.9442923281895155, 4.6906436044942641, 4.5244795709048873, 5.0131159011803197, 4.2831299847977853, 4.5348334058809101, 5.3621537165526156, 4.7908162635282636, 4.4369543960380353, 4.5408937690828957], 'Rp': [0.74411099958734939, 0.76197886591018704, 0.78232300957836753, 0.74147014284582091, 0.79035990384714816, 0.77999034751416141, 0.72408814244237696, 0.75447138223927834, 0.77824262300777369, 0.77157746202265509]}. 
3 | Tried parameters {'K': 2}. Average performances: {'R^2': 0.5224721279471691, 'MSE': 5.3523066375974935, 'Rp': 0.74298171374638422}. 
4 | All performances: {'R^2': [0.5656803685025703, 0.5316627325217892, 0.591140304932898, 0.5001650197048881, 0.275513848823845, 0.49630338479452907, 0.5627207692702139, 0.5932122698886195, 0.5358150474013654, 0.5725075336309726], 'MSE': [5.0108657241746206, 5.2898725076299131, 4.6990875884916168, 5.8241596904099895, 8.1960466072870393, 5.3807051959214851, 4.8257087538782848, 4.5235132636359667, 5.1194970392217582, 4.6536100053242579], 'Rp': [0.76081204172907613, 0.74576315161815898, 0.77273246575896393, 0.72250243664135116, 0.64471802089392816, 0.7280547744511483, 0.76084994198593825, 0.77853119799315895, 0.75121281185653632, 0.76464029453558136]}. 
5 | Tried parameters {'K': 3}. Average performances: {'R^2': 0.49882383110553574, 'MSE': 5.6014365475089782, 'Rp': 0.7335897681845045}. 
6 | All performances: {'R^2': [0.4979285826647162, 0.5078318349514966, 0.5370842929765497, 0.5261297199227486, 0.49478715983864985, 0.5181273024224665, 0.3635429875022478, 0.4866876708250858, 0.4870546421896771, 0.5690641177617197], 'MSE': [5.6022114477648666, 5.6940073275222449, 5.1270029611472649, 5.4075444999672087, 5.5400881267055171, 5.5134804710439438, 6.6435095542392357, 5.6669675134381006, 5.9697375950129841, 4.8498159782484098], 'Rp': [0.72790390670407468, 0.73671029243674768, 0.751870662799628, 0.74791505556489202, 0.7273977453187348, 0.75316697298500024, 0.67908787903285139, 0.72385549058070464, 0.72285980515217696, 0.76512987127023335]}. 
7 | Best performances: {'R^2': 0.5788055478458782, 'MSE': 4.7121312940649487, 'Rp': 0.76286128789951202}. Best parameters: {'K': 1}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_toy/grid_search/run_line_search_bnmf_gibbs.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run the line search method for finding the best value for K for BNMF.
 3 | We use the parameters for the true priors.
 4 | 
 5 | The BIC tends to give overly simple models, preferring K=1 oftentimes.
 6 | The log likelihood and AIC tend to peak at the true K if the correct priors are
 7 | given (this has to do with converging to a good local minimum).
 8 | 
 9 | If we give the wrong prior (true/5) we still obtain good convergence (with 
10 | true*5 all values get pushed to 0, leading to terrible solutions), and we get
11 | a nice peak for the log likelihood and AIC around the true K.
12 | """
13 | 
14 | import sys, os
15 | project_location = os.path.dirname(__file__)+"/../../../../"
16 | sys.path.append(project_location)
17 | 
18 | from BNMTF.data_toy.bnmf.generate_bnmf import generate_dataset, try_generate_M
19 | from BNMTF.code.cross_validation.line_search_bnmf import LineSearch
20 | from BNMTF.code.models.bnmf_gibbs_optimised import bnmf_gibbs_optimised
21 | 
22 | import numpy, matplotlib.pyplot as plt
23 | 
24 | ##########
25 | 
26 | restarts = 10
27 | iterations = 1000
28 | burn_in = 800
29 | thinning = 5
30 | 
31 | I, J = 100, 80
32 | true_K = 10
33 | values_K = range(1,20+1)
34 | 
35 | fraction_unknown = 0.1
36 | attempts_M = 100
37 | 
38 | alpha, beta = 1., 1. #1., 1.
39 | tau = alpha / beta
40 | lambdaU = numpy.ones((I,true_K))
41 | lambdaV = numpy.ones((J,true_K))
42 | 
43 | classifier = bnmf_gibbs_optimised
44 | initUV = 'random'
45 | 
46 | # Generate data
47 | (_,_,_,_,R) = generate_dataset(I,J,true_K,lambdaU,lambdaV,tau)
48 | M = numpy.ones((I,J))
49 | #M = try_generate_M(I,J,fraction_unknown,attempts_M)
50 | 
51 | # Run the line search. The priors lambdaU and lambdaV need to be a single value (recall K is unknown)
52 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU[0,0]/10, 'lambdaV':lambdaV[0,0]/10 }
53 | line_search = LineSearch(classifier,values_K,R,M,priors,initUV,iterations,restarts)
54 | line_search.search(burn_in,thinning)
55 | 
56 | # Plot the performances of all three metrics - but MSE separately
57 | metrics = ['loglikelihood', 'BIC', 'AIC', 'MSE']
58 | for metric in metrics:
59 |     plt.figure()
60 |     plt.plot(values_K, line_search.all_values(metric), label=metric)
61 |     plt.legend(loc=3)
62 |     
63 | # Also print out all values in a dictionary
64 | all_values = {}
65 | for metric in metrics:
66 |     all_values[metric] = line_search.all_values(metric)
67 |     
68 | print "all_values = %s" % all_values
69 | 
70 | '''
71 | 
72 | '''


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/kbmf/run_kbmf.R:
--------------------------------------------------------------------------------
 1 | source("kbmf_regression_train.R")
 2 | source("kbmf_regression_test.R")
 3 | 
 4 | set.seed(1606)
 5 | 
 6 | Px <- 3
 7 | Nx <- 622
 8 | Pz <- 3
 9 | Nz <- 138
10 | 
11 | # Load in the drug sensitivity values
12 | folder_drug_sensitivity <- '/Users/thomasbrouwer/Documents/Projects/libraries/BNMTF/data_drug_sensitivity/gdsc/'
13 | name_drug_sensitivity <- 'ic50_excl_empty_filtered_cell_lines_drugs.txt'
14 | Y <- as.matrix(read.table(paste(folder_drug_sensitivity,name_drug_sensitivity,sep=''),
15 | 				header=TRUE,
16 | 				sep=',',
17 | 				colClasses=c(rep("NULL",3), rep("numeric",138))))
18 | 
19 | print("Loaded data")
20 | 
21 | # Load in the kernels - X = cancer cell lines, Z = drugs
22 | folder_kernels <- '/Users/thomasbrouwer/Documents/Projects/libraries/BNMTF/data_drug_sensitivity/gdsc/kernels/'
23 | 
24 | kernel_copy_variation <- as.matrix(read.table(paste(folder_kernels,'copy_variation.txt',sep=''),header=TRUE,sep='\t'))
25 | kernel_gene_expression <- as.matrix(read.table(paste(folder_kernels,'gene_expression.txt',sep=''),header=TRUE,sep='\t'))
26 | kernel_mutation <- as.matrix(read.table(paste(folder_kernels,'mutation.txt',sep=''),header=TRUE,sep='\t'))
27 | 
28 | kernel_1d2d <- as.matrix(read.table(paste(folder_kernels,'1d2d_descriptors.txt',sep=''),header=TRUE,sep=','))
29 | kernel_fingerprints<- as.matrix(read.table(paste(folder_kernels,'PubChem_fingerprints.txt',sep=''),header=TRUE,sep=','))
30 | kernel_targets <- as.matrix(read.table(paste(folder_kernels,'targets.txt',sep=''),header=TRUE,sep=','))
31 | 
32 | Kx <- array(0, c(Nx, Nx, Px))
33 | Kx[,, 1] <- kernel_copy_variation
34 | Kx[,, 2] <- kernel_gene_expression
35 | Kx[,, 3] <- kernel_mutation
36 | 
37 | Kz <- array(0, c(Nz, Nz, Pz))
38 | Kz[,, 1] <- kernel_1d2d
39 | Kz[,, 2] <- kernel_fingerprints
40 | Kz[,, 3] <- kernel_targets
41 | 
42 | print("Loaded kernels")
43 | 
44 | # Train the model, and test the performance on the training data
45 | state <- kbmf_regression_train(Kx, Kz, Y, 10)
46 | prediction <- kbmf_regression_test(Kx, Kz, state)
47 | 
48 | print("Trained model")
49 | #print(prediction$Y$mu)
50 | 
51 | print(sprintf("MSE = %.4f", mean((prediction$Y$mu - Y)^2, na.rm=TRUE )))
52 | # R=5, 200 iterations: "MSE = 2.0170"
53 | # R=5, 1000 iterations: "MSE = 2.0131"
54 | # R=10, 100 iterations: "MSE = 1.5869"
55 | # R=10, 200 iterations: "MSE = 1.5736"
56 | # R=10, 1000 iterations: "MSE = 1.5644"
57 | 
58 | print("kernel weights on X")
59 | print(state$ex$mu)
60 | 
61 | print("kernel weights on Z")
62 | print(state$ez$mu)
63 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/fold_1.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7962793111127799, 'MSE': 2.3942128786704253, 'Rp': 0.89255379031404714}. 
2 | All performances: {'R^2': [0.7904854226278109, 0.7977480320423695, 0.7967836001359289, 0.7896255269460981, 0.8003652266849697, 0.8048060186925018, 0.7989542277743389, 0.7903809106476973, 0.7980343043693358, 0.7956098412067476], 'MSE': [2.422273364658484, 2.3630302114580588, 2.3672172193024279, 2.4448223591251672, 2.3511364821316394, 2.3274346177947707, 2.4149613416010665, 2.433361714709386, 2.4232179007302661, 2.3946735751929906], 'Rp': [0.88938815315203712, 0.89344672984968354, 0.89279324860415155, 0.88895013720269911, 0.89472480723056569, 0.89722386636305007, 0.89394445210840856, 0.88927457802807974, 0.893449912747178, 0.8923420178546172]}. 
3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8040323686653522, 'MSE': 2.3026338827465, 'Rp': 0.89702020965730611}. 
4 | All performances: {'R^2': [0.8006527841042816, 0.8103274513761641, 0.8089860752213169, 0.7992510418334184, 0.8033775036229067, 0.808599092203559, 0.8041182552951762, 0.8011531209246552, 0.8039437041995383, 0.7999146578725052], 'MSE': [2.4076049889234357, 2.2510060279166928, 2.2888403594696261, 2.2639094244927089, 2.2626514184189954, 2.263015092042711, 2.2966149451504321, 2.2578180945236928, 2.3526107090099173, 2.3822677675167911], 'Rp': [0.89516742029212826, 0.90032136294324194, 0.8995513518121524, 0.89453020572987596, 0.89650478845426862, 0.89955753916293113, 0.89723956718663933, 0.89566903124067365, 0.89683611491299475, 0.89482471483815451]}. 
5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8044644076252941, 'MSE': 2.2972110493106284, 'Rp': 0.89737508381421827}. 
6 | All performances: {'R^2': [0.801553768283229, 0.8168281002065513, 0.8017482560228743, 0.8164645745982658, 0.7974793410175808, 0.8126752109488761, 0.7922738642286442, 0.8121128144351457, 0.7922437016984221, 0.8012644448133527], 'MSE': [2.3401138139820441, 2.2412933449738417, 2.3301598817292413, 2.1277149835149576, 2.4372391411811156, 2.256260196211139, 2.3860111097544703, 2.181558246411071, 2.3543859005330581, 2.3173738748153485], 'Rp': [0.89583190351821995, 0.90406019464110221, 0.89583155180606222, 0.90394382579429122, 0.89361333406802179, 0.90167499115860139, 0.89111621416927489, 0.90140803161731942, 0.89083874744861835, 0.89543204392067077]}. 
7 | Best performances: {'R^2': 0.8044644076252941, 'MSE': 2.2972110493106284, 'Rp': 0.89737508381421827}. Best parameters: {'K': 10, 'L': 10}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/fold_10.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7962027030089187, 'MSE': 2.3936326519001163, 'Rp': 0.89252895387061104}. 
2 | All performances: {'R^2': [0.7959869706381066, 0.7951509497634806, 0.7969493235960402, 0.7917983478265285, 0.7996969448736383, 0.7945761499005599, 0.7896867883958768, 0.8010506447138536, 0.7973366253719005, 0.7997942850092019], 'MSE': [2.4125360112093301, 2.4321089495536086, 2.3785545457598629, 2.4278312020304433, 2.3925625440518412, 2.3547068077976792, 2.3686869906807329, 2.3720104723133737, 2.4085107533355119, 2.3888182422687745], 'Rp': [0.89247689970218513, 0.8918651487165179, 0.89293690822373861, 0.89014332519541384, 0.89434738602375319, 0.89179573129795087, 0.889038490013907, 0.89510910936092192, 0.8931810196232669, 0.89439552054845484]}. 
3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8028743159206282, 'MSE': 2.3159767919964684, 'Rp': 0.89639111146093975}. 
4 | All performances: {'R^2': [0.8019130426479211, 0.8039798797754616, 0.8017096416266156, 0.8045136679705778, 0.805594218026878, 0.8055794698377987, 0.8046734984279295, 0.8019207232836603, 0.8022474012860819, 0.796611616323357], 'MSE': [2.2756878597227019, 2.3261329010708769, 2.3246602151872202, 2.3359126338758314, 2.3291356177319114, 2.2818278400775198, 2.3200395012492949, 2.3388578608981754, 2.2875386837406597, 2.3399748064104879], 'Rp': [0.8957245125924933, 0.89681567482548652, 0.89566170837658976, 0.89711747083963589, 0.89773778255997994, 0.89793974646105512, 0.89758487382370156, 0.89600275232751192, 0.89629057551020386, 0.89303601729273985]}. 
5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8056584869493152, 'MSE': 2.2828101914911216, 'Rp': 0.89804463141550228}. 
6 | All performances: {'R^2': [0.7952093373746402, 0.8019811480395942, 0.8096655946682076, 0.7972032570401394, 0.8108057961654705, 0.8077523366359415, 0.7928828981339053, 0.8182132920277003, 0.810007866782938, 0.8128633426246147], 'MSE': [2.4014181521428939, 2.2994028205470158, 2.2961444437199772, 2.3258121356602111, 2.175861105126502, 2.2374753813735562, 2.4287027811997315, 2.1554886929719257, 2.2572489420291766, 2.2505474601402278], 'Rp': [0.8924534481072659, 0.89617879160321468, 0.90016992738919865, 0.8933114452597315, 0.90100948537653491, 0.89956761398233076, 0.89081967738690337, 0.90502044855233454, 0.90018260254792426, 0.90173287394958423]}. 
7 | Best performances: {'R^2': 0.8056584869493152, 'MSE': 2.2828101914911216, 'Rp': 0.89804463141550228}. Best parameters: {'K': 10, 'L': 10}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/fold_2.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7946522802147737, 'MSE': 2.4091382983018734, 'Rp': 0.89169311760322434}. 
2 | All performances: {'R^2': [0.8000569040927532, 0.8091322941196964, 0.7957285744250913, 0.7901977707725168, 0.7915534329410909, 0.7945219387414444, 0.7954266072892766, 0.7877669115389623, 0.786394673269638, 0.7957436949572676], 'MSE': [2.3948155427310391, 2.2863440424424439, 2.3948497443038783, 2.4579662634655768, 2.454431927336242, 2.3738582922274305, 2.3919940020141639, 2.4595950303302856, 2.4739895279683979, 2.4035386101992748], 'Rp': [0.8946687142763815, 0.89968079972229509, 0.89230491886791075, 0.88914375470802232, 0.89010508005765621, 0.89175323078082958, 0.8919296638755716, 0.88797231037752344, 0.88720035247823026, 0.89217235088782243]}. 
3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8009583033431479, 'MSE': 2.3354299140272849, 'Rp': 0.89527503027427147}. 
4 | All performances: {'R^2': [0.8038189537325939, 0.8067094787430149, 0.799421147933612, 0.7940147353250736, 0.7954182713439355, 0.8112072748202167, 0.8033374435947094, 0.8008082361604596, 0.7962787608890061, 0.7985687308888577], 'MSE': [2.2775440014675103, 2.3073840327230726, 2.2251856978503253, 2.3944902356305415, 2.3830037099235617, 2.2085859675496602, 2.3910241568014086, 2.3743954219025736, 2.3728241080673667, 2.4198618083568322], 'Rp': [0.89689384936601291, 0.89861580020976839, 0.89486680983261369, 0.89134664287576593, 0.89208889220503118, 0.90084165616134937, 0.89653281875824209, 0.89498173965876215, 0.89252204709999583, 0.8940600465751728]}. 
5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8029663502145356, 'MSE': 2.3120449560692848, 'Rp': 0.89650829367241669}. 
6 | All performances: {'R^2': [0.8118443788980988, 0.7982761582334134, 0.8054113508909871, 0.8000401187474862, 0.7909248439189163, 0.8115191023401522, 0.802353454367045, 0.7974777338971236, 0.8046538800343781, 0.8071624808177548], 'MSE': [2.238817749613915, 2.3718311071675506, 2.321085138854412, 2.3069904316570242, 2.4384709383389773, 2.2660479738340973, 2.2465856242139819, 2.4028720226236793, 2.2737899950494542, 2.2539585793397547], 'Rp': [0.90126474846899507, 0.89383068465732296, 0.89782899235744995, 0.8951151917108997, 0.88987891254965101, 0.90116928601449775, 0.89640284958312444, 0.89340672124408593, 0.89718488657153039, 0.89900066356661046]}. 
7 | Best performances: {'R^2': 0.8029663502145356, 'MSE': 2.3120449560692848, 'Rp': 0.89650829367241669}. Best parameters: {'K': 10, 'L': 10}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/fold_4.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7950874521624522, 'MSE': 2.4099176058721867, 'Rp': 0.89188437812650834}. 
2 | All performances: {'R^2': [0.7934265600418325, 0.794397141121076, 0.7834791018577417, 0.7946554668516492, 0.7912707234891984, 0.7954116360248146, 0.8020378836631872, 0.8002171280111819, 0.797782279209116, 0.7981966013547238], 'MSE': [2.3972913804342868, 2.4462897570763844, 2.545852871423413, 2.4025244229289457, 2.3990153289118039, 2.3657112797051689, 2.3982445616113042, 2.2887828923566937, 2.4565279959010407, 2.3989355683728242], 'Rp': [0.8910131495949245, 0.8914621928743427, 0.88536410169325674, 0.89150482081937421, 0.88979551146025782, 0.89252186873863548, 0.8956392728458733, 0.89473361294657283, 0.89327820230196953, 0.8935310479898767]}. 
3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8018644803855853, 'MSE': 2.3306768091956491, 'Rp': 0.89585244288485377}. 
4 | All performances: {'R^2': [0.7998618381825771, 0.7984758980125775, 0.7975094166463333, 0.7993220682686236, 0.8067454463416195, 0.8126131749328396, 0.8012243311235306, 0.8004104174758595, 0.8039269011633705, 0.7985553117085221], 'MSE': [2.3616973653495599, 2.3922748088099079, 2.4469261165153782, 2.3685299013018382, 2.2846072647427667, 2.2208802602255195, 2.2528069487309055, 2.3108692766233925, 2.2387157946497718, 2.4294603550074498], 'Rp': [0.89468738306211437, 0.8939699469380411, 0.89333507283527014, 0.89438184405604759, 0.89863487780213458, 0.90158145610463758, 0.89542621542839362, 0.89547815412944198, 0.89724062710633579, 0.89378885138612052]}. 
5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8051988981636494, 'MSE': 2.2906800918261339, 'Rp': 0.89775970520821491}. 
6 | All performances: {'R^2': [0.8039768394265854, 0.7951560725701752, 0.7986521865506377, 0.8085563548487484, 0.8077833228146585, 0.8144529561676849, 0.7988272888585155, 0.8056176811720999, 0.8042356870845461, 0.8147305921428414], 'MSE': [2.2864434283187531, 2.4051797154234449, 2.2952099931550434, 2.28558345606428, 2.3286454151639648, 2.2131017894237646, 2.3558272688800757, 2.2807439502444762, 2.2758331563451724, 2.1802327452423627], 'Rp': [0.89714797830755455, 0.89204387677125041, 0.894204692685447, 0.89953630450523814, 0.89893001148813634, 0.9028470593014466, 0.89405400165104298, 0.89821175949161602, 0.89729245399739443, 0.90332891388302305]}. 
7 | Best performances: {'R^2': 0.8051988981636494, 'MSE': 2.2906800918261339, 'Rp': 0.89775970520821491}. Best parameters: {'K': 10, 'L': 10}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/fold_5.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7958402302389869, 'MSE': 2.3945373072588754, 'Rp': 0.89235187386994297}. 
2 | All performances: {'R^2': [0.7940207279673263, 0.795837407255736, 0.7993871010255709, 0.7902867310992964, 0.792361322799282, 0.8071115770611526, 0.7881777543312496, 0.7970320689931153, 0.8003778802941676, 0.7938097315629732], 'MSE': [2.4081763119130284, 2.3678890233351226, 2.3789678487694337, 2.4541092846724992, 2.4239173652173802, 2.2585787225459888, 2.4508681036866693, 2.3780773014054111, 2.3869254806122329, 2.4378636304309866], 'Rp': [0.89139752102312586, 0.89266019778280037, 0.89444792022457265, 0.88927831495569287, 0.89034727241906342, 0.89848210168041975, 0.88820812803332394, 0.89278446280918677, 0.89477556881278963, 0.8911372509584542]}. 
3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8019570441102051, 'MSE': 2.3232871438767111, 'Rp': 0.89584534414652794}. 
4 | All performances: {'R^2': [0.8048247888815004, 0.798500936204051, 0.8066825644642783, 0.8018207342898751, 0.8130024542366234, 0.7983025758139942, 0.7992729913073152, 0.8053085345934738, 0.7944551253934088, 0.7973997359175296], 'MSE': [2.3183898000977181, 2.2842494373009536, 2.2495209058448578, 2.2800699453996081, 2.1817190188188023, 2.4009495614278324, 2.3823247222872133, 2.2912030018639888, 2.4160814299723405, 2.4283636157537987], 'Rp': [0.89742028268222795, 0.89395156221256966, 0.89870803685506662, 0.89560520110452202, 0.90205487880530766, 0.89395862285382088, 0.8942347332010061, 0.89767678873256274, 0.89168008020363809, 0.89316325481455783]}. 
5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.804128574013742, 'MSE': 2.2970066124616162, 'Rp': 0.89724386792145572}. 
6 | All performances: {'R^2': [0.8018918940502309, 0.796376676917294, 0.8062295864621227, 0.8082060853516105, 0.8124761605307512, 0.796538157062373, 0.8134384566254184, 0.792980154343343, 0.8111628238974505, 0.8019857448968262], 'MSE': [2.3228671565079866, 2.4038097663148181, 2.3105097578576017, 2.2190218756207134, 2.1603316694782113, 2.3226214136563268, 2.2680059983564402, 2.3996616134702533, 2.230436034835257, 2.3328008385185544], 'Rp': [0.89596193285991099, 0.89294528687928565, 0.89823431276554377, 0.89925281985266425, 0.90203515241436072, 0.89295471952452565, 0.90251673601363913, 0.89130734779662391, 0.90140689845976385, 0.89582347264824025]}. 
7 | Best performances: {'R^2': 0.804128574013742, 'MSE': 2.2970066124616162, 'Rp': 0.89724386792145572}. Best parameters: {'K': 10, 'L': 10}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/fold_7.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7962373804844697, 'MSE': 2.3853968914479422, 'Rp': 0.89256292745410359}. 
2 | All performances: {'R^2': [0.8015608956764622, 0.7876138719134953, 0.7935886607772044, 0.7892693423573884, 0.7987991598231307, 0.7938395728539742, 0.796096706569903, 0.8047219572561222, 0.7983943468917059, 0.7984892907253112], 'MSE': [2.3495799604902268, 2.4571317674208109, 2.4023851550842719, 2.4682236903080623, 2.3387523111291411, 2.4153742583515334, 2.403010174729153, 2.303202124129101, 2.3685595362115213, 2.3477499366256001], 'Rp': [0.89563064955076099, 0.88795531351128754, 0.8910866625222128, 0.88864332874497964, 0.89405702524284802, 0.89136444225606593, 0.89240422521857332, 0.89714012724208325, 0.89363408197011895, 0.8937134182821056]}. 
3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.801060277630849, 'MSE': 2.3288473103475309, 'Rp': 0.89532909472238364}. 
4 | All performances: {'R^2': [0.8038257081318991, 0.7916609129375809, 0.7970632591201944, 0.8137899871206278, 0.803238527407148, 0.7991606776891241, 0.8001396576693462, 0.8014644315127357, 0.8017419185025294, 0.7985176962173043], 'MSE': [2.3377803883604593, 2.4865238573561879, 2.3508379593618489, 2.180610092879713, 2.3671274459237561, 2.3350184560180214, 2.3391118099968953, 2.3184257824042969, 2.3307482815339635, 2.2422890296401667], 'Rp': [0.89692183415336268, 0.89013716032794021, 0.89336956941142875, 0.90236924566559151, 0.89650782879980195, 0.89421733242733037, 0.89470885119314936, 0.89555373955483519, 0.8956627074752348, 0.89384267821515906]}. 
5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8049883317548986, 'MSE': 2.2830135555330942, 'Rp': 0.89766937945265668}. 
6 | All performances: {'R^2': [0.8015963912749063, 0.8083878374418325, 0.8044650965318931, 0.8056131931880682, 0.8041182151113851, 0.8076909047647529, 0.7997312196829396, 0.8076027952025622, 0.8100207287101648, 0.8006569356404791], 'MSE': [2.325063405979956, 2.2633171437171256, 2.2128991414886454, 2.3228738184807427, 2.2825584293815857, 2.3202674699303674, 2.298570078595001, 2.2318234984901948, 2.176913347645943, 2.3958492216213787], 'Rp': [0.89576918933267324, 0.89976002518785425, 0.8974568840439372, 0.89792639689406428, 0.89746256458333418, 0.89894720356090274, 0.89491354387162947, 0.89906124420735756, 0.90027894167880318, 0.89511780116601025]}. 
7 | Best performances: {'R^2': 0.8049883317548986, 'MSE': 2.2830135555330942, 'Rp': 0.89766937945265668}. Best parameters: {'K': 10, 'L': 10}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/fold_8.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7964900224414253, 'MSE': 2.3861335874411442, 'Rp': 0.89266053002129664}. 
2 | All performances: {'R^2': [0.794475052052582, 0.804578074407563, 0.7953904648899824, 0.7873804476088627, 0.8038215772603295, 0.7915910843091055, 0.8087018029665941, 0.7955745265118743, 0.7902222163759548, 0.7931649780314047], 'MSE': [2.4001096679360101, 2.2667472151286292, 2.4982148114733134, 2.423070768777603, 2.3450689330003085, 2.4274455551094198, 2.2297924090176267, 2.4038778476551328, 2.4566117654413899, 2.410396900872009], 'Rp': [0.89151606522610005, 0.8974139242351824, 0.89194020988267142, 0.88755902528355424, 0.89671448121692243, 0.88994643628806225, 0.89947889364857059, 0.89221660294436078, 0.88899551995073023, 0.89082414153681144]}. 
3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.7997221295979017, 'MSE': 2.3471753676047951, 'Rp': 0.89464267116036622}. 
4 | All performances: {'R^2': [0.7879088633138132, 0.8057443062004819, 0.7995848586996388, 0.7980329678791787, 0.8009123378226934, 0.8152522376049086, 0.805901867285057, 0.7854884604825368, 0.794763828338703, 0.8036315683520067], 'MSE': [2.4311718989280275, 2.2865164856718647, 2.3057491685551024, 2.3677476950803777, 2.3398133747572234, 2.2163812952808297, 2.2606850213238907, 2.4693772984161018, 2.403906443901008, 2.3904049941335233], 'Rp': [0.88820254833377621, 0.89799051336381031, 0.89471959579938143, 0.89346168254896052, 0.89530942571427796, 0.90305398395656822, 0.89817901705708114, 0.88716150328664389, 0.89172818861625236, 0.89662025292690894]}. 
5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8038687646008519, 'MSE': 2.2988967993000236, 'Rp': 0.89706053293886379}. 
6 | All performances: {'R^2': [0.7930382719289255, 0.79626905817854, 0.8099109552715251, 0.803207024194675, 0.8101198957306462, 0.7982475595755186, 0.8194113774034337, 0.8053253595948064, 0.8044065019807765, 0.7987516421496721], 'MSE': [2.3761997204140402, 2.3456636913819295, 2.3148546853832812, 2.3168488669180829, 2.2619395262550315, 2.3549028633139377, 2.1302607551241199, 2.25308533418259, 2.2934829666254726, 2.3417295834017482], 'Rp': [0.89149652076402786, 0.89280160729539848, 0.90038573819372447, 0.89667502415293232, 0.90016756417796462, 0.89385755993691185, 0.90550618152935503, 0.89800573707409492, 0.89738985216351108, 0.89431954410071768]}. 
7 | Best performances: {'R^2': 0.8038687646008519, 'MSE': 2.2988967993000236, 'Rp': 0.89706053293886379}. Best parameters: {'K': 10, 'L': 10}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/fold_9.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7963963800861193, 'MSE': 2.3982113726281038, 'Rp': 0.89261941324805338}. 
2 | All performances: {'R^2': [0.7949888695361219, 0.8010306367630496, 0.7991311619876645, 0.8021350200758728, 0.7941086418523625, 0.7926981173442411, 0.7970076456232205, 0.7908113770604897, 0.7973274070849623, 0.7947249235332094], 'MSE': [2.4102765911482722, 2.380863298237792, 2.326736466464066, 2.3234155345370442, 2.5192119752188731, 2.4419849869237598, 2.2760263917564942, 2.4327715604823101, 2.4539669976714111, 2.4168599238410171], 'Rp': [0.89197025592442114, 0.89513612660321062, 0.8941939241831639, 0.89581558437995579, 0.8913423061525243, 0.89058054692061173, 0.89311627390117698, 0.88944378883149877, 0.89308531157496085, 0.89151001400900864]}. 
3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8017290486819405, 'MSE': 2.3340448490642589, 'Rp': 0.8957243254129883}. 
4 | All performances: {'R^2': [0.8012439730174359, 0.791160247067829, 0.7910476177400406, 0.8052265053167637, 0.8064183573714383, 0.7975723030110036, 0.8055658779512659, 0.8135958616158998, 0.8126150843212142, 0.7928446594065144], 'MSE': [2.3613559444606, 2.4063457737220504, 2.4063009256228467, 2.3123768640865232, 2.2920760535064297, 2.3099786587112159, 2.3349040594265476, 2.2392407012312754, 2.2315689466126272, 2.4463005632624761], 'Rp': [0.89531774678600895, 0.88980947409916711, 0.89004735023737214, 0.89770032281523282, 0.89829353614686802, 0.89342798544826008, 0.89779645136681052, 0.90205039669937126, 0.9018589305817627, 0.89094105994903061]}. 
5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8032228166725975, 'MSE': 2.3174364968101586, 'Rp': 0.89675388823395452}. 
6 | All performances: {'R^2': [0.8085749453675722, 0.7975557461175751, 0.7942203071834579, 0.7954438427412809, 0.8147565597668069, 0.8049120478829231, 0.8071341008675909, 0.8093371221337018, 0.8086980499584601, 0.7915954447066054], 'MSE': [2.234975086516183, 2.311453596756829, 2.4544707874414051, 2.4695313604543663, 2.2043290406101459, 2.2277643068806481, 2.3016331792080971, 2.2234639591739742, 2.3490233796334761, 2.3977202714264609], 'Rp': [0.89976177735888874, 0.89369662094192492, 0.8917313674280164, 0.89215001757957668, 0.90284235443305061, 0.89832614544033462, 0.89882408273539538, 0.90018019859040921, 0.89953461531877854, 0.89049170251316945]}. 
7 | Best performances: {'R^2': 0.8032228166725975, 'MSE': 2.3174364968101586, 'Rp': 0.89675388823395452}. Best parameters: {'K': 10, 'L': 10}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/fold_3.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7965397901626805, 'MSE': 2.3881568828950366, 'Rp': 0.89272365518691998}. 
2 | All performances: {'R^2': [0.7937045534860062, 0.8123114155441549, 0.7918976004339093, 0.7855558434518486, 0.8004344378378311, 0.7985765932853583, 0.7985608771389182, 0.7972422786584668, 0.7939476696609845, 0.7931666321293271], 'MSE': [2.3641759944672165, 2.1898715021533013, 2.4983227204748935, 2.5346544382552052, 2.3421510098201006, 2.3334526276186573, 2.3682109295188614, 2.3898883103520006, 2.4134640308733806, 2.4473772654167489], 'Rp': [0.89121247262145231, 0.90132710125301474, 0.88997822486264555, 0.88653974153748771, 0.8950024865577576, 0.89385509123989759, 0.89399933466505688, 0.89307511773430603, 0.89139070308957835, 0.89085627830800274]}. 
3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8008675620781804, 'MSE': 2.3369687378949195, 'Rp': 0.89524324374406949}. 
4 | All performances: {'R^2': [0.8040702518483612, 0.7957541457139967, 0.8043670153189693, 0.7970107984254063, 0.7994914927552363, 0.7977877963977614, 0.7972950888811783, 0.8129681487450874, 0.8030142606189579, 0.7969166220768514], 'MSE': [2.3460168032678639, 2.3899380279689608, 2.2513577539230032, 2.4017201908785273, 2.2529659144084744, 2.3662195184231893, 2.3916965907331447, 2.1850075177708757, 2.358720827150032, 2.4260442344251287], 'Rp': [0.896951825128949, 0.89259869385738488, 0.89713641009809386, 0.89305138153040564, 0.89465924572324718, 0.89366571950265339, 0.89314072548487422, 0.90182016275119536, 0.89637745407477321, 0.89303081928911865]}. 
5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8020843324781719, 'MSE': 2.3220336321770811, 'Rp': 0.89604381132040045}. 
6 | All performances: {'R^2': [0.7966805567974335, 0.8034904045142354, 0.7990622945019292, 0.8055367169519014, 0.799049000125107, 0.7990725918556073, 0.8045158820824074, 0.8116612597747448, 0.8003302047569263, 0.8014444134214274], 'MSE': [2.366538762856337, 2.3029380258119412, 2.3007728352311276, 2.2849673222532236, 2.3788373894280221, 2.4082518971435269, 2.2629805132764318, 2.2674543428355669, 2.2582432908801553, 2.3893519420544784], 'Rp': [0.89305688274992512, 0.89663656581300721, 0.89441372763367488, 0.89773566622985834, 0.89439502769169721, 0.89457158570956796, 0.89757415641973803, 0.90131898987600501, 0.89495006836338398, 0.89578544271714688]}. 
7 | Best performances: {'R^2': 0.8020843324781719, 'MSE': 2.3220336321770811, 'Rp': 0.89604381132040045}. Best parameters: {'K': 10, 'L': 10}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/np_nmtf/fold_6.txt:
--------------------------------------------------------------------------------
1 | Tried parameters {'K': 6, 'L': 6}. Average performances: {'R^2': 0.7952803369442216, 'MSE': 2.4026048701453222, 'Rp': 0.89201558726821273}. 
2 | All performances: {'R^2': [0.7977771201750432, 0.8039212527492999, 0.8006453852695992, 0.7842785135291789, 0.8010595676347045, 0.7843197592376037, 0.7991280193326178, 0.7996025576942166, 0.7888416267302656, 0.7932295670896878], 'MSE': [2.3331045323342203, 2.3965694439049181, 2.302505575036764, 2.4618875102975961, 2.3881187786815237, 2.5260202018624969, 2.3557077163595479, 2.3607984762269321, 2.4581769728542722, 2.4431594938949477], 'Rp': [0.89354344382185591, 0.89665266067193905, 0.89502649015246205, 0.88590397751995431, 0.89503694668305078, 0.88617005619008937, 0.89414819552702207, 0.8945155617897379, 0.88842947592867938, 0.89072906439733779]}. 
3 | Tried parameters {'K': 8, 'L': 8}. Average performances: {'R^2': 0.8004171700687225, 'MSE': 2.3428801344649193, 'Rp': 0.89501776861235116}. 
4 | All performances: {'R^2': [0.7918697033098796, 0.8065369167726792, 0.7964846281051495, 0.7997335942370991, 0.8086342523731586, 0.7998339835651985, 0.8064152232602552, 0.7960017293527646, 0.7991749548866869, 0.7994867148243546], 'MSE': [2.4486314638975251, 2.2576154930906593, 2.364139770278765, 2.3872472895361008, 2.2719223876318009, 2.3920607808680754, 2.2501439624653741, 2.3821015138581001, 2.3558835323844569, 2.3190551506383339], 'Rp': [0.89011112800870651, 0.89850781254851975, 0.89277991691190295, 0.89485025549589203, 0.89949730019392693, 0.89488008514924722, 0.89830075046921265, 0.89267834978320748, 0.89414456291272149, 0.89442752465017505]}. 
5 | Tried parameters {'K': 10, 'L': 10}. Average performances: {'R^2': 0.8052883269730977, 'MSE': 2.2853406282207653, 'Rp': 0.89776655739064259}. 
6 | All performances: {'R^2': [0.8081524404338646, 0.7937885698989963, 0.8072212312359672, 0.8085978650015806, 0.8038642392447332, 0.8074333955813097, 0.8071426704318253, 0.8053098764967874, 0.8111183562449857, 0.8002546251609262], 'MSE': [2.3111691792446067, 2.3750378134243428, 2.2813537761593516, 2.2375732860074868, 2.3039484171398312, 2.2817379778741946, 2.2886098494117908, 2.2393009911979633, 2.2379920990999973, 2.2966828926480871], 'Rp': [0.89927455899867881, 0.89147066068622771, 0.89891497871229142, 0.89967049370813446, 0.89706734855607184, 0.89884031750182958, 0.89860907091636288, 0.89778327572400185, 0.90071415857169124, 0.8953207105311356]}. 
7 | Best performances: {'R^2': 0.8052883269730977, 'MSE': 2.2853406282207653, 'Rp': 0.89776655739064259}. Best parameters: {'K': 10, 'L': 10}. 
8 | 


--------------------------------------------------------------------------------
/experiments/experiments_toy/time/nmtf_np_time.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Recover the toy dataset generated by example/generate_toy/bnmf/generate_bnmtf.py
 3 | using the non-probabilistic NMTF, and plot the MSE against timestamps.
 4 | 
 5 | We can plot the MSE, R2 and Rp as it converges, on the entire dataset.
 6 | 
 7 | We have I=100, J=80, K=5, L=5, and no test data.
 8 | """
 9 | 
10 | import sys, os
11 | project_location = os.path.dirname(__file__)+"/../../../../"
12 | sys.path.append(project_location)
13 | 
14 | from BNMTF.code.models.nmtf_np import NMTF
15 | 
16 | import numpy, random, scipy, matplotlib.pyplot as plt
17 | 
18 | ##########
19 | 
20 | input_folder = project_location+"BNMTF/data_toy/bnmtf/"
21 | 
22 | repeats = 10
23 | 
24 | iterations = 10000
25 | I, J, K, L = 100,80,5,5
26 | 
27 | init_FG = 'kmeans'
28 | init_S = 'exponential'
29 | expo_prior = 1/10.
30 | 
31 | # Load in data
32 | R = numpy.loadtxt(input_folder+"R.txt")
33 | M = numpy.ones((I,J))
34 | 
35 | # Run the VB algorithm, <repeats> times
36 | times_repeats = []
37 | performances_repeats = []
38 | for i in range(0,repeats):
39 |     # Set all the seeds
40 |     numpy.random.seed(3)
41 |     random.seed(4)
42 |     scipy.random.seed(5)
43 |     
44 |     # Run the classifier
45 |     nmtf = NMTF(R,M,K,L) 
46 |     nmtf.initialise(init_S,init_FG,expo_prior)
47 |     nmtf.run(iterations)
48 | 
49 |     # Extract the performances and timestamps across all iterations
50 |     times_repeats.append(nmtf.all_times)
51 |     performances_repeats.append(nmtf.all_performances)
52 | 
53 | # Check whether seed worked: all performances should be the same
54 | assert all([numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats]), \
55 |     "Seed went wrong - performances not the same across repeats!"
56 | 
57 | # Print out the performances, and the average times
58 | all_times_average = list(numpy.average(times_repeats, axis=0))
59 | all_performances = performances_repeats[0]
60 | print "np_all_times_average = %s" % all_times_average
61 | print "np_all_performances = %s" % all_performances
62 | 
63 | 
64 | # Print all time plots, the average, and performance vs iterations
65 | plt.figure()
66 | plt.title("Performance against time")
67 | plt.ylim(0,10)
68 | for times in times_repeats:
69 |     plt.plot(times, all_performances['MSE'])
70 | 
71 | plt.figure()
72 | plt.title("Performance against average time")
73 | plt.plot(all_times_average, all_performances['MSE'])
74 | plt.ylim(0,10)
75 | 
76 | plt.figure()
77 | plt.title("Performance against iteration")
78 | plt.plot(all_performances['MSE'])
79 | plt.ylim(0,10)


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/vb_nmf/results.txt:
--------------------------------------------------------------------------------
 1 | All model fits for fold 1, metric AIC: [240290.16735229088, 238980.60715952617, 238049.02123880715, 239030.05666991285].
 2 | Best K for fold 1: 25.
 3 | Performance: {'R^2': 0.8123419361488506, 'MSE': 2.2242309355503416, 'Rp': 0.90259104726287731}.
 4 | 
 5 | All model fits for fold 2, metric AIC: [240229.80122119767, 239274.82620135765, 238160.91629605644, 237460.97778081335].
 6 | Best K for fold 2: 30.
 7 | Performance: {'R^2': 0.8011409466575017, 'MSE': 2.3108126630384804, 'Rp': 0.89686786281647779}.
 8 | 
 9 | All model fits for fold 3, metric AIC: [239983.14391001957, 238318.7796743041, 238267.80746191408, 237854.79107721607].
10 | Best K for fold 3: 30.
11 | Performance: {'R^2': 0.7943028271877304, 'MSE': 2.4095896447817631, 'Rp': 0.89414010092697216}.
12 | 
13 | All model fits for fold 4, metric AIC: [240174.32637766597, 239002.67747375579, 238210.09773381858, 238586.6912395501].
14 | Best K for fold 4: 25.
15 | Performance: {'R^2': 0.8125046212085996, 'MSE': 2.2188694213830114, 'Rp': 0.90282141562178775}.
16 | 
17 | All model fits for fold 5, metric AIC: [240212.19600782395, 238669.50000280215, 237874.41753096576, 237701.38034011592].
18 | Best K for fold 5: 30.
19 | Performance: {'R^2': 0.7934881370166628, 'MSE': 2.4185938516134278, 'Rp': 0.89292623713793573}.
20 | 
21 | All model fits for fold 6, metric AIC: [240173.43494869216, 238558.97209104756, 238422.99853905643, 237979.10762966136].
22 | Best K for fold 6: 30.
23 | Performance: {'R^2': 0.8111969927756486, 'MSE': 2.1808748510586002, 'Rp': 0.90319019710203263}.
24 | 
25 | All model fits for fold 7, metric AIC: [239869.44683602237, 238353.65447191551, 238152.05580152466, 238180.82009855763].
26 | Best K for fold 7: 25.
27 | Performance: {'R^2': 0.8058878338360765, 'MSE': 2.2503432196374651, 'Rp': 0.89931551399355991}.
28 | 
29 | All model fits for fold 8, metric AIC: [240577.89475339543, 238771.80770711903, 237667.67713085565, 238088.97970950397].
30 | Best K for fold 8: 25.
31 | Performance: {'R^2': 0.811089129626958, 'MSE': 2.2305023229025145, 'Rp': 0.90161087516091587}.
32 | 
33 | All model fits for fold 9, metric AIC: [240287.46868564631, 238744.09024294608, 238219.47537998416, 238134.03195392119].
34 | Best K for fold 9: 30.
35 | Performance: {'R^2': 0.798953276136085, 'MSE': 2.3595465204422488, 'Rp': 0.89633675449065164}.
36 | 
37 | All model fits for fold 10, metric AIC: [240451.49121444131, 238581.40721198689, 237968.91066330951, 238284.25023562534].
38 | Best K for fold 10: 25.
39 | Performance: {'R^2': 0.8151865445946502, 'MSE': 2.2186318302878667, 'Rp': 0.90445283498221596}.
40 | 
41 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/gibbs_nmf/results.txt:
--------------------------------------------------------------------------------
 1 | All model fits for fold 1, metric AIC: [243595.44979297026, 241968.19780653264, 241833.11276982535, 242525.46377454145].
 2 | Best K for fold 1: 25.
 3 | Performance: {'R^2': 0.8248485588294542, 'MSE': 2.0115451703143985, 'Rp': 0.90908045103379365}.
 4 | 
 5 | All model fits for fold 2, metric AIC: [243025.70626974344, 242168.69699825841, 242026.95270679001, 242134.08099919959].
 6 | Best K for fold 2: 25.
 7 | Performance: {'R^2': 0.8219514639515233, 'MSE': 2.0532542729784833, 'Rp': 0.90755752809182066}.
 8 | 
 9 | All model fits for fold 3, metric AIC: [243612.7997870651, 242259.96864944304, 242061.09393043793, 242475.63898623944].
10 | Best K for fold 3: 25.
11 | Performance: {'R^2': 0.8217549958515522, 'MSE': 2.0454971069846226, 'Rp': 0.90714536611822205}.
12 | 
13 | All model fits for fold 4, metric AIC: [243420.81499282017, 241914.29134586823, 241670.16193069995, 242280.64342551032].
14 | Best K for fold 4: 25.
15 | Performance: {'R^2': 0.8349672123366683, 'MSE': 1.994656076757727, 'Rp': 0.91396299549995652}.
16 | 
17 | All model fits for fold 5, metric AIC: [243255.89851395186, 242389.40115483353, 242100.97265390822, 242468.23189654254].
18 | Best K for fold 5: 25.
19 | Performance: {'R^2': 0.830543344804296, 'MSE': 2.0281421630490297, 'Rp': 0.91179392839675322}.
20 | 
21 | All model fits for fold 6, metric AIC: [243313.94572363066, 242350.42676329185, 242011.27639744786, 241870.62415823588].
22 | Best K for fold 6: 30.
23 | Performance: {'R^2': 0.8229475100079148, 'MSE': 2.0691704067461281, 'Rp': 0.90832416407316152}.
24 | 
25 | All model fits for fold 7, metric AIC: [243305.98134092687, 241888.00165122704, 241598.72213610105, 242204.50154172539].
26 | Best K for fold 7: 25.
27 | Performance: {'R^2': 0.8234388009582426, 'MSE': 2.0708801136454622, 'Rp': 0.90786269315758972}.
28 | 
29 | All model fits for fold 8, metric AIC: [243113.28511082294, 242329.20180314814, 241593.33037916449, 242287.90117164943].
30 | Best K for fold 8: 25.
31 | Performance: {'R^2': 0.8228191950789238, 'MSE': 2.1137440615703653, 'Rp': 0.90778790396895936}.
32 | 
33 | All model fits for fold 9, metric AIC: [243404.35296055069, 242122.98676400131, 241606.51841963449, 242404.48081945442].
34 | Best K for fold 9: 25.
35 | Performance: {'R^2': 0.8195240616800068, 'MSE': 2.1153688464049725, 'Rp': 0.90600658191475891}.
36 | 
37 | All model fits for fold 10, metric AIC: [243643.31508766502, 242065.17131888881, 241698.58228740888, 242122.23747144494].
38 | Best K for fold 10: 25.
39 | Performance: {'R^2': 0.8266748390223762, 'MSE': 2.0478097531374373, 'Rp': 0.90967154000028361}.
40 | 
41 | 


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/cross_validation/icm_nmf/results.txt:
--------------------------------------------------------------------------------
 1 | All model fits for fold 1, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475].
 2 | Best K for fold 1: 30.
 3 | Performance: {'R^2': 0.7072309782081623, 'MSE': 3.5039148405029135, 'Rp': 0.85755329346470166}.
 4 | 
 5 | All model fits for fold 2, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475].
 6 | Best K for fold 2: 30.
 7 | Performance: {'R^2': 0.2162669348625822, 'MSE': 9.0622730084824674, 'Rp': 0.6982606574842174}.
 8 | 
 9 | All model fits for fold 3, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475].
10 | Best K for fold 3: 30.
11 | Performance: {'R^2': 0.6853079551313846, 'MSE': 3.7009069757338917, 'Rp': 0.84719052665866601}.
12 | 
13 | All model fits for fold 4, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475].
14 | Best K for fold 4: 30.
15 | Performance: {'R^2': 0.7144108917311998, 'MSE': 3.3451246835265178, 'Rp': 0.86206595196591884}.
16 | 
17 | All model fits for fold 5, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475].
18 | Best K for fold 5: 30.
19 | Performance: {'R^2': 0.7341480430315861, 'MSE': 3.1147595748400358, 'Rp': 0.86644930808200238}.
20 | 
21 | All model fits for fold 6, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475].
22 | Best K for fold 6: 30.
23 | Performance: {'R^2': 0.6671037956836574, 'MSE': 3.9037354439533258, 'Rp': 0.83763274145781152}.
24 | 
25 | All model fits for fold 7, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475].
26 | Best K for fold 7: 30.
27 | Performance: {'R^2': -0.17013019643779437, 'MSE': 13.991970030783968, 'Rp': 0.6226739536838739}.
28 | 
29 | All model fits for fold 8, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475].
30 | Best K for fold 8: 30.
31 | Performance: {'R^2': 0.7288988508164431, 'MSE': 3.1814210224127897, 'Rp': 0.86428985480373288}.
32 | 
33 | All model fits for fold 9, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475].
34 | Best K for fold 9: 30.
35 | Performance: {'R^2': 0.7201731755424339, 'MSE': 3.2677197491020404, 'Rp': 0.86193739061972197}.
36 | 
37 | All model fits for fold 10, metric AIC: [237857.19116199756, 233820.26668651064, 230754.84486460069, 228078.59118779475].
38 | Best K for fold 10: 30.
39 | Performance: {'R^2': -0.07478035943340289, 'MSE': 12.460551868851933, 'Rp': 0.64163764940452106}.
40 | 
41 | 


--------------------------------------------------------------------------------
/experiments/experiments_toy/time/nmf_np_time.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Recover the toy dataset generated by example/generate_toy/bnmf/generate_bnmf.py
 3 | using the non-probabilistic NMF, and plot the MSE against timestamps.
 4 | 
 5 | We can plot the MSE, R2 and Rp as it converges, on the entire dataset.
 6 | 
 7 | We have I=100, J=80, K=10, and no test data.
 8 | We give flatter priors (1/10) than what was used to generate the data (1).
 9 | """
10 | 
11 | import sys, os
12 | project_location = os.path.dirname(__file__)+"/../../../../"
13 | sys.path.append(project_location)
14 | 
15 | from BNMTF.code.models.nmf_np import NMF
16 | 
17 | import numpy, random, scipy, matplotlib.pyplot as plt
18 | 
19 | ##########
20 | 
21 | input_folder = project_location+"BNMTF/data_toy/bnmf/"
22 | 
23 | repeats = 10
24 | 
25 | iterations = 2000
26 | I, J, K = 100,80,10
27 | 
28 | init_UV = 'exponential'
29 | expo_prior = 1/10.
30 | 
31 | # Load in data
32 | R = numpy.loadtxt(input_folder+"R.txt")
33 | M = numpy.ones((I,J))
34 | 
35 | 
36 | # Run the VB algorithm, <repeats> times
37 | times_repeats = []
38 | performances_repeats = []
39 | for i in range(0,repeats):
40 |     # Set all the seeds
41 |     numpy.random.seed(0)
42 |     random.seed(0)
43 |     scipy.random.seed(0)
44 |     
45 |     # Run the classifier
46 |     nmf = NMF(R,M,K) 
47 |     nmf.initialise(init_UV,expo_prior)
48 |     nmf.run(iterations)
49 | 
50 |     # Extract the performances and timestamps across all iterations
51 |     times_repeats.append(nmf.all_times)
52 |     performances_repeats.append(nmf.all_performances)
53 | 
54 | # Check whether seed worked: all performances should be the same
55 | assert all([numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats]), \
56 |     "Seed went wrong - performances not the same across repeats!"
57 | 
58 | # Print out the performances, and the average times
59 | all_times_average = list(numpy.average(times_repeats, axis=0))
60 | all_performances = performances_repeats[0]
61 | print "np_all_times_average = %s" % all_times_average
62 | print "np_all_performances = %s" % all_performances
63 | 
64 | 
65 | # Print all time plots, the average, and performance vs iterations
66 | plt.figure()
67 | plt.title("Performance against time")
68 | plt.ylim(0,10)
69 | for times in times_repeats:
70 |     plt.plot(times, all_performances['MSE'])
71 | 
72 | plt.figure()
73 | plt.title("Performance against average time")
74 | plt.plot(all_times_average, all_performances['MSE'])
75 | plt.ylim(0,10)
76 | 
77 | plt.figure()
78 | plt.title("Performance against iteration")
79 | plt.plot(all_performances['MSE'])
80 | plt.ylim(0,10)


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/time/nmf_vb_time.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run NMF VB on the Sanger dataset.
 3 | 
 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset.
 5 | 
 6 | We give flat priors (1/10).
 7 | """
 8 | 
 9 | import sys, os
10 | project_location = os.path.dirname(__file__)+"/../../../../"
11 | sys.path.append(project_location)
12 | 
13 | from BNMTF.code.models.bnmf_vb_optimised import bnmf_vb_optimised
14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
15 | 
16 | import numpy, random, scipy, matplotlib.pyplot as plt
17 | 
18 | ##########
19 | 
20 | standardised = False #standardised Sanger or unstandardised
21 | 
22 | repeats = 10
23 | 
24 | iterations = 500
25 | init_UV = 'random'
26 | I, J, K = 622,138,25
27 | 
28 | alpha, beta = 1., 1. #1., 1.
29 | lambdaU = numpy.ones((I,K))/10.
30 | lambdaV = numpy.ones((J,K))/10.
31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV }
32 | 
33 | # Load in data
34 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised)
35 | 
36 | 
37 | # Run the VB algorithm, <repeats> times
38 | times_repeats = []
39 | performances_repeats = []
40 | for i in range(0,repeats):
41 |     # Set all the seeds
42 |     numpy.random.seed(0)
43 |     
44 |     # Run the classifier
45 |     BNMF = bnmf_vb_optimised(R,M,K,priors) 
46 |     BNMF.initialise(init_UV)
47 |     BNMF.run(iterations)
48 | 
49 |     # Extract the performances and timestamps across all iterations
50 |     times_repeats.append(BNMF.all_times)
51 |     performances_repeats.append(BNMF.all_performances)
52 | 
53 | # Check whether seed worked: all performances should be the same
54 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \
55 |     "Seed went wrong - performances not the same across repeats!"
56 | 
57 | # Print out the performances, and the average times
58 | vb_all_times_average = list(numpy.average(times_repeats, axis=0))
59 | vb_all_performances = performances_repeats[0]
60 | print "vb_all_times_average = %s" % vb_all_times_average
61 | print "vb_all_performances = %s" % vb_all_performances
62 | 
63 | 
64 | # Print all time plots, the average, and performance vs iterations
65 | plt.figure()
66 | plt.title("Performance against time")
67 | plt.ylim(0,10)
68 | for times in times_repeats:
69 |     plt.plot(times, vb_all_performances['MSE'])
70 | 
71 | plt.figure()
72 | plt.title("Performance against average time")
73 | plt.plot(vb_all_times_average, vb_all_performances['MSE'])
74 | plt.ylim(0,10)
75 | 
76 | plt.figure()
77 | plt.title("Performance against iteration")
78 | plt.plot(vb_all_performances['MSE'])
79 | plt.ylim(0,10)


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/time/nmf_icm_time.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run NMF ICM on the Sanger dataset.
 3 | 
 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset.
 5 | 
 6 | We give flat priors (1/10).
 7 | """
 8 | 
 9 | import sys, os
10 | project_location = os.path.dirname(__file__)+"/../../../../"
11 | sys.path.append(project_location)
12 | 
13 | from BNMTF.code.models.nmf_icm import nmf_icm
14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
15 | 
16 | import numpy, random, scipy, matplotlib.pyplot as plt
17 | 
18 | ##########
19 | 
20 | standardised = False #standardised Sanger or unstandardised
21 | 
22 | repeats = 10
23 | 
24 | iterations = 1000
25 | init_UV = 'random'
26 | I, J, K = 622,138,25
27 | 
28 | minimum_TN = 0.1
29 | 
30 | alpha, beta = 1., 1. #1., 1.
31 | lambdaU = numpy.ones((I,K))/10.
32 | lambdaV = numpy.ones((J,K))/10.
33 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV }
34 | 
35 | # Load in data
36 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised)
37 | 
38 | 
39 | # Run the VB algorithm, <repeats> times
40 | times_repeats = []
41 | performances_repeats = []
42 | for i in range(0,repeats):
43 |     # Set all the seeds
44 |     numpy.random.seed(0)
45 |     
46 |     # Run the classifier
47 |     nmf = nmf_icm(R,M,K,priors) 
48 |     nmf.initialise(init_UV)
49 |     nmf.run(iterations,minimum_TN=minimum_TN)
50 | 
51 |     # Extract the performances and timestamps across all iterations
52 |     times_repeats.append(nmf.all_times)
53 |     performances_repeats.append(nmf.all_performances)
54 | 
55 | # Check whether seed worked: all performances should be the same
56 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \
57 |     "Seed went wrong - performances not the same across repeats!"
58 | 
59 | # Print out the performances, and the average times
60 | icm_all_times_average = list(numpy.average(times_repeats, axis=0))
61 | icm_all_performances = performances_repeats[0]
62 | print "icm_all_times_average = %s" % icm_all_times_average
63 | print "icm_all_performances = %s" % icm_all_performances
64 | 
65 | 
66 | # Print all time plots, the average, and performance vs iterations
67 | plt.figure()
68 | plt.title("Performance against time")
69 | plt.ylim(0,10)
70 | for times in times_repeats:
71 |     plt.plot(times, icm_all_performances['MSE'])
72 | 
73 | plt.figure()
74 | plt.title("Performance against average time")
75 | plt.plot(icm_all_times_average, icm_all_performances['MSE'])
76 | plt.ylim(0,10)
77 | 
78 | plt.figure()
79 | plt.title("Performance against iteration")
80 | plt.plot(icm_all_performances['MSE'])
81 | plt.ylim(0,10)


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/time/nmf_gibbs_time.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run NMF Gibbs on the Sanger dataset.
 3 | 
 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset.
 5 | 
 6 | We give flat priors (1/10).
 7 | """
 8 | 
 9 | import sys, os
10 | project_location = os.path.dirname(__file__)+"/../../../../"
11 | sys.path.append(project_location)
12 | 
13 | from BNMTF.code.models.bnmf_gibbs_optimised import bnmf_gibbs_optimised
14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
15 | 
16 | import numpy, random, scipy, matplotlib.pyplot as plt
17 | 
18 | ##########
19 | 
20 | standardised = False #standardised Sanger or unstandardised
21 | 
22 | repeats = 10
23 | 
24 | iterations = 500
25 | init_UV = 'random'
26 | I, J, K = 622,138,25
27 | 
28 | alpha, beta = 1., 1. #1., 1.
29 | lambdaU = numpy.ones((I,K))/10.
30 | lambdaV = numpy.ones((J,K))/10.
31 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaU':lambdaU, 'lambdaV':lambdaV }
32 | 
33 | # Load in data
34 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised)
35 | 
36 | 
37 | # Run the VB algorithm, <repeats> times
38 | times_repeats = []
39 | performances_repeats = []
40 | for i in range(0,repeats):
41 |     # Set all the seeds
42 |     numpy.random.seed(0)
43 |     
44 |     # Run the classifier
45 |     BNMF = bnmf_gibbs_optimised(R,M,K,priors) 
46 |     BNMF.initialise(init_UV)
47 |     BNMF.run(iterations)
48 | 
49 |     # Extract the performances and timestamps across all iterations
50 |     times_repeats.append(BNMF.all_times)
51 |     performances_repeats.append(BNMF.all_performances)
52 | 
53 | # Check whether seed worked: all performances should be the same
54 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \
55 |     "Seed went wrong - performances not the same across repeats!"
56 | 
57 | # Print out the performances, and the average times
58 | gibbs_all_times_average = list(numpy.average(times_repeats, axis=0))
59 | gibbs_all_performances = performances_repeats[0]
60 | print "gibbs_all_times_average = %s" % gibbs_all_times_average
61 | print "gibbs_all_performances = %s" % gibbs_all_performances
62 | 
63 | # Print all time plots, the average, and performance vs iterations
64 | plt.figure()
65 | plt.title("Performance against time")
66 | plt.ylim(0,10)
67 | for times in times_repeats:
68 |     plt.plot(times, gibbs_all_performances['MSE'])
69 | 
70 | plt.figure()
71 | plt.title("Performance against average time")
72 | plt.plot(gibbs_all_times_average, gibbs_all_performances['MSE'])
73 | plt.ylim(0,10)
74 | 
75 | plt.figure()
76 | plt.title("Performance against iteration")
77 | plt.plot(gibbs_all_performances['MSE'])
78 | plt.ylim(0,10)


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/time/nmtf_vb_time.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run NMTF VB on the Sanger dataset.
 3 | 
 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset.
 5 | 
 6 | We give flat priors (1/10).
 7 | """
 8 | 
 9 | import sys, os
10 | project_location = os.path.dirname(__file__)+"/../../../../"
11 | sys.path.append(project_location)
12 | 
13 | from BNMTF.code.models.bnmtf_vb_optimised import bnmtf_vb_optimised
14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
15 | 
16 | import numpy, random, scipy, matplotlib.pyplot as plt
17 | 
18 | ##########
19 | 
20 | standardised = False #standardised Sanger or unstandardised
21 | 
22 | repeats = 10
23 | 
24 | iterations = 1000
25 | init_FG = 'kmeans'
26 | init_S = 'random'
27 | I, J, K, L = 622,138,5,5
28 | 
29 | alpha, beta = 1., 1.
30 | lambdaF = numpy.ones((I,K))/10.
31 | lambdaS = numpy.ones((K,L))/10.
32 | lambdaG = numpy.ones((J,L))/10.
33 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
34 | 
35 | 
36 | # Load in data
37 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised)
38 | 
39 | 
40 | # Run the VB algorithm, <repeats> times
41 | times_repeats = []
42 | performances_repeats = []
43 | for i in range(0,repeats):
44 |     # Set all the seeds
45 |     numpy.random.seed(3)
46 |     
47 |     # Run the classifier
48 |     BNMTF = bnmtf_vb_optimised(R,M,K,L,priors) 
49 |     BNMTF.initialise(init_S,init_FG)
50 |     BNMTF.run(iterations)
51 | 
52 |     # Extract the performances and timestamps across all iterations
53 |     times_repeats.append(BNMTF.all_times)
54 |     performances_repeats.append(BNMTF.all_performances)
55 | 
56 | # Check whether seed worked: all performances should be the same
57 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \
58 |     "Seed went wrong - performances not the same across repeats!"
59 | 
60 | # Print out the performances, and the average times
61 | vb_all_times_average = list(numpy.average(times_repeats, axis=0))
62 | vb_all_performances = performances_repeats[0]
63 | print "vb_all_times_average = %s" % vb_all_times_average
64 | print "vb_all_performances = %s" % vb_all_performances
65 | 
66 | 
67 | # Print all time plots, the average, and performance vs iterations
68 | plt.figure()
69 | plt.title("Performance against time")
70 | plt.ylim(0,10)
71 | for times in times_repeats:
72 |     plt.plot(times, vb_all_performances['MSE'])
73 | 
74 | plt.figure()
75 | plt.title("Performance against average time")
76 | plt.plot(vb_all_times_average, vb_all_performances['MSE'])
77 | plt.ylim(0,10)
78 | 
79 | plt.figure()
80 | plt.title("Performance against iteration")
81 | plt.plot(vb_all_performances['MSE'])
82 | plt.ylim(0,10)


--------------------------------------------------------------------------------
/experiments/experiments_gdsc/time/nmtf_gibbs_time.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Run NMTF VB on the Sanger dataset.
 3 | 
 4 | We can plot the MSE, R2 and Rp as it converges, against time, on the entire dataset.
 5 | 
 6 | We give flat priors (1/10).
 7 | """
 8 | 
 9 | import sys, os
10 | project_location = os.path.dirname(__file__)+"/../../../../"
11 | sys.path.append(project_location)
12 | 
13 | from BNMTF.code.models.bnmtf_gibbs_optimised import bnmtf_gibbs_optimised
14 | from BNMTF.data_drug_sensitivity.gdsc.load_data import load_gdsc
15 | 
16 | import numpy, random, scipy, matplotlib.pyplot as plt
17 | 
18 | ##########
19 | 
20 | standardised = False #standardised Sanger or unstandardised
21 | 
22 | repeats = 10
23 | 
24 | iterations = 1000
25 | init_FG = 'kmeans'
26 | init_S = 'random'
27 | I, J, K, L = 622,138,5,5
28 | 
29 | alpha, beta = 1., 1.
30 | lambdaF = numpy.ones((I,K))/10.
31 | lambdaS = numpy.ones((K,L))/10.
32 | lambdaG = numpy.ones((J,L))/10.
33 | priors = { 'alpha':alpha, 'beta':beta, 'lambdaF':lambdaF, 'lambdaS':lambdaS, 'lambdaG':lambdaG }
34 | 
35 | # Load in data
36 | (_,R,M,_,_,_,_) = load_gdsc(standardised=standardised)
37 | 
38 | 
39 | # Run the VB algorithm, <repeats> times
40 | times_repeats = []
41 | performances_repeats = []
42 | for i in range(0,repeats):
43 |     # Set all the seeds
44 |     numpy.random.seed(3)
45 |     
46 |     # Run the classifier
47 |     BNMTF = bnmtf_gibbs_optimised(R,M,K,L,priors) 
48 |     BNMTF.initialise(init_S,init_FG)
49 |     BNMTF.run(iterations)
50 | 
51 |     # Extract the performances and timestamps across all iterations
52 |     times_repeats.append(BNMTF.all_times)
53 |     performances_repeats.append(BNMTF.all_performances)
54 | 
55 | # Check whether seed worked: all performances should be the same
56 | assert all(numpy.array_equal(performances, performances_repeats[0]) for performances in performances_repeats), \
57 |     "Seed went wrong - performances not the same across repeats!"
58 | 
59 | # Print out the performances, and the average times
60 | gibbs_all_times_average = list(numpy.average(times_repeats, axis=0))
61 | gibbs_all_performances = performances_repeats[0]
62 | print "gibbs_all_times_average = %s" % gibbs_all_times_average
63 | print "gibbs_all_performances = %s" % gibbs_all_performances
64 | 
65 | 
66 | # Print all time plots, the average, and performance vs iterations
67 | plt.figure()
68 | plt.title("Performance against time")
69 | plt.ylim(0,10)
70 | for times in times_repeats:
71 |     plt.plot(times, gibbs_all_performances['MSE'])
72 | 
73 | plt.figure()
74 | plt.title("Performance against average time")
75 | plt.plot(gibbs_all_times_average, gibbs_all_performances['MSE'])
76 | plt.ylim(0,10)
77 | 
78 | plt.figure()
79 | plt.title("Performance against iteration")
80 | plt.plot(gibbs_all_performances['MSE'])
81 | plt.ylim(0,10)


--------------------------------------------------------------------------------