├── cimcb_logo.png ├── cimcb ├── cross_val │ └── __init__.py ├── bootstrap │ ├── __init__.py │ ├── Perc.py │ ├── Per.py │ └── CPer.py ├── __version__.py ├── utils │ ├── dict_mean.py │ ├── dict_std.py │ ├── nested_getattr.py │ ├── dict_median.py │ ├── smooth.py │ ├── dict_median_scores.py │ ├── dict_95ci.py │ ├── dict_perc.py │ ├── YpredCallback.py │ ├── color_scale.py │ ├── wmean.py │ ├── __init__.py │ ├── load_comparisonXL.py │ ├── load_dataXL.py │ ├── binary_evaluation.py │ ├── load_dataCSV.py │ ├── ci95_ellipse.py │ ├── table_check.py │ ├── scale.py │ ├── binary_metrics.py │ ├── multiclass_metrics.py │ └── knnimpute.py ├── plot │ ├── __init__.py │ └── pca.py ├── __init__.py └── model │ ├── __init__.py │ ├── NN_LogitLogit_Sklearn.py │ ├── NN_LinearLinear_Sklearn.py │ ├── NN_LinearLogit_Sklearn.py │ ├── NN_SoftmaxSoftmax.py │ ├── SVM.py │ ├── RBF_NN.py │ ├── MBNN_SigmoidSigmoid_1Layer.py │ ├── MBNN_LinearSigmoid.py │ ├── MBNN_LinearSigmoid_1Layer.py │ ├── NN_L1.py │ ├── NN_L2.py │ ├── RF.py │ ├── PCR.py │ ├── NN_LogitLogit.py │ ├── NN_LinearLinear.py │ ├── NN_SigmoidSigmoidSigmoid.py │ ├── NN_ReluTan.py │ ├── NN_TanRelu.py │ ├── NN_TanTan.py │ ├── NN_ReluRelu.py │ ├── NN_LinearTan.py │ ├── NN_LogitRelu.py │ ├── NN_LogitTan.py │ ├── NN_ReluLogit.py │ ├── NN_TanLinear.py │ ├── NN_TanLogit.py │ ├── NN_LinearRelu.py │ ├── NN_ReluLinear.py │ ├── NN_LogitLinear.py │ ├── NN_LinearLogit.py │ ├── PCLR.py │ └── MBNN_SigmoidSigmoid.py ├── cimcb.recipe └── meta.yaml ├── LICENSE ├── setup.py ├── .gitignore └── README.md /cimcb_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/CIMCB/cimcb/master/cimcb_logo.png -------------------------------------------------------------------------------- /cimcb/cross_val/__init__.py: -------------------------------------------------------------------------------- 1 | from .KFold import KFold 2 | from .holdout import holdout 3 | 4 | __all__ = ["kfold", "holdout"] 5 | -------------------------------------------------------------------------------- /cimcb/bootstrap/__init__.py: -------------------------------------------------------------------------------- 1 | from .Per import Per 2 | from .CPer import CPer 3 | from .BCA import BCA 4 | 5 | __all__ = ["Per", "CPer", "BCA"] 6 | -------------------------------------------------------------------------------- /cimcb/__version__.py: -------------------------------------------------------------------------------- 1 | from __future__ import division, absolute_import, print_function 2 | 3 | major = 2 4 | minor = 1 5 | micro = 2 6 | version = "%(major)d.%(minor)d.%(micro)d" % (locals()) 7 | -------------------------------------------------------------------------------- /cimcb/utils/dict_mean.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def dict_mean(dict_list): 5 | mean_dict = {} 6 | for key in dict_list[0].keys(): 7 | value = [] 8 | for i in dict_list: 9 | value.append(i[key]) 10 | mean_dict[key] = np.mean(value) 11 | return mean_dict 12 | -------------------------------------------------------------------------------- /cimcb/utils/dict_std.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def dict_std(dict_list): 5 | std_dict = {} 6 | for key in dict_list[0].keys(): 7 | value = [] 8 | for i in dict_list: 9 | value.append(i[key]) 10 | std_dict[key] = np.std(value, ddof=1) 11 | return std_dict 12 | -------------------------------------------------------------------------------- /cimcb/utils/nested_getattr.py: -------------------------------------------------------------------------------- 1 | from functools import reduce 2 | 3 | 4 | def nested_getattr(model, attributes): 5 | """getattr for nested attributes.""" 6 | 7 | def _getattr(model, attributes): 8 | return getattr(model, attributes) 9 | 10 | return reduce(_getattr, [model] + attributes.split(".")) 11 | -------------------------------------------------------------------------------- /cimcb/plot/__init__.py: -------------------------------------------------------------------------------- 1 | from .boxplot import boxplot 2 | from .distribution import distribution 3 | from .pca import pca 4 | from .permutation_test import permutation_test 5 | from .roc import roc_boot, roc_cv, roc 6 | from .scatter import scatter 7 | from .scatterCI import scatterCI 8 | from .scatter_ellipse import scatter_ellipse 9 | 10 | __all__ = ["boxplot", "distribution", "pca", "permutation_test", "roc_boot", "roc_cv", "roc", "scatter", "scatterCI", "scatter_ellipse"] 11 | -------------------------------------------------------------------------------- /cimcb/utils/dict_median.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def dict_median(dict_list): 5 | median_dict = {} 6 | for key in dict_list[0].keys(): 7 | value = [] 8 | for i in dict_list: 9 | value.append(i[key]) 10 | value_arr = np.array(value) 11 | if np.isnan(value_arr).any() == True: 12 | median_dict[key] = np.nan 13 | else: 14 | median_dict[key] = np.median(value,) 15 | return median_dict 16 | -------------------------------------------------------------------------------- /cimcb/__init__.py: -------------------------------------------------------------------------------- 1 | from .__version__ import version as __version__ 2 | 3 | # To ignore TensorFlow Depreciation Warnings 4 | import logging 5 | logging.getLogger("tensorflow").setLevel(logging.ERROR) 6 | 7 | # Use Theano 8 | import os 9 | os.environ["KERAS_BACKEND"] = "theano" 10 | import keras 11 | import keras.backend 12 | 13 | from . import bootstrap 14 | from . import cross_val 15 | from . import model 16 | from . import plot 17 | from . import utils 18 | 19 | __all__ = ["bootstrap", "cross_val", "model", "plot", "utils"] 20 | -------------------------------------------------------------------------------- /cimcb/utils/smooth.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def smooth(a, WSZ): 5 | # a: NumPy 1-D array containing the data to be smoothed 6 | # WSZ: smoothing window size needs, which must be odd number, 7 | # as in the original MATLAB implementation 8 | if WSZ % 2 == 0: 9 | WSZ = WSZ - 1 10 | out0 = np.convolve(a, np.ones(WSZ, dtype=int), 'valid') / WSZ 11 | r = np.arange(1, WSZ - 1, 2) 12 | start = np.cumsum(a[:WSZ - 1])[::2] / r 13 | stop = (np.cumsum(a[:-WSZ:-1])[::2] / r)[::-1] 14 | return np.concatenate((start, out0, stop)) 15 | -------------------------------------------------------------------------------- /cimcb/utils/dict_median_scores.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def dict_median_scores(dict_list): 5 | median_dict = {} 6 | for key in dict_list.keys(): 7 | value = dict_list[key] 8 | value_arr = np.array(value) 9 | if np.isnan(value_arr).any() == True: 10 | median_dict[key] = np.nan 11 | else: 12 | #append_low = np.percentile(value_arr, 2.5) 13 | append_mid = np.median(value_arr, axis=0) 14 | #append_upp = np.percentile(value_arr, 95.7) 15 | median_dict[key] = append_mid 16 | return median_dict 17 | -------------------------------------------------------------------------------- /cimcb/utils/dict_95ci.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def dict_95ci(dict_list): 5 | median_dict = {} 6 | for key in dict_list.keys(): 7 | value = dict_list[key] 8 | value_arr = np.array(value) 9 | if np.isnan(value_arr).any() == True: 10 | median_dict[key] = np.nan 11 | else: 12 | append_low = np.percentile(value_arr, 2.5) 13 | append_mid = np.percentile(value_arr, 50) 14 | append_upp = np.percentile(value_arr, 95.7) 15 | median_dict[key] = [append_low, append_upp, append_mid] 16 | return median_dict 17 | -------------------------------------------------------------------------------- /cimcb/utils/dict_perc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def dict_perc(dict_list, ci=95): 5 | perc_dict = {} 6 | for key in dict_list[0].keys(): 7 | value = [] 8 | for i in dict_list: 9 | value.append(i[key]) 10 | value_arr = np.array(value) 11 | if np.isnan(value_arr).any() == True: 12 | perc_dict[key] = [np.nan, np.nan] 13 | else: 14 | lower_alpha = (100 - ci) / 2 15 | upper_alpha = 100 - lower_alpha 16 | lower_ci = np.percentile(value_arr, lower_alpha) 17 | upper_ci = np.percentile(value_arr, upper_alpha) 18 | perc_dict[key] = [lower_ci, upper_ci] 19 | return perc_dict 20 | -------------------------------------------------------------------------------- /cimcb/utils/YpredCallback.py: -------------------------------------------------------------------------------- 1 | from keras.callbacks import Callback 2 | 3 | 4 | class YpredCallback(Callback): 5 | """Used as a callback for Keras to get Ypred_train, and Ypred_test for each epoch. 6 | 7 | Example: 8 | yc = YpredCallback(X, X) 9 | model.fit(X, Y, callbacks=[yc] 10 | """ 11 | 12 | def __init__(self, model, X_train, X_test=None): 13 | self.model = model # Keras model 14 | self.Y_train = [] 15 | self.Y_test = [] 16 | self.X_train = X_train 17 | # If X_test is None, use X_train 18 | if X_test is None: 19 | self.X_test = X_train 20 | else: 21 | self.X_test = X_test 22 | 23 | def on_epoch_end(self, model, epoch, logs=None): 24 | Y_train_pred = self.model.predict(self.X_train).flatten() 25 | Y_test_pred = self.model.predict(self.X_test).flatten() 26 | self.Y_train.append(Y_train_pred) 27 | self.Y_test.append(Y_test_pred) 28 | -------------------------------------------------------------------------------- /cimcb.recipe/meta.yaml: -------------------------------------------------------------------------------- 1 | package: 2 | name: cimcb 3 | version: 2.1.2 4 | 5 | source: 6 | path: ../ 7 | 8 | build: 9 | noarch: python 10 | number: 0 11 | script: "{{ PYTHON }} -m pip install . --no-deps -vv" 12 | 13 | requirements: 14 | build: 15 | - setuptools 16 | - python >=3.5 17 | - bokeh >=1.0.0 18 | - keras >=2.2.4 19 | - numpy >=1.12 20 | - pandas 21 | - scipy 22 | - scikit-learn 23 | - statsmodels 24 | - python >=3.5 25 | - theano 26 | - tqdm 27 | - xlrd 28 | - joblib 29 | - nomkl 30 | 31 | run: 32 | - python >=3.5 33 | - bokeh >=1.0.0 34 | - keras >=2.2.4 35 | - numpy >=1.12 36 | - pandas 37 | - scipy 38 | - scikit-learn 39 | - statsmodels 40 | - theano 41 | - tqdm 42 | - xlrd 43 | - joblib 44 | - nomkl 45 | 46 | about: 47 | home: https://github.com/CIMCB 48 | license: MIT 49 | summary: "A package containing the necessary tools for the statistical analysis of untargeted and targeted metabolomics data." 50 | 51 | -------------------------------------------------------------------------------- /cimcb/utils/color_scale.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn import preprocessing 3 | 4 | 5 | def color_scale(x, method="tanh", beta=None): 6 | 7 | # Initially scale between 0 and 1 8 | scaler = preprocessing.MinMaxScaler(feature_range=(0.1, 1)) 9 | x_init = scaler.fit_transform(x[:, np.newaxis]).flatten() 10 | 11 | # Methods of transformation 12 | if method == "linear": 13 | x_tr = x_init 14 | elif method == "sq": 15 | x_tr = x_init ** 2 16 | elif method == "sqrt": 17 | x_tr = np.sqrt(x_init) 18 | elif method == "tan": 19 | x_tr = 1 + np.tan(beta * (1 + x_init)) 20 | elif method == "tanh+1": 21 | x_tr = 1 + np.tanh(beta * (-1 + x_init)) 22 | elif method == "tanh": 23 | x_tr_init = np.tanh(beta * (-1 + x_init)) 24 | x_tr = scaler.fit_transform(x_tr_init[:, np.newaxis]).flatten() 25 | else: 26 | print("An incorrect method for color_scale was selected, so it set to 'tanh'. Supported methods are 'linear', 'sq', 'sqrt', 'tanh', and 'tanh+1'.") 27 | x_tr = x_init 28 | 29 | return x_tr 30 | -------------------------------------------------------------------------------- /cimcb/model/__init__.py: -------------------------------------------------------------------------------- 1 | from .NN_LinearSigmoid import NN_LinearSigmoid 2 | from .NN_SigmoidSigmoid import NN_SigmoidSigmoid 3 | from .MBNN_LinearSigmoid import MBNN_LinearSigmoid 4 | from .MBNN_SigmoidSigmoid import MBNN_SigmoidSigmoid 5 | from .MBNN_LinearSigmoid_1Layer import MBNN_LinearSigmoid_1Layer 6 | from .MBNN_SigmoidSigmoid_1Layer import MBNN_SigmoidSigmoid_1Layer 7 | from .NN_SigmoidSigmoidSigmoid import NN_SigmoidSigmoidSigmoid 8 | from .NN_LinearLogit_Sklearn import NN_LinearLogit_Sklearn 9 | from .NN_LogitLogit_Sklearn import NN_LogitLogit_Sklearn 10 | from .PCLR import PCLR 11 | from .PCR import PCR 12 | from .PLS_SIMPLS import PLS_SIMPLS 13 | from .PLS_NIPALS import PLS_NIPALS 14 | from .RF import RF 15 | from .SVM import SVM 16 | from .NN_L1 import NN_L1 17 | from .NN_L2 import NN_L2 18 | from .RBF_NN import RBF_NN 19 | 20 | 21 | __all__ = ["NN_LinearSigmoid", "NN_SigmoidSigmoid", "NN_SoftmaxSoftmax", "MBNN_LinearSigmoid", "MBNN_SigmoidSigmoid", "NN_SigmoidSigmoidSigmoid", "NN_LinearLogit_Sklearn", "NN_LogitLogit_Sklearn", "PCLR", "PCR", "PLS_SIMPLS", "PLS_NIPALS", "RF", "SVM", "NN_L1", "NN_L2","RBF_NN"] 22 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 CIMCB 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import setup 2 | 3 | 4 | def readme(): 5 | with open('README.md', encoding='utf-8') as f: 6 | return f.read() 7 | 8 | 9 | setup( 10 | name="cimcb", 11 | version="2.1.2", 12 | description="A package containing the necessary tools for the statistical analysis of untargeted and targeted metabolomics data.", 13 | long_description=readme(), 14 | long_description_content_type='text/markdown', 15 | license="MIT", 16 | url="https://github.com/KevinMMendez/cimcb", 17 | packages=["cimcb", "cimcb.bootstrap", "cimcb.cross_val", "cimcb.model", "cimcb.plot", "cimcb.utils"], 18 | python_requires=">=3.5", 19 | install_requires=["bokeh>=1.0.0", 20 | "keras>=2.2.4", 21 | "numpy>=1.12", 22 | "pandas", 23 | "scipy", 24 | "scikit-learn", 25 | "statsmodels", 26 | "theano", 27 | "tqdm", 28 | "xlrd", 29 | "joblib"], 30 | author="Kevin Mendez, David Broadhurst", 31 | author_email="k.mendez@ecu.edu.au, d.broadhurst@ecu.edu.au", 32 | ) 33 | -------------------------------------------------------------------------------- /cimcb/utils/wmean.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def wmean(x, weights): 5 | """Returns Weighted Mean. Ignores NaNs and handles infinite weights. 6 | 7 | Parameters 8 | ---------- 9 | x: array-like [n_samples] 10 | An array-like object that contains the data. 11 | 12 | weights: array-like [n_samples] 13 | An array-like object that contains the corresponding weights. 14 | 15 | Returns 16 | ---------------------------------- 17 | m: number 18 | The weighted mean. 19 | """ 20 | 21 | # Flatten x and weights 22 | x = x.flatten() 23 | weights = weights.flatten() 24 | 25 | # Find NaNs 26 | nans = np.isnan(x) 27 | infs = np.isinf(weights) 28 | 29 | # If all x are nans, return np.nan 30 | if nans.all() == True: 31 | m = np.nan 32 | return m 33 | 34 | # If there are infinite weights, use the corresponding x 35 | if infs.any() == True: 36 | m = np.nanmean(x[infs]) 37 | return m 38 | 39 | # Set NaNs to zero 40 | x[nans] = 0 41 | weights[nans] = 0 42 | 43 | # Normalize the weights + calculate Weighted Mean 44 | weights = weights / np.sum(weights) 45 | m = np.matmul(weights, x) 46 | return m 47 | -------------------------------------------------------------------------------- /cimcb/utils/__init__.py: -------------------------------------------------------------------------------- 1 | from .binary_metrics import binary_metrics 2 | from .binary_evaluation import binary_evaluation 3 | from .multiclass_metrics import multiclass_metrics 4 | from .ci95_ellipse import ci95_ellipse 5 | from .dict_95ci import dict_95ci 6 | from .dict_mean import dict_mean 7 | from .dict_median import dict_median 8 | from .dict_median_scores import dict_median_scores 9 | from .dict_std import dict_std 10 | from .dict_perc import dict_perc 11 | from .knnimpute import knnimpute 12 | from .load_comparisonXL import load_comparisonXL 13 | from .load_dataXL import load_dataXL 14 | from .load_dataCSV import load_dataCSV 15 | from .scale import scale 16 | from .nested_getattr import nested_getattr 17 | from .table_check import table_check 18 | from .univariate_2class import univariate_2class 19 | from .wmean import wmean 20 | from .YpredCallback import YpredCallback 21 | from .color_scale import color_scale 22 | from .smooth import smooth 23 | 24 | __all__ = ["binary_metrics", "binary_evaluation", "multiclass_metrics", "ci95_ellipse", "dict_95ci", "dict_mean", "dict_median", "dict_median_scores", "dict_std", "dict_perc", "knnimpute", "load_comparisonXL", "load_dataXL", "load_dataCSV", "scale", "nested_getattr", "table_check", "univariate_2class", "wmean", "YpredCallback", "color_scale", "smooth"] 25 | -------------------------------------------------------------------------------- /cimcb/utils/load_comparisonXL.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | 4 | def load_comparisonXL(method, evaluate="train", dropna=True): 5 | """Load comparison table.""" 6 | if evaluate == "test": 7 | e = "['Test']" 8 | elif evaluate == "in bag": 9 | e = "['In Bag']" 10 | elif evaluate == "out of bag": 11 | e = "['Out of Bag']" 12 | else: 13 | e = "['Train']" 14 | 15 | # Import methods 16 | table = [] 17 | for i in method: 18 | table.append(pd.read_excel(i + ".xlsx")) 19 | 20 | # Concatenate table 21 | df = pd.DataFrame() 22 | for i in range(len(table)): 23 | df = pd.concat([df, table[i].loc[table[i]['evaluate'] == e].T.squeeze()], axis=1, sort=False) 24 | df = df.T.drop(columns="evaluate") 25 | 26 | # Remove [ ] from string 27 | for i in range(len(df)): 28 | for j in range(len(df.T)): 29 | if type(df.iloc[i, j]) is str: 30 | df.iloc[i, j] = df.iloc[i, j][2: -2] 31 | 32 | # Reset index and add methods column 33 | method_name = [] 34 | for i in range(len(method)): 35 | name_i = method[i].rsplit('/', 1)[1] 36 | method_name.append(name_i) 37 | df = df.reset_index() 38 | df = pd.concat([pd.Series(method_name, name="method"), df], axis=1, sort=False) 39 | df = df.drop("index", 1) 40 | #df = df.set_index("method") 41 | 42 | # drop columns with just nans 43 | if dropna is True: 44 | df = df.dropna(axis=1, how='all') 45 | 46 | return df 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .coverage 42 | .coverage.* 43 | .cache 44 | nosetests.xml 45 | coverage.xml 46 | *.cover 47 | .hypothesis/ 48 | .pytest_cache/ 49 | 50 | # Translations 51 | *.mo 52 | *.pot 53 | 54 | # Django stuff: 55 | *.log 56 | local_settings.py 57 | db.sqlite3 58 | 59 | # Flask stuff: 60 | instance/ 61 | .webassets-cache 62 | 63 | # Scrapy stuff: 64 | .scrapy 65 | 66 | # Sphinx documentation 67 | docs/_build/ 68 | 69 | # PyBuilder 70 | target/ 71 | 72 | # Jupyter Notebook 73 | .ipynb_checkpoints 74 | 75 | # pyenv 76 | .python-version 77 | 78 | # celery beat schedule file 79 | celerybeat-schedule 80 | 81 | # SageMath parsed files 82 | *.sage.py 83 | 84 | # Environments 85 | .env 86 | .venv 87 | env/ 88 | venv/ 89 | ENV/ 90 | env.bak/ 91 | venv.bak/ 92 | 93 | # Spyder project settings 94 | .spyderproject 95 | .spyproject 96 | 97 | # Rope project settings 98 | .ropeproject 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | -------------------------------------------------------------------------------- /cimcb/utils/load_dataXL.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from os import path 4 | from .table_check import table_check 5 | 6 | 7 | def load_dataXL(filename, DataSheet, PeakSheet): 8 | """Loads and validates the DataFile and PeakFile from an excel file. 9 | 10 | 11 | Parameters 12 | ---------- 13 | file : string 14 | The name of the excel file (.xlsx file) e.g. 'projectxxx1.xlsx'. Note, it can include the directory e.g. '/homedir/projectxxx1.xlsx' 15 | 16 | DataSheet : string 17 | The name of the data sheet in the file e.g. 'Data'. Note, the data sheet must contain an 'Idx' and 'SampleID' column. 18 | 19 | PeakSheet : string 20 | The name of the peak sheet in the file e.g. 'Pata'. Note, the peak sheet must contain an 'Idx', 'Name', and 'Label' column. 21 | 22 | Returns 23 | ------- 24 | DataTable: DataFrame 25 | Data sheet from the excel file. 26 | 27 | PeakTable: DataFrame 28 | Peak sheet from the excel file. 29 | """ 30 | 31 | if path.isfile(filename) is False: 32 | raise ValueError("{} does not exist.".format(filename)) 33 | 34 | if not filename.endswith(".xlsx"): 35 | raise ValueError("{} should be a .xlsx file.".format(filename)) 36 | 37 | # LOAD PEAK DATA 38 | print("Loadings PeakFile: {}".format(PeakSheet)) 39 | PeakTable = pd.read_excel(filename, sheet_name=PeakSheet) 40 | 41 | # LOAD DATA TABLE 42 | print("Loadings DataFile: {}".format(DataSheet)) 43 | DataTable = pd.read_excel(filename, sheet_name=DataSheet) 44 | 45 | # Replace with nans 46 | DataTable = DataTable.replace(-99, np.nan) 47 | DataTable = DataTable.replace(".", np.nan) 48 | DataTable = DataTable.replace(" ", np.nan) 49 | 50 | # Error checks 51 | table_check(DataTable, PeakTable, print_statement=True) 52 | 53 | # Make the Idx column start from 1 54 | DataTable.index = np.arange(1, len(DataTable) + 1) 55 | PeakTable.index = np.arange(1, len(PeakTable) + 1) 56 | 57 | print("TOTAL SAMPLES: {} TOTAL PEAKS: {}".format(len(DataTable), len(PeakTable))) 58 | print("Done!") 59 | return DataTable, PeakTable 60 | -------------------------------------------------------------------------------- /cimcb/utils/binary_evaluation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, auc 3 | from sklearn import metrics 4 | import scipy 5 | 6 | 7 | def binary_evaluation(y_true, y_pred): 8 | """ Return a dict of binary stats with the following metrics: R2, auc, accuracy, precision, sensitivity, specificity, and F1 score. 9 | 10 | Parameters 11 | ---------- 12 | y_true : array-like, shape = [n_samples] 13 | Binary label for samples (0s and 1s). 14 | 15 | y_pred : array-like, shape = [n_samples] 16 | Predicted y score for samples. 17 | 18 | cut_off : number, (default 0.5) 19 | A value for y_pred greater-than or equal to the cut_off will be treated as 1, otherwise it will be treated as 0 for the confusion matrix. 20 | 21 | parametric : boolean, (default True) 22 | If parametric is True, calculate R2. 23 | 24 | Returns 25 | ------- 26 | stats: dict 27 | dict containing calculated R2, auc, accuracy, precision, sensitivity, specificity, and F1 score. 28 | """ 29 | 30 | # Convert to array 31 | y_true_arr = np.array(y_true) 32 | y_pred_arr = np.array(y_pred) 33 | 34 | # Error checks 35 | if y_true_arr.ndim != 1: 36 | raise ValueError("y_true should only have 1 dimension.") 37 | if y_pred_arr.ndim != 1: 38 | raise ValueError("y_pred should only have 1 dimension.") 39 | if y_true_arr.shape[0] != y_pred_arr.shape[0]: 40 | raise ValueError("The number of values in y_true should match y_pred.") 41 | if np.array_equal(sorted(set(y_true_arr)), [0, 1]) is False: 42 | raise ValueError("y_true should only contain 0s and 1s") 43 | 44 | # Binary statistics dictionary 45 | stats = {} 46 | 47 | stats["R²"] = 1 - (sum((y_true_arr - y_pred_arr) ** 2) / sum((y_true_arr - np.mean(y_true_arr)) ** 2)) 48 | 49 | fpr, tpr, thresholds = metrics.roc_curve(y_true_arr, y_pred_arr, pos_label=1) 50 | stats["AUC"] = metrics.auc(fpr, tpr) 51 | 52 | try: 53 | stats["ManW P-Value"] = scipy.stats.mannwhitneyu(y_pred_arr[y_true_arr == 0], y_pred_arr[y_true_arr == 1], alternative="two-sided")[1] 54 | except ValueError: 55 | stats["ManW P-Value"] = 1 56 | 57 | return stats 58 | -------------------------------------------------------------------------------- /cimcb/utils/load_dataCSV.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | from os import path 4 | from .table_check import table_check 5 | 6 | 7 | def load_dataCSV(DataSheet, PeakSheet): 8 | """Loads and validates the DataFile and PeakFile from csv files. 9 | 10 | 11 | Parameters 12 | ---------- 13 | DataSheet : string 14 | The name of the csv file (.csv file) that contains the 'Data'. Note, the data sheet must contain an 'Idx' and 'SampleID'column. e.g. 'datasheetxxx1.csv' or '/homedir/datasheetxxx1.csv' 15 | 16 | PeakSheet : string 17 | The name of the csv file (.csv file) that contains the 'Peak'. Note, the peak sheet must contain an 'Idx', 'Name', and 'Label' column. e.g. 'peaksheetxxx1.csv' or 'peaksheetxxx1.csv' 18 | 19 | Returns 20 | ------- 21 | DataTable: DataFrame 22 | Data sheet from the csv file. 23 | 24 | PeakTable: DataFrame 25 | Peak sheet from the csv file. 26 | """ 27 | 28 | # Check Datasheet exists 29 | if path.isfile(DataSheet) is False: 30 | raise ValueError("{} does not exist.".format(filename)) 31 | 32 | if not DataSheet.endswith(".csv"): 33 | raise ValueError("{} should be a .csv file.".format(filename)) 34 | 35 | # Check PeakSheet exists 36 | if path.isfile(PeakSheet) is False: 37 | raise ValueError("{} does not exist.".format(filename)) 38 | 39 | if not PeakSheet.endswith(".csv"): 40 | raise ValueError("{} should be a .csv file.".format(filename)) 41 | 42 | # LOAD PEAK DATA 43 | print("Loadings PeakFile: {}".format(PeakSheet)) 44 | PeakTable = pd.read_csv(PeakSheet) 45 | 46 | # LOAD DATA TABLE 47 | print("Loadings DataFile: {}".format(DataSheet)) 48 | DataTable = pd.read_csv(DataSheet) 49 | 50 | # Replace with nans 51 | DataTable = DataTable.replace(-99, np.nan) 52 | DataTable = DataTable.replace(".", np.nan) 53 | DataTable = DataTable.replace(" ", np.nan) 54 | 55 | # Error checks 56 | table_check(DataTable, PeakTable, print_statement=True) 57 | 58 | # Make the Idx column start from 1 59 | DataTable.index = np.arange(1, len(DataTable) + 1) 60 | PeakTable.index = np.arange(1, len(PeakTable) + 1) 61 | 62 | print("TOTAL SAMPLES: {} TOTAL PEAKS: {}".format(len(DataTable), len(PeakTable))) 63 | print("Done!") 64 | return DataTable, PeakTable 65 | -------------------------------------------------------------------------------- /cimcb/utils/ci95_ellipse.py: -------------------------------------------------------------------------------- 1 | import math 2 | import numpy as np 3 | from sklearn.decomposition import PCA 4 | 5 | 6 | def ci95_ellipse(data, type="pop"): 7 | """ Construct a 95% confidence ellipse using PCA. 8 | 9 | Parameters 10 | ---------- 11 | data : array-like, shape = [n_samples, 2] 12 | data[:,0] must represent x coordinates 13 | data[:,1] must represent y coordinates 14 | 15 | type : string, optional (default='pop') 16 | It must be 'pop' or 'mean' 17 | 18 | Returns 19 | ------- 20 | ellipse: array-like, shape = [100, 2] 21 | ellipse[:,0] represents x coordinates of ellipse 22 | ellipse[:,1] represents y coordinates of ellipse 23 | 24 | outside: array-like, shape = [n_samples, 1] 25 | returns an 1d array (of 0/1) with length n_samples 26 | 0 : ith sample is outside of ellipse 27 | 1 : ith sample is inside of ellipse 28 | """ 29 | 30 | # Build and fit PCA model 31 | pca = PCA() 32 | pca.fit(data) 33 | coeff = pca.components_ 34 | score = pca.transform(data) 35 | eigvals = pca.explained_variance_ 36 | 37 | # Calculate rotation angle 38 | phi = math.atan2(coeff[0, 1], coeff[0, 0]) 39 | 40 | # This angle is between -pi and pi. 41 | # Let's shift it such that the angle is between 0 and 2pi 42 | if phi < 0: 43 | phi += 2 * math.pi 44 | 45 | # Get the coordinates of the data mean 46 | n = len(data) 47 | m = np.mean(data, axis=0) 48 | x0 = m[0] 49 | y0 = m[1] 50 | 51 | # Get the 95% confidence interval error ellipse 52 | # inverse of the chi-square cumulative distribution for p = 0.05 & 2 d.f. = 5.9915 53 | chisquare_val = 5.9915 54 | if type is "pop": 55 | a = math.sqrt(chisquare_val * eigvals[0]) 56 | b = math.sqrt(chisquare_val * eigvals[1]) 57 | elif type is "mean": 58 | a = math.sqrt(chisquare_val * eigvals[0] / n) 59 | b = math.sqrt(chisquare_val * eigvals[1] / n) 60 | else: 61 | raise ValueError("type has to be 'pop' or 'mean'.") 62 | 63 | # the ellipse in x and y coordinates 64 | theta_grid = np.linspace(0, 2 * math.pi, num=100) 65 | ellipse_x_r = a * np.cos(theta_grid) 66 | ellipse_y_r = b * np.sin(theta_grid) 67 | 68 | # Define a rotation matrix 69 | R = np.array([[np.cos(phi), np.sin(phi)], [-np.sin(phi), np.cos(phi)]]) 70 | # let's rotate the ellipse to some angle phi 71 | r_ellipse = np.dot(np.vstack((ellipse_x_r, ellipse_y_r)).T, R) 72 | 73 | # Draw the error ellipse 74 | x = r_ellipse[:, 0] + x0 75 | y = r_ellipse[:, 1] + y0 76 | ellipse = np.stack((x, y), axis=1) 77 | 78 | outside = [] 79 | for i in range(len(score)): 80 | metric = (score[i, 0] / a) ** 2 + (score[i, 1] / b) ** 2 81 | if metric > 1: 82 | outside.append(1) 83 | else: 84 | outside.append(0) 85 | 86 | return ellipse, outside 87 | -------------------------------------------------------------------------------- /cimcb/utils/table_check.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def table_check(DataTable, PeakTable, print_statement=True): 5 | """Error checking for DataTable and PeakTable (used in load_dataXL). 6 | 7 | Parameters 8 | ---------- 9 | DataTable: DataFrame 10 | Data sheet with the required columns. 11 | 12 | PeakTable: DataFrame 13 | Peak sheet with the required columns. 14 | 15 | print_statement: boolean (default True) 16 | If the error checks are successful and print_statement is True, the following is printed: "Data Table & Peak Table is suitable." 17 | """ 18 | 19 | # Check DataTable for Idx, Class and SampleID 20 | data_columns = DataTable.columns.values 21 | 22 | if "Idx" not in data_columns: 23 | raise ValueError("Data Table does not contain the required 'Idx' column") 24 | if DataTable.Idx.isnull().values.any() == True: 25 | raise ValueError("Data Table Idx column cannot contain missing values") 26 | if len(np.unique(DataTable.Idx)) != len(DataTable.Idx): 27 | raise ValueError("Data Table Idx numbers are not unique. Please change") 28 | 29 | # Removed 'Class' as a required column 30 | # if "Class" not in data_columns: 31 | # raise ValueError("Data Table does not contain the required 'Class' column") 32 | 33 | if "SampleID" not in data_columns: 34 | raise ValueError("Data Table does not contain the required 'SampleID' column") 35 | 36 | # Check PeakTable for Idx, Name, Label 37 | peak_columns = PeakTable.columns.values 38 | 39 | if "Idx" not in peak_columns: 40 | raise ValueError("Peak Table does not contain the required 'Idx' column") 41 | if PeakTable.Idx.isnull().values.any() == True: 42 | raise ValueError("Peak Table Idx column cannot contain missing values") 43 | if len(np.unique(PeakTable.Idx)) != len(PeakTable.Idx): 44 | raise ValueError("Peak Table Idx numbers are not unique. Please change") 45 | 46 | if "Name" not in peak_columns: 47 | raise ValueError("Peak Table does not contain the required 'Name' column") 48 | if PeakTable.Idx.isnull().values.any() == True: 49 | raise ValueError("Peak Table Name column cannot contain missing values") 50 | if len(np.unique(PeakTable.Idx)) != len(PeakTable.Idx): 51 | raise ValueError("Peak Table Name numbers are not unique. Please change") 52 | 53 | if "Label" not in peak_columns: 54 | raise ValueError("Data Table does not contain the required 'Label' column") 55 | 56 | # Check that Peak Names in PeakTable & DataTable match 57 | peak_list = PeakTable.Name 58 | data_columns = DataTable.columns.values 59 | temp = np.intersect1d(data_columns, peak_list) 60 | 61 | if len(temp) != len(peak_list): 62 | raise ValueError("The Peak Names in Data Table should exactly match the Peak Names in Peak Table. Remember that all Peak Names should be unique.") 63 | 64 | if print_statement is True: 65 | print("Data Table & Peak Table is suitable.") 66 | -------------------------------------------------------------------------------- /cimcb/model/NN_LogitLogit_Sklearn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from sklearn.neural_network import MLPClassifier 5 | from keras.models import Sequential 6 | from keras.layers import Dense 7 | from .BaseModel import BaseModel 8 | from ..utils import YpredCallback 9 | 10 | 11 | class NN_LogitLogit_Sklearn(BaseModel): 12 | """2 Layer linear-linear neural network using Keras""" 13 | 14 | parametric = False 15 | bootlist = None 16 | 17 | def __init__(self, n_nodes=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 18 | self.n_nodes = n_nodes 19 | self.verbose = verbose 20 | self.n_epochs = epochs 21 | self.k = n_nodes 22 | self.batch_size = batch_size 23 | self.loss = loss 24 | self.learning_rate = learning_rate 25 | self.momentum = momentum 26 | self.decay = decay 27 | self.optimizer = "sgd" 28 | 29 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 30 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 31 | 32 | Parameters 33 | ---------- 34 | X : array-like, shape = [n_samples, n_features] 35 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 36 | 37 | Y : array-like, shape = [n_samples, 1] 38 | Response variables, where n_samples is the number of samples. 39 | 40 | Returns 41 | ------- 42 | y_pred_train : array-like, shape = [n_samples, 1] 43 | Predicted y score for samples. 44 | """ 45 | 46 | # If batch-size is None: 47 | if self.batch_size is None: 48 | self.batch_size = len(X) 49 | 50 | # Ensure array and error check 51 | X, Y = self.input_check(X, Y) 52 | 53 | self.model = MLPClassifier(hidden_layer_sizes=(self.n_nodes,), activation="logistic", solver=self.optimizer, learning_rate_init=self.learning_rate, momentum=self.momentum, batch_size=self.batch_size, nesterovs_momentum=False, max_iter=self.n_epochs) 54 | 55 | # Fit 56 | self.model.fit(X, Y) 57 | 58 | y_pred_train = self.model.predict(X) 59 | 60 | # Storing X, Y, and Y_pred 61 | self.Y_pred = y_pred_train 62 | self.X = X 63 | self.Y = Y 64 | return y_pred_train 65 | 66 | def test(self, X, Y=None): 67 | """Calculate and return Y predicted value. 68 | 69 | Parameters 70 | ---------- 71 | X : array-like, shape = [n_samples, n_features] 72 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 73 | 74 | Returns 75 | ------- 76 | y_pred_test : array-like, shape = [n_samples, 1] 77 | Predicted y score for samples. 78 | """ 79 | y_pred_test = self.model.predict(X) 80 | return y_pred_test 81 | -------------------------------------------------------------------------------- /cimcb/utils/scale.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | 4 | def scale(x, axis=0, ddof=1, method="auto", mu="default", sigma="default", return_mu_sigma=False): 5 | """Scales x (which can include nans) with method: 'auto', 'pareto', 'vast', or 'level'. 6 | 7 | Parameters 8 | ---------- 9 | x: array-like 10 | An array-like object that contains the data. 11 | 12 | axis: integer or None, (default 0) 13 | The axis along which to operate 14 | 15 | ddof: integer, (default 1) 16 | The degrees of freedom correction. Note, by default ddof=1 unlike scipy.stats.zscore with ddof=0. 17 | 18 | method: string, (default "auto") 19 | Method used to scale x. Accepted methods are 'auto', 'pareto', 'vast' and 'level'. 20 | 21 | mu: number or "default", (default "default") 22 | If mu is provided it is used, however, by default it is calculated. 23 | 24 | sigma: number or "default", (default "default") 25 | If sigma is provided it is used, however, by default it is calculated. 26 | 27 | return_mu_sigma: boolean, (default False) 28 | If return_mu_sigma is True, mu and sigma are returned instead of z. Note, this is useful if mu and sigma want to be stored for future use. 29 | 30 | Returns if return_mu_sigma = False 31 | ---------------------------------- 32 | z: array-like 33 | An array-like object that contains the scaled data. 34 | 35 | Returns if return_mu_sigma = True 36 | --------------------------------- 37 | mu: number 38 | Calculated mu for x given axis and ddof. 39 | 40 | sigma: number 41 | Calculated sigma for x given axis and ddof. 42 | """ 43 | 44 | x = np.array(x) 45 | 46 | # Simplier if we tranpose X if axis=1 (return x.T after the calculations) 47 | if axis == 1: 48 | x = x.T 49 | 50 | # Expand dimension if array is 1d 51 | if x.ndim == 1: 52 | x = np.expand_dims(x, axis=1) 53 | 54 | # Calculate mu and sigma if set to 'default' (ignoring nans) 55 | if mu is "default": 56 | mu = np.nanmean(x, axis=0) 57 | if sigma is "default": 58 | sigma = np.nanstd(x, axis=0, ddof=ddof) 59 | sigma = np.where(sigma == 0, 1, sigma) # if a value in sigma equals 0 it is converted to 1 60 | 61 | # Error check before scaling 62 | if len(mu) != len(x.T): 63 | raise ValueError("Length of mu array does not match x matrix.") 64 | if len(sigma) != len(x.T): 65 | raise ValueError("Length of sigma array does not match x matrix.") 66 | 67 | # Scale based on selected method 68 | if method is "auto": 69 | z = (x - mu) / sigma 70 | elif method is "pareto": 71 | z = (x - mu) / np.sqrt(sigma) 72 | elif method is "vast": 73 | z = ((x - mu) / sigma) * (mu / sigma) 74 | elif method is "level": 75 | z = (x - mu) / mu 76 | else: 77 | raise ValueError("Method has to be either 'auto', 'pareto', 'vast', or 'level'.") 78 | 79 | # Return x.T if axis = 1 80 | if axis == 1: 81 | z = z.T 82 | 83 | if return_mu_sigma is True: 84 | return z, mu, sigma 85 | else: 86 | return z 87 | -------------------------------------------------------------------------------- /cimcb/model/NN_LinearLinear_Sklearn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from sklearn.neural_network import MLPClassifier 5 | from keras.models import Sequential 6 | from keras.layers import Dense 7 | from .BaseModel import BaseModel 8 | from ..utils import YpredCallback 9 | 10 | 11 | class NN_LinearLinear_Sklearn(BaseModel): 12 | """2 Layer linear-linear neural network using Keras""" 13 | 14 | parametric = False 15 | bootlist = None 16 | 17 | def __init__(self, n_nodes=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 18 | self.n_nodes = n_nodes 19 | self.verbose = verbose 20 | self.n_epochs = epochs 21 | self.k = n_nodes 22 | self.batch_size = batch_size 23 | self.loss = loss 24 | self.learning_rate = learning_rate 25 | self.momentum = momentum 26 | self.decay = decay 27 | self.optimizer = "sgd" 28 | 29 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 30 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 31 | 32 | Parameters 33 | ---------- 34 | X : array-like, shape = [n_samples, n_features] 35 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 36 | 37 | Y : array-like, shape = [n_samples, 1] 38 | Response variables, where n_samples is the number of samples. 39 | 40 | Returns 41 | ------- 42 | y_pred_train : array-like, shape = [n_samples, 1] 43 | Predicted y score for samples. 44 | """ 45 | 46 | # If batch-size is None: 47 | if self.batch_size is None: 48 | self.batch_size = len(X) 49 | 50 | # Ensure array and error check 51 | X, Y = self.input_check(X, Y) 52 | 53 | self.model = MLPClassifier(hidden_layer_sizes=(self.n_nodes,), 54 | activation='identity', 55 | solver=self.optimizer, 56 | learning_rate_init=self.learning_rate, 57 | momentum=self.momentum, 58 | batch_size=self.batch_size, 59 | nesterovs_momentum=False, 60 | max_iter=self.n_epochs) 61 | 62 | # Fit 63 | self.model.fit(X, Y) 64 | 65 | y_pred_train = self.model.predict(X) 66 | 67 | # Storing X, Y, and Y_pred 68 | self.Y_pred = y_pred_train 69 | self.X = X 70 | self.Y = Y 71 | return y_pred_train 72 | 73 | def test(self, X, Y=None): 74 | """Calculate and return Y predicted value. 75 | 76 | Parameters 77 | ---------- 78 | X : array-like, shape = [n_samples, n_features] 79 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 80 | 81 | Returns 82 | ------- 83 | y_pred_test : array-like, shape = [n_samples, 1] 84 | Predicted y score for samples. 85 | """ 86 | y_pred_test = self.model.predict(X) 87 | return y_pred_test 88 | -------------------------------------------------------------------------------- /cimcb/model/NN_LinearLogit_Sklearn.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from sklearn.neural_network import MLPClassifier 5 | from keras.models import Sequential 6 | from keras.layers import Dense 7 | from .BaseModel import BaseModel 8 | from ..utils import YpredCallback 9 | 10 | 11 | class NN_LinearLogit_Sklearn(BaseModel): 12 | """2 Layer linear-linear neural network using Keras""" 13 | 14 | parametric = False 15 | bootlist = None 16 | 17 | def __init__(self, n_nodes=2, epochs2=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 18 | self.n_nodes = n_nodes 19 | self.verbose = verbose 20 | self.n_epochs = epochs2 21 | self.k = n_nodes 22 | self.batch_size = batch_size 23 | self.loss = loss 24 | self.learning_rate = learning_rate 25 | self.momentum = momentum 26 | self.decay = decay 27 | self.optimizer = "sgd" 28 | 29 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 30 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 31 | 32 | Parameters 33 | ---------- 34 | X : array-like, shape = [n_samples, n_features] 35 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 36 | 37 | Y : array-like, shape = [n_samples, 1] 38 | Response variables, where n_samples is the number of samples. 39 | 40 | Returns 41 | ------- 42 | y_pred_train : array-like, shape = [n_samples, 1] 43 | Predicted y score for samples. 44 | """ 45 | 46 | # If batch-size is None: 47 | if self.batch_size is None: 48 | self.batch_size = len(X) 49 | 50 | # Ensure array and error check 51 | X, Y = self.input_check(X, Y) 52 | 53 | self.model = MLPClassifier(hidden_layer_sizes=(self.n_nodes,), 54 | activation='identity', 55 | solver=self.optimizer, 56 | learning_rate_init=self.learning_rate, 57 | momentum=self.momentum, 58 | batch_size=self.batch_size, 59 | nesterovs_momentum=False, 60 | max_iter=self.n_epochs) 61 | 62 | # Fit 63 | self.model.fit(X, Y) 64 | 65 | y_pred_train = self.model.predict(X) 66 | 67 | # Storing X, Y, and Y_pred 68 | self.Y_pred = y_pred_train 69 | self.X = X 70 | self.Y = Y 71 | return y_pred_train 72 | 73 | def test(self, X, Y=None): 74 | """Calculate and return Y predicted value. 75 | 76 | Parameters 77 | ---------- 78 | X : array-like, shape = [n_samples, n_features] 79 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 80 | 81 | Returns 82 | ------- 83 | y_pred_test : array-like, shape = [n_samples, 1] 84 | Predicted y score for samples. 85 | """ 86 | y_pred_test = self.model.predict(X) 87 | return y_pred_test 88 | -------------------------------------------------------------------------------- /cimcb/bootstrap/Perc.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .BaseBootstrap import BaseBootstrap 3 | from ..utils import nested_getattr 4 | 5 | 6 | class Perc(BaseBootstrap): 7 | """ Returns bootstrap confidence intervals using the percentile boostrap interval. 8 | 9 | Parameters 10 | ---------- 11 | model : object 12 | This object is assumed to store bootlist attributes in .model (e.g. modelPLS.model.x_scores_). 13 | 14 | X : array-like, shape = [n_samples, n_features] 15 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 16 | 17 | Y : array-like, shape = [n_samples, 1] 18 | Response variables, where n_samples is the number of samples. 19 | 20 | bootlist : array-like, shape = [n_bootlist, 1] 21 | List of attributes to calculate and return bootstrap confidence intervals. 22 | 23 | bootnum : a positive integer, (default 100) 24 | The number of bootstrap samples used in the computation. 25 | 26 | seed: integer or None (default None) 27 | Used to seed the generator for the resample with replacement. 28 | 29 | Returns 30 | ------- 31 | bootci : dict of arrays 32 | Keys correspond to attributes in bootlist. 33 | Each array contains 95% confidence intervals. 34 | To return bootci, initalise then use method run(). 35 | """ 36 | 37 | def __init__(self, model, X, Y, bootlist, bootnum=100, seed=None): 38 | super().__init__(model=model, X=X, Y=Y, bootlist=bootlist, bootnum=bootnum, seed=seed) 39 | 40 | def calc_stat(self): 41 | """Stores selected attributes (from self.bootlist) for the original model.""" 42 | self.stat = {} 43 | for i in self.bootlist: 44 | self.stat[i] = nested_getattr(self.model, i) 45 | 46 | def calc_bootidx(self): 47 | super().calc_bootidx() 48 | 49 | def calc_bootstat(self): 50 | super().calc_bootstat() 51 | 52 | def calc_bootci(self): 53 | self.bootci = {} 54 | for i in self.bootlist: 55 | self.bootci[i] = self.bootci_method(self.bootstat[i], self.stat[i]) 56 | 57 | def run(self): 58 | self.calc_stat() 59 | self.calc_bootidx() 60 | self.calc_bootstat() 61 | self.calc_bootci() 62 | return self.bootci 63 | 64 | @staticmethod 65 | def bootci_method(bootstat, stat): 66 | """Calculates bootstrap confidence intervals using the percentile bootstrap interval.""" 67 | if bootstat[0].ndim == 1: 68 | boot_ci = [] 69 | # Calculate bootci for each component (peak), and append it to bootci 70 | for i in range(len(bootstat[0])): 71 | bootstat_i = [item[i] for item in bootstat] 72 | lower_ci = np.percentile(bootstat_i, 2.5) 73 | upper_ci = np.percentile(bootstat_i, 97.5) 74 | boot_ci.append([lower_ci, upper_ci]) 75 | boot_ci = np.array(boot_ci) 76 | 77 | # Recursive component (to get ndim = 1, and append) 78 | else: 79 | ncomp = stat.shape[1] 80 | boot_ci = [] 81 | for k in range(ncomp): 82 | bootstat_k = [] 83 | for j in range(len(bootstat)): 84 | bootstat_k.append(bootstat[j][:, k]) 85 | boot_ci_k = Perc.bootci_method(bootstat_k, stat[:, k]) 86 | boot_ci.append(boot_ci_k) 87 | boot_ci = np.array(boot_ci) 88 | return boot_ci 89 | -------------------------------------------------------------------------------- /cimcb/utils/binary_metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import confusion_matrix, roc_auc_score 3 | 4 | 5 | def binary_metrics(y_true, y_pred, cut_off=0.5, parametric=True, k=None): 6 | """ Return a dict of binary stats with the following metrics: R2, auc, accuracy, precision, sensitivity, specificity, and F1 score. 7 | 8 | Parameters 9 | ---------- 10 | y_true : array-like, shape = [n_samples] 11 | Binary label for samples (0s and 1s). 12 | 13 | y_pred : array-like, shape = [n_samples] 14 | Predicted y score for samples. 15 | 16 | cut_off : number, (default 0.5) 17 | A value for y_pred greater-than or equal to the cut_off will be treated as 1, otherwise it will be treated as 0 for the confusion matrix. 18 | 19 | parametric : boolean, (default True) 20 | If parametric is True, calculate R2. 21 | 22 | Returns 23 | ------- 24 | stats: dict 25 | dict containing calculated R2, auc, accuracy, precision, sensitivity, specificity, and F1 score. 26 | """ 27 | 28 | # Convert to array 29 | y_true_arr = np.array(y_true) 30 | y_pred_arr = np.array(y_pred) 31 | 32 | # Error checks 33 | if y_true_arr.ndim != 1: 34 | raise ValueError("y_true should only have 1 dimension.") 35 | if y_pred_arr.ndim != 1: 36 | raise ValueError("y_pred should only have 1 dimension.") 37 | if y_true_arr.shape[0] != y_pred_arr.shape[0]: 38 | raise ValueError("The number of values in y_true should match y_pred.") 39 | if np.array_equal(sorted(set(y_true_arr)), [0, 1]) is False: 40 | raise ValueError("y_true should only contain 0s and 1s") 41 | 42 | # Get confusion matrix 43 | try: 44 | y_pred_round = np.where(y_pred_arr >= cut_off, 1, 0) 45 | except RuntimeWarning: 46 | raise ValueError("Kevin: This warning says there are nans. Something is not right if y predicted are nans.") 47 | tn, fp, fn, tp = confusion_matrix(y_true_arr, y_pred_round).ravel() 48 | 49 | # Binary statistics dictionary 50 | stats = {} 51 | if parametric is True: 52 | stats["R²"] = 1 - (sum((y_true_arr - y_pred_arr) ** 2) / sum((y_true_arr - np.mean(y_true_arr)) ** 2)) 53 | else: 54 | stats["R²"] = np.nan 55 | 56 | try: 57 | stats["AUC"] = roc_auc_score(y_true_arr, y_pred_arr) 58 | except ValueError: 59 | raise ValueError("You need to lower the learning_rate! This is a common issue when using the ‘mean_squared_error’ loss function called exploding gradients. 'At an extreme, the values of weights can become so large as to overflow and result in NaN values' (REF: https://machinelearningmastery.com/exploding-gradients-in-neural-networks/).") 60 | 61 | stats["ACCURACY"] = safe_div((tp + tn), (tp + tn + fp + fn)) 62 | stats["PRECISION"] = safe_div((tp), (tp + fp)) 63 | stats["SENSITIVITY"] = safe_div((tp), (tp + fn)) 64 | stats["SPECIFICITY"] = safe_div((tn), (tn + fp)) 65 | stats["F1-SCORE"] = safe_div((2 * tp), (2 * tp + fp + fn)) 66 | 67 | # Additional: AIC/BIC/SSE 68 | n = len(y_true) 69 | resid = y_true - y_pred 70 | rss = sum(resid ** 2) 71 | if rss == 0: 72 | stats["SSE"] = 0 73 | stats["AIC"] = 0 74 | stats["BIC"] = 0 75 | else: 76 | stats["SSE"] = rss / n 77 | if k is None: 78 | stats["AIC"] = 0 79 | stats["BIC"] = 0 80 | else: 81 | stats["AIC"] = 2 * k - 2 * np.log(rss) 82 | stats["BIC"] = n * np.log(rss / n) + k * np.log(n) 83 | return stats 84 | 85 | 86 | def safe_div(a, b): 87 | """Return np.nan if the demoninator is 0.""" 88 | if b == 0: 89 | return np.nan 90 | return a / b 91 | -------------------------------------------------------------------------------- /cimcb/model/NN_SoftmaxSoftmax.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD, Adam 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | from scipy.stats import logistic 7 | from .BaseModel import BaseModel 8 | from ..utils import YpredCallback 9 | 10 | 11 | class NN_SoftmaxSoftmax(BaseModel): 12 | """2 Layer logistic-logistic neural network using Keras""" 13 | 14 | parametric = True 15 | bootlist = None 16 | 17 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="categorical_crossentropy", batch_size=None, verbose=0): 18 | self.n_neurons = n_neurons 19 | self.verbose = verbose 20 | self.n_epochs = epochs 21 | self.k = n_neurons 22 | self.batch_size = batch_size 23 | self.loss = loss 24 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 25 | #self.optimizer = Adam(lr=learning_rate, decay=decay) 26 | 27 | self.__name__ = 'cimcb.model.NN_SoftmaxSoftmax' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # If batch-size is None: 51 | if self.batch_size is None: 52 | self.batch_size = len(X) 53 | 54 | self.model = Sequential() 55 | self.model.add(Dense(self.n_neurons, activation="sigmoid", input_dim=len(X.T))) 56 | self.model.add(Dense(len(Y[0]), activation="softmax")) 57 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 58 | 59 | # If epoch_ypred is True, calculate ypred for each epoch 60 | if epoch_ypred is True: 61 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 62 | else: 63 | self.epoch = Callback() 64 | 65 | # Fit 66 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 67 | y_pred_train = self.model.predict(X) 68 | 69 | # Storing X, Y, and Y_pred 70 | self.Y_pred = y_pred_train 71 | self.X = X 72 | self.Y = Y 73 | return y_pred_train 74 | 75 | def test(self, X, Y=None): 76 | """Calculate and return Y predicted value. 77 | 78 | Parameters 79 | ---------- 80 | X : array-like, shape = [n_samples, n_features] 81 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 82 | 83 | Returns 84 | ------- 85 | y_pred_test : array-like, shape = [n_samples, 1] 86 | Predicted y score for samples. 87 | """ 88 | y_pred_test = self.model.predict(X) 89 | return y_pred_test 90 | -------------------------------------------------------------------------------- /cimcb/utils/multiclass_metrics.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from sklearn.metrics import multilabel_confusion_matrix, roc_auc_score 3 | 4 | 5 | def multiclass_metrics(y_true, y_pred, cut_off=0.5, parametric=True, k=None): 6 | """ Return a dict of binary stats with the following metrics: R2, auc, accuracy, precision, sensitivity, specificity, and F1 score. 7 | 8 | Parameters 9 | ---------- 10 | y_true : array-like, shape = [n_samples] 11 | Binary label for samples (0s and 1s). 12 | 13 | y_pred : array-like, shape = [n_samples] 14 | Predicted y score for samples. 15 | 16 | cut_off : number, (default 0.5) 17 | A value for y_pred greater-than or equal to the cut_off will be treated as 1, otherwise it will be treated as 0 for the confusion matrix. 18 | 19 | parametric : boolean, (default True) 20 | If parametric is True, calculate R2. 21 | 22 | Returns 23 | ------- 24 | stats: dict 25 | dict containing calculated R2, auc, accuracy, precision, sensitivity, specificity, and F1 score. 26 | """ 27 | 28 | # Convert to array 29 | y_true_arr = np.array(y_true) 30 | y_pred_arr = np.array(y_pred) 31 | 32 | # Error checks -> later 33 | 34 | # Get confusion matrix 35 | try: 36 | y_pred_round = np.zeros(y_pred_arr.shape) 37 | idx = y_pred_arr.argmax(axis=-1) 38 | for i in range(len(idx)): 39 | y_pred_round[i, idx[i]] = 1 40 | 41 | except RuntimeWarning: 42 | raise ValueError("Kevin: This warning says there are nans. Something is not right if y predicted are nans.") 43 | conf = multilabel_confusion_matrix(y_true_arr, y_pred_round).ravel() 44 | n_groups = len(conf) / 4 45 | tn = conf[0::4] 46 | fp = conf[1::4] 47 | fn = conf[2::4] 48 | tp = conf[3::4] 49 | 50 | # Multi-Class Stats Dictionary (Macro Average) 51 | stats = {} 52 | 53 | # R^2 (macro R^2) 54 | ones = np.ones(int(n_groups)) 55 | RSS = sum((y_true_arr - y_pred_arr) ** 2) 56 | TSS = sum((y_true_arr - np.mean(y_true_arr, axis=0)) ** 2) 57 | R2 = ones - (RSS / TSS) 58 | R2macro = sum(R2) / n_groups 59 | stats["R²"] = R2macro 60 | 61 | try: 62 | stats["AUC"] = roc_auc_score(y_true_arr, y_pred_arr, average='macro') 63 | except ValueError: 64 | raise ValueError("You need to lower the learning_rate! This is a common issue when using the ‘mean_squared_error’ loss function called exploding gradients. 'At an extreme, the values of weights can become so large as to overflow and result in NaN values' (REF: https://machinelearningmastery.com/exploding-gradients-in-neural-networks/).") 65 | 66 | stats["ACCURACY"] = safe_div(np.sum(safe_div((tp + tn), (tp + tn + fp + fn))), n_groups) 67 | stats["PRECISION"] = safe_div(np.sum(safe_div((tp), (tp + fp))), n_groups) 68 | stats["SENSITIVITY"] = safe_div(np.sum(safe_div((tp), (tp + fn))), n_groups) 69 | stats["SPECIFICITY"] = safe_div(np.sum(safe_div((tn), (tn + fp))), n_groups) 70 | stats["F1-SCORE"] = safe_div(np.sum(safe_div((2 * tp), (2 * tp + fp + fn))), n_groups) 71 | 72 | stats["SSE"] = 0 73 | stats["AIC"] = 0 74 | stats["BIC"] = 0 75 | # Per Group 76 | # stats["ACCURACYgroup"] = safe_div((tp + tn), (tp + tn + fp + fn)) 77 | # stats["PRECISIONgroup"] = safe_div((tp), (tp + fp)) 78 | # stats["SENSITIVITYgroup"] = safe_div((tp), (tp + fn)) 79 | # stats["SPECIFICITYgroup"] = safe_div((tn), (tn + fp)) 80 | # stats["F1-SCOREgroup"] = safe_div((2 * tp), (2 * tp + fp + fn)) 81 | return stats 82 | 83 | 84 | def safe_div(a, b): 85 | """Return np.nan if the demoninator is 0.""" 86 | try: 87 | if b == 0: 88 | return np.nan 89 | except ValueError: 90 | if 0 in b: 91 | return np.nan 92 | return a / b 93 | -------------------------------------------------------------------------------- /cimcb/bootstrap/Per.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from .BaseBootstrap import BaseBootstrap 3 | from ..utils import nested_getattr 4 | 5 | 6 | class Per(BaseBootstrap): 7 | """ Returns bootstrap confidence intervals using the percentile boostrap interval. 8 | 9 | Parameters 10 | ---------- 11 | model : object 12 | This object is assumed to store bootlist attributes in .model (e.g. modelPLS.model.x_scores_). 13 | 14 | X : array-like, shape = [n_samples, n_features] 15 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 16 | 17 | Y : array-like, shape = [n_samples, 1] 18 | Response variables, where n_samples is the number of samples. 19 | 20 | bootlist : array-like, shape = [n_bootlist, 1] 21 | List of attributes to calculate and return bootstrap confidence intervals. 22 | 23 | bootnum : a positive integer, (default 100) 24 | The number of bootstrap samples used in the computation. 25 | 26 | seed: integer or None (default None) 27 | Used to seed the generator for the resample with replacement. 28 | 29 | Returns 30 | ------- 31 | bootci : dict of arrays 32 | Keys correspond to attributes in bootlist. 33 | Each array contains 95% confidence intervals. 34 | To return bootci, initalise then use method run(). 35 | """ 36 | 37 | def __init__(self, model, bootnum=100, seed=None, n_cores=-1, stratify=True): 38 | super().__init__(model=model, bootnum=bootnum, seed=seed, n_cores=n_cores, stratify=stratify) 39 | self.__name__ = "Per" 40 | 41 | def calc_stat(self): 42 | super().calc_stat() 43 | 44 | def calc_bootidx(self): 45 | super().calc_bootidx() 46 | 47 | def calc_bootstat(self): 48 | super().calc_bootstat() 49 | 50 | def calc_bootci(self): 51 | self.bootci = {} 52 | for i in self.bootlist: 53 | self.bootci[i] = self.bootci_method(self.bootstat[i], self.stat[i]) 54 | 55 | def run(self): 56 | self.calc_stat() 57 | self.calc_bootidx() 58 | self.calc_bootstat() 59 | self.calc_bootci() 60 | 61 | @staticmethod 62 | def bootci_method(bootstat, stat): 63 | """Calculates bootstrap confidence intervals using the percentile bootstrap interval.""" 64 | try: 65 | stat.ndim 66 | except AttributeError: 67 | print(stat) 68 | if stat.ndim == 1: 69 | boot_ci = [] 70 | # Calculate bootci for each component (peak), and append it to bootci 71 | for i in range(len(bootstat[0])): 72 | bootstat_i = [item[i] for item in bootstat] 73 | lower_ci = np.percentile(bootstat_i, 2.5) 74 | upper_ci = np.percentile(bootstat_i, 97.5) 75 | mid_ci = np.percentile(bootstat_i, 50) 76 | boot_ci.append([lower_ci, upper_ci, mid_ci]) 77 | boot_ci = np.array(boot_ci) 78 | elif stat.ndim == 0: 79 | lower_ci = np.percentile(bootstat, 2.5) 80 | upper_ci = np.percentile(bootstat, 97.5) 81 | mid_ci = np.percentile(bootstat, 50) 82 | boot_ci = [lower_ci, upper_ci, mid_ci] 83 | boot_ci = np.array(boot_ci) 84 | # Recursive component (to get ndim = 1, and append) 85 | else: 86 | ncomp = stat.shape[1] 87 | boot_ci = [] 88 | for k in range(ncomp): 89 | bootstat_k = [] 90 | for j in range(len(bootstat)): 91 | bootstat_k.append(bootstat[j][:, k]) 92 | boot_ci_k = Per.bootci_method(bootstat_k, stat[:, k]) 93 | boot_ci.append(boot_ci_k) 94 | boot_ci = np.array(boot_ci) 95 | return boot_ci 96 | -------------------------------------------------------------------------------- /cimcb/plot/pca.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn import decomposition 4 | from bokeh.plotting import output_notebook, show 5 | from bokeh.layouts import gridplot 6 | from .scatter import scatter 7 | from ..utils import ci95_ellipse 8 | 9 | 10 | def pca(X, pcx=1, pcy=2, group_label=None, sample_label=None, peak_label=None): 11 | """Creates a PCA scores and loadings plot using Bokeh. 12 | 13 | Required Parameters 14 | ------------------- 15 | X : array-like, shape = [n_samples] 16 | Inpute data 17 | """ 18 | 19 | # Set model 20 | model = decomposition.PCA() 21 | model.fit(X) 22 | scores_ = model.transform(X) 23 | explained_var_ = model.explained_variance_ratio_ * 100 24 | 25 | # Extract scores, explained variance, and loadings for pcx and pcy 26 | x_score = scores_[:, (pcx - 1)] 27 | y_score = scores_[:, (pcy - 1)] 28 | x_expvar = explained_var_[(pcx - 1)] 29 | y_expvar = explained_var_[(pcy - 1)] 30 | x_load = model.components_[(pcx - 1), :] 31 | y_load = model.components_[(pcy - 1), :] 32 | 33 | # Colour for fig_score 34 | if group_label is None: 35 | col = ["blue", "green", "red"] 36 | else: 37 | col = None 38 | 39 | # Ensure group_label is an np.array 40 | group_label = np.array(group_label) 41 | 42 | # Scores plot 43 | fig_score = scatter(x_score, y_score, group=group_label, label=sample_label, size=5, xlabel="PC {} ({:0.1f}%)".format(pcx, x_expvar), ylabel="PC {} ({:0.1f}%)".format(pcy, y_expvar), title="PCA Score Plot (PC{} vs. PC{})".format(pcx, pcy), font_size="15pt", width=490, height=430, hover_xy=False, col_palette=col) 44 | print(len(x_load)) 45 | # Loadings plot 46 | fig_load = scatter(x_load, y_load, size=7, label=peak_label, xlabel="PC {} ({:0.1f}%)".format(pcx, x_expvar), ylabel="PC {} ({:0.1f}%)".format(pcy, y_expvar), title="PCA Loadings Plot (PC{} vs. PC{})".format(pcx, pcy), font_size="15pt", width=490, height=430, hover_xy=False, shape="triangle", legend=False, hline=True, vline=True) 47 | 48 | # if group is None 49 | if group_label is None: 50 | group_label = [0] * len(X) 51 | 52 | # Score plot extra: 95% confidence ellipse using PCA 53 | unique_group = np.sort(np.unique(group_label)) 54 | 55 | # Set colour per group 56 | list_color = ["red", "blue", "green", "black", "orange", "yellow", "brown", "cyan"] 57 | while len(list_color) < len(unique_group): # Loop over list_color if number of groups > len(list_colour) 58 | list_color += list_color 59 | 60 | # Add 95% confidence ellipse for each unique group in a loop 61 | for i in range(len(unique_group)): 62 | # Get scores for the corresponding group 63 | group_i_x = [] 64 | group_i_y = [] 65 | for j in range(len(group_label)): 66 | if group_label[j] == unique_group[i]: 67 | group_i_x.append(x_score[j]) 68 | group_i_y.append(y_score[j]) 69 | 70 | # Calculate ci95 ellipse for each group 71 | data_circ_group = pd.DataFrame({"0": group_i_x, "1": group_i_y}) 72 | m, outside_m = ci95_ellipse(data_circ_group, type="mean") 73 | p, outside_p = ci95_ellipse(data_circ_group, type="pop") 74 | 75 | # Plot ci95 ellipse outer line 76 | fig_score.line(m[:, 0], m[:, 1], color=list_color[i], line_width=2, alpha=0.8, line_dash="solid") 77 | fig_score.line(p[:, 0], p[:, 1], color=list_color[i], alpha=0.4) 78 | 79 | # Plot ci95 ellipse shade 80 | fig_score.patch(m[:, 0], m[:, 1], color=list_color[i], alpha=0.07) 81 | fig_score.patch(p[:, 0], p[:, 1], color=list_color[i], alpha=0.01) 82 | 83 | # Output this figure with fig_score and fig_load 84 | output_notebook() 85 | fig = gridplot([[fig_score, fig_load]]) 86 | show(fig) 87 | -------------------------------------------------------------------------------- /cimcb/utils/knnimpute.py: -------------------------------------------------------------------------------- 1 | import warnings 2 | import numpy as np 3 | from scipy.spatial.distance import pdist, squareform 4 | from .wmean import wmean 5 | 6 | 7 | def knnimpute(x, k=3): 8 | """kNN missing value imputation using Euclidean distance. 9 | 10 | Parameters 11 | ---------- 12 | x: array-like 13 | An array-like object that contains the data with NaNs. 14 | 15 | k: positive integer excluding 0, (default 3) 16 | The number of nearest neighbours to use. 17 | 18 | Returns 19 | ------- 20 | z: array-like 21 | An array-like object corresponding to x with NaNs imputed. 22 | """ 23 | 24 | # Tranpose x so we treat columns as features, and rows as samples 25 | x = x.T 26 | 27 | # Error check for k value 28 | if type(k) is not int: 29 | raise ValueError("k is not an integer") 30 | if k < 1: 31 | raise ValueError("k must be greater than zero") 32 | k_max = x.shape[1] - 1 33 | if k_max < k: 34 | raise ValueError("k value is too high. Max k value is {}".format(k_max)) 35 | 36 | # z is the returned array with NaNs imputed 37 | z = x.copy() 38 | 39 | # Use columns without NaNs for knnimpute 40 | nan_check = np.isnan(x) 41 | no_nan = np.where(sum(nan_check.T) == 0, 1, 0) 42 | 43 | # Error check that not all columns have NaNs 44 | x_no_nan = x[no_nan == 1] 45 | if x_no_nan.size == 0: 46 | raise ValueError("All colummns of the input data contain missing values. Unable to impute missing values.") 47 | 48 | # Calculate pairwise distances between columns, and covert to square-form distance matrix 49 | pair_dist = pdist(x_no_nan.T, metric="euclidean") 50 | sq_dist = squareform(pair_dist) 51 | 52 | # Make diagonals negative and sort 53 | dist = np.sort(sq_dist - np.eye(sq_dist.shape[0], sq_dist.shape[1])).T 54 | dist_idx = np.argsort(sq_dist - np.eye(sq_dist.shape[0], sq_dist.shape[1])).T 55 | 56 | # Find where neighbours are equal distance 57 | equal_dist_a = np.diff(dist[1:].T, 1, 1).T == 0 58 | equal_dist_a = equal_dist_a.astype(int) # Convert to integer 59 | equal_dist_b = np.zeros(len(dist)) 60 | equal_dist = np.concatenate((equal_dist_a, [equal_dist_b])) # Concatenate 61 | 62 | # Get rows and cols for missing values 63 | nan_idx = np.argwhere(nan_check) 64 | nan_rows = nan_idx[:, 0] 65 | nan_cols = nan_idx[:, 1] 66 | # Make sure rows/cols are in a list (note: this happens when there is 1 missing value) 67 | if type(nan_rows) is not np.ndarray: 68 | nan_rows = [nan_rows] 69 | nan_cols = [nan_cols] 70 | 71 | # Impute each NaN value 72 | for i in range(len(nan_rows)): 73 | 74 | # Error check for rows with all NaNs 75 | if np.isnan(x[nan_rows[i], :]).all() == True: 76 | warnings.warn("Row {} contains all NaNs, so Row {} is imputed with zeros.".format(nan_rows[i], nan_rows[i]), Warning) 77 | 78 | # Create a loop from 1 to len(dist_idx) - k 79 | lastk = len(dist_idx) - k 80 | loopk = [1] 81 | while lastk > loopk[-1]: 82 | loopk.append(loopk[-1] + 1) 83 | 84 | # Impute 85 | for j in loopk: 86 | L_a = equal_dist[j + k - 2 :, nan_cols[i]] 87 | L = np.where(L_a == 0)[0][0] # equal_dist neighbours 88 | 89 | x_vals_r = nan_rows[i] 90 | x_vals_c = dist_idx[j : j + k + L, nan_cols[i]] 91 | x_vals = x[x_vals_r, x_vals_c] 92 | weights = 1 / dist[1 : k + L + 1, nan_cols[i]] 93 | imp_val = wmean(x_vals, weights) # imputed value 94 | if imp_val is not np.nan: 95 | z[nan_rows[i], nan_cols[i]] = imp_val 96 | break 97 | 98 | # Transpose z 99 | z = z.T 100 | return z 101 | -------------------------------------------------------------------------------- /cimcb/model/SVM.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.svm import SVC 4 | from sklearn.metrics import roc_auc_score 5 | from .BaseModel import BaseModel 6 | from ..utils import binary_metrics, binary_evaluation 7 | 8 | 9 | class SVM(BaseModel): 10 | """Support Vector Machine""" 11 | 12 | parametric = True 13 | bootlist = None # list of metrics to bootstrap 14 | 15 | bootlist = ["Y_pred", "model.eval_metrics_"] # list of metrics to bootstrap 16 | 17 | def __init__(self, C=1.0, kernel="rbf", degree=3, gamma="auto", tol=0.001, max_iter=-1): 18 | self.model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma, probability=True, tol=tol, max_iter=max_iter) 19 | self.k = None 20 | 21 | self.__name__ = 'cimcb.model.SVM' 22 | self.__params__ = {'C': C, 'kernel': kernel, 'degree': degree, 'gamma': gamma, 'tol': tol, 'max_iter': max_iter} 23 | 24 | def set_params(self, params): 25 | self.__init__(**params) 26 | 27 | def train(self, X, Y): 28 | """ Fit the SVM model, save additional stats (as attributes) and return Y predicted values. 29 | 30 | Parameters 31 | ---------- 32 | X : array-like, shape = [n_samples, n_features] 33 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 34 | 35 | Y : array-like, shape = [n_samples, 1] 36 | Response variables, where n_samples is the number of samples. 37 | 38 | Returns 39 | ------- 40 | y_pred_train : array-like, shape = [n_samples, 1] 41 | Predicted y score for samples. 42 | """ 43 | 44 | # Ensure array and error check 45 | X, Y = self.input_check(X, Y) 46 | 47 | # Fit the model 48 | self.model.fit(X, Y) 49 | 50 | # Predict_proba was designed for multi-groups... 51 | # This makes it sure that y_pred is correct 52 | y_pred = self.model.predict_proba(X) 53 | pred_0 = roc_auc_score(Y, y_pred[:, 0]) 54 | pred_1 = roc_auc_score(Y, y_pred[:, 1]) 55 | if pred_0 > pred_1: 56 | self.pred_index = 0 57 | else: 58 | self.pred_index = 1 59 | 60 | # Calculate and return Y prediction value 61 | y_pred_train = np.array(self.model.predict_proba(X)[:, self.pred_index]) 62 | 63 | self.model.y_loadings_ = np.array([0, 0, 0]) 64 | self.model.x_scores_ = np.array([0, 0, 0]) 65 | self.model.pctvar_ = np.array([0, 0, 0]) 66 | 67 | # Storing X, Y, and Y_pred 68 | self.X = X 69 | self.Y = Y 70 | self.Y_pred = y_pred_train 71 | self.metrics_key = [] 72 | self.model.eval_metrics_ = [] 73 | bm = binary_evaluation(Y, y_pred_train) 74 | for key, value in bm.items(): 75 | self.model.eval_metrics_.append(value) 76 | self.metrics_key.append(key) 77 | 78 | self.model.eval_metrics_ = np.array(self.model.eval_metrics_) 79 | 80 | self.Y_train = Y 81 | self.Y_pred_train = y_pred_train 82 | 83 | return y_pred_train 84 | 85 | def test(self, X, Y=None): 86 | """Calculate and return Y predicted value. 87 | 88 | Parameters 89 | ---------- 90 | X : array-like, shape = [n_samples, n_features] 91 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 92 | 93 | Returns 94 | ------- 95 | y_pred_test : array-like, shape = [n_samples, 1] 96 | Predicted y score for samples. 97 | """ 98 | 99 | # Convert to X to numpy array if a DataFrame 100 | if isinstance(X, pd.DataFrame or pd.Series): 101 | X = np.array(X) 102 | 103 | # Calculate and return Y predicted value 104 | y_pred_test = np.array(self.model.predict_proba(X)[:, self.pred_index]) 105 | if Y is not None: 106 | self.metrics_key = [] 107 | self.model.eval_metrics_ = [] 108 | bm = binary_evaluation(Y, y_pred_test) 109 | for key, value in bm.items(): 110 | self.model.eval_metrics_.append(value) 111 | self.metrics_key.append(key) 112 | 113 | self.model.eval_metrics_ = np.array(self.model.eval_metrics_) 114 | 115 | self.Y_pred = y_pred_test 116 | return y_pred_test 117 | -------------------------------------------------------------------------------- /cimcb/model/RBF_NN.py: -------------------------------------------------------------------------------- 1 | import numpy 2 | import numpy as np 3 | import math 4 | from sklearn.cluster import KMeans 5 | from .BaseModel import BaseModel 6 | 7 | 8 | class RBF_NN(BaseModel): 9 | """Radial basis function neural network""" 10 | 11 | parametric = True 12 | bootlist = None 13 | 14 | def __init__(self, n_clusters=8, max_iter=100): 15 | self.n_clusters = n_clusters 16 | self.max_iter = max_iter 17 | self.k = n_clusters 18 | 19 | self.__name__ = 'cimcb.model.RBF_NN' 20 | self.__params__ = {'n_clusters': n_clusters, 'max_iter': max_iter} 21 | 22 | def set_params(self, params): 23 | self.__init__(**params) 24 | 25 | def train(self, X, Y): 26 | """ Fit the rbf-nn model, save additional stats (as attributes) and return Y predicted values. 27 | 28 | Parameters 29 | ---------- 30 | X : array-like, shape = [n_samples, n_features] 31 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 32 | 33 | Y : array-like, shape = [n_samples, 1] 34 | Response variables, where n_samples is the number of samples. 35 | 36 | Returns 37 | ------- 38 | y_pred_train : array-like, shape = [n_samples, 1] 39 | Predicted y score for samples. 40 | """ 41 | 42 | # Ensure array and error check 43 | X, Y = self.input_check(X, Y) 44 | 45 | km = KMeans(n_clusters=self.n_clusters, max_iter=self.max_iter) 46 | km.fit(X) 47 | cent = km.cluster_centers_ 48 | 49 | self.model = KMeans 50 | 51 | # Determine the value of sigma 52 | max = 0 53 | for i in range(self.n_clusters): 54 | for j in range(self.n_clusters): 55 | d = numpy.linalg.norm(cent[i] - cent[j]) 56 | if d > max: 57 | max = d 58 | d = max 59 | sigma = d / math.sqrt(2 * self.n_clusters) 60 | 61 | # Set up G matrix 62 | shape = X.shape 63 | row = shape[0] 64 | column = self.n_clusters 65 | G = numpy.empty((row, column), dtype=float) 66 | for i in range(row): 67 | for j in range(column): 68 | dist = numpy.linalg.norm(X[i] - cent[j]) 69 | G[i][j] = math.exp(-math.pow(dist, 2) / math.pow(2 * sigma, 2)) 70 | 71 | # Find W 72 | GTG = numpy.dot(G.T, G) 73 | GTG_inv = numpy.linalg.inv(GTG) 74 | fac = numpy.dot(GTG_inv, G.T) 75 | W = numpy.dot(fac, Y) 76 | self.cent = cent 77 | self.W = W 78 | self.G = G 79 | self.sigma = sigma 80 | y_pred_train = np.dot(G, W) 81 | self.xcols_num = len(X.T) 82 | cent2 = [] 83 | for i in range(len(self.cent.T)): 84 | something = [] 85 | for j in range(len(self.cent)): 86 | something.append(self.cent[j][i]) 87 | cent2.append(something) 88 | 89 | self.cent2 = np.array(cent2) 90 | 91 | self.vi = np.dot(self.cent2, self.W) 92 | 93 | self.model.x_scores_ = self.G 94 | self.model.y_loadings_ = self.W.reshape(1, len(self.W)) 95 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 96 | self.X = X 97 | self.Y = Y 98 | self.Y_pred = y_pred_train 99 | return y_pred_train 100 | 101 | def test(self, X): 102 | """Calculate and return Y predicted value. 103 | 104 | Parameters 105 | ---------- 106 | X : array-like, shape = [n_samples, n_features] 107 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 108 | 109 | Returns 110 | ------- 111 | y_pred_test : array-like, shape = [n_samples, 1] 112 | Predicted y score for samples. 113 | """ 114 | 115 | # Set up G matrix 116 | shape = X.shape 117 | row = shape[0] 118 | column = self.n_clusters 119 | G = numpy.empty((row, column), dtype=float) 120 | for i in range(row): 121 | for j in range(column): 122 | dist = numpy.linalg.norm(X[i] - self.cent[j]) 123 | G[i][j] = math.exp(-math.pow(dist, 2) / math.pow(2 * self.sigma, 2)) 124 | y_pred_test = numpy.dot(G, self.W) 125 | return y_pred_test 126 | -------------------------------------------------------------------------------- /cimcb/bootstrap/CPer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy 3 | import pandas as pd 4 | from scipy.stats import norm 5 | import math 6 | import multiprocessing 7 | from copy import deepcopy 8 | from bokeh.layouts import widgetbox, gridplot, column, row, layout 9 | from bokeh.plotting import ColumnDataSource, figure, output_notebook, show 10 | from .BaseBootstrap import BaseBootstrap 11 | from itertools import combinations 12 | from ..plot import scatterCI, boxplot, distribution, scatter, scatter_ellipse 13 | from ..utils import nested_getattr, dict_95ci, dict_median_scores 14 | 15 | 16 | class CPer(BaseBootstrap): 17 | """ Returns bootstrap confidence intervals using the bias-corrected boostrap interval. 18 | 19 | Parameters 20 | ---------- 21 | model : object 22 | This object is assumed to store bootlist attributes in .model (e.g. modelPLS.model.x_scores_). 23 | 24 | X : array-like, shape = [n_samples, n_features] 25 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 26 | 27 | Y : array-like, shape = [n_samples, 1] 28 | Response variables, where n_samples is the number of samples. 29 | 30 | bootlist : array-like, shape = [n_bootlist, 1] 31 | List of attributes to calculate and return bootstrap confidence intervals. 32 | 33 | bootnum : a positive integer, (default 100) 34 | The number of bootstrap samples used in the computation. 35 | 36 | seed: integer or None (default None) 37 | Used to seed the generator for the resample with replacement. 38 | 39 | Returns 40 | ------- 41 | bootci : dict of arrays 42 | Keys correspond to attributes in bootlist. 43 | Each array contains 95% confidence intervals. 44 | To return bootci, initalise then use method run(). 45 | """ 46 | 47 | def __init__(self, model, bootnum=100, seed=None, n_cores=-1, stratify=True): 48 | super().__init__(model=model, bootnum=bootnum, seed=seed, n_cores=n_cores, stratify=stratify) 49 | self.__name__ = "CPer" 50 | 51 | def calc_stat(self): 52 | super().calc_stat() 53 | 54 | def calc_bootidx(self): 55 | super().calc_bootidx() 56 | 57 | def calc_bootstat(self): 58 | super().calc_bootstat() 59 | 60 | def calc_bootci(self): 61 | self.bootci = {} 62 | for i in self.bootlist: 63 | self.bootci[i] = self.bootci_method(self.bootstat[i], self.stat[i]) 64 | 65 | def run(self): 66 | self.calc_stat() 67 | self.calc_bootidx() 68 | self.calc_bootstat() 69 | self.calc_bootci() 70 | 71 | @staticmethod 72 | def bootci_method(bootstat, stat): 73 | """Calculates bootstrap confidence intervals using the bias-corrected bootstrap interval.""" 74 | if stat.ndim == 1: 75 | nboot = len(bootstat) 76 | zalpha = norm.ppf(0.05 / 2) 77 | obs = stat # Observed mean 78 | meansum = np.zeros((1, len(obs))).flatten() 79 | for i in range(len(obs)): 80 | for j in range(len(bootstat)): 81 | if bootstat[j][i] >= obs[i]: 82 | meansum[i] = meansum[i] + 1 83 | prop = meansum / nboot # Proportion of times boot mean > obs mean 84 | z0 = -norm.ppf(prop) 85 | 86 | # new alpha 87 | pct1 = 100 * norm.cdf((2 * z0 + zalpha)) 88 | pct2 = 100 * norm.cdf((2 * z0 - zalpha)) 89 | pct3 = 100 * norm.cdf((2 * z0)) 90 | boot_ci = [] 91 | for i in range(len(pct1)): 92 | bootstat_i = [item[i] for item in bootstat] 93 | append_low = np.percentile(bootstat_i, pct1[i]) 94 | append_mid = np.percentile(bootstat_i, pct3[i]) 95 | append_upp = np.percentile(bootstat_i, pct2[i]) 96 | boot_ci.append([append_low, append_upp, append_mid]) 97 | boot_ci = np.array(boot_ci) 98 | 99 | # Recursive component (to get ndim = 1, and append) 100 | else: 101 | ncomp = stat.shape[1] 102 | boot_ci = [] 103 | for k in range(ncomp): 104 | bootstat_k = [] 105 | for j in range(len(bootstat)): 106 | bootstat_k.append(bootstat[j][:, k]) 107 | boot_ci_k = CPer.bootci_method(bootstat_k, stat[:, k]) 108 | boot_ci.append(boot_ci_k) 109 | boot_ci = np.array(boot_ci) 110 | return boot_ci 111 | -------------------------------------------------------------------------------- /cimcb/model/MBNN_SigmoidSigmoid_1Layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.models import Model 6 | from keras.layers import Input, Dense, Concatenate, LSTM, concatenate 7 | from keras.layers import Dense 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class MBNN_SigmoidSigmoid_1Layer(BaseModel): 13 | """2 Layer logistic-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons_l1=2, n_neurons_l2=1, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons_l1 = n_neurons_l1 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.batch_size = batch_size 23 | self.loss = loss 24 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 25 | 26 | self.__name__ = 'cimcb.model.MBNN_SigmoidSigmoid_1Layer' 27 | self.__params__ = {'n_neurons_l1': n_neurons_l1, 'n_neurons_l2': n_neurons_l2, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 28 | 29 | def set_params(self, params): 30 | self.__init__(**params) 31 | 32 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 33 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 34 | 35 | Parameters 36 | ---------- 37 | X : array-like, shape = [n_samples, n_features] 38 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 39 | 40 | Y : array-like, shape = [n_samples, 1] 41 | Response variables, where n_samples is the number of samples. 42 | 43 | Returns 44 | ------- 45 | y_pred_train : array-like, shape = [n_samples, 1] 46 | Predicted y score for samples. 47 | """ 48 | 49 | # If batch-size is None: 50 | if self.batch_size is None: 51 | self.batch_size = len(X) 52 | 53 | X1 = X[0] 54 | X2 = X[1] 55 | 56 | # Layer for X1 57 | input_X1 = Input(shape=(len(X1.T),)) 58 | layer1_X1 = Dense(self.n_neurons_l1, activation="sigmoid")(input_X1) 59 | layer1_X1 = Model(inputs=input_X1, outputs=layer1_X1) 60 | 61 | # Layer for X2 62 | input_X2 = Input(shape=(len(X2.T),)) 63 | layer1_X2 = Dense(self.n_neurons_l1, activation="sigmoid")(input_X2) 64 | layer1_X2 = Model(inputs=input_X2, outputs=layer1_X2) 65 | 66 | # Concatenate 67 | concat = concatenate([layer1_X1.output, layer1_X2.output]) 68 | #model_concat = Dense(self.n_neurons_l2, activation="sigmoid")(concat) 69 | model_concat = Dense(1, activation="sigmoid")(concat) 70 | 71 | self.model = Model(inputs=[layer1_X1.input, layer1_X2.input], outputs=model_concat) 72 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 73 | 74 | # If epoch_ypred is True, calculate ypred for each epoch 75 | if epoch_ypred is True: 76 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 77 | else: 78 | self.epoch = Callback() 79 | 80 | # Fit 81 | self.model.fit([X1, X2], Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 82 | 83 | # Not sure about the naming scheme (trying to match PLS) 84 | y_pred_train = self.model.predict(X).flatten() 85 | 86 | # Storing X, Y, and Y_pred 87 | self.Y_pred = y_pred_train 88 | self.X = X 89 | self.Y = Y 90 | return y_pred_train 91 | 92 | def test(self, X, Y=None): 93 | """Calculate and return Y predicted value. 94 | 95 | Parameters 96 | ---------- 97 | X : array-like, shape = [n_samples, n_features] 98 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 99 | 100 | Returns 101 | ------- 102 | y_pred_test : array-like, shape = [n_samples, 1] 103 | Predicted y score for samples. 104 | """ 105 | 106 | y_pred_test = self.model.predict(X).flatten() 107 | return y_pred_test 108 | -------------------------------------------------------------------------------- /cimcb/model/MBNN_LinearSigmoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.models import Model 6 | from keras.layers import Input, Dense, Concatenate, LSTM, concatenate 7 | from keras.layers import Dense 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class MBNN_LinearSigmoid(BaseModel): 13 | """2 Layer logistic-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons_l1=2, n_neurons_l2=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons_l1 = n_neurons_l1 20 | self.n_neurons_l2 = n_neurons_l2 21 | self.verbose = verbose 22 | self.n_epochs = epochs 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.MBNN_LinearSigmoid' 28 | self.__params__ = {'n_neurons_l1': n_neurons_l1, 'n_neurons_l2': n_neurons_l2, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # If batch-size is None: 51 | if self.batch_size is None: 52 | self.batch_size = len(X) 53 | 54 | X1 = X[0] 55 | X2 = X[1] 56 | 57 | # Layer for X1 58 | input_X1 = Input(shape=(len(X1.T),)) 59 | layer1_X1 = Dense(self.n_neurons_l1, activation="linear")(input_X1) 60 | layer1_X1 = Model(inputs=input_X1, outputs=layer1_X1) 61 | 62 | # Layer for X2 63 | input_X2 = Input(shape=(len(X2.T),)) 64 | layer1_X2 = Dense(self.n_neurons_l1, activation="linear")(input_X2) 65 | layer1_X2 = Model(inputs=input_X2, outputs=layer1_X2) 66 | 67 | # Concatenate 68 | concat = concatenate([layer1_X1.output, layer1_X2.output]) 69 | model_concat = Dense(self.n_neurons_l2, activation="sigmoid")(concat) 70 | model_concat = Dense(1, activation="sigmoid")(model_concat) 71 | 72 | self.model = Model(inputs=[layer1_X1.input, layer1_X2.input], outputs=model_concat) 73 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 74 | 75 | # If epoch_ypred is True, calculate ypred for each epoch 76 | if epoch_ypred is True: 77 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 78 | else: 79 | self.epoch = Callback() 80 | 81 | # Fit 82 | self.model.fit([X1, X2], Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 83 | 84 | # Not sure about the naming scheme (trying to match PLS) 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | y_pred_test = self.model.predict(X).flatten() 108 | return y_pred_test 109 | -------------------------------------------------------------------------------- /cimcb/model/MBNN_LinearSigmoid_1Layer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.models import Model 6 | from keras.layers import Input, Dense, Concatenate, LSTM, concatenate 7 | from keras.layers import Dense 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class MBNN_LinearSigmoid_1Layer(BaseModel): 13 | """2 Layer logistic-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons_l1=2, n_neurons_l2=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons_l1 = n_neurons_l1 20 | self.n_neurons_l2 = n_neurons_l2 21 | self.verbose = verbose 22 | self.n_epochs = epochs 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.MBNN_LinearSigmoid_1Layer' 28 | self.__params__ = {'n_neurons_l1': n_neurons_l1, 'n_neurons_l2': n_neurons_l2, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # If batch-size is None: 51 | if self.batch_size is None: 52 | self.batch_size = len(X) 53 | 54 | X1 = X[0] 55 | X2 = X[1] 56 | 57 | # Layer for X1 58 | input_X1 = Input(shape=(len(X1.T),)) 59 | layer1_X1 = Dense(self.n_neurons_l1, activation="linear")(input_X1) 60 | layer1_X1 = Model(inputs=input_X1, outputs=layer1_X1) 61 | 62 | # Layer for X2 63 | input_X2 = Input(shape=(len(X2.T),)) 64 | layer1_X2 = Dense(self.n_neurons_l1, activation="linear")(input_X2) 65 | layer1_X2 = Model(inputs=input_X2, outputs=layer1_X2) 66 | 67 | # Concatenate 68 | concat = concatenate([layer1_X1.output, layer1_X2.output]) 69 | #model_concat = Dense(self.n_neurons_l2, activation="sigmoid")(concat) 70 | model_concat = Dense(1, activation="sigmoid")(concat) 71 | 72 | self.model = Model(inputs=[layer1_X1.input, layer1_X2.input], outputs=model_concat) 73 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 74 | 75 | # If epoch_ypred is True, calculate ypred for each epoch 76 | if epoch_ypred is True: 77 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 78 | else: 79 | self.epoch = Callback() 80 | 81 | # Fit 82 | self.model.fit([X1, X2], Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 83 | 84 | # Not sure about the naming scheme (trying to match PLS) 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | y_pred_test = self.model.predict(X).flatten() 108 | return y_pred_test 109 | -------------------------------------------------------------------------------- /cimcb/model/NN_L1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | from keras import regularizers 7 | from .BaseModel import BaseModel 8 | from ..utils import YpredCallback 9 | 10 | 11 | class NN_L1(BaseModel): 12 | """2 Layer linear-logistic neural network using Keras""" 13 | 14 | parametric = False 15 | bootlist = None 16 | 17 | def __init__(self, l_lambda=0.01, n_nodes=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 18 | self.l_lambda = l_lambda 19 | self.n_nodes = n_nodes 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_nodes 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 28 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 29 | 30 | Parameters 31 | ---------- 32 | X : array-like, shape = [n_samples, n_features] 33 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 34 | 35 | Y : array-like, shape = [n_samples, 1] 36 | Response variables, where n_samples is the number of samples. 37 | 38 | Returns 39 | ------- 40 | y_pred_train : array-like, shape = [n_samples, 1] 41 | Predicted y score for samples. 42 | """ 43 | 44 | # If batch-size is None: 45 | if self.batch_size is None: 46 | self.batch_size = min(200, len(X)) 47 | 48 | self.model = Sequential() 49 | self.model.add(Dense(self.n_nodes, activation="linear", input_dim=len(X.T), kernel_regularizer=regularizers.l1(self.l_lambda))) 50 | self.model.add(Dense(1, activation="sigmoid")) 51 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 52 | 53 | # If epoch_ypred is True, calculate ypred for each epoch 54 | if epoch_ypred is True: 55 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 56 | else: 57 | self.epoch = Callback() 58 | 59 | # Fit 60 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 61 | 62 | layer1_weight = self.model.layers[0].get_weights()[0] 63 | layer1_bias = self.model.layers[0].get_weights()[1] 64 | layer2_weight = self.model.layers[1].get_weights()[0] 65 | layer2_bias = self.model.layers[1].get_weights()[1] 66 | 67 | # Not sure about the naming scheme (trying to match PLS) 68 | self.model.x_loadings_ = layer1_weight 69 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 70 | self.model.y_loadings_ = layer2_weight 71 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 72 | self.xcols_num = len(X.T) 73 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 74 | y_pred_train = self.model.predict(X).flatten() 75 | 76 | # Storing X, Y, and Y_pred 77 | self.Y_pred = y_pred_train 78 | self.X = X 79 | self.Y = Y 80 | return y_pred_train 81 | 82 | def test(self, X, Y=None): 83 | """Calculate and return Y predicted value. 84 | 85 | Parameters 86 | ---------- 87 | X : array-like, shape = [n_samples, n_features] 88 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 89 | 90 | Returns 91 | ------- 92 | y_pred_test : array-like, shape = [n_samples, 1] 93 | Predicted y score for samples. 94 | """ 95 | 96 | layer1_weight = self.model.layers[0].get_weights()[0] 97 | layer1_bias = self.model.layers[0].get_weights()[1] 98 | layer2_weight = self.model.layers[1].get_weights()[0] 99 | layer2_bias = self.model.layers[1].get_weights()[1] 100 | 101 | self.model.y_loadings_ = layer2_weight 102 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 103 | self.model.x_loadings_ = layer1_weight 104 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 105 | y_pred_test = self.model.predict(X).flatten() 106 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 107 | return y_pred_test 108 | -------------------------------------------------------------------------------- /cimcb/model/NN_L2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | from keras import regularizers 7 | from .BaseModel import BaseModel 8 | from ..utils import YpredCallback 9 | 10 | 11 | class NN_L2(BaseModel): 12 | """2 Layer linear-logistic neural network using Keras""" 13 | 14 | parametric = False 15 | bootlist = None 16 | 17 | def __init__(self, l_lambda=0.01, n_nodes=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 18 | self.l_lambda = l_lambda 19 | self.n_nodes = n_nodes 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_nodes 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 28 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 29 | 30 | Parameters 31 | ---------- 32 | X : array-like, shape = [n_samples, n_features] 33 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 34 | 35 | Y : array-like, shape = [n_samples, 1] 36 | Response variables, where n_samples is the number of samples. 37 | 38 | Returns 39 | ------- 40 | y_pred_train : array-like, shape = [n_samples, 1] 41 | Predicted y score for samples. 42 | """ 43 | 44 | # If batch-size is None: 45 | if self.batch_size is None: 46 | self.batch_size = min(200, len(X)) 47 | 48 | self.model = Sequential() 49 | self.model.add(Dense(self.n_nodes, activation="linear", input_dim=len(X.T), kernel_regularizer=regularizers.l2(self.l_lambda))) 50 | self.model.add(Dense(1, activation="sigmoid")) 51 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 52 | 53 | # If epoch_ypred is True, calculate ypred for each epoch 54 | if epoch_ypred is True: 55 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 56 | else: 57 | self.epoch = Callback() 58 | 59 | # Fit 60 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 61 | 62 | layer1_weight = self.model.layers[0].get_weights()[0] 63 | layer1_bias = self.model.layers[0].get_weights()[1] 64 | layer2_weight = self.model.layers[1].get_weights()[0] 65 | layer2_bias = self.model.layers[1].get_weights()[1] 66 | 67 | # Not sure about the naming scheme (trying to match PLS) 68 | self.model.x_loadings_ = layer1_weight 69 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 70 | self.model.y_loadings_ = layer2_weight 71 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 72 | self.xcols_num = len(X.T) 73 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 74 | y_pred_train = self.model.predict(X).flatten() 75 | 76 | # Storing X, Y, and Y_pred 77 | self.Y_pred = y_pred_train 78 | self.X = X 79 | self.Y = Y 80 | return y_pred_train 81 | 82 | def test(self, X, Y=None): 83 | """Calculate and return Y predicted value. 84 | 85 | Parameters 86 | ---------- 87 | X : array-like, shape = [n_samples, n_features] 88 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 89 | 90 | Returns 91 | ------- 92 | y_pred_test : array-like, shape = [n_samples, 1] 93 | Predicted y score for samples. 94 | """ 95 | 96 | layer1_weight = self.model.layers[0].get_weights()[0] 97 | layer1_bias = self.model.layers[0].get_weights()[1] 98 | layer2_weight = self.model.layers[1].get_weights()[0] 99 | layer2_bias = self.model.layers[1].get_weights()[1] 100 | 101 | self.model.y_loadings_ = layer2_weight 102 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 103 | self.model.x_loadings_ = layer1_weight 104 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 105 | y_pred_test = self.model.predict(X).flatten() 106 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 107 | return y_pred_test 108 | -------------------------------------------------------------------------------- /cimcb/model/RF.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.ensemble import RandomForestClassifier 4 | from sklearn.metrics import roc_auc_score 5 | from .BaseModel import BaseModel 6 | from ..utils import binary_metrics, binary_evaluation 7 | 8 | 9 | class RF(BaseModel): 10 | """Random forest""" 11 | 12 | parametric = True 13 | bootlist = None # list of metrics to bootstrap 14 | 15 | bootlist = ["Y_pred", "model.eval_metrics_"] # list of metrics to bootstrap 16 | 17 | def __init__(self, n_estimators=100, max_features="auto", max_depth=None, criterion="gini", min_samples_split=2, min_samples_leaf=1, max_leaf_nodes=None, n_jobs=None): 18 | self.model = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features, max_depth=max_depth, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, max_leaf_nodes=max_leaf_nodes, n_jobs=n_jobs) 19 | self.k = n_estimators 20 | 21 | self.__name__ = 'cimcb.model.RF' 22 | self.__params__ = {'n_estimators': n_estimators, 'max_features': max_features, 'max_depth': max_depth, 'criterion': criterion, 'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf, 'max_leaf_nodes': max_leaf_nodes, 'n_jobs': n_jobs} 23 | 24 | def set_params(self, params): 25 | self.__init__(**params) 26 | 27 | def train(self, X, Y): 28 | """ Fit the RF model, save additional stats (as attributes) and return Y predicted values. 29 | 30 | Parameters 31 | ---------- 32 | X : array-like, shape = [n_samples, n_features] 33 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 34 | 35 | Y : array-like, shape = [n_samples, 1] 36 | Response variables, where n_samples is the number of samples. 37 | 38 | Returns 39 | ------- 40 | y_pred_train : array-like, shape = [n_samples, 1] 41 | Predicted y score for samples. 42 | """ 43 | 44 | # Ensure array and error check 45 | X, Y = self.input_check(X, Y) 46 | 47 | # Fit the model 48 | self.model.fit(X, Y) 49 | 50 | # Predict_proba was designed for multi-groups... 51 | # This makes it sure that y_pred is correct 52 | y_pred = self.model.predict_proba(X) 53 | pred_0 = roc_auc_score(Y, y_pred[:, 0]) 54 | pred_1 = roc_auc_score(Y, y_pred[:, 1]) 55 | if pred_0 > pred_1: 56 | self.pred_index = 0 57 | else: 58 | self.pred_index = 1 59 | 60 | # Calculate and return Y prediction value 61 | y_pred_train = np.array(self.model.predict_proba(X)[:, self.pred_index]) 62 | 63 | self.model.y_loadings_ = np.array([0, 0, 0]) 64 | self.model.x_scores_ = np.array([0, 0, 0]) 65 | self.model.pctvar_ = np.array([0, 0, 0]) 66 | 67 | # Storing X, Y, and Y_pred 68 | self.X = X 69 | self.Y = Y 70 | self.Y_pred = y_pred_train 71 | 72 | self.metrics_key = [] 73 | self.model.eval_metrics_ = [] 74 | bm = binary_evaluation(Y, y_pred_train) 75 | for key, value in bm.items(): 76 | self.model.eval_metrics_.append(value) 77 | self.metrics_key.append(key) 78 | 79 | self.Y_train = Y 80 | self.Y_pred_train = y_pred_train 81 | 82 | self.model.eval_metrics_ = np.array(self.model.eval_metrics_) 83 | 84 | return y_pred_train 85 | 86 | def test(self, X, Y=None): 87 | """Calculate and return Y predicted value. 88 | 89 | Parameters 90 | ---------- 91 | X : array-like, shape = [n_samples, n_features] 92 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 93 | 94 | Returns 95 | ------- 96 | y_pred_test : array-like, shape = [n_samples, 1] 97 | Predicted y score for samples. 98 | """ 99 | 100 | # Convert to X to numpy array if a DataFrame 101 | if isinstance(X, pd.DataFrame or pd.Series): 102 | X = np.array(X) 103 | 104 | # Calculate and return Y predicted value 105 | y_pred_test = np.array(self.model.predict_proba(X)[:, self.pred_index]) 106 | 107 | if Y is not None: 108 | self.metrics_key = [] 109 | self.model.eval_metrics_ = [] 110 | bm = binary_evaluation(Y, y_pred_test) 111 | for key, value in bm.items(): 112 | self.model.eval_metrics_.append(value) 113 | self.metrics_key.append(key) 114 | 115 | self.model.eval_metrics_ = np.array(self.model.eval_metrics_) 116 | 117 | self.Y_pred = y_pred_test 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/PCR.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.decomposition import PCA 4 | from sklearn.linear_model import LinearRegression 5 | from .BaseModel import BaseModel 6 | from ..utils import binary_metrics, binary_evaluation 7 | 8 | 9 | class PCR(BaseModel): 10 | """ Principal component regression. 11 | 12 | Parameters 13 | ---------- 14 | n_components : int, (default 2) 15 | Number of components to keep. 16 | 17 | Methods 18 | ------- 19 | train : Fit model to data. 20 | 21 | test : Apply model to test data. 22 | 23 | evaluate : Evaluate model. 24 | 25 | calc_bootci : Calculate bootstrap intervals for plot_featureimportance. 26 | 27 | plot_featureimportance : Plot coefficient and Variable Importance in Projection (VIP). 28 | 29 | plot_permutation_test : Perform a permutation test and plot. 30 | """ 31 | 32 | parametric = True 33 | bootlist = ["model.coef_", "Y_pred", "model.eval_metrics_"] # list of metrics to bootstrap 34 | 35 | def __init__(self, n_components=2): 36 | self.model = PCA(n_components=n_components) 37 | self.regrmodel = LinearRegression() 38 | self.k = n_components 39 | 40 | self.__name__ = 'cimcb.model.PCR' 41 | self.__params__ = {'n_components': n_components} 42 | 43 | def set_params(self, params): 44 | self.__init__(**params) 45 | 46 | def train(self, X, Y): 47 | """ Fit the PCR model, save additional stats (as attributes) and return Y predicted values. 48 | 49 | Parameters 50 | ---------- 51 | X : array-like, shape = [n_samples, n_features] 52 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 53 | 54 | Y : array-like, shape = [n_samples, 1] 55 | Response variables, where n_samples is the number of samples. 56 | 57 | Returns 58 | ------- 59 | y_pred_train : array-like, shape = [n_samples, 1] 60 | Predicted y score for samples. 61 | """ 62 | 63 | # Ensure array and error check 64 | X, Y = self.input_check(X, Y) 65 | 66 | # Fit the model 67 | self.model.fit(X) 68 | self.model.x_scores_ = self.model.transform(X) 69 | self.regrmodel.fit(self.model.x_scores_, Y) 70 | 71 | # Save x_loadings, coef, pctvar, x_weights, y_loadings and vip 72 | self.model.x_loadings_ = self.model.components_.T 73 | self.model.coef_ = np.dot(self.model.x_loadings_, self.regrmodel.coef_) 74 | self.model.pctvar_ = self.model.explained_variance_ 75 | self.model.x_weights_ = self.model.components_.T 76 | self.model.y_loadings_ = self.regrmodel.coef_.reshape(1, len(self.regrmodel.coef_)) 77 | 78 | # Calculate and return Y prediction value 79 | y_pred_train = self.regrmodel.predict(self.model.x_scores_).flatten() 80 | 81 | # Storing X, Y, and Y_pred 82 | self.X = X 83 | self.Y = Y 84 | self.Y_pred = y_pred_train 85 | self.metrics_key = [] 86 | self.model.eval_metrics_ = [] 87 | bm = binary_evaluation(Y, y_pred_train) 88 | for key, value in bm.items(): 89 | self.model.eval_metrics_.append(value) 90 | self.metrics_key.append(key) 91 | 92 | self.model.eval_metrics_ = np.array(self.model.eval_metrics_) 93 | self.Y_train = Y 94 | self.Y_pred_train = y_pred_train 95 | 96 | return y_pred_train 97 | 98 | def test(self, X, Y=None): 99 | """Calculate and return Y predicted value. 100 | 101 | Parameters 102 | ---------- 103 | X : array-like, shape = [n_samples, n_features] 104 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 105 | 106 | Returns 107 | ------- 108 | y_pred_test : array-like, shape = [n_samples, 1] 109 | Predicted y score for samples. 110 | """ 111 | 112 | # Convert to X to numpy array if a DataFrame 113 | if isinstance(X, pd.DataFrame or pd.Series): 114 | X = np.array(X) 115 | 116 | # Calculate and return Y predicted value 117 | newX = self.model.transform(X) 118 | y_pred_test = self.regrmodel.predict(newX).flatten() 119 | # Calculate and return Y predicted value 120 | if Y is not None: 121 | self.metrics_key = [] 122 | self.model.eval_metrics_ = [] 123 | bm = binary_evaluation(Y, y_pred_test) 124 | for key, value in bm.items(): 125 | self.model.eval_metrics_.append(value) 126 | self.metrics_key.append(key) 127 | 128 | self.model.eval_metrics_ = np.array(self.model.eval_metrics_) 129 | 130 | self.Y_pred = y_pred_test 131 | return y_pred_test 132 | -------------------------------------------------------------------------------- /cimcb/model/NN_LogitLogit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | from .BaseModel import BaseModel 7 | from ..utils import YpredCallback 8 | 9 | 10 | class NN_LogitLogit(BaseModel): 11 | """2 Layer logistic-logistic neural network using Keras""" 12 | 13 | parametric = True 14 | bootlist = None 15 | 16 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 17 | self.n_neurons = n_neurons 18 | self.verbose = verbose 19 | self.n_epochs = epochs 20 | self.k = n_neurons 21 | self.batch_size = batch_size 22 | self.loss = loss 23 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 24 | 25 | self.__name__ = 'cimcb.model.NN_LogitLogit' 26 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 27 | 28 | def set_params(self, params): 29 | self.__init__(**params) 30 | 31 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 32 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 33 | 34 | Parameters 35 | ---------- 36 | X : array-like, shape = [n_samples, n_features] 37 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 38 | 39 | Y : array-like, shape = [n_samples, 1] 40 | Response variables, where n_samples is the number of samples. 41 | 42 | Returns 43 | ------- 44 | y_pred_train : array-like, shape = [n_samples, 1] 45 | Predicted y score for samples. 46 | """ 47 | 48 | # If batch-size is None: 49 | if self.batch_size is None: 50 | self.batch_size = len(X) 51 | 52 | self.model = Sequential() 53 | self.model.add(Dense(self.n_neurons, activation="sigmoid", input_dim=len(X.T))) 54 | self.model.add(Dense(1, activation="sigmoid")) 55 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 56 | 57 | # If epoch_ypred is True, calculate ypred for each epoch 58 | if epoch_ypred is True: 59 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 60 | else: 61 | self.epoch = Callback() 62 | 63 | # Fit 64 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 65 | 66 | layer1_weight = self.model.layers[0].get_weights()[0] 67 | layer1_bias = self.model.layers[0].get_weights()[1] 68 | layer2_weight = self.model.layers[1].get_weights()[0] 69 | layer2_bias = self.model.layers[1].get_weights()[1] 70 | 71 | # Not sure about the naming scheme (trying to match PLS) 72 | self.model.x_loadings_ = layer1_weight 73 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 74 | self.model.y_loadings_ = layer2_weight 75 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 76 | self.xcols_num = len(X.T) 77 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 78 | y_pred_train = self.model.predict(X).flatten() 79 | 80 | # Storing X, Y, and Y_pred 81 | self.Y_pred = y_pred_train 82 | self.X = X 83 | self.Y = Y 84 | return y_pred_train 85 | 86 | def test(self, X, Y=None): 87 | """Calculate and return Y predicted value. 88 | 89 | Parameters 90 | ---------- 91 | X : array-like, shape = [n_samples, n_features] 92 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 93 | 94 | Returns 95 | ------- 96 | y_pred_test : array-like, shape = [n_samples, 1] 97 | Predicted y score for samples. 98 | """ 99 | 100 | layer1_weight = self.model.layers[0].get_weights()[0] 101 | layer1_bias = self.model.layers[0].get_weights()[1] 102 | layer2_weight = self.model.layers[1].get_weights()[0] 103 | layer2_bias = self.model.layers[1].get_weights()[1] 104 | 105 | self.model.y_loadings_ = layer2_weight 106 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 107 | self.model.x_loadings_ = layer1_weight 108 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 109 | y_pred_test = self.model.predict(X).flatten() 110 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 111 | return y_pred_test 112 | -------------------------------------------------------------------------------- /cimcb/model/NN_LinearLinear.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | from .BaseModel import BaseModel 7 | from ..utils import YpredCallback 8 | 9 | 10 | class NN_LinearLinear(BaseModel): 11 | """2 Layer linear-linear neural network using Keras""" 12 | 13 | parametric = True 14 | bootlist = None 15 | 16 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="mean_squared_error", batch_size=None, verbose=0): 17 | self.n_neurons = n_neurons 18 | self.verbose = verbose 19 | self.n_epochs = epochs 20 | self.k = n_neurons 21 | self.batch_size = batch_size 22 | self.loss = loss 23 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 24 | 25 | self.__name__ = 'cimcb.model.NN_LinearLinear' 26 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 27 | 28 | def set_params(self, params): 29 | self.__init__(**params) 30 | 31 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 32 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 33 | 34 | Parameters 35 | ---------- 36 | X : array-like, shape = [n_samples, n_features] 37 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 38 | 39 | Y : array-like, shape = [n_samples, 1] 40 | Response variables, where n_samples is the number of samples. 41 | 42 | Returns 43 | ------- 44 | y_pred_train : array-like, shape = [n_samples, 1] 45 | Predicted y score for samples. 46 | """ 47 | 48 | # If batch-size is None: 49 | if self.batch_size is None: 50 | self.batch_size = len(X) 51 | 52 | # Ensure array and error check 53 | X, Y = self.input_check(X, Y) 54 | 55 | self.model = Sequential() 56 | self.model.add(Dense(self.n_neurons, activation="linear", input_dim=len(X.T))) 57 | self.model.add(Dense(1, activation="linear")) 58 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 59 | 60 | # If epoch_ypred is True, calculate ypred for each epoch 61 | if epoch_ypred is True: 62 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 63 | else: 64 | self.epoch = Callback() 65 | 66 | # Fit 67 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 68 | 69 | layer1_weight = self.model.layers[0].get_weights()[0] 70 | layer1_bias = self.model.layers[0].get_weights()[1] 71 | layer2_weight = self.model.layers[1].get_weights()[0] 72 | layer2_bias = self.model.layers[1].get_weights()[1] 73 | 74 | # Not sure about the naming scheme (trying to match PLS) 75 | self.model.x_loadings_ = layer1_weight 76 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 77 | self.model.y_loadings_ = layer2_weight 78 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 79 | self.xcols_num = len(X.T) 80 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 81 | y_pred_train = self.model.predict(X).flatten() 82 | 83 | # Storing X, Y, and Y_pred 84 | self.Y_pred = y_pred_train 85 | self.X = X 86 | self.Y = Y 87 | return y_pred_train 88 | 89 | def test(self, X, Y=None): 90 | """Calculate and return Y predicted value. 91 | 92 | Parameters 93 | ---------- 94 | X : array-like, shape = [n_samples, n_features] 95 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 96 | 97 | Returns 98 | ------- 99 | y_pred_test : array-like, shape = [n_samples, 1] 100 | Predicted y score for samples. 101 | """ 102 | 103 | layer1_weight = self.model.layers[0].get_weights()[0] 104 | layer1_bias = self.model.layers[0].get_weights()[1] 105 | layer2_weight = self.model.layers[1].get_weights()[0] 106 | layer2_bias = self.model.layers[1].get_weights()[1] 107 | 108 | self.model.y_loadings_ = layer2_weight 109 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 110 | self.model.x_loadings_ = layer1_weight 111 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 112 | y_pred_test = self.model.predict(X).flatten() 113 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 114 | return y_pred_test 115 | -------------------------------------------------------------------------------- /cimcb/model/NN_SigmoidSigmoidSigmoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | from .BaseModel import BaseModel 7 | from ..utils import YpredCallback 8 | 9 | 10 | class NN_SigmoidSigmoidSigmoid(BaseModel): 11 | """2 Layer logistic-logistic neural network using Keras""" 12 | 13 | parametric = True 14 | bootlist = None 15 | 16 | def __init__(self, n_neurons_l1=2, n_neurons_l2=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 17 | self.n_neurons_l1 = n_neurons_l1 18 | self.n_neurons_l2 = n_neurons_l2 19 | self.verbose = verbose 20 | self.n_epochs = epochs 21 | self.k = n_neurons_l1 22 | self.batch_size = batch_size 23 | self.loss = loss 24 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 25 | 26 | self.__name__ = 'cimcb.model.NN_SigmoidSigmoidSigmoid' 27 | self.__params__ = {'n_neurons_l1': n_neurons_l1, 'n_neurons_l2': n_neurons_l2, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 28 | 29 | def set_params(self, params): 30 | self.__init__(**params) 31 | 32 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 33 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 34 | 35 | Parameters 36 | ---------- 37 | X : array-like, shape = [n_samples, n_features] 38 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 39 | 40 | Y : array-like, shape = [n_samples, 1] 41 | Response variables, where n_samples is the number of samples. 42 | 43 | Returns 44 | ------- 45 | y_pred_train : array-like, shape = [n_samples, 1] 46 | Predicted y score for samples. 47 | """ 48 | 49 | # If batch-size is None: 50 | if self.batch_size is None: 51 | self.batch_size = len(X) 52 | 53 | self.model = Sequential() 54 | self.model.add(Dense(self.n_neurons_l1, activation="sigmoid", input_dim=len(X.T))) 55 | self.model.add(Dense(self.n_neurons_l2, activation="sigmoid", input_dim=self.n_neurons_l1)) 56 | self.model.add(Dense(1, activation="sigmoid")) 57 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 58 | 59 | # If epoch_ypred is True, calculate ypred for each epoch 60 | if epoch_ypred is True: 61 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 62 | else: 63 | self.epoch = Callback() 64 | 65 | # Fit 66 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 67 | 68 | layer1_weight = self.model.layers[0].get_weights()[0] 69 | layer1_bias = self.model.layers[0].get_weights()[1] 70 | layer2_weight = self.model.layers[1].get_weights()[0] 71 | layer2_bias = self.model.layers[1].get_weights()[1] 72 | 73 | # Not sure about the naming scheme (trying to match PLS) 74 | # self.model.x_loadings_ = layer1_weight 75 | # self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 76 | # self.model.y_loadings_ = layer2_weight 77 | # self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 78 | # self.xcols_num = len(X.T) 79 | # self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 80 | y_pred_train = self.model.predict(X).flatten() 81 | 82 | # Storing X, Y, and Y_pred 83 | self.Y_pred = y_pred_train 84 | self.X = X 85 | self.Y = Y 86 | return y_pred_train 87 | 88 | def test(self, X, Y=None): 89 | """Calculate and return Y predicted value. 90 | 91 | Parameters 92 | ---------- 93 | X : array-like, shape = [n_samples, n_features] 94 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 95 | 96 | Returns 97 | ------- 98 | y_pred_test : array-like, shape = [n_samples, 1] 99 | Predicted y score for samples. 100 | """ 101 | 102 | layer1_weight = self.model.layers[0].get_weights()[0] 103 | layer1_bias = self.model.layers[0].get_weights()[1] 104 | layer2_weight = self.model.layers[1].get_weights()[0] 105 | layer2_bias = self.model.layers[1].get_weights()[1] 106 | 107 | # self.model.y_loadings_ = layer2_weight 108 | # self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 109 | # self.model.x_loadings_ = layer1_weight 110 | # self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 111 | y_pred_test = self.model.predict(X).flatten() 112 | # self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 113 | return y_pred_test 114 | -------------------------------------------------------------------------------- /cimcb/model/NN_ReluTan.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_ReluTan(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_ReluTan' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="relu", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="tanh")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_TanRelu.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_TanRelu(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_TanRelu' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="tanh", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="relu")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_TanTan.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_TanTan(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_TanTan' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="tanh", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="tanh")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_ReluRelu.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_ReluRelu(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_ReluRelu' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="relu", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="relu")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_LinearTan.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_LinearTan(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_LinearTan' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="linear", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="tanh")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_LogitRelu.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_LogitRelu(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_LogitRelu' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="linear", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="relu")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_LogitTan.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_LogitTan(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_LogitTan' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="sigmoid", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="tanh")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_ReluLogit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_ReluLogit(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_ReluLogit' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="relu", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="sigmoid")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_TanLinear.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_TanLinear(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_TanLinear' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="tanh", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="linear")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_TanLogit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_TanLogit(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_TanLogit' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="tanh", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="sigmoid")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_LinearRelu.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_LinearRelu(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_LinearRelu' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="linear", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="relu")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_ReluLinear.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_ReluLinear(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_ReluLinear' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="relu", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="linear")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_LogitLinear.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_LogitLinear(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.NN_LogitLinear' 28 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # # If using Keras, set tf to 1 core 51 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 52 | # session = tf.Session(config=config) 53 | # K.set_session(session) 54 | 55 | # If batch-size is None: 56 | if self.batch_size is None: 57 | self.batch_size = len(X) 58 | 59 | self.model = Sequential() 60 | self.model.add(Dense(self.n_neurons, activation="sigmoid", input_dim=len(X.T))) 61 | self.model.add(Dense(1, activation="linear")) 62 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 63 | 64 | # If epoch_ypred is True, calculate ypred for each epoch 65 | if epoch_ypred is True: 66 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 67 | else: 68 | self.epoch = Callback() 69 | 70 | # Fit 71 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 72 | 73 | layer1_weight = self.model.layers[0].get_weights()[0] 74 | layer1_bias = self.model.layers[0].get_weights()[1] 75 | layer2_weight = self.model.layers[1].get_weights()[0] 76 | layer2_bias = self.model.layers[1].get_weights()[1] 77 | 78 | # Not sure about the naming scheme (trying to match PLS) 79 | self.model.x_loadings_ = layer1_weight 80 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 81 | self.model.y_loadings_ = layer2_weight 82 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 83 | self.xcols_num = len(X.T) 84 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 85 | y_pred_train = self.model.predict(X).flatten() 86 | 87 | # Storing X, Y, and Y_pred 88 | self.Y_pred = y_pred_train 89 | self.X = X 90 | self.Y = Y 91 | return y_pred_train 92 | 93 | def test(self, X, Y=None): 94 | """Calculate and return Y predicted value. 95 | 96 | Parameters 97 | ---------- 98 | X : array-like, shape = [n_samples, n_features] 99 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 100 | 101 | Returns 102 | ------- 103 | y_pred_test : array-like, shape = [n_samples, 1] 104 | Predicted y score for samples. 105 | """ 106 | 107 | layer1_weight = self.model.layers[0].get_weights()[0] 108 | layer1_bias = self.model.layers[0].get_weights()[1] 109 | layer2_weight = self.model.layers[1].get_weights()[0] 110 | layer2_bias = self.model.layers[1].get_weights()[1] 111 | 112 | self.model.y_loadings_ = layer2_weight 113 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 114 | self.model.x_loadings_ = layer1_weight 115 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 116 | y_pred_test = self.model.predict(X).flatten() 117 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 118 | return y_pred_test 119 | -------------------------------------------------------------------------------- /cimcb/model/NN_LinearLogit.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.layers import Dense 6 | import tensorflow as tf 7 | from keras import backend as K 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback 10 | 11 | 12 | class NN_LinearLogit(BaseModel): 13 | """2 Layer linear-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = None 17 | 18 | def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons = n_neurons 20 | self.verbose = verbose 21 | self.n_epochs = epochs 22 | self.k = n_neurons 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.decay = decay 26 | self.nesterov = nesterov 27 | self.momentum = momentum 28 | self.learning_rate = learning_rate 29 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 30 | 31 | self.__name__ = 'cimcb.model.NN_LinearLogit' 32 | self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 33 | 34 | def set_params(self, params): 35 | self.__init__(**params) 36 | 37 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 38 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 39 | 40 | Parameters 41 | ---------- 42 | X : array-like, shape = [n_samples, n_features] 43 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 44 | 45 | Y : array-like, shape = [n_samples, 1] 46 | Response variables, where n_samples is the number of samples. 47 | 48 | Returns 49 | ------- 50 | y_pred_train : array-like, shape = [n_samples, 1] 51 | Predicted y score for samples. 52 | """ 53 | 54 | # # If using Keras, set tf to 1 core 55 | # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True) 56 | # session = tf.Session(config=config) 57 | # K.set_session(session) 58 | 59 | # If batch-size is None: 60 | if self.batch_size is None: 61 | self.batch_size = len(X) 62 | 63 | self.model = Sequential() 64 | self.model.add(Dense(self.n_neurons, activation="linear", input_dim=len(X.T))) 65 | self.model.add(Dense(1, activation="sigmoid")) 66 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 67 | 68 | # If epoch_ypred is True, calculate ypred for each epoch 69 | if epoch_ypred is True: 70 | self.epoch = YpredCallback(self.model, X, epoch_xtest) 71 | else: 72 | self.epoch = Callback() 73 | 74 | # Fit 75 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch]) 76 | 77 | layer1_weight = self.model.layers[0].get_weights()[0] 78 | layer1_bias = self.model.layers[0].get_weights()[1] 79 | layer2_weight = self.model.layers[1].get_weights()[0] 80 | layer2_bias = self.model.layers[1].get_weights()[1] 81 | 82 | # Not sure about the naming scheme (trying to match PLS) 83 | self.model.x_loadings_ = layer1_weight 84 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 85 | self.model.y_loadings_ = layer2_weight 86 | self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0]))) 87 | self.xcols_num = len(X.T) 88 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 89 | y_pred_train = self.model.predict(X).flatten() 90 | 91 | # Storing X, Y, and Y_pred 92 | self.Y_pred = y_pred_train 93 | self.X = X 94 | self.Y = Y 95 | return y_pred_train 96 | 97 | def test(self, X, Y=None): 98 | """Calculate and return Y predicted value. 99 | 100 | Parameters 101 | ---------- 102 | X : array-like, shape = [n_samples, n_features] 103 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 104 | 105 | Returns 106 | ------- 107 | y_pred_test : array-like, shape = [n_samples, 1] 108 | Predicted y score for samples. 109 | """ 110 | 111 | layer1_weight = self.model.layers[0].get_weights()[0] 112 | layer1_bias = self.model.layers[0].get_weights()[1] 113 | layer2_weight = self.model.layers[1].get_weights()[0] 114 | layer2_bias = self.model.layers[1].get_weights()[1] 115 | 116 | self.model.y_loadings_ = layer2_weight 117 | self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_)) 118 | self.model.x_loadings_ = layer1_weight 119 | self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias 120 | y_pred_test = self.model.predict(X).flatten() 121 | self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100 122 | return y_pred_test 123 | -------------------------------------------------------------------------------- /cimcb/model/PCLR.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import pandas as pd 3 | from sklearn.decomposition import PCA 4 | from sklearn.linear_model import LogisticRegression 5 | from .BaseModel import BaseModel 6 | from sklearn.metrics import roc_auc_score 7 | from ..utils import binary_metrics, binary_evaluation 8 | 9 | 10 | class PCLR(BaseModel): 11 | """ Principal component logistic regression. 12 | 13 | Parameters 14 | ---------- 15 | n_components : int, (default 2) 16 | Number of components to keep. 17 | 18 | Methods 19 | ------- 20 | train : Fit model to data. 21 | 22 | test : Apply model to test data. 23 | 24 | evaluate : Evaluate model. 25 | 26 | calc_bootci : Calculate bootstrap intervals for plot_featureimportance. 27 | 28 | plot_featureimportance : Plot coefficient and Variable Importance in Projection (VIP). 29 | 30 | plot_permutation_test : Perform a permutation test and plot. 31 | """ 32 | 33 | parametric = True 34 | bootlist = ["model.coef_", "Y_pred", "model.eval_metrics_"] # list of metrics to bootstrap 35 | 36 | def __init__(self, n_components=2): 37 | self.model = PCA(n_components=n_components) 38 | self.regrmodel = LogisticRegression(solver="liblinear") 39 | self.k = n_components 40 | 41 | self.__name__ = 'cimcb.model.PCLR' 42 | self.__params__ = {'n_components': n_components} 43 | 44 | def set_params(self, params): 45 | self.__init__(**params) 46 | 47 | def train(self, X, Y): 48 | """ Fit the PCR model, save additional stats (as attributes) and return Y predicted values. 49 | 50 | Parameters 51 | ---------- 52 | X : array-like, shape = [n_samples, n_features] 53 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 54 | 55 | Y : array-like, shape = [n_samples, 1] 56 | Response variables, where n_samples is the number of samples. 57 | 58 | Returns 59 | ------- 60 | y_pred_train : array-like, shape = [n_samples, 1] 61 | Predicted y score for samples. 62 | """ 63 | 64 | # Ensure array and error check 65 | X, Y = self.input_check(X, Y) 66 | 67 | # Fit the model 68 | self.model.fit(X) 69 | self.model.x_scores_ = self.model.transform(X) 70 | self.regrmodel.fit(self.model.x_scores_, Y) 71 | 72 | # Save x_loadings, coef, pctvar, x_weights, y_loadings and vip 73 | self.model.x_loadings_ = self.model.components_.T 74 | self.model.coef_ = np.dot(self.model.x_loadings_, self.regrmodel.coef_.flatten()) 75 | self.model.pctvar_ = self.model.explained_variance_ 76 | self.model.x_weights_ = self.model.components_.T 77 | self.model.y_loadings_ = self.regrmodel.coef_.reshape(1, len(self.regrmodel.coef_.flatten())) 78 | 79 | # Calculate and return Y prediction value 80 | #y_pred_train = self.regrmodel.predict(self.model.x_scores_).flatten() 81 | # Predict_proba was designed for multi-groups... 82 | # This makes it sure that y_pred is correct 83 | y_pred = self.regrmodel.predict_proba(self.model.x_scores_) 84 | pred_0 = roc_auc_score(Y, y_pred[:, 0]) 85 | pred_1 = roc_auc_score(Y, y_pred[:, 1]) 86 | if pred_0 > pred_1: 87 | self.pred_index = 0 88 | else: 89 | self.pred_index = 1 90 | 91 | # Calculate and return Y prediction value 92 | y_pred_train = np.array(self.regrmodel.predict_proba(self.model.x_scores_)[:, self.pred_index]) 93 | 94 | self.Y_train = Y 95 | self.Y_pred_train = y_pred_train 96 | self.Y_pred = y_pred_train 97 | self.X = X 98 | self.Y = Y 99 | self.metrics_key = [] 100 | self.model.eval_metrics_ = [] 101 | bm = binary_evaluation(Y, y_pred_train) 102 | for key, value in bm.items(): 103 | self.model.eval_metrics_.append(value) 104 | self.metrics_key.append(key) 105 | 106 | self.model.eval_metrics_ = np.array(self.model.eval_metrics_) 107 | 108 | return y_pred_train 109 | 110 | def test(self, X, Y=None): 111 | """Calculate and return Y predicted value. 112 | 113 | Parameters 114 | ---------- 115 | X : array-like, shape = [n_samples, n_features] 116 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 117 | 118 | Returns 119 | ------- 120 | y_pred_test : array-like, shape = [n_samples, 1] 121 | Predicted y score for samples. 122 | """ 123 | 124 | # Convert to X to numpy array if a DataFrame 125 | if isinstance(X, pd.DataFrame or pd.Series): 126 | X = np.array(X) 127 | 128 | # Calculate and return Y predicted value 129 | newX = self.model.transform(X) 130 | #y_pred_test = self.regrmodel.predict(newX).flatten() 131 | # Calculate and return Y predicted value 132 | y_pred_test = np.array(self.regrmodel.predict_proba(newX)[:, self.pred_index]) 133 | 134 | # Calculate and return Y predicted value 135 | if Y is not None: 136 | self.metrics_key = [] 137 | self.model.eval_metrics_ = [] 138 | bm = binary_evaluation(Y, y_pred_test) 139 | for key, value in bm.items(): 140 | self.model.eval_metrics_.append(value) 141 | self.metrics_key.append(key) 142 | 143 | self.model.eval_metrics_ = np.array(self.model.eval_metrics_) 144 | 145 | self.Y_pred = y_pred_test 146 | return y_pred_test 147 | -------------------------------------------------------------------------------- /cimcb/model/MBNN_SigmoidSigmoid.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from keras.callbacks import Callback 3 | from keras.optimizers import SGD 4 | from keras.models import Sequential 5 | from keras.models import Model 6 | from keras.layers import Input, Dense, Concatenate, LSTM, concatenate 7 | from keras.layers import Dense 8 | from .BaseModel import BaseModel 9 | from ..utils import YpredCallback, binary_evaluation 10 | 11 | 12 | class MBNN_SigmoidSigmoid(BaseModel): 13 | """2 Layer logistic-logistic neural network using Keras""" 14 | 15 | parametric = True 16 | bootlist = ["Y_pred", "model.eval_metrics_"] # list of metrics to bootstrap 17 | 18 | def __init__(self, n_neurons_l1=2, n_neurons_l2=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0): 19 | self.n_neurons_l1 = n_neurons_l1 20 | self.n_neurons_l2 = n_neurons_l2 21 | self.verbose = verbose 22 | self.n_epochs = epochs 23 | self.batch_size = batch_size 24 | self.loss = loss 25 | self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov) 26 | 27 | self.__name__ = 'cimcb.model.MBNN_SigmoidSigmoid' 28 | self.__params__ = {'n_neurons_l1': n_neurons_l1, 'n_neurons_l2': n_neurons_l2, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose} 29 | 30 | def set_params(self, params): 31 | self.__init__(**params) 32 | 33 | def train(self, X, Y, epoch_ypred=False, epoch_xtest=None): 34 | """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values. 35 | 36 | Parameters 37 | ---------- 38 | X : array-like, shape = [n_samples, n_features] 39 | Predictor variables, where n_samples is the number of samples and n_features is the number of predictors. 40 | 41 | Y : array-like, shape = [n_samples, 1] 42 | Response variables, where n_samples is the number of samples. 43 | 44 | Returns 45 | ------- 46 | y_pred_train : array-like, shape = [n_samples, 1] 47 | Predicted y score for samples. 48 | """ 49 | 50 | # If batch-size is None: 51 | if self.batch_size is None: 52 | batch_size = len(X) 53 | else: 54 | batch_size = self.batch_size 55 | 56 | #X = np.array(X) 57 | X1 = X[0] 58 | X2 = X[1] 59 | 60 | # Layers in loop 61 | layer1 = [] 62 | for i in X: 63 | input_i = Input(shape=(len(i.T),)) 64 | layer1_i = Dense(self.n_neurons_l1, activation="sigmoid")(input_i) 65 | layer1_i = Model(inputs=input_i, outputs=layer1_i) 66 | layer1.append(layer1_i) 67 | 68 | # Concatenate 69 | concat = concatenate([i.output for i in layer1]) 70 | model_concat = Dense(self.n_neurons_l2, activation="sigmoid")(concat) 71 | model_concat = Dense(1, activation="sigmoid")(model_concat) 72 | 73 | self.model = Model(inputs=[i.input for i in layer1], outputs=model_concat) 74 | self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"]) 75 | 76 | self.metrics_key = [] 77 | self.model.eval_metrics_ = [] 78 | 79 | self.model.pfi_acc_ = np.zeros((1, len(Y))) 80 | self.model.pfi_r2q2_ = np.zeros((1, len(Y))) 81 | self.model.pfi_auc_ = np.zeros((1, len(Y))) 82 | self.model.vip_ = np.zeros((1, len(Y))) 83 | self.model.coef_ = np.zeros((1, len(Y))) 84 | 85 | self.model.y_loadings_ = np.array([0, 0, 0]) 86 | self.model.x_scores_ = np.array([0, 0, 0]) 87 | self.model.x_loadings_ = np.array([0, 0, 0]) 88 | self.model.pctvar_ = np.array([0, 0, 0]) 89 | 90 | # Fit 91 | self.model.fit(X, Y, epochs=self.n_epochs, batch_size=batch_size, verbose=self.verbose) 92 | 93 | # Not sure about the naming scheme (trying to match PLS) 94 | y_pred_train = self.model.predict(X).flatten() 95 | 96 | 97 | self.model.eval_metrics_ = [] 98 | bm = binary_evaluation(Y, y_pred_train) 99 | for key, value in bm.items(): 100 | self.model.eval_metrics_.append(value) 101 | self.metrics_key.append(key) 102 | self.model.eval_metrics_ = np.array(self.model.eval_metrics_) 103 | 104 | # Storing X, Y, and Y_pred 105 | self.Y_train = Y 106 | self.Y_pred_train = y_pred_train 107 | self.Y_pred = y_pred_train 108 | self.X = X 109 | self.Y = Y 110 | return y_pred_train 111 | 112 | def test(self, X, Y=None): 113 | """Calculate and return Y predicted value. 114 | 115 | Parameters 116 | ---------- 117 | X : array-like, shape = [n_samples, n_features] 118 | Test variables, where n_samples is the number of samples and n_features is the number of predictors. 119 | 120 | Returns 121 | ------- 122 | y_pred_test : array-like, shape = [n_samples, 1] 123 | Predicted y score for samples. 124 | """ 125 | 126 | y_pred_test = self.model.predict(X).flatten() 127 | 128 | # Calculate and return Y predicted value 129 | if Y is not None: 130 | self.metrics_key = [] 131 | self.model.eval_metrics_ = [] 132 | bm = binary_evaluation(Y, y_pred_test) 133 | for key, value in bm.items(): 134 | self.model.eval_metrics_.append(value) 135 | self.metrics_key.append(key) 136 | 137 | self.model.eval_metrics_ = np.array(self.model.eval_metrics_) 138 | 139 | self.Y_pred = y_pred_test 140 | 141 | return y_pred_test 142 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | drawing 2 | 3 | # cimcb 4 | A package containing the necessary tools for the statistical analysis of untargeted and targeted metabolomics data. 5 | 6 | ## Installation 7 | 8 | ### Dependencies 9 | cimcb requires: 10 | - Python (>=3.5) 11 | - Bokeh (>=1.0.0) 12 | - Keras 13 | - NumPy (>=1.12) 14 | - Pandas 15 | - SciPy 16 | - scikit-learn 17 | - Statsmodels 18 | - TensorFlow 19 | - tqdm 20 | 21 | ### User installation 22 | The recommend way to install cimcb and dependencies is to using ``conda``: 23 | ```console 24 | conda install -c cimcb cimcb 25 | ``` 26 | or ``pip``: 27 | ```console 28 | pip install cimcb 29 | ``` 30 | Alternatively, to install directly from github: 31 | ```console 32 | pip install https://github.com/cimcb/cimcb/archive/master.zip 33 | ``` 34 | 35 | ### API 36 | For futher detail on the usage refer to the docstring. 37 | 38 | #### cimcb.model 39 | - [PLS_SIMPLS](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/PLS_SIMPLS.py#L6-L23): Partial least-squares regression using the SIMPLS algorithm. 40 | - [PLS_NIPALS](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/PLS_SIMPLS.py#L7-L24): Partial least-squares regression using the NIPALS algorithm. 41 | - [PCR](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/PCR.py#L8-L25): Principal component regression. 42 | - [PCLR](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/PCLR.py#L8-L25): Principal component logistic regression. 43 | - [RF](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/RF.py#L8-L43): Random forest. 44 | - [SVM](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/SVM.py#L8-L31): Support vector machine. 45 | - [NN_LinearLinear](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/NN_LinearLinear.py#L10-L42): 2 Layer linear-linear neural network. 46 | - [NN_LinearSigmoid](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/NN_LinearSigmoid.py#L10-L42): 2 Layer linear-sigmoid neural network. 47 | - [NN_SigmoidSigmoid](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/NN_LogitLogit.py#L10-L42): 2 Layer sigmoid-sigmoid neural network. 48 | 49 | #### cimcb.plot 50 | - [boxplot](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/boxplot.py#L8-L18): Creates a boxplot using Bokeh. 51 | - [distribution](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/distribution.py#L6-L16): Creates a distribution plot using Bokeh. 52 | - [pca](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/pca.py#L10-L17): Creates a PCA scores and loadings plot using Bokeh. 53 | - [permutation_test](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/permutation_test.py#L13-L27): Creates permutation test plots using Bokeh. 54 | - [roc_plot](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/roc.py#L20-L33): Creates a rocplot using Bokeh. 55 | - [scatter](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/scatter.py#L6-L16): Creates a scatterplot using Bokeh. 56 | - [scatterCI](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/scatterCI.py#L7-L14): Creates a scatterCI plot using Bokeh. 57 | 58 | #### cimcb.cross_val 59 | - [kfold](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/cross_val/kfold.py#L14-L42): Exhaustitive search over param_dict calculating binary metrics using k-fold cross validation. 60 | - [holdout](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/cross_val/holdout.py#L11-L36): Exhaustitive search over param_dict calculating binary metrics using hold-out set. 61 | 62 | #### cimcb.bootstrap 63 | - [Perc](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/bootstrap/Perc.py#L6-L35): Returns bootstrap confidence intervals using the percentile boostrap interval. 64 | - [BC](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/bootstrap/BC.py#L7-L36): Returns bootstrap confidence intervals using the bias-corrected boostrap interval. 65 | - [BCA](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/bootstrap/BCA.py#L9-L37): Returns bootstrap confidence intervals using the bias-corrected and accelerated boostrap interval. 66 | 67 | #### cimcb.utils 68 | - [binary_metrics](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/binary_metrics.py#L5-L26): Return a dict of binary stats with the following metrics: R2, auc, accuracy, precision, sensitivity, specificity, and F1 score. 69 | - [ci95_ellipse](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/ci95_ellipse.py#L6-L28): Construct a 95% confidence ellipse using PCA. 70 | - [dict_mean](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/dict_mean.py#L4-L5): Calculate mean for all keys in dictionary. 71 | - [dict_median](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/dict_median.py#L4-L5): Calculate median for all keys in dictionary. 72 | - [dict_perc](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/dict_perc.py#L4-L5): Calculate confidence intervals (percentile) for all keys in dictionary. 73 | - [dict_std](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/dict_std.py#L4-L5): Calculate std for all keys in dictionary. 74 | - [knnimpute](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/knnimpute.py#L7-L22): kNN missing value imputation using Euclidean distance. 75 | - [load_dataCSV](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/load_dataCSV.py#L7-L25): Loads and validates the DataFile and PeakFile from CSV files. 76 | - [load_dataXL](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/load_dataXL.py#L7-L29): Loads and validates the DataFile and PeakFile from a excel file. 77 | - [nested_getattr](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/nested_getattr.py#L4-L5): getattr for nested attributes. 78 | - [scale](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/scale.py#L4-L42): Scales x (which can include nans) with method: 'auto', 'pareto', 'vast', or 'level'. 79 | - [table_check](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/table_check.py#L4-L17): Error checking for DataTable and PeakTable (used in load_dataXL). 80 | - [univariate_2class](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/univariate_2class.py#L8-L35): Creates a table of univariate statistics (2 class). 81 | - [wmean](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/wmean.py#L4-L19): Returns Weighted Mean. Ignores NaNs and handles infinite weights. 82 | 83 | ### License 84 | cimcb is licensed under the MIT license. 85 | 86 | ### Authors 87 | - [Kevin Mendez](https://github.com/kevinmmendez) 88 | - [David Broadhurst](https://scholar.google.ca/citations?user=M3_zZwUAAAAJ&hl=en) 89 | 90 | ### Correspondence 91 | Professor David Broadhurst, Director of the Centre for Integrative Metabolomics & Computation Biology at Edith Cowan University. 92 | E-mail: d.broadhurst@ecu.edu.au 93 | --------------------------------------------------------------------------------