├── cimcb_logo.png
├── cimcb
    ├── cross_val
    │   └── __init__.py
    ├── bootstrap
    │   ├── __init__.py
    │   ├── Perc.py
    │   ├── Per.py
    │   └── CPer.py
    ├── __version__.py
    ├── utils
    │   ├── dict_mean.py
    │   ├── dict_std.py
    │   ├── nested_getattr.py
    │   ├── dict_median.py
    │   ├── smooth.py
    │   ├── dict_median_scores.py
    │   ├── dict_95ci.py
    │   ├── dict_perc.py
    │   ├── YpredCallback.py
    │   ├── color_scale.py
    │   ├── wmean.py
    │   ├── __init__.py
    │   ├── load_comparisonXL.py
    │   ├── load_dataXL.py
    │   ├── binary_evaluation.py
    │   ├── load_dataCSV.py
    │   ├── ci95_ellipse.py
    │   ├── table_check.py
    │   ├── scale.py
    │   ├── binary_metrics.py
    │   ├── multiclass_metrics.py
    │   └── knnimpute.py
    ├── plot
    │   ├── __init__.py
    │   └── pca.py
    ├── __init__.py
    └── model
    │   ├── __init__.py
    │   ├── NN_LogitLogit_Sklearn.py
    │   ├── NN_LinearLinear_Sklearn.py
    │   ├── NN_LinearLogit_Sklearn.py
    │   ├── NN_SoftmaxSoftmax.py
    │   ├── SVM.py
    │   ├── RBF_NN.py
    │   ├── MBNN_SigmoidSigmoid_1Layer.py
    │   ├── MBNN_LinearSigmoid.py
    │   ├── MBNN_LinearSigmoid_1Layer.py
    │   ├── NN_L1.py
    │   ├── NN_L2.py
    │   ├── RF.py
    │   ├── PCR.py
    │   ├── NN_LogitLogit.py
    │   ├── NN_LinearLinear.py
    │   ├── NN_SigmoidSigmoidSigmoid.py
    │   ├── NN_ReluTan.py
    │   ├── NN_TanRelu.py
    │   ├── NN_TanTan.py
    │   ├── NN_ReluRelu.py
    │   ├── NN_LinearTan.py
    │   ├── NN_LogitRelu.py
    │   ├── NN_LogitTan.py
    │   ├── NN_ReluLogit.py
    │   ├── NN_TanLinear.py
    │   ├── NN_TanLogit.py
    │   ├── NN_LinearRelu.py
    │   ├── NN_ReluLinear.py
    │   ├── NN_LogitLinear.py
    │   ├── NN_LinearLogit.py
    │   ├── PCLR.py
    │   └── MBNN_SigmoidSigmoid.py
├── cimcb.recipe
    └── meta.yaml
├── LICENSE
├── setup.py
├── .gitignore
└── README.md


/cimcb_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CIMCB/cimcb/master/cimcb_logo.png


--------------------------------------------------------------------------------
/cimcb/cross_val/__init__.py:
--------------------------------------------------------------------------------
1 | from .KFold import KFold
2 | from .holdout import holdout
3 | 
4 | __all__ = ["kfold", "holdout"]
5 | 


--------------------------------------------------------------------------------
/cimcb/bootstrap/__init__.py:
--------------------------------------------------------------------------------
1 | from .Per import Per
2 | from .CPer import CPer
3 | from .BCA import BCA
4 | 
5 | __all__ = ["Per", "CPer", "BCA"]
6 | 


--------------------------------------------------------------------------------
/cimcb/__version__.py:
--------------------------------------------------------------------------------
1 | from __future__ import division, absolute_import, print_function
2 | 
3 | major = 2
4 | minor = 1
5 | micro = 2
6 | version = "%(major)d.%(minor)d.%(micro)d" % (locals())
7 | 


--------------------------------------------------------------------------------
/cimcb/utils/dict_mean.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def dict_mean(dict_list):
 5 |     mean_dict = {}
 6 |     for key in dict_list[0].keys():
 7 |         value = []
 8 |         for i in dict_list:
 9 |             value.append(i[key])
10 |         mean_dict[key] = np.mean(value)
11 |     return mean_dict
12 | 


--------------------------------------------------------------------------------
/cimcb/utils/dict_std.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def dict_std(dict_list):
 5 |     std_dict = {}
 6 |     for key in dict_list[0].keys():
 7 |         value = []
 8 |         for i in dict_list:
 9 |             value.append(i[key])
10 |         std_dict[key] = np.std(value, ddof=1)
11 |     return std_dict
12 | 


--------------------------------------------------------------------------------
/cimcb/utils/nested_getattr.py:
--------------------------------------------------------------------------------
 1 | from functools import reduce
 2 | 
 3 | 
 4 | def nested_getattr(model, attributes):
 5 |     """getattr for nested attributes."""
 6 | 
 7 |     def _getattr(model, attributes):
 8 |         return getattr(model, attributes)
 9 | 
10 |     return reduce(_getattr, [model] + attributes.split("."))
11 | 


--------------------------------------------------------------------------------
/cimcb/plot/__init__.py:
--------------------------------------------------------------------------------
 1 | from .boxplot import boxplot
 2 | from .distribution import distribution
 3 | from .pca import pca
 4 | from .permutation_test import permutation_test
 5 | from .roc import roc_boot, roc_cv, roc
 6 | from .scatter import scatter
 7 | from .scatterCI import scatterCI
 8 | from .scatter_ellipse import scatter_ellipse
 9 | 
10 | __all__ = ["boxplot", "distribution", "pca", "permutation_test", "roc_boot", "roc_cv", "roc", "scatter", "scatterCI", "scatter_ellipse"]
11 | 


--------------------------------------------------------------------------------
/cimcb/utils/dict_median.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def dict_median(dict_list):
 5 |     median_dict = {}
 6 |     for key in dict_list[0].keys():
 7 |         value = []
 8 |         for i in dict_list:
 9 |             value.append(i[key])
10 |         value_arr = np.array(value)
11 |         if np.isnan(value_arr).any() == True:
12 |             median_dict[key] = np.nan
13 |         else:
14 |             median_dict[key] = np.median(value,)
15 |     return median_dict
16 | 


--------------------------------------------------------------------------------
/cimcb/__init__.py:
--------------------------------------------------------------------------------
 1 | from .__version__ import version as __version__
 2 | 
 3 | # To ignore TensorFlow Depreciation Warnings
 4 | import logging
 5 | logging.getLogger("tensorflow").setLevel(logging.ERROR)
 6 | 
 7 | # Use Theano
 8 | import os
 9 | os.environ["KERAS_BACKEND"] = "theano"
10 | import keras
11 | import keras.backend
12 | 
13 | from . import bootstrap
14 | from . import cross_val
15 | from . import model
16 | from . import plot
17 | from . import utils
18 | 
19 | __all__ = ["bootstrap", "cross_val", "model", "plot", "utils"]
20 | 


--------------------------------------------------------------------------------
/cimcb/utils/smooth.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def smooth(a, WSZ):
 5 |     # a: NumPy 1-D array containing the data to be smoothed
 6 |     # WSZ: smoothing window size needs, which must be odd number,
 7 |     # as in the original MATLAB implementation
 8 |     if WSZ % 2 == 0:
 9 |         WSZ = WSZ - 1
10 |     out0 = np.convolve(a, np.ones(WSZ, dtype=int), 'valid') / WSZ
11 |     r = np.arange(1, WSZ - 1, 2)
12 |     start = np.cumsum(a[:WSZ - 1])[::2] / r
13 |     stop = (np.cumsum(a[:-WSZ:-1])[::2] / r)[::-1]
14 |     return np.concatenate((start, out0, stop))
15 | 


--------------------------------------------------------------------------------
/cimcb/utils/dict_median_scores.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def dict_median_scores(dict_list):
 5 |     median_dict = {}
 6 |     for key in dict_list.keys():
 7 |         value = dict_list[key]
 8 |         value_arr = np.array(value)
 9 |         if np.isnan(value_arr).any() == True:
10 |             median_dict[key] = np.nan
11 |         else:
12 |             #append_low = np.percentile(value_arr, 2.5)
13 |             append_mid = np.median(value_arr, axis=0)
14 |             #append_upp = np.percentile(value_arr, 95.7)
15 |             median_dict[key] = append_mid
16 |     return median_dict
17 | 


--------------------------------------------------------------------------------
/cimcb/utils/dict_95ci.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def dict_95ci(dict_list):
 5 |     median_dict = {}
 6 |     for key in dict_list.keys():
 7 |         value = dict_list[key]
 8 |         value_arr = np.array(value)
 9 |         if np.isnan(value_arr).any() == True:
10 |             median_dict[key] = np.nan
11 |         else:
12 |             append_low = np.percentile(value_arr, 2.5)
13 |             append_mid = np.percentile(value_arr, 50)
14 |             append_upp = np.percentile(value_arr, 95.7)
15 |             median_dict[key] = [append_low, append_upp, append_mid]
16 |     return median_dict
17 | 


--------------------------------------------------------------------------------
/cimcb/utils/dict_perc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def dict_perc(dict_list, ci=95):
 5 |     perc_dict = {}
 6 |     for key in dict_list[0].keys():
 7 |         value = []
 8 |         for i in dict_list:
 9 |             value.append(i[key])
10 |         value_arr = np.array(value)
11 |         if np.isnan(value_arr).any() == True:
12 |             perc_dict[key] = [np.nan, np.nan]
13 |         else:
14 |             lower_alpha = (100 - ci) / 2
15 |             upper_alpha = 100 - lower_alpha
16 |             lower_ci = np.percentile(value_arr, lower_alpha)
17 |             upper_ci = np.percentile(value_arr, upper_alpha)
18 |             perc_dict[key] = [lower_ci, upper_ci]
19 |     return perc_dict
20 | 


--------------------------------------------------------------------------------
/cimcb/utils/YpredCallback.py:
--------------------------------------------------------------------------------
 1 | from keras.callbacks import Callback
 2 | 
 3 | 
 4 | class YpredCallback(Callback):
 5 |     """Used as a callback for Keras to get Ypred_train, and Ypred_test for each epoch.
 6 | 
 7 |     Example:
 8 |     yc = YpredCallback(X, X)
 9 |     model.fit(X, Y, callbacks=[yc]
10 |     """
11 | 
12 |     def __init__(self, model, X_train, X_test=None):
13 |         self.model = model  # Keras model
14 |         self.Y_train = []
15 |         self.Y_test = []
16 |         self.X_train = X_train
17 |         # If X_test is None, use X_train
18 |         if X_test is None:
19 |             self.X_test = X_train
20 |         else:
21 |             self.X_test = X_test
22 | 
23 |     def on_epoch_end(self, model, epoch, logs=None):
24 |         Y_train_pred = self.model.predict(self.X_train).flatten()
25 |         Y_test_pred = self.model.predict(self.X_test).flatten()
26 |         self.Y_train.append(Y_train_pred)
27 |         self.Y_test.append(Y_test_pred)
28 | 


--------------------------------------------------------------------------------
/cimcb.recipe/meta.yaml:
--------------------------------------------------------------------------------
 1 | package:
 2 |   name: cimcb
 3 |   version: 2.1.2
 4 | 
 5 | source:
 6 |   path: ../
 7 | 
 8 | build:
 9 |   noarch: python
10 |   number: 0
11 |   script: "{{ PYTHON }} -m pip install . --no-deps -vv"
12 | 
13 | requirements:
14 |   build:
15 |     - setuptools
16 |     - python >=3.5
17 |     - bokeh >=1.0.0
18 |     - keras >=2.2.4
19 |     - numpy >=1.12
20 |     - pandas
21 |     - scipy
22 |     - scikit-learn
23 |     - statsmodels
24 |     - python >=3.5
25 |     - theano
26 |     - tqdm
27 |     - xlrd
28 |     - joblib
29 |     - nomkl
30 | 
31 |   run:
32 |     - python >=3.5
33 |     - bokeh >=1.0.0
34 |     - keras >=2.2.4
35 |     - numpy >=1.12
36 |     - pandas
37 |     - scipy
38 |     - scikit-learn
39 |     - statsmodels
40 |     - theano
41 |     - tqdm
42 |     - xlrd
43 |     - joblib
44 |     - nomkl
45 | 
46 | about:
47 |   home: https://github.com/CIMCB
48 |   license: MIT
49 |   summary: "A package containing the necessary tools for the statistical analysis of untargeted and targeted metabolomics data."
50 | 
51 | 


--------------------------------------------------------------------------------
/cimcb/utils/color_scale.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn import preprocessing
 3 | 
 4 | 
 5 | def color_scale(x, method="tanh", beta=None):
 6 | 
 7 |     # Initially scale between 0 and 1
 8 |     scaler = preprocessing.MinMaxScaler(feature_range=(0.1, 1))
 9 |     x_init = scaler.fit_transform(x[:, np.newaxis]).flatten()
10 | 
11 |     # Methods of transformation
12 |     if method == "linear":
13 |         x_tr = x_init
14 |     elif method == "sq":
15 |         x_tr = x_init ** 2
16 |     elif method == "sqrt":
17 |         x_tr = np.sqrt(x_init)
18 |     elif method == "tan":
19 |         x_tr = 1 + np.tan(beta * (1 + x_init))
20 |     elif method == "tanh+1":
21 |         x_tr = 1 + np.tanh(beta * (-1 + x_init))
22 |     elif method == "tanh":
23 |         x_tr_init = np.tanh(beta * (-1 + x_init))
24 |         x_tr = scaler.fit_transform(x_tr_init[:, np.newaxis]).flatten()
25 |     else:
26 |         print("An incorrect method for color_scale was selected, so it set to 'tanh'. Supported methods are 'linear', 'sq', 'sqrt', 'tanh', and 'tanh+1'.")
27 |         x_tr = x_init
28 | 
29 |     return x_tr
30 | 


--------------------------------------------------------------------------------
/cimcb/model/__init__.py:
--------------------------------------------------------------------------------
 1 | from .NN_LinearSigmoid import NN_LinearSigmoid
 2 | from .NN_SigmoidSigmoid import NN_SigmoidSigmoid
 3 | from .MBNN_LinearSigmoid import MBNN_LinearSigmoid
 4 | from .MBNN_SigmoidSigmoid import MBNN_SigmoidSigmoid
 5 | from .MBNN_LinearSigmoid_1Layer import MBNN_LinearSigmoid_1Layer
 6 | from .MBNN_SigmoidSigmoid_1Layer import MBNN_SigmoidSigmoid_1Layer
 7 | from .NN_SigmoidSigmoidSigmoid import NN_SigmoidSigmoidSigmoid
 8 | from .NN_LinearLogit_Sklearn import NN_LinearLogit_Sklearn
 9 | from .NN_LogitLogit_Sklearn import NN_LogitLogit_Sklearn
10 | from .PCLR import PCLR
11 | from .PCR import PCR
12 | from .PLS_SIMPLS import PLS_SIMPLS
13 | from .PLS_NIPALS import PLS_NIPALS
14 | from .RF import RF
15 | from .SVM import SVM
16 | from .NN_L1 import NN_L1
17 | from .NN_L2 import NN_L2
18 | from .RBF_NN import RBF_NN
19 | 
20 | 
21 | __all__ = ["NN_LinearSigmoid", "NN_SigmoidSigmoid", "NN_SoftmaxSoftmax", "MBNN_LinearSigmoid", "MBNN_SigmoidSigmoid", "NN_SigmoidSigmoidSigmoid", "NN_LinearLogit_Sklearn", "NN_LogitLogit_Sklearn", "PCLR", "PCR", "PLS_SIMPLS", "PLS_NIPALS", "RF", "SVM", "NN_L1", "NN_L2","RBF_NN"]
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2019 CIMCB
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | from setuptools import setup
 2 | 
 3 | 
 4 | def readme():
 5 |   with open('README.md', encoding='utf-8') as f:
 6 |     return f.read()
 7 | 
 8 | 
 9 | setup(
10 |     name="cimcb",
11 |     version="2.1.2",
12 |     description="A package containing the necessary tools for the statistical analysis of untargeted and targeted metabolomics data.",
13 |     long_description=readme(),
14 |     long_description_content_type='text/markdown',
15 |     license="MIT",
16 |     url="https://github.com/KevinMMendez/cimcb",
17 |     packages=["cimcb", "cimcb.bootstrap", "cimcb.cross_val", "cimcb.model", "cimcb.plot", "cimcb.utils"],
18 |     python_requires=">=3.5",
19 |     install_requires=["bokeh>=1.0.0",
20 |                       "keras>=2.2.4",
21 |                       "numpy>=1.12",
22 |                       "pandas",
23 |                       "scipy",
24 |                       "scikit-learn",
25 |                       "statsmodels",
26 |                       "theano",
27 |                       "tqdm",
28 |                       "xlrd",
29 |                       "joblib"],
30 |     author="Kevin Mendez, David Broadhurst",
31 |     author_email="k.mendez@ecu.edu.au, d.broadhurst@ecu.edu.au",
32 | )
33 | 


--------------------------------------------------------------------------------
/cimcb/utils/wmean.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def wmean(x, weights):
 5 |     """Returns Weighted Mean. Ignores NaNs and handles infinite weights.
 6 | 
 7 |     Parameters
 8 |     ----------
 9 |     x: array-like [n_samples]
10 |         An array-like object that contains the data.
11 | 
12 |     weights: array-like [n_samples]
13 |         An array-like object that contains the corresponding weights.
14 | 
15 |     Returns
16 |     ----------------------------------
17 |     m: number
18 |         The weighted mean.
19 |     """
20 | 
21 |     # Flatten x and weights
22 |     x = x.flatten()
23 |     weights = weights.flatten()
24 | 
25 |     # Find NaNs
26 |     nans = np.isnan(x)
27 |     infs = np.isinf(weights)
28 | 
29 |     # If all x are nans, return np.nan
30 |     if nans.all() == True:
31 |         m = np.nan
32 |         return m
33 | 
34 |     # If there are infinite weights, use the corresponding x
35 |     if infs.any() == True:
36 |         m = np.nanmean(x[infs])
37 |         return m
38 | 
39 |     # Set NaNs to zero
40 |     x[nans] = 0
41 |     weights[nans] = 0
42 |     
43 |     # Normalize the weights + calculate Weighted Mean
44 |     weights = weights / np.sum(weights)
45 |     m = np.matmul(weights, x)
46 |     return m
47 | 


--------------------------------------------------------------------------------
/cimcb/utils/__init__.py:
--------------------------------------------------------------------------------
 1 | from .binary_metrics import binary_metrics
 2 | from .binary_evaluation import binary_evaluation
 3 | from .multiclass_metrics import multiclass_metrics
 4 | from .ci95_ellipse import ci95_ellipse
 5 | from .dict_95ci import dict_95ci
 6 | from .dict_mean import dict_mean
 7 | from .dict_median import dict_median
 8 | from .dict_median_scores import dict_median_scores
 9 | from .dict_std import dict_std
10 | from .dict_perc import dict_perc
11 | from .knnimpute import knnimpute
12 | from .load_comparisonXL import load_comparisonXL
13 | from .load_dataXL import load_dataXL
14 | from .load_dataCSV import load_dataCSV
15 | from .scale import scale
16 | from .nested_getattr import nested_getattr
17 | from .table_check import table_check
18 | from .univariate_2class import univariate_2class
19 | from .wmean import wmean
20 | from .YpredCallback import YpredCallback
21 | from .color_scale import color_scale
22 | from .smooth import smooth
23 | 
24 | __all__ = ["binary_metrics", "binary_evaluation", "multiclass_metrics", "ci95_ellipse", "dict_95ci", "dict_mean", "dict_median", "dict_median_scores", "dict_std", "dict_perc", "knnimpute", "load_comparisonXL", "load_dataXL", "load_dataCSV", "scale", "nested_getattr", "table_check", "univariate_2class", "wmean", "YpredCallback", "color_scale", "smooth"]
25 | 


--------------------------------------------------------------------------------
/cimcb/utils/load_comparisonXL.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | 
 4 | def load_comparisonXL(method, evaluate="train", dropna=True):
 5 |     """Load comparison table."""
 6 |     if evaluate == "test":
 7 |         e = "['Test']"
 8 |     elif evaluate == "in bag":
 9 |         e = "['In Bag']"
10 |     elif evaluate == "out of bag":
11 |         e = "['Out of Bag']"
12 |     else:
13 |         e = "['Train']"
14 | 
15 |     # Import methods
16 |     table = []
17 |     for i in method:
18 |         table.append(pd.read_excel(i + ".xlsx"))
19 | 
20 |     # Concatenate table
21 |     df = pd.DataFrame()
22 |     for i in range(len(table)):
23 |         df = pd.concat([df, table[i].loc[table[i]['evaluate'] == e].T.squeeze()], axis=1, sort=False)
24 |     df = df.T.drop(columns="evaluate")
25 | 
26 |     # Remove [ ] from string
27 |     for i in range(len(df)):
28 |         for j in range(len(df.T)):
29 |             if type(df.iloc[i, j]) is str:
30 |                 df.iloc[i, j] = df.iloc[i, j][2: -2]
31 | 
32 |     # Reset index and add methods column
33 |     method_name = []
34 |     for i in range(len(method)):
35 |         name_i = method[i].rsplit('/', 1)[1]
36 |         method_name.append(name_i)
37 |     df = df.reset_index()
38 |     df = pd.concat([pd.Series(method_name, name="method"), df], axis=1, sort=False)
39 |     df = df.drop("index", 1)
40 |     #df = df.set_index("method")
41 | 
42 |     # drop columns with just nans
43 |     if dropna is True:
44 |         df = df.dropna(axis=1, how='all')
45 | 
46 |     return df
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | *.egg-info/
 24 | .installed.cfg
 25 | *.egg
 26 | MANIFEST
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | .pytest_cache/
 49 | 
 50 | # Translations
 51 | *.mo
 52 | *.pot
 53 | 
 54 | # Django stuff:
 55 | *.log
 56 | local_settings.py
 57 | db.sqlite3
 58 | 
 59 | # Flask stuff:
 60 | instance/
 61 | .webassets-cache
 62 | 
 63 | # Scrapy stuff:
 64 | .scrapy
 65 | 
 66 | # Sphinx documentation
 67 | docs/_build/
 68 | 
 69 | # PyBuilder
 70 | target/
 71 | 
 72 | # Jupyter Notebook
 73 | .ipynb_checkpoints
 74 | 
 75 | # pyenv
 76 | .python-version
 77 | 
 78 | # celery beat schedule file
 79 | celerybeat-schedule
 80 | 
 81 | # SageMath parsed files
 82 | *.sage.py
 83 | 
 84 | # Environments
 85 | .env
 86 | .venv
 87 | env/
 88 | venv/
 89 | ENV/
 90 | env.bak/
 91 | venv.bak/
 92 | 
 93 | # Spyder project settings
 94 | .spyderproject
 95 | .spyproject
 96 | 
 97 | # Rope project settings
 98 | .ropeproject
 99 | 
100 | # mkdocs documentation
101 | /site
102 | 
103 | # mypy
104 | .mypy_cache/
105 | 


--------------------------------------------------------------------------------
/cimcb/utils/load_dataXL.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from os import path
 4 | from .table_check import table_check
 5 | 
 6 | 
 7 | def load_dataXL(filename, DataSheet, PeakSheet):
 8 |     """Loads and validates the DataFile and PeakFile from an excel file.
 9 | 
10 | 
11 |     Parameters
12 |     ----------
13 |     file : string
14 |         The name of the excel file (.xlsx file) e.g. 'projectxxx1.xlsx'.  Note, it can include the directory e.g. '/homedir/projectxxx1.xlsx'
15 | 
16 |     DataSheet : string
17 |         The name of the data sheet in the file e.g. 'Data'. Note, the data sheet must contain an 'Idx' and 'SampleID' column.
18 | 
19 |     PeakSheet : string
20 |         The name of the peak sheet in the file e.g. 'Pata'. Note, the peak sheet must contain an 'Idx', 'Name', and 'Label' column.
21 | 
22 |     Returns
23 |     -------
24 |     DataTable: DataFrame
25 |         Data sheet from the excel file.
26 | 
27 |     PeakTable: DataFrame
28 |         Peak sheet from the excel file.
29 |     """
30 | 
31 |     if path.isfile(filename) is False:
32 |         raise ValueError("{} does not exist.".format(filename))
33 | 
34 |     if not filename.endswith(".xlsx"):
35 |         raise ValueError("{} should be a .xlsx file.".format(filename))
36 | 
37 |     # LOAD PEAK DATA
38 |     print("Loadings PeakFile: {}".format(PeakSheet))
39 |     PeakTable = pd.read_excel(filename, sheet_name=PeakSheet)
40 | 
41 |     # LOAD DATA TABLE
42 |     print("Loadings DataFile: {}".format(DataSheet))
43 |     DataTable = pd.read_excel(filename, sheet_name=DataSheet)
44 | 
45 |     # Replace with nans
46 |     DataTable = DataTable.replace(-99, np.nan)
47 |     DataTable = DataTable.replace(".", np.nan)
48 |     DataTable = DataTable.replace(" ", np.nan)
49 | 
50 |     # Error checks
51 |     table_check(DataTable, PeakTable, print_statement=True)
52 | 
53 |     # Make the Idx column start from 1
54 |     DataTable.index = np.arange(1, len(DataTable) + 1)
55 |     PeakTable.index = np.arange(1, len(PeakTable) + 1)
56 | 
57 |     print("TOTAL SAMPLES: {} TOTAL PEAKS: {}".format(len(DataTable), len(PeakTable)))
58 |     print("Done!")
59 |     return DataTable, PeakTable
60 | 


--------------------------------------------------------------------------------
/cimcb/utils/binary_evaluation.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import confusion_matrix, roc_auc_score, roc_curve, auc
 3 | from sklearn import metrics
 4 | import scipy
 5 | 
 6 | 
 7 | def binary_evaluation(y_true, y_pred):
 8 |     """ Return a dict of binary stats with the following metrics: R2, auc, accuracy, precision, sensitivity, specificity, and F1 score.
 9 | 
10 |     Parameters
11 |     ----------
12 |     y_true : array-like, shape = [n_samples]
13 |         Binary label for samples (0s and 1s).
14 | 
15 |     y_pred : array-like, shape = [n_samples]
16 |         Predicted y score for samples.
17 | 
18 |     cut_off : number, (default 0.5)
19 |         A value for y_pred greater-than or equal to the cut_off will be treated as 1, otherwise it will be treated as 0 for the confusion matrix.
20 | 
21 |     parametric : boolean, (default True)
22 |         If parametric is True, calculate R2.
23 | 
24 |     Returns
25 |     -------
26 |     stats: dict
27 |         dict containing calculated R2, auc, accuracy, precision, sensitivity, specificity, and F1 score.
28 |     """
29 | 
30 |     # Convert to array
31 |     y_true_arr = np.array(y_true)
32 |     y_pred_arr = np.array(y_pred)
33 | 
34 |     # Error checks
35 |     if y_true_arr.ndim != 1:
36 |         raise ValueError("y_true should only have 1 dimension.")
37 |     if y_pred_arr.ndim != 1:
38 |         raise ValueError("y_pred should only have 1 dimension.")
39 |     if y_true_arr.shape[0] != y_pred_arr.shape[0]:
40 |         raise ValueError("The number of values in y_true should match y_pred.")
41 |     if np.array_equal(sorted(set(y_true_arr)), [0, 1]) is False:
42 |         raise ValueError("y_true should only contain 0s and 1s")
43 | 
44 |     # Binary statistics dictionary
45 |     stats = {}
46 | 
47 |     stats["R²"] = 1 - (sum((y_true_arr - y_pred_arr) ** 2) / sum((y_true_arr - np.mean(y_true_arr)) ** 2))
48 | 
49 |     fpr, tpr, thresholds = metrics.roc_curve(y_true_arr, y_pred_arr, pos_label=1)
50 |     stats["AUC"] = metrics.auc(fpr, tpr)
51 | 
52 |     try:
53 |         stats["ManW P-Value"] = scipy.stats.mannwhitneyu(y_pred_arr[y_true_arr == 0], y_pred_arr[y_true_arr == 1], alternative="two-sided")[1]
54 |     except ValueError:
55 |         stats["ManW P-Value"] = 1
56 | 
57 |     return stats
58 | 


--------------------------------------------------------------------------------
/cimcb/utils/load_dataCSV.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | from os import path
 4 | from .table_check import table_check
 5 | 
 6 | 
 7 | def load_dataCSV(DataSheet, PeakSheet):
 8 |     """Loads and validates the DataFile and PeakFile from csv files.
 9 | 
10 | 
11 |     Parameters
12 |     ----------
13 |     DataSheet : string
14 |         The name of the csv file (.csv file) that contains the 'Data'. Note, the data sheet must contain an 'Idx' and 'SampleID'column. e.g. 'datasheetxxx1.csv' or '/homedir/datasheetxxx1.csv'
15 | 
16 |     PeakSheet : string
17 |         The name of the csv file (.csv file) that contains the 'Peak'. Note, the peak sheet must contain an 'Idx', 'Name', and 'Label' column. e.g. 'peaksheetxxx1.csv' or 'peaksheetxxx1.csv'
18 | 
19 |     Returns
20 |     -------
21 |     DataTable: DataFrame
22 |         Data sheet from the csv file.
23 | 
24 |     PeakTable: DataFrame
25 |         Peak sheet from the csv file.
26 |     """
27 | 
28 |     # Check Datasheet exists
29 |     if path.isfile(DataSheet) is False:
30 |         raise ValueError("{} does not exist.".format(filename))
31 | 
32 |     if not DataSheet.endswith(".csv"):
33 |         raise ValueError("{} should be a .csv file.".format(filename))
34 | 
35 |     # Check PeakSheet exists
36 |     if path.isfile(PeakSheet) is False:
37 |         raise ValueError("{} does not exist.".format(filename))
38 | 
39 |     if not PeakSheet.endswith(".csv"):
40 |         raise ValueError("{} should be a .csv file.".format(filename))
41 | 
42 |     # LOAD PEAK DATA
43 |     print("Loadings PeakFile: {}".format(PeakSheet))
44 |     PeakTable = pd.read_csv(PeakSheet)
45 | 
46 |     # LOAD DATA TABLE
47 |     print("Loadings DataFile: {}".format(DataSheet))
48 |     DataTable = pd.read_csv(DataSheet)
49 | 
50 |     # Replace with nans
51 |     DataTable = DataTable.replace(-99, np.nan)
52 |     DataTable = DataTable.replace(".", np.nan)
53 |     DataTable = DataTable.replace(" ", np.nan)
54 | 
55 |     # Error checks
56 |     table_check(DataTable, PeakTable, print_statement=True)
57 | 
58 |     # Make the Idx column start from 1
59 |     DataTable.index = np.arange(1, len(DataTable) + 1)
60 |     PeakTable.index = np.arange(1, len(PeakTable) + 1)
61 | 
62 |     print("TOTAL SAMPLES: {} TOTAL PEAKS: {}".format(len(DataTable), len(PeakTable)))
63 |     print("Done!")
64 |     return DataTable, PeakTable
65 | 


--------------------------------------------------------------------------------
/cimcb/utils/ci95_ellipse.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | import numpy as np
 3 | from sklearn.decomposition import PCA
 4 | 
 5 | 
 6 | def ci95_ellipse(data, type="pop"):
 7 |     """ Construct a 95% confidence ellipse using PCA.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     data : array-like, shape = [n_samples, 2]
12 |         data[:,0] must represent x coordinates
13 |         data[:,1] must represent y coordinates
14 | 
15 |     type : string, optional (default='pop')
16 |         It must be 'pop' or 'mean'
17 | 
18 |     Returns
19 |     -------
20 |     ellipse: array-like, shape = [100, 2]
21 |         ellipse[:,0] represents x coordinates of ellipse
22 |         ellipse[:,1] represents y coordinates of ellipse
23 | 
24 |     outside: array-like, shape = [n_samples, 1]
25 |         returns an 1d array (of 0/1) with length n_samples
26 |         0 : ith sample is outside of ellipse
27 |         1 : ith sample is inside of ellipse
28 |     """
29 | 
30 |     # Build and fit PCA model
31 |     pca = PCA()
32 |     pca.fit(data)
33 |     coeff = pca.components_
34 |     score = pca.transform(data)
35 |     eigvals = pca.explained_variance_
36 | 
37 |     # Calculate rotation angle
38 |     phi = math.atan2(coeff[0, 1], coeff[0, 0])
39 | 
40 |     # This angle is between -pi and pi.
41 |     # Let's shift it such that the angle is between 0 and 2pi
42 |     if phi < 0:
43 |         phi += 2 * math.pi
44 | 
45 |     # Get the coordinates of the data mean
46 |     n = len(data)
47 |     m = np.mean(data, axis=0)
48 |     x0 = m[0]
49 |     y0 = m[1]
50 | 
51 |     # Get the 95% confidence interval error ellipse
52 |     # inverse of the chi-square cumulative distribution for  p = 0.05 & 2 d.f. = 5.9915
53 |     chisquare_val = 5.9915
54 |     if type is "pop":
55 |         a = math.sqrt(chisquare_val * eigvals[0])
56 |         b = math.sqrt(chisquare_val * eigvals[1])
57 |     elif type is "mean":
58 |         a = math.sqrt(chisquare_val * eigvals[0] / n)
59 |         b = math.sqrt(chisquare_val * eigvals[1] / n)
60 |     else:
61 |         raise ValueError("type has to be 'pop' or 'mean'.")
62 | 
63 |     # the ellipse in x and y coordinates
64 |     theta_grid = np.linspace(0, 2 * math.pi, num=100)
65 |     ellipse_x_r = a * np.cos(theta_grid)
66 |     ellipse_y_r = b * np.sin(theta_grid)
67 | 
68 |     # Define a rotation matrix
69 |     R = np.array([[np.cos(phi), np.sin(phi)], [-np.sin(phi), np.cos(phi)]])
70 |     # let's rotate the ellipse to some angle phi
71 |     r_ellipse = np.dot(np.vstack((ellipse_x_r, ellipse_y_r)).T, R)
72 | 
73 |     # Draw the error ellipse
74 |     x = r_ellipse[:, 0] + x0
75 |     y = r_ellipse[:, 1] + y0
76 |     ellipse = np.stack((x, y), axis=1)
77 | 
78 |     outside = []
79 |     for i in range(len(score)):
80 |         metric = (score[i, 0] / a) ** 2 + (score[i, 1] / b) ** 2
81 |         if metric > 1:
82 |             outside.append(1)
83 |         else:
84 |             outside.append(0)
85 | 
86 |     return ellipse, outside
87 | 


--------------------------------------------------------------------------------
/cimcb/utils/table_check.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def table_check(DataTable, PeakTable, print_statement=True):
 5 |     """Error checking for DataTable and PeakTable (used in load_dataXL).
 6 | 
 7 |     Parameters
 8 |     ----------
 9 |     DataTable: DataFrame
10 |         Data sheet with the required columns.
11 | 
12 |     PeakTable: DataFrame
13 |         Peak sheet with the required columns.
14 | 
15 |     print_statement: boolean (default True)
16 |         If the error checks are successful and print_statement is True, the following is printed: "Data Table & Peak Table is suitable."
17 |     """
18 | 
19 |     # Check DataTable for Idx, Class and SampleID
20 |     data_columns = DataTable.columns.values
21 | 
22 |     if "Idx" not in data_columns:
23 |         raise ValueError("Data Table does not contain the required 'Idx' column")
24 |     if DataTable.Idx.isnull().values.any() == True:
25 |         raise ValueError("Data Table Idx column cannot contain missing values")
26 |     if len(np.unique(DataTable.Idx)) != len(DataTable.Idx):
27 |         raise ValueError("Data Table Idx numbers are not unique. Please change")
28 | 
29 |     # Removed 'Class' as a required column
30 |     # if "Class" not in data_columns:
31 |     #     raise ValueError("Data Table does not contain the required 'Class' column")
32 | 
33 |     if "SampleID" not in data_columns:
34 |         raise ValueError("Data Table does not contain the required 'SampleID' column")
35 | 
36 |     # Check PeakTable for Idx, Name, Label
37 |     peak_columns = PeakTable.columns.values
38 | 
39 |     if "Idx" not in peak_columns:
40 |         raise ValueError("Peak Table does not contain the required 'Idx' column")
41 |     if PeakTable.Idx.isnull().values.any() == True:
42 |         raise ValueError("Peak Table Idx column cannot contain missing values")
43 |     if len(np.unique(PeakTable.Idx)) != len(PeakTable.Idx):
44 |         raise ValueError("Peak Table Idx numbers are not unique. Please change")
45 | 
46 |     if "Name" not in peak_columns:
47 |         raise ValueError("Peak Table does not contain the required 'Name' column")
48 |     if PeakTable.Idx.isnull().values.any() == True:
49 |         raise ValueError("Peak Table Name column cannot contain missing values")
50 |     if len(np.unique(PeakTable.Idx)) != len(PeakTable.Idx):
51 |         raise ValueError("Peak Table Name numbers are not unique. Please change")
52 | 
53 |     if "Label" not in peak_columns:
54 |         raise ValueError("Data Table does not contain the required 'Label' column")
55 | 
56 |     # Check that Peak Names in PeakTable & DataTable match
57 |     peak_list = PeakTable.Name
58 |     data_columns = DataTable.columns.values
59 |     temp = np.intersect1d(data_columns, peak_list)
60 | 
61 |     if len(temp) != len(peak_list):
62 |         raise ValueError("The Peak Names in Data Table should exactly match the Peak Names in Peak Table. Remember that all Peak Names should be unique.")
63 | 
64 |     if print_statement is True:
65 |         print("Data Table & Peak Table is suitable.")
66 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_LogitLogit_Sklearn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from keras.callbacks import Callback
 3 | from keras.optimizers import SGD
 4 | from sklearn.neural_network import MLPClassifier
 5 | from keras.models import Sequential
 6 | from keras.layers import Dense
 7 | from .BaseModel import BaseModel
 8 | from ..utils import YpredCallback
 9 | 
10 | 
11 | class NN_LogitLogit_Sklearn(BaseModel):
12 |     """2 Layer linear-linear neural network using Keras"""
13 | 
14 |     parametric = False
15 |     bootlist = None
16 | 
17 |     def __init__(self, n_nodes=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
18 |         self.n_nodes = n_nodes
19 |         self.verbose = verbose
20 |         self.n_epochs = epochs
21 |         self.k = n_nodes
22 |         self.batch_size = batch_size
23 |         self.loss = loss
24 |         self.learning_rate = learning_rate
25 |         self.momentum = momentum
26 |         self.decay = decay
27 |         self.optimizer = "sgd"
28 | 
29 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
30 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
31 | 
32 |         Parameters
33 |         ----------
34 |         X : array-like, shape = [n_samples, n_features]
35 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
36 | 
37 |         Y : array-like, shape = [n_samples, 1]
38 |             Response variables, where n_samples is the number of samples.
39 | 
40 |         Returns
41 |         -------
42 |         y_pred_train : array-like, shape = [n_samples, 1]
43 |             Predicted y score for samples.
44 |         """
45 | 
46 |         # If batch-size is None:
47 |         if self.batch_size is None:
48 |             self.batch_size = len(X)
49 | 
50 |         # Ensure array and error check
51 |         X, Y = self.input_check(X, Y)
52 | 
53 |         self.model = MLPClassifier(hidden_layer_sizes=(self.n_nodes,), activation="logistic", solver=self.optimizer, learning_rate_init=self.learning_rate, momentum=self.momentum, batch_size=self.batch_size, nesterovs_momentum=False, max_iter=self.n_epochs)
54 | 
55 |         # Fit
56 |         self.model.fit(X, Y)
57 | 
58 |         y_pred_train = self.model.predict(X)
59 | 
60 |         # Storing X, Y, and Y_pred
61 |         self.Y_pred = y_pred_train
62 |         self.X = X
63 |         self.Y = Y
64 |         return y_pred_train
65 | 
66 |     def test(self, X, Y=None):
67 |         """Calculate and return Y predicted value.
68 | 
69 |         Parameters
70 |         ----------
71 |         X : array-like, shape = [n_samples, n_features]
72 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
73 | 
74 |         Returns
75 |         -------
76 |         y_pred_test : array-like, shape = [n_samples, 1]
77 |             Predicted y score for samples.
78 |         """
79 |         y_pred_test = self.model.predict(X)
80 |         return y_pred_test
81 | 


--------------------------------------------------------------------------------
/cimcb/utils/scale.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | 
 3 | 
 4 | def scale(x, axis=0, ddof=1, method="auto", mu="default", sigma="default", return_mu_sigma=False):
 5 |     """Scales x (which can include nans) with method: 'auto', 'pareto', 'vast', or 'level'.
 6 | 
 7 |     Parameters
 8 |     ----------
 9 |     x: array-like
10 |         An array-like object that contains the data.
11 | 
12 |     axis: integer or None, (default 0)
13 |         The axis along which to operate
14 | 
15 |     ddof: integer, (default 1)
16 |         The degrees of freedom correction. Note, by default ddof=1 unlike scipy.stats.zscore with ddof=0.
17 | 
18 |     method: string, (default "auto")
19 |         Method used to scale x. Accepted methods are 'auto', 'pareto', 'vast' and 'level'.
20 | 
21 |     mu: number or "default", (default "default")
22 |         If mu is provided it is used, however, by default it is calculated.
23 | 
24 |     sigma: number or "default",  (default "default")
25 |         If sigma is provided it is used, however, by default it is calculated.
26 | 
27 |     return_mu_sigma: boolean, (default False)
28 |         If return_mu_sigma is True, mu and sigma are returned instead of z. Note, this is useful if mu and sigma want to be stored for future use.
29 | 
30 |     Returns if return_mu_sigma = False
31 |     ----------------------------------
32 |     z: array-like
33 |         An array-like object that contains the scaled data.
34 | 
35 |     Returns if return_mu_sigma = True
36 |     ---------------------------------
37 |     mu: number
38 |         Calculated mu for x given axis and ddof.
39 | 
40 |     sigma: number
41 |         Calculated sigma for x given axis and ddof.
42 |     """
43 | 
44 |     x = np.array(x)
45 | 
46 |     # Simplier if we tranpose X if axis=1 (return x.T after the calculations)
47 |     if axis == 1:
48 |         x = x.T
49 | 
50 |     # Expand dimension if array is 1d
51 |     if x.ndim == 1:
52 |         x = np.expand_dims(x, axis=1)
53 | 
54 |     # Calculate mu and sigma if set to 'default' (ignoring nans)
55 |     if mu is "default":
56 |         mu = np.nanmean(x, axis=0)
57 |     if sigma is "default":
58 |         sigma = np.nanstd(x, axis=0, ddof=ddof)
59 |         sigma = np.where(sigma == 0, 1, sigma)  # if a value in sigma equals 0 it is converted to 1
60 | 
61 |     # Error check before scaling
62 |     if len(mu) != len(x.T):
63 |         raise ValueError("Length of mu array does not match x matrix.")
64 |     if len(sigma) != len(x.T):
65 |         raise ValueError("Length of sigma array does not match x matrix.")
66 | 
67 |     # Scale based on selected method
68 |     if method is "auto":
69 |         z = (x - mu) / sigma
70 |     elif method is "pareto":
71 |         z = (x - mu) / np.sqrt(sigma)
72 |     elif method is "vast":
73 |         z = ((x - mu) / sigma) * (mu / sigma)
74 |     elif method is "level":
75 |         z = (x - mu) / mu
76 |     else:
77 |         raise ValueError("Method has to be either 'auto', 'pareto', 'vast', or 'level'.")
78 | 
79 |     # Return x.T if axis = 1
80 |     if axis == 1:
81 |         z = z.T
82 | 
83 |     if return_mu_sigma is True:
84 |         return z, mu, sigma
85 |     else:
86 |         return z
87 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_LinearLinear_Sklearn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from keras.callbacks import Callback
 3 | from keras.optimizers import SGD
 4 | from sklearn.neural_network import MLPClassifier
 5 | from keras.models import Sequential
 6 | from keras.layers import Dense
 7 | from .BaseModel import BaseModel
 8 | from ..utils import YpredCallback
 9 | 
10 | 
11 | class NN_LinearLinear_Sklearn(BaseModel):
12 |     """2 Layer linear-linear neural network using Keras"""
13 | 
14 |     parametric = False
15 |     bootlist = None
16 | 
17 |     def __init__(self, n_nodes=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
18 |         self.n_nodes = n_nodes
19 |         self.verbose = verbose
20 |         self.n_epochs = epochs
21 |         self.k = n_nodes
22 |         self.batch_size = batch_size
23 |         self.loss = loss
24 |         self.learning_rate = learning_rate
25 |         self.momentum = momentum
26 |         self.decay = decay
27 |         self.optimizer = "sgd"
28 | 
29 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
30 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
31 | 
32 |         Parameters
33 |         ----------
34 |         X : array-like, shape = [n_samples, n_features]
35 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
36 | 
37 |         Y : array-like, shape = [n_samples, 1]
38 |             Response variables, where n_samples is the number of samples.
39 | 
40 |         Returns
41 |         -------
42 |         y_pred_train : array-like, shape = [n_samples, 1]
43 |             Predicted y score for samples.
44 |         """
45 | 
46 |         # If batch-size is None:
47 |         if self.batch_size is None:
48 |             self.batch_size = len(X)
49 | 
50 |         # Ensure array and error check
51 |         X, Y = self.input_check(X, Y)
52 | 
53 |         self.model = MLPClassifier(hidden_layer_sizes=(self.n_nodes,),
54 |                                    activation='identity',
55 |                                    solver=self.optimizer,
56 |                                    learning_rate_init=self.learning_rate,
57 |                                    momentum=self.momentum,
58 |                                    batch_size=self.batch_size,
59 |                                    nesterovs_momentum=False,
60 |                                    max_iter=self.n_epochs)
61 | 
62 |         # Fit
63 |         self.model.fit(X, Y)
64 | 
65 |         y_pred_train = self.model.predict(X)
66 | 
67 |         # Storing X, Y, and Y_pred
68 |         self.Y_pred = y_pred_train
69 |         self.X = X
70 |         self.Y = Y
71 |         return y_pred_train
72 | 
73 |     def test(self, X, Y=None):
74 |         """Calculate and return Y predicted value.
75 | 
76 |         Parameters
77 |         ----------
78 |         X : array-like, shape = [n_samples, n_features]
79 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
80 | 
81 |         Returns
82 |         -------
83 |         y_pred_test : array-like, shape = [n_samples, 1]
84 |             Predicted y score for samples.
85 |         """
86 |         y_pred_test = self.model.predict(X)
87 |         return y_pred_test
88 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_LinearLogit_Sklearn.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from keras.callbacks import Callback
 3 | from keras.optimizers import SGD
 4 | from sklearn.neural_network import MLPClassifier
 5 | from keras.models import Sequential
 6 | from keras.layers import Dense
 7 | from .BaseModel import BaseModel
 8 | from ..utils import YpredCallback
 9 | 
10 | 
11 | class NN_LinearLogit_Sklearn(BaseModel):
12 |     """2 Layer linear-linear neural network using Keras"""
13 | 
14 |     parametric = False
15 |     bootlist = None
16 | 
17 |     def __init__(self, n_nodes=2, epochs2=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
18 |         self.n_nodes = n_nodes
19 |         self.verbose = verbose
20 |         self.n_epochs = epochs2
21 |         self.k = n_nodes
22 |         self.batch_size = batch_size
23 |         self.loss = loss
24 |         self.learning_rate = learning_rate
25 |         self.momentum = momentum
26 |         self.decay = decay
27 |         self.optimizer = "sgd"
28 | 
29 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
30 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
31 | 
32 |         Parameters
33 |         ----------
34 |         X : array-like, shape = [n_samples, n_features]
35 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
36 | 
37 |         Y : array-like, shape = [n_samples, 1]
38 |             Response variables, where n_samples is the number of samples.
39 | 
40 |         Returns
41 |         -------
42 |         y_pred_train : array-like, shape = [n_samples, 1]
43 |             Predicted y score for samples.
44 |         """
45 | 
46 |         # If batch-size is None:
47 |         if self.batch_size is None:
48 |             self.batch_size = len(X)
49 | 
50 |         # Ensure array and error check
51 |         X, Y = self.input_check(X, Y)
52 | 
53 |         self.model = MLPClassifier(hidden_layer_sizes=(self.n_nodes,),
54 |                                    activation='identity',
55 |                                    solver=self.optimizer,
56 |                                    learning_rate_init=self.learning_rate,
57 |                                    momentum=self.momentum,
58 |                                    batch_size=self.batch_size,
59 |                                    nesterovs_momentum=False,
60 |                                    max_iter=self.n_epochs)
61 | 
62 |         # Fit
63 |         self.model.fit(X, Y)
64 | 
65 |         y_pred_train = self.model.predict(X)
66 | 
67 |         # Storing X, Y, and Y_pred
68 |         self.Y_pred = y_pred_train
69 |         self.X = X
70 |         self.Y = Y
71 |         return y_pred_train
72 | 
73 |     def test(self, X, Y=None):
74 |         """Calculate and return Y predicted value.
75 | 
76 |         Parameters
77 |         ----------
78 |         X : array-like, shape = [n_samples, n_features]
79 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
80 | 
81 |         Returns
82 |         -------
83 |         y_pred_test : array-like, shape = [n_samples, 1]
84 |             Predicted y score for samples.
85 |         """
86 |         y_pred_test = self.model.predict(X)
87 |         return y_pred_test
88 | 


--------------------------------------------------------------------------------
/cimcb/bootstrap/Perc.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .BaseBootstrap import BaseBootstrap
 3 | from ..utils import nested_getattr
 4 | 
 5 | 
 6 | class Perc(BaseBootstrap):
 7 |     """ Returns bootstrap confidence intervals using the percentile boostrap interval.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     model : object
12 |         This object is assumed to store bootlist attributes in .model (e.g. modelPLS.model.x_scores_).
13 | 
14 |     X : array-like, shape = [n_samples, n_features]
15 |         Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
16 | 
17 |     Y : array-like, shape = [n_samples, 1]
18 |         Response variables, where n_samples is the number of samples.
19 | 
20 |     bootlist : array-like, shape = [n_bootlist, 1]
21 |         List of attributes to calculate and return bootstrap confidence intervals.
22 | 
23 |     bootnum : a positive integer, (default 100)
24 |         The number of bootstrap samples used in the computation.
25 | 
26 |     seed: integer or None (default None)
27 |         Used to seed the generator for the resample with replacement.
28 | 
29 |     Returns
30 |     -------
31 |     bootci : dict of arrays
32 |         Keys correspond to attributes in bootlist.
33 |         Each array contains 95% confidence intervals.
34 |         To return bootci, initalise then use method run().
35 |     """
36 | 
37 |     def __init__(self, model, X, Y, bootlist, bootnum=100, seed=None):
38 |         super().__init__(model=model, X=X, Y=Y, bootlist=bootlist, bootnum=bootnum, seed=seed)
39 | 
40 |     def calc_stat(self):
41 |         """Stores selected attributes (from self.bootlist) for the original model."""
42 |         self.stat = {}
43 |         for i in self.bootlist:
44 |             self.stat[i] = nested_getattr(self.model, i)
45 | 
46 |     def calc_bootidx(self):
47 |         super().calc_bootidx()
48 | 
49 |     def calc_bootstat(self):
50 |         super().calc_bootstat()
51 | 
52 |     def calc_bootci(self):
53 |         self.bootci = {}
54 |         for i in self.bootlist:
55 |             self.bootci[i] = self.bootci_method(self.bootstat[i], self.stat[i])
56 | 
57 |     def run(self):
58 |         self.calc_stat()
59 |         self.calc_bootidx()
60 |         self.calc_bootstat()
61 |         self.calc_bootci()
62 |         return self.bootci
63 | 
64 |     @staticmethod
65 |     def bootci_method(bootstat, stat):
66 |         """Calculates bootstrap confidence intervals using the percentile bootstrap interval."""
67 |         if bootstat[0].ndim == 1:
68 |             boot_ci = []
69 |             # Calculate bootci for each component (peak), and append it to bootci
70 |             for i in range(len(bootstat[0])):
71 |                 bootstat_i = [item[i] for item in bootstat]
72 |                 lower_ci = np.percentile(bootstat_i, 2.5)
73 |                 upper_ci = np.percentile(bootstat_i, 97.5)
74 |                 boot_ci.append([lower_ci, upper_ci])
75 |             boot_ci = np.array(boot_ci)
76 | 
77 |         # Recursive component (to get ndim = 1, and append)
78 |         else:
79 |             ncomp = stat.shape[1]
80 |             boot_ci = []
81 |             for k in range(ncomp):
82 |                 bootstat_k = []
83 |                 for j in range(len(bootstat)):
84 |                     bootstat_k.append(bootstat[j][:, k])
85 |                 boot_ci_k = Perc.bootci_method(bootstat_k, stat[:, k])
86 |                 boot_ci.append(boot_ci_k)
87 |             boot_ci = np.array(boot_ci)
88 |         return boot_ci
89 | 


--------------------------------------------------------------------------------
/cimcb/utils/binary_metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import confusion_matrix, roc_auc_score
 3 | 
 4 | 
 5 | def binary_metrics(y_true, y_pred, cut_off=0.5, parametric=True, k=None):
 6 |     """ Return a dict of binary stats with the following metrics: R2, auc, accuracy, precision, sensitivity, specificity, and F1 score.
 7 | 
 8 |     Parameters
 9 |     ----------
10 |     y_true : array-like, shape = [n_samples]
11 |         Binary label for samples (0s and 1s).
12 | 
13 |     y_pred : array-like, shape = [n_samples]
14 |         Predicted y score for samples.
15 | 
16 |     cut_off : number, (default 0.5)
17 |         A value for y_pred greater-than or equal to the cut_off will be treated as 1, otherwise it will be treated as 0 for the confusion matrix.
18 | 
19 |     parametric : boolean, (default True)
20 |         If parametric is True, calculate R2.
21 | 
22 |     Returns
23 |     -------
24 |     stats: dict
25 |         dict containing calculated R2, auc, accuracy, precision, sensitivity, specificity, and F1 score.
26 |     """
27 | 
28 |     # Convert to array
29 |     y_true_arr = np.array(y_true)
30 |     y_pred_arr = np.array(y_pred)
31 | 
32 |     # Error checks
33 |     if y_true_arr.ndim != 1:
34 |         raise ValueError("y_true should only have 1 dimension.")
35 |     if y_pred_arr.ndim != 1:
36 |         raise ValueError("y_pred should only have 1 dimension.")
37 |     if y_true_arr.shape[0] != y_pred_arr.shape[0]:
38 |         raise ValueError("The number of values in y_true should match y_pred.")
39 |     if np.array_equal(sorted(set(y_true_arr)), [0, 1]) is False:
40 |         raise ValueError("y_true should only contain 0s and 1s")
41 | 
42 |     # Get confusion matrix
43 |     try:
44 |         y_pred_round = np.where(y_pred_arr >= cut_off, 1, 0)
45 |     except RuntimeWarning:
46 |         raise ValueError("Kevin: This warning says there are nans. Something is not right if y predicted are nans.")
47 |     tn, fp, fn, tp = confusion_matrix(y_true_arr, y_pred_round).ravel()
48 | 
49 |     # Binary statistics dictionary
50 |     stats = {}
51 |     if parametric is True:
52 |         stats["R²"] = 1 - (sum((y_true_arr - y_pred_arr) ** 2) / sum((y_true_arr - np.mean(y_true_arr)) ** 2))
53 |     else:
54 |         stats["R²"] = np.nan
55 | 
56 |     try:
57 |         stats["AUC"] = roc_auc_score(y_true_arr, y_pred_arr)
58 |     except ValueError:
59 |         raise ValueError("You need to lower the learning_rate! This is a common issue when using the ‘mean_squared_error’ loss function called exploding gradients. 'At an extreme, the values of weights can become so large as to overflow and result in NaN values' (REF: https://machinelearningmastery.com/exploding-gradients-in-neural-networks/).")
60 | 
61 |     stats["ACCURACY"] = safe_div((tp + tn), (tp + tn + fp + fn))
62 |     stats["PRECISION"] = safe_div((tp), (tp + fp))
63 |     stats["SENSITIVITY"] = safe_div((tp), (tp + fn))
64 |     stats["SPECIFICITY"] = safe_div((tn), (tn + fp))
65 |     stats["F1-SCORE"] = safe_div((2 * tp), (2 * tp + fp + fn))
66 | 
67 |     # Additional: AIC/BIC/SSE
68 |     n = len(y_true)
69 |     resid = y_true - y_pred
70 |     rss = sum(resid ** 2)
71 |     if rss == 0:
72 |         stats["SSE"] = 0
73 |         stats["AIC"] = 0
74 |         stats["BIC"] = 0
75 |     else:
76 |         stats["SSE"] = rss / n
77 |         if k is None:
78 |             stats["AIC"] = 0
79 |             stats["BIC"] = 0
80 |         else:
81 |             stats["AIC"] = 2 * k - 2 * np.log(rss)
82 |             stats["BIC"] = n * np.log(rss / n) + k * np.log(n)
83 |     return stats
84 | 
85 | 
86 | def safe_div(a, b):
87 |     """Return np.nan if the demoninator is 0."""
88 |     if b == 0:
89 |         return np.nan
90 |     return a / b
91 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_SoftmaxSoftmax.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from keras.callbacks import Callback
 3 | from keras.optimizers import SGD, Adam
 4 | from keras.models import Sequential
 5 | from keras.layers import Dense
 6 | from scipy.stats import logistic
 7 | from .BaseModel import BaseModel
 8 | from ..utils import YpredCallback
 9 | 
10 | 
11 | class NN_SoftmaxSoftmax(BaseModel):
12 |     """2 Layer logistic-logistic neural network using Keras"""
13 | 
14 |     parametric = True
15 |     bootlist = None
16 | 
17 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="categorical_crossentropy", batch_size=None, verbose=0):
18 |         self.n_neurons = n_neurons
19 |         self.verbose = verbose
20 |         self.n_epochs = epochs
21 |         self.k = n_neurons
22 |         self.batch_size = batch_size
23 |         self.loss = loss
24 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
25 |         #self.optimizer = Adam(lr=learning_rate, decay=decay)
26 | 
27 |         self.__name__ = 'cimcb.model.NN_SoftmaxSoftmax'
28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
29 | 
30 |     def set_params(self, params):
31 |         self.__init__(**params)
32 | 
33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
35 | 
36 |         Parameters
37 |         ----------
38 |         X : array-like, shape = [n_samples, n_features]
39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
40 | 
41 |         Y : array-like, shape = [n_samples, 1]
42 |             Response variables, where n_samples is the number of samples.
43 | 
44 |         Returns
45 |         -------
46 |         y_pred_train : array-like, shape = [n_samples, 1]
47 |             Predicted y score for samples.
48 |         """
49 | 
50 |         # If batch-size is None:
51 |         if self.batch_size is None:
52 |             self.batch_size = len(X)
53 | 
54 |         self.model = Sequential()
55 |         self.model.add(Dense(self.n_neurons, activation="sigmoid", input_dim=len(X.T)))
56 |         self.model.add(Dense(len(Y[0]), activation="softmax"))
57 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
58 | 
59 |         # If epoch_ypred is True, calculate ypred for each epoch
60 |         if epoch_ypred is True:
61 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
62 |         else:
63 |             self.epoch = Callback()
64 | 
65 |         # Fit
66 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
67 |         y_pred_train = self.model.predict(X)
68 | 
69 |         # Storing X, Y, and Y_pred
70 |         self.Y_pred = y_pred_train
71 |         self.X = X
72 |         self.Y = Y
73 |         return y_pred_train
74 | 
75 |     def test(self, X, Y=None):
76 |         """Calculate and return Y predicted value.
77 | 
78 |         Parameters
79 |         ----------
80 |         X : array-like, shape = [n_samples, n_features]
81 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
82 | 
83 |         Returns
84 |         -------
85 |         y_pred_test : array-like, shape = [n_samples, 1]
86 |             Predicted y score for samples.
87 |         """
88 |         y_pred_test = self.model.predict(X)
89 |         return y_pred_test
90 | 


--------------------------------------------------------------------------------
/cimcb/utils/multiclass_metrics.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from sklearn.metrics import multilabel_confusion_matrix, roc_auc_score
 3 | 
 4 | 
 5 | def multiclass_metrics(y_true, y_pred, cut_off=0.5, parametric=True, k=None):
 6 |     """ Return a dict of binary stats with the following metrics: R2, auc, accuracy, precision, sensitivity, specificity, and F1 score.
 7 | 
 8 |     Parameters
 9 |     ----------
10 |     y_true : array-like, shape = [n_samples]
11 |         Binary label for samples (0s and 1s).
12 | 
13 |     y_pred : array-like, shape = [n_samples]
14 |         Predicted y score for samples.
15 | 
16 |     cut_off : number, (default 0.5)
17 |         A value for y_pred greater-than or equal to the cut_off will be treated as 1, otherwise it will be treated as 0 for the confusion matrix.
18 | 
19 |     parametric : boolean, (default True)
20 |         If parametric is True, calculate R2.
21 | 
22 |     Returns
23 |     -------
24 |     stats: dict
25 |         dict containing calculated R2, auc, accuracy, precision, sensitivity, specificity, and F1 score.
26 |     """
27 | 
28 |     # Convert to array
29 |     y_true_arr = np.array(y_true)
30 |     y_pred_arr = np.array(y_pred)
31 | 
32 |     # Error checks -> later
33 | 
34 |     # Get confusion matrix
35 |     try:
36 |         y_pred_round = np.zeros(y_pred_arr.shape)
37 |         idx = y_pred_arr.argmax(axis=-1)
38 |         for i in range(len(idx)):
39 |             y_pred_round[i, idx[i]] = 1
40 | 
41 |     except RuntimeWarning:
42 |         raise ValueError("Kevin: This warning says there are nans. Something is not right if y predicted are nans.")
43 |     conf = multilabel_confusion_matrix(y_true_arr, y_pred_round).ravel()
44 |     n_groups = len(conf) / 4
45 |     tn = conf[0::4]
46 |     fp = conf[1::4]
47 |     fn = conf[2::4]
48 |     tp = conf[3::4]
49 | 
50 |     # Multi-Class Stats Dictionary (Macro Average)
51 |     stats = {}
52 | 
53 |     # R^2 (macro R^2)
54 |     ones = np.ones(int(n_groups))
55 |     RSS = sum((y_true_arr - y_pred_arr) ** 2)
56 |     TSS = sum((y_true_arr - np.mean(y_true_arr, axis=0)) ** 2)
57 |     R2 = ones - (RSS / TSS)
58 |     R2macro = sum(R2) / n_groups
59 |     stats["R²"] = R2macro
60 | 
61 |     try:
62 |         stats["AUC"] = roc_auc_score(y_true_arr, y_pred_arr, average='macro')
63 |     except ValueError:
64 |         raise ValueError("You need to lower the learning_rate! This is a common issue when using the ‘mean_squared_error’ loss function called exploding gradients. 'At an extreme, the values of weights can become so large as to overflow and result in NaN values' (REF: https://machinelearningmastery.com/exploding-gradients-in-neural-networks/).")
65 | 
66 |     stats["ACCURACY"] = safe_div(np.sum(safe_div((tp + tn), (tp + tn + fp + fn))), n_groups)
67 |     stats["PRECISION"] = safe_div(np.sum(safe_div((tp), (tp + fp))), n_groups)
68 |     stats["SENSITIVITY"] = safe_div(np.sum(safe_div((tp), (tp + fn))), n_groups)
69 |     stats["SPECIFICITY"] = safe_div(np.sum(safe_div((tn), (tn + fp))), n_groups)
70 |     stats["F1-SCORE"] = safe_div(np.sum(safe_div((2 * tp), (2 * tp + fp + fn))), n_groups)
71 | 
72 |     stats["SSE"] = 0
73 |     stats["AIC"] = 0
74 |     stats["BIC"] = 0
75 |     # Per Group
76 |     # stats["ACCURACYgroup"] = safe_div((tp + tn), (tp + tn + fp + fn))
77 |     # stats["PRECISIONgroup"] = safe_div((tp), (tp + fp))
78 |     # stats["SENSITIVITYgroup"] = safe_div((tp), (tp + fn))
79 |     # stats["SPECIFICITYgroup"] = safe_div((tn), (tn + fp))
80 |     # stats["F1-SCOREgroup"] = safe_div((2 * tp), (2 * tp + fp + fn))
81 |     return stats
82 | 
83 | 
84 | def safe_div(a, b):
85 |     """Return np.nan if the demoninator is 0."""
86 |     try:
87 |         if b == 0:
88 |             return np.nan
89 |     except ValueError:
90 |         if 0 in b:
91 |             return np.nan
92 |     return a / b
93 | 


--------------------------------------------------------------------------------
/cimcb/bootstrap/Per.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | from .BaseBootstrap import BaseBootstrap
 3 | from ..utils import nested_getattr
 4 | 
 5 | 
 6 | class Per(BaseBootstrap):
 7 |     """ Returns bootstrap confidence intervals using the percentile boostrap interval.
 8 | 
 9 |     Parameters
10 |     ----------
11 |     model : object
12 |         This object is assumed to store bootlist attributes in .model (e.g. modelPLS.model.x_scores_).
13 | 
14 |     X : array-like, shape = [n_samples, n_features]
15 |         Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
16 | 
17 |     Y : array-like, shape = [n_samples, 1]
18 |         Response variables, where n_samples is the number of samples.
19 | 
20 |     bootlist : array-like, shape = [n_bootlist, 1]
21 |         List of attributes to calculate and return bootstrap confidence intervals.
22 | 
23 |     bootnum : a positive integer, (default 100)
24 |         The number of bootstrap samples used in the computation.
25 | 
26 |     seed: integer or None (default None)
27 |         Used to seed the generator for the resample with replacement.
28 | 
29 |     Returns
30 |     -------
31 |     bootci : dict of arrays
32 |         Keys correspond to attributes in bootlist.
33 |         Each array contains 95% confidence intervals.
34 |         To return bootci, initalise then use method run().
35 |     """
36 | 
37 |     def __init__(self, model, bootnum=100, seed=None, n_cores=-1, stratify=True):
38 |         super().__init__(model=model, bootnum=bootnum, seed=seed, n_cores=n_cores, stratify=stratify)
39 |         self.__name__ = "Per"
40 | 
41 |     def calc_stat(self):
42 |         super().calc_stat()
43 | 
44 |     def calc_bootidx(self):
45 |         super().calc_bootidx()
46 | 
47 |     def calc_bootstat(self):
48 |         super().calc_bootstat()
49 | 
50 |     def calc_bootci(self):
51 |         self.bootci = {}
52 |         for i in self.bootlist:
53 |             self.bootci[i] = self.bootci_method(self.bootstat[i], self.stat[i])
54 | 
55 |     def run(self):
56 |         self.calc_stat()
57 |         self.calc_bootidx()
58 |         self.calc_bootstat()
59 |         self.calc_bootci()
60 | 
61 |     @staticmethod
62 |     def bootci_method(bootstat, stat):
63 |         """Calculates bootstrap confidence intervals using the percentile bootstrap interval."""
64 |         try:
65 |             stat.ndim
66 |         except AttributeError:
67 |             print(stat)
68 |         if stat.ndim == 1:
69 |             boot_ci = []
70 |             # Calculate bootci for each component (peak), and append it to bootci
71 |             for i in range(len(bootstat[0])):
72 |                 bootstat_i = [item[i] for item in bootstat]
73 |                 lower_ci = np.percentile(bootstat_i, 2.5)
74 |                 upper_ci = np.percentile(bootstat_i, 97.5)
75 |                 mid_ci = np.percentile(bootstat_i, 50)
76 |                 boot_ci.append([lower_ci, upper_ci, mid_ci])
77 |             boot_ci = np.array(boot_ci)
78 |         elif stat.ndim == 0:
79 |             lower_ci = np.percentile(bootstat, 2.5)
80 |             upper_ci = np.percentile(bootstat, 97.5)
81 |             mid_ci = np.percentile(bootstat, 50)
82 |             boot_ci = [lower_ci, upper_ci, mid_ci]
83 |             boot_ci = np.array(boot_ci)
84 |         # Recursive component (to get ndim = 1, and append)
85 |         else:
86 |             ncomp = stat.shape[1]
87 |             boot_ci = []
88 |             for k in range(ncomp):
89 |                 bootstat_k = []
90 |                 for j in range(len(bootstat)):
91 |                     bootstat_k.append(bootstat[j][:, k])
92 |                 boot_ci_k = Per.bootci_method(bootstat_k, stat[:, k])
93 |                 boot_ci.append(boot_ci_k)
94 |             boot_ci = np.array(boot_ci)
95 |         return boot_ci
96 | 


--------------------------------------------------------------------------------
/cimcb/plot/pca.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | from sklearn import decomposition
 4 | from bokeh.plotting import output_notebook, show
 5 | from bokeh.layouts import gridplot
 6 | from .scatter import scatter
 7 | from ..utils import ci95_ellipse
 8 | 
 9 | 
10 | def pca(X, pcx=1, pcy=2, group_label=None, sample_label=None, peak_label=None):
11 |     """Creates a PCA scores and loadings plot using Bokeh.
12 | 
13 |     Required Parameters
14 |     -------------------
15 |     X : array-like, shape = [n_samples]
16 |         Inpute data
17 |     """
18 | 
19 |     # Set model
20 |     model = decomposition.PCA()
21 |     model.fit(X)
22 |     scores_ = model.transform(X)
23 |     explained_var_ = model.explained_variance_ratio_ * 100
24 | 
25 |     # Extract scores, explained variance, and loadings for pcx and pcy
26 |     x_score = scores_[:, (pcx - 1)]
27 |     y_score = scores_[:, (pcy - 1)]
28 |     x_expvar = explained_var_[(pcx - 1)]
29 |     y_expvar = explained_var_[(pcy - 1)]
30 |     x_load = model.components_[(pcx - 1), :]
31 |     y_load = model.components_[(pcy - 1), :]
32 | 
33 |     # Colour for fig_score
34 |     if group_label is None:
35 |         col = ["blue", "green", "red"]
36 |     else:
37 |         col = None
38 | 
39 |     # Ensure group_label is an np.array
40 |     group_label = np.array(group_label)
41 | 
42 |     # Scores plot
43 |     fig_score = scatter(x_score, y_score, group=group_label, label=sample_label, size=5, xlabel="PC {} ({:0.1f}%)".format(pcx, x_expvar), ylabel="PC {} ({:0.1f}%)".format(pcy, y_expvar), title="PCA Score Plot (PC{} vs. PC{})".format(pcx, pcy), font_size="15pt", width=490, height=430, hover_xy=False, col_palette=col)
44 |     print(len(x_load))
45 |     # Loadings plot
46 |     fig_load = scatter(x_load, y_load, size=7, label=peak_label, xlabel="PC {} ({:0.1f}%)".format(pcx, x_expvar), ylabel="PC {} ({:0.1f}%)".format(pcy, y_expvar), title="PCA Loadings Plot (PC{} vs. PC{})".format(pcx, pcy), font_size="15pt", width=490, height=430, hover_xy=False, shape="triangle", legend=False, hline=True, vline=True)
47 | 
48 |     # if group is None
49 |     if group_label is None:
50 |         group_label = [0] * len(X)
51 | 
52 |     # Score plot extra: 95% confidence ellipse using PCA
53 |     unique_group = np.sort(np.unique(group_label))
54 | 
55 |     # Set colour per group
56 |     list_color = ["red", "blue", "green", "black", "orange", "yellow", "brown", "cyan"]
57 |     while len(list_color) < len(unique_group):  # Loop over list_color if number of groups > len(list_colour)
58 |         list_color += list_color
59 | 
60 |     # Add 95% confidence ellipse for each unique group in a loop
61 |     for i in range(len(unique_group)):
62 |         # Get scores for the corresponding group
63 |         group_i_x = []
64 |         group_i_y = []
65 |         for j in range(len(group_label)):
66 |             if group_label[j] == unique_group[i]:
67 |                 group_i_x.append(x_score[j])
68 |                 group_i_y.append(y_score[j])
69 | 
70 |         # Calculate ci95 ellipse for each group
71 |         data_circ_group = pd.DataFrame({"0": group_i_x, "1": group_i_y})
72 |         m, outside_m = ci95_ellipse(data_circ_group, type="mean")
73 |         p, outside_p = ci95_ellipse(data_circ_group, type="pop")
74 | 
75 |         # Plot ci95 ellipse outer line
76 |         fig_score.line(m[:, 0], m[:, 1], color=list_color[i], line_width=2, alpha=0.8, line_dash="solid")
77 |         fig_score.line(p[:, 0], p[:, 1], color=list_color[i], alpha=0.4)
78 | 
79 |         # Plot ci95 ellipse shade
80 |         fig_score.patch(m[:, 0], m[:, 1], color=list_color[i], alpha=0.07)
81 |         fig_score.patch(p[:, 0], p[:, 1], color=list_color[i], alpha=0.01)
82 | 
83 |     # Output this figure with fig_score and fig_load
84 |     output_notebook()
85 |     fig = gridplot([[fig_score, fig_load]])
86 |     show(fig)
87 | 


--------------------------------------------------------------------------------
/cimcb/utils/knnimpute.py:
--------------------------------------------------------------------------------
  1 | import warnings
  2 | import numpy as np
  3 | from scipy.spatial.distance import pdist, squareform
  4 | from .wmean import wmean
  5 | 
  6 | 
  7 | def knnimpute(x, k=3):
  8 |     """kNN missing value imputation using Euclidean distance.
  9 | 
 10 |     Parameters
 11 |     ----------
 12 |     x: array-like
 13 |         An array-like object that contains the data with NaNs.
 14 | 
 15 |     k: positive integer excluding 0, (default 3)
 16 |         The number of nearest neighbours to use.
 17 | 
 18 |     Returns
 19 |     -------
 20 |     z: array-like
 21 |         An array-like object corresponding to x with NaNs imputed.
 22 |     """
 23 | 
 24 |     # Tranpose x so we treat columns as features, and rows as samples
 25 |     x = x.T
 26 | 
 27 |     # Error check for k value
 28 |     if type(k) is not int:
 29 |         raise ValueError("k is not an integer")
 30 |     if k < 1:
 31 |         raise ValueError("k must be greater than zero")
 32 |     k_max = x.shape[1] - 1
 33 |     if k_max < k:
 34 |         raise ValueError("k value is too high. Max k value is {}".format(k_max))
 35 | 
 36 |     # z is the returned array with NaNs imputed
 37 |     z = x.copy()
 38 | 
 39 |     # Use columns without NaNs for knnimpute
 40 |     nan_check = np.isnan(x)
 41 |     no_nan = np.where(sum(nan_check.T) == 0, 1, 0)
 42 | 
 43 |     # Error check that not all columns have NaNs
 44 |     x_no_nan = x[no_nan == 1]
 45 |     if x_no_nan.size == 0:
 46 |         raise ValueError("All colummns of the input data contain missing values. Unable to impute missing values.")
 47 | 
 48 |     # Calculate pairwise distances between columns, and covert to square-form distance matrix
 49 |     pair_dist = pdist(x_no_nan.T, metric="euclidean")
 50 |     sq_dist = squareform(pair_dist)
 51 | 
 52 |     # Make diagonals negative and sort
 53 |     dist = np.sort(sq_dist - np.eye(sq_dist.shape[0], sq_dist.shape[1])).T
 54 |     dist_idx = np.argsort(sq_dist - np.eye(sq_dist.shape[0], sq_dist.shape[1])).T
 55 | 
 56 |     # Find where neighbours are equal distance
 57 |     equal_dist_a = np.diff(dist[1:].T, 1, 1).T == 0
 58 |     equal_dist_a = equal_dist_a.astype(int)  # Convert to integer
 59 |     equal_dist_b = np.zeros(len(dist))
 60 |     equal_dist = np.concatenate((equal_dist_a, [equal_dist_b]))  # Concatenate
 61 | 
 62 |     # Get rows and cols for missing values
 63 |     nan_idx = np.argwhere(nan_check)
 64 |     nan_rows = nan_idx[:, 0]
 65 |     nan_cols = nan_idx[:, 1]
 66 |     # Make sure rows/cols are in a list (note: this happens when there is 1 missing value)
 67 |     if type(nan_rows) is not np.ndarray:
 68 |         nan_rows = [nan_rows]
 69 |         nan_cols = [nan_cols]
 70 | 
 71 |     # Impute each NaN value
 72 |     for i in range(len(nan_rows)):
 73 | 
 74 |         # Error check for rows with all NaNs
 75 |         if np.isnan(x[nan_rows[i], :]).all() == True:
 76 |             warnings.warn("Row {} contains all NaNs, so Row {} is imputed with zeros.".format(nan_rows[i], nan_rows[i]), Warning)
 77 | 
 78 |         # Create a loop from 1 to len(dist_idx) - k
 79 |         lastk = len(dist_idx) - k
 80 |         loopk = [1]
 81 |         while lastk > loopk[-1]:
 82 |             loopk.append(loopk[-1] + 1)
 83 | 
 84 |         # Impute
 85 |         for j in loopk:
 86 |             L_a = equal_dist[j + k - 2 :, nan_cols[i]]
 87 |             L = np.where(L_a == 0)[0][0]  # equal_dist neighbours
 88 | 
 89 |             x_vals_r = nan_rows[i]
 90 |             x_vals_c = dist_idx[j : j + k + L, nan_cols[i]]
 91 |             x_vals = x[x_vals_r, x_vals_c]
 92 |             weights = 1 / dist[1 : k + L + 1, nan_cols[i]]
 93 |             imp_val = wmean(x_vals, weights)  # imputed value
 94 |             if imp_val is not np.nan:
 95 |                 z[nan_rows[i], nan_cols[i]] = imp_val
 96 |                 break
 97 | 
 98 |     # Transpose z
 99 |     z = z.T
100 |     return z
101 | 


--------------------------------------------------------------------------------
/cimcb/model/SVM.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from sklearn.svm import SVC
  4 | from sklearn.metrics import roc_auc_score
  5 | from .BaseModel import BaseModel
  6 | from ..utils import binary_metrics, binary_evaluation
  7 | 
  8 | 
  9 | class SVM(BaseModel):
 10 |     """Support Vector Machine"""
 11 | 
 12 |     parametric = True
 13 |     bootlist = None  # list of metrics to bootstrap
 14 | 
 15 |     bootlist = ["Y_pred", "model.eval_metrics_"]  # list of metrics to bootstrap
 16 | 
 17 |     def __init__(self, C=1.0, kernel="rbf", degree=3, gamma="auto", tol=0.001, max_iter=-1):
 18 |         self.model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma, probability=True, tol=tol, max_iter=max_iter)
 19 |         self.k = None
 20 | 
 21 |         self.__name__ = 'cimcb.model.SVM'
 22 |         self.__params__ = {'C': C, 'kernel': kernel, 'degree': degree, 'gamma': gamma, 'tol': tol, 'max_iter': max_iter}
 23 | 
 24 |     def set_params(self, params):
 25 |         self.__init__(**params)
 26 | 
 27 |     def train(self, X, Y):
 28 |         """ Fit the SVM model, save additional stats (as attributes) and return Y predicted values.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         X : array-like, shape = [n_samples, n_features]
 33 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 34 | 
 35 |         Y : array-like, shape = [n_samples, 1]
 36 |             Response variables, where n_samples is the number of samples.
 37 | 
 38 |         Returns
 39 |         -------
 40 |         y_pred_train : array-like, shape = [n_samples, 1]
 41 |             Predicted y score for samples.
 42 |         """
 43 | 
 44 |         # Ensure array and error check
 45 |         X, Y = self.input_check(X, Y)
 46 | 
 47 |         # Fit the model
 48 |         self.model.fit(X, Y)
 49 | 
 50 |         # Predict_proba was designed for multi-groups...
 51 |         # This makes it sure that y_pred is correct
 52 |         y_pred = self.model.predict_proba(X)
 53 |         pred_0 = roc_auc_score(Y, y_pred[:, 0])
 54 |         pred_1 = roc_auc_score(Y, y_pred[:, 1])
 55 |         if pred_0 > pred_1:
 56 |             self.pred_index = 0
 57 |         else:
 58 |             self.pred_index = 1
 59 | 
 60 |         # Calculate and return Y prediction value
 61 |         y_pred_train = np.array(self.model.predict_proba(X)[:, self.pred_index])
 62 | 
 63 |         self.model.y_loadings_ = np.array([0, 0, 0])
 64 |         self.model.x_scores_ = np.array([0, 0, 0])
 65 |         self.model.pctvar_ = np.array([0, 0, 0])
 66 | 
 67 |         # Storing X, Y, and Y_pred
 68 |         self.X = X
 69 |         self.Y = Y
 70 |         self.Y_pred = y_pred_train
 71 |         self.metrics_key = []
 72 |         self.model.eval_metrics_ = []
 73 |         bm = binary_evaluation(Y, y_pred_train)
 74 |         for key, value in bm.items():
 75 |             self.model.eval_metrics_.append(value)
 76 |             self.metrics_key.append(key)
 77 | 
 78 |         self.model.eval_metrics_ = np.array(self.model.eval_metrics_)
 79 | 
 80 |         self.Y_train = Y
 81 |         self.Y_pred_train = y_pred_train
 82 | 
 83 |         return y_pred_train
 84 | 
 85 |     def test(self, X, Y=None):
 86 |         """Calculate and return Y predicted value.
 87 | 
 88 |         Parameters
 89 |         ----------
 90 |         X : array-like, shape = [n_samples, n_features]
 91 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
 92 | 
 93 |         Returns
 94 |         -------
 95 |         y_pred_test : array-like, shape = [n_samples, 1]
 96 |             Predicted y score for samples.
 97 |         """
 98 | 
 99 |         # Convert to X to numpy array if a DataFrame
100 |         if isinstance(X, pd.DataFrame or pd.Series):
101 |             X = np.array(X)
102 | 
103 |         # Calculate and return Y predicted value
104 |         y_pred_test = np.array(self.model.predict_proba(X)[:, self.pred_index])
105 |         if Y is not None:
106 |             self.metrics_key = []
107 |             self.model.eval_metrics_ = []
108 |             bm = binary_evaluation(Y, y_pred_test)
109 |             for key, value in bm.items():
110 |                 self.model.eval_metrics_.append(value)
111 |                 self.metrics_key.append(key)
112 | 
113 |             self.model.eval_metrics_ = np.array(self.model.eval_metrics_)
114 | 
115 |         self.Y_pred = y_pred_test
116 |         return y_pred_test
117 | 


--------------------------------------------------------------------------------
/cimcb/model/RBF_NN.py:
--------------------------------------------------------------------------------
  1 | import numpy
  2 | import numpy as np
  3 | import math
  4 | from sklearn.cluster import KMeans
  5 | from .BaseModel import BaseModel
  6 | 
  7 | 
  8 | class RBF_NN(BaseModel):
  9 |     """Radial basis function neural network"""
 10 | 
 11 |     parametric = True
 12 |     bootlist = None
 13 | 
 14 |     def __init__(self, n_clusters=8, max_iter=100):
 15 |         self.n_clusters = n_clusters
 16 |         self.max_iter = max_iter
 17 |         self.k = n_clusters
 18 | 
 19 |         self.__name__ = 'cimcb.model.RBF_NN'
 20 |         self.__params__ = {'n_clusters': n_clusters, 'max_iter': max_iter}
 21 | 
 22 |     def set_params(self, params):
 23 |         self.__init__(**params)
 24 | 
 25 |     def train(self, X, Y):
 26 |         """ Fit the rbf-nn model, save additional stats (as attributes) and return Y predicted values.
 27 | 
 28 |         Parameters
 29 |         ----------
 30 |         X : array-like, shape = [n_samples, n_features]
 31 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 32 | 
 33 |         Y : array-like, shape = [n_samples, 1]
 34 |             Response variables, where n_samples is the number of samples.
 35 | 
 36 |         Returns
 37 |         -------
 38 |         y_pred_train : array-like, shape = [n_samples, 1]
 39 |             Predicted y score for samples.
 40 |         """
 41 | 
 42 |         # Ensure array and error check
 43 |         X, Y = self.input_check(X, Y)
 44 | 
 45 |         km = KMeans(n_clusters=self.n_clusters, max_iter=self.max_iter)
 46 |         km.fit(X)
 47 |         cent = km.cluster_centers_
 48 | 
 49 |         self.model = KMeans
 50 | 
 51 |         # Determine the value of sigma
 52 |         max = 0
 53 |         for i in range(self.n_clusters):
 54 |             for j in range(self.n_clusters):
 55 |                 d = numpy.linalg.norm(cent[i] - cent[j])
 56 |                 if d > max:
 57 |                     max = d
 58 |         d = max
 59 |         sigma = d / math.sqrt(2 * self.n_clusters)
 60 | 
 61 |         # Set up G matrix
 62 |         shape = X.shape
 63 |         row = shape[0]
 64 |         column = self.n_clusters
 65 |         G = numpy.empty((row, column), dtype=float)
 66 |         for i in range(row):
 67 |             for j in range(column):
 68 |                 dist = numpy.linalg.norm(X[i] - cent[j])
 69 |                 G[i][j] = math.exp(-math.pow(dist, 2) / math.pow(2 * sigma, 2))
 70 | 
 71 |         # Find W
 72 |         GTG = numpy.dot(G.T, G)
 73 |         GTG_inv = numpy.linalg.inv(GTG)
 74 |         fac = numpy.dot(GTG_inv, G.T)
 75 |         W = numpy.dot(fac, Y)
 76 |         self.cent = cent
 77 |         self.W = W
 78 |         self.G = G
 79 |         self.sigma = sigma
 80 |         y_pred_train = np.dot(G, W)
 81 |         self.xcols_num = len(X.T)
 82 |         cent2 = []
 83 |         for i in range(len(self.cent.T)):
 84 |             something = []
 85 |             for j in range(len(self.cent)):
 86 |                 something.append(self.cent[j][i])
 87 |             cent2.append(something)
 88 | 
 89 |         self.cent2 = np.array(cent2)
 90 | 
 91 |         self.vi = np.dot(self.cent2, self.W)
 92 | 
 93 |         self.model.x_scores_ = self.G
 94 |         self.model.y_loadings_ = self.W.reshape(1, len(self.W))
 95 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 96 |         self.X = X
 97 |         self.Y = Y
 98 |         self.Y_pred = y_pred_train
 99 |         return y_pred_train
100 | 
101 |     def test(self, X):
102 |         """Calculate and return Y predicted value.
103 | 
104 |         Parameters
105 |         ----------
106 |         X : array-like, shape = [n_samples, n_features]
107 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
108 | 
109 |         Returns
110 |         -------
111 |         y_pred_test : array-like, shape = [n_samples, 1]
112 |             Predicted y score for samples.
113 |         """
114 | 
115 |         # Set up G matrix
116 |         shape = X.shape
117 |         row = shape[0]
118 |         column = self.n_clusters
119 |         G = numpy.empty((row, column), dtype=float)
120 |         for i in range(row):
121 |             for j in range(column):
122 |                 dist = numpy.linalg.norm(X[i] - self.cent[j])
123 |                 G[i][j] = math.exp(-math.pow(dist, 2) / math.pow(2 * self.sigma, 2))
124 |         y_pred_test = numpy.dot(G, self.W)
125 |         return y_pred_test
126 | 


--------------------------------------------------------------------------------
/cimcb/bootstrap/CPer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import scipy
  3 | import pandas as pd
  4 | from scipy.stats import norm
  5 | import math
  6 | import multiprocessing
  7 | from copy import deepcopy
  8 | from bokeh.layouts import widgetbox, gridplot, column, row, layout
  9 | from bokeh.plotting import ColumnDataSource, figure, output_notebook, show
 10 | from .BaseBootstrap import BaseBootstrap
 11 | from itertools import combinations
 12 | from ..plot import scatterCI, boxplot, distribution, scatter, scatter_ellipse
 13 | from ..utils import nested_getattr, dict_95ci, dict_median_scores
 14 | 
 15 | 
 16 | class CPer(BaseBootstrap):
 17 |     """ Returns bootstrap confidence intervals using the bias-corrected boostrap interval.
 18 | 
 19 |     Parameters
 20 |     ----------
 21 |     model : object
 22 |         This object is assumed to store bootlist attributes in .model (e.g. modelPLS.model.x_scores_).
 23 | 
 24 |     X : array-like, shape = [n_samples, n_features]
 25 |         Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 26 | 
 27 |     Y : array-like, shape = [n_samples, 1]
 28 |         Response variables, where n_samples is the number of samples.
 29 | 
 30 |     bootlist : array-like, shape = [n_bootlist, 1]
 31 |         List of attributes to calculate and return bootstrap confidence intervals.
 32 | 
 33 |     bootnum : a positive integer, (default 100)
 34 |         The number of bootstrap samples used in the computation.
 35 | 
 36 |     seed: integer or None (default None)
 37 |         Used to seed the generator for the resample with replacement.
 38 | 
 39 |     Returns
 40 |     -------
 41 |     bootci : dict of arrays
 42 |         Keys correspond to attributes in bootlist.
 43 |         Each array contains 95% confidence intervals.
 44 |         To return bootci, initalise then use method run().
 45 |     """
 46 | 
 47 |     def __init__(self, model, bootnum=100, seed=None, n_cores=-1, stratify=True):
 48 |         super().__init__(model=model, bootnum=bootnum, seed=seed, n_cores=n_cores, stratify=stratify)
 49 |         self.__name__ = "CPer"
 50 | 
 51 |     def calc_stat(self):
 52 |         super().calc_stat()
 53 | 
 54 |     def calc_bootidx(self):
 55 |         super().calc_bootidx()
 56 | 
 57 |     def calc_bootstat(self):
 58 |         super().calc_bootstat()
 59 | 
 60 |     def calc_bootci(self):
 61 |         self.bootci = {}
 62 |         for i in self.bootlist:
 63 |             self.bootci[i] = self.bootci_method(self.bootstat[i], self.stat[i])
 64 | 
 65 |     def run(self):
 66 |         self.calc_stat()
 67 |         self.calc_bootidx()
 68 |         self.calc_bootstat()
 69 |         self.calc_bootci()
 70 | 
 71 |     @staticmethod
 72 |     def bootci_method(bootstat, stat):
 73 |         """Calculates bootstrap confidence intervals using the bias-corrected bootstrap interval."""
 74 |         if stat.ndim == 1:
 75 |             nboot = len(bootstat)
 76 |             zalpha = norm.ppf(0.05 / 2)
 77 |             obs = stat  # Observed mean
 78 |             meansum = np.zeros((1, len(obs))).flatten()
 79 |             for i in range(len(obs)):
 80 |                 for j in range(len(bootstat)):
 81 |                     if bootstat[j][i] >= obs[i]:
 82 |                         meansum[i] = meansum[i] + 1
 83 |             prop = meansum / nboot  # Proportion of times boot mean > obs mean
 84 |             z0 = -norm.ppf(prop)
 85 | 
 86 |             # new alpha
 87 |             pct1 = 100 * norm.cdf((2 * z0 + zalpha))
 88 |             pct2 = 100 * norm.cdf((2 * z0 - zalpha))
 89 |             pct3 = 100 * norm.cdf((2 * z0))
 90 |             boot_ci = []
 91 |             for i in range(len(pct1)):
 92 |                 bootstat_i = [item[i] for item in bootstat]
 93 |                 append_low = np.percentile(bootstat_i, pct1[i])
 94 |                 append_mid = np.percentile(bootstat_i, pct3[i])
 95 |                 append_upp = np.percentile(bootstat_i, pct2[i])
 96 |                 boot_ci.append([append_low, append_upp, append_mid])
 97 |             boot_ci = np.array(boot_ci)
 98 | 
 99 |         # Recursive component (to get ndim = 1, and append)
100 |         else:
101 |             ncomp = stat.shape[1]
102 |             boot_ci = []
103 |             for k in range(ncomp):
104 |                 bootstat_k = []
105 |                 for j in range(len(bootstat)):
106 |                     bootstat_k.append(bootstat[j][:, k])
107 |                 boot_ci_k = CPer.bootci_method(bootstat_k, stat[:, k])
108 |                 boot_ci.append(boot_ci_k)
109 |             boot_ci = np.array(boot_ci)
110 |         return boot_ci
111 | 


--------------------------------------------------------------------------------
/cimcb/model/MBNN_SigmoidSigmoid_1Layer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.models import Model
  6 | from keras.layers import Input, Dense, Concatenate, LSTM, concatenate
  7 | from keras.layers import Dense
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class MBNN_SigmoidSigmoid_1Layer(BaseModel):
 13 |     """2 Layer logistic-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons_l1=2, n_neurons_l2=1, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons_l1 = n_neurons_l1
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.batch_size = batch_size
 23 |         self.loss = loss
 24 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 25 | 
 26 |         self.__name__ = 'cimcb.model.MBNN_SigmoidSigmoid_1Layer'
 27 |         self.__params__ = {'n_neurons_l1': n_neurons_l1, 'n_neurons_l2': n_neurons_l2, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 28 | 
 29 |     def set_params(self, params):
 30 |         self.__init__(**params)
 31 | 
 32 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 33 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 34 | 
 35 |         Parameters
 36 |         ----------
 37 |         X : array-like, shape = [n_samples, n_features]
 38 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 39 | 
 40 |         Y : array-like, shape = [n_samples, 1]
 41 |             Response variables, where n_samples is the number of samples.
 42 | 
 43 |         Returns
 44 |         -------
 45 |         y_pred_train : array-like, shape = [n_samples, 1]
 46 |             Predicted y score for samples.
 47 |         """
 48 | 
 49 |         # If batch-size is None:
 50 |         if self.batch_size is None:
 51 |             self.batch_size = len(X)
 52 | 
 53 |         X1 = X[0]
 54 |         X2 = X[1]
 55 | 
 56 |         # Layer for X1
 57 |         input_X1 = Input(shape=(len(X1.T),))
 58 |         layer1_X1 = Dense(self.n_neurons_l1, activation="sigmoid")(input_X1)
 59 |         layer1_X1 = Model(inputs=input_X1, outputs=layer1_X1)
 60 | 
 61 |         # Layer for X2
 62 |         input_X2 = Input(shape=(len(X2.T),))
 63 |         layer1_X2 = Dense(self.n_neurons_l1, activation="sigmoid")(input_X2)
 64 |         layer1_X2 = Model(inputs=input_X2, outputs=layer1_X2)
 65 | 
 66 |         # Concatenate
 67 |         concat = concatenate([layer1_X1.output, layer1_X2.output])
 68 |         #model_concat = Dense(self.n_neurons_l2, activation="sigmoid")(concat)
 69 |         model_concat = Dense(1, activation="sigmoid")(concat)
 70 | 
 71 |         self.model = Model(inputs=[layer1_X1.input, layer1_X2.input], outputs=model_concat)
 72 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 73 | 
 74 |         # If epoch_ypred is True, calculate ypred for each epoch
 75 |         if epoch_ypred is True:
 76 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 77 |         else:
 78 |             self.epoch = Callback()
 79 | 
 80 |         # Fit
 81 |         self.model.fit([X1, X2], Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 82 | 
 83 |         # Not sure about the naming scheme (trying to match PLS)
 84 |         y_pred_train = self.model.predict(X).flatten()
 85 | 
 86 |         # Storing X, Y, and Y_pred
 87 |         self.Y_pred = y_pred_train
 88 |         self.X = X
 89 |         self.Y = Y
 90 |         return y_pred_train
 91 | 
 92 |     def test(self, X, Y=None):
 93 |         """Calculate and return Y predicted value.
 94 | 
 95 |         Parameters
 96 |         ----------
 97 |         X : array-like, shape = [n_samples, n_features]
 98 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
 99 | 
100 |         Returns
101 |         -------
102 |         y_pred_test : array-like, shape = [n_samples, 1]
103 |             Predicted y score for samples.
104 |         """
105 | 
106 |         y_pred_test = self.model.predict(X).flatten()
107 |         return y_pred_test
108 | 


--------------------------------------------------------------------------------
/cimcb/model/MBNN_LinearSigmoid.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.models import Model
  6 | from keras.layers import Input, Dense, Concatenate, LSTM, concatenate
  7 | from keras.layers import Dense
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class MBNN_LinearSigmoid(BaseModel):
 13 |     """2 Layer logistic-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons_l1=2, n_neurons_l2=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons_l1 = n_neurons_l1
 20 |         self.n_neurons_l2 = n_neurons_l2
 21 |         self.verbose = verbose
 22 |         self.n_epochs = epochs
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.MBNN_LinearSigmoid'
 28 |         self.__params__ = {'n_neurons_l1': n_neurons_l1, 'n_neurons_l2': n_neurons_l2, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # If batch-size is None:
 51 |         if self.batch_size is None:
 52 |             self.batch_size = len(X)
 53 | 
 54 |         X1 = X[0]
 55 |         X2 = X[1]
 56 | 
 57 |         # Layer for X1
 58 |         input_X1 = Input(shape=(len(X1.T),))
 59 |         layer1_X1 = Dense(self.n_neurons_l1, activation="linear")(input_X1)
 60 |         layer1_X1 = Model(inputs=input_X1, outputs=layer1_X1)
 61 | 
 62 |         # Layer for X2
 63 |         input_X2 = Input(shape=(len(X2.T),))
 64 |         layer1_X2 = Dense(self.n_neurons_l1, activation="linear")(input_X2)
 65 |         layer1_X2 = Model(inputs=input_X2, outputs=layer1_X2)
 66 | 
 67 |         # Concatenate
 68 |         concat = concatenate([layer1_X1.output, layer1_X2.output])
 69 |         model_concat = Dense(self.n_neurons_l2, activation="sigmoid")(concat)
 70 |         model_concat = Dense(1, activation="sigmoid")(model_concat)
 71 | 
 72 |         self.model = Model(inputs=[layer1_X1.input, layer1_X2.input], outputs=model_concat)
 73 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 74 | 
 75 |         # If epoch_ypred is True, calculate ypred for each epoch
 76 |         if epoch_ypred is True:
 77 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 78 |         else:
 79 |             self.epoch = Callback()
 80 | 
 81 |         # Fit
 82 |         self.model.fit([X1, X2], Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 83 | 
 84 |         # Not sure about the naming scheme (trying to match PLS)
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         y_pred_test = self.model.predict(X).flatten()
108 |         return y_pred_test
109 | 


--------------------------------------------------------------------------------
/cimcb/model/MBNN_LinearSigmoid_1Layer.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.models import Model
  6 | from keras.layers import Input, Dense, Concatenate, LSTM, concatenate
  7 | from keras.layers import Dense
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class MBNN_LinearSigmoid_1Layer(BaseModel):
 13 |     """2 Layer logistic-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons_l1=2, n_neurons_l2=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons_l1 = n_neurons_l1
 20 |         self.n_neurons_l2 = n_neurons_l2
 21 |         self.verbose = verbose
 22 |         self.n_epochs = epochs
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.MBNN_LinearSigmoid_1Layer'
 28 |         self.__params__ = {'n_neurons_l1': n_neurons_l1, 'n_neurons_l2': n_neurons_l2, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # If batch-size is None:
 51 |         if self.batch_size is None:
 52 |             self.batch_size = len(X)
 53 | 
 54 |         X1 = X[0]
 55 |         X2 = X[1]
 56 | 
 57 |         # Layer for X1
 58 |         input_X1 = Input(shape=(len(X1.T),))
 59 |         layer1_X1 = Dense(self.n_neurons_l1, activation="linear")(input_X1)
 60 |         layer1_X1 = Model(inputs=input_X1, outputs=layer1_X1)
 61 | 
 62 |         # Layer for X2
 63 |         input_X2 = Input(shape=(len(X2.T),))
 64 |         layer1_X2 = Dense(self.n_neurons_l1, activation="linear")(input_X2)
 65 |         layer1_X2 = Model(inputs=input_X2, outputs=layer1_X2)
 66 | 
 67 |         # Concatenate
 68 |         concat = concatenate([layer1_X1.output, layer1_X2.output])
 69 |         #model_concat = Dense(self.n_neurons_l2, activation="sigmoid")(concat)
 70 |         model_concat = Dense(1, activation="sigmoid")(concat)
 71 | 
 72 |         self.model = Model(inputs=[layer1_X1.input, layer1_X2.input], outputs=model_concat)
 73 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 74 | 
 75 |         # If epoch_ypred is True, calculate ypred for each epoch
 76 |         if epoch_ypred is True:
 77 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 78 |         else:
 79 |             self.epoch = Callback()
 80 | 
 81 |         # Fit
 82 |         self.model.fit([X1, X2], Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 83 | 
 84 |         # Not sure about the naming scheme (trying to match PLS)
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         y_pred_test = self.model.predict(X).flatten()
108 |         return y_pred_test
109 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_L1.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | from keras import regularizers
  7 | from .BaseModel import BaseModel
  8 | from ..utils import YpredCallback
  9 | 
 10 | 
 11 | class NN_L1(BaseModel):
 12 |     """2 Layer linear-logistic neural network using Keras"""
 13 | 
 14 |     parametric = False
 15 |     bootlist = None
 16 | 
 17 |     def __init__(self, l_lambda=0.01, n_nodes=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 18 |         self.l_lambda = l_lambda
 19 |         self.n_nodes = n_nodes
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_nodes
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 28 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         X : array-like, shape = [n_samples, n_features]
 33 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 34 | 
 35 |         Y : array-like, shape = [n_samples, 1]
 36 |             Response variables, where n_samples is the number of samples.
 37 | 
 38 |         Returns
 39 |         -------
 40 |         y_pred_train : array-like, shape = [n_samples, 1]
 41 |             Predicted y score for samples.
 42 |         """
 43 | 
 44 |         # If batch-size is None:
 45 |         if self.batch_size is None:
 46 |             self.batch_size = min(200, len(X))
 47 | 
 48 |         self.model = Sequential()
 49 |         self.model.add(Dense(self.n_nodes, activation="linear", input_dim=len(X.T), kernel_regularizer=regularizers.l1(self.l_lambda)))
 50 |         self.model.add(Dense(1, activation="sigmoid"))
 51 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 52 | 
 53 |         # If epoch_ypred is True, calculate ypred for each epoch
 54 |         if epoch_ypred is True:
 55 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 56 |         else:
 57 |             self.epoch = Callback()
 58 | 
 59 |         # Fit
 60 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 61 | 
 62 |         layer1_weight = self.model.layers[0].get_weights()[0]
 63 |         layer1_bias = self.model.layers[0].get_weights()[1]
 64 |         layer2_weight = self.model.layers[1].get_weights()[0]
 65 |         layer2_bias = self.model.layers[1].get_weights()[1]
 66 | 
 67 |         # Not sure about the naming scheme (trying to match PLS)
 68 |         self.model.x_loadings_ = layer1_weight
 69 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 70 |         self.model.y_loadings_ = layer2_weight
 71 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 72 |         self.xcols_num = len(X.T)
 73 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 74 |         y_pred_train = self.model.predict(X).flatten()
 75 | 
 76 |         # Storing X, Y, and Y_pred
 77 |         self.Y_pred = y_pred_train
 78 |         self.X = X
 79 |         self.Y = Y
 80 |         return y_pred_train
 81 | 
 82 |     def test(self, X, Y=None):
 83 |         """Calculate and return Y predicted value.
 84 | 
 85 |         Parameters
 86 |         ----------
 87 |         X : array-like, shape = [n_samples, n_features]
 88 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
 89 | 
 90 |         Returns
 91 |         -------
 92 |         y_pred_test : array-like, shape = [n_samples, 1]
 93 |             Predicted y score for samples.
 94 |         """
 95 | 
 96 |         layer1_weight = self.model.layers[0].get_weights()[0]
 97 |         layer1_bias = self.model.layers[0].get_weights()[1]
 98 |         layer2_weight = self.model.layers[1].get_weights()[0]
 99 |         layer2_bias = self.model.layers[1].get_weights()[1]
100 | 
101 |         self.model.y_loadings_ = layer2_weight
102 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
103 |         self.model.x_loadings_ = layer1_weight
104 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
105 |         y_pred_test = self.model.predict(X).flatten()
106 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
107 |         return y_pred_test
108 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_L2.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | from keras import regularizers
  7 | from .BaseModel import BaseModel
  8 | from ..utils import YpredCallback
  9 | 
 10 | 
 11 | class NN_L2(BaseModel):
 12 |     """2 Layer linear-logistic neural network using Keras"""
 13 | 
 14 |     parametric = False
 15 |     bootlist = None
 16 | 
 17 |     def __init__(self, l_lambda=0.01, n_nodes=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 18 |         self.l_lambda = l_lambda
 19 |         self.n_nodes = n_nodes
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_nodes
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 28 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         X : array-like, shape = [n_samples, n_features]
 33 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 34 | 
 35 |         Y : array-like, shape = [n_samples, 1]
 36 |             Response variables, where n_samples is the number of samples.
 37 | 
 38 |         Returns
 39 |         -------
 40 |         y_pred_train : array-like, shape = [n_samples, 1]
 41 |             Predicted y score for samples.
 42 |         """
 43 | 
 44 |         # If batch-size is None:
 45 |         if self.batch_size is None:
 46 |             self.batch_size = min(200, len(X))
 47 | 
 48 |         self.model = Sequential()
 49 |         self.model.add(Dense(self.n_nodes, activation="linear", input_dim=len(X.T), kernel_regularizer=regularizers.l2(self.l_lambda)))
 50 |         self.model.add(Dense(1, activation="sigmoid"))
 51 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 52 | 
 53 |         # If epoch_ypred is True, calculate ypred for each epoch
 54 |         if epoch_ypred is True:
 55 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 56 |         else:
 57 |             self.epoch = Callback()
 58 | 
 59 |         # Fit
 60 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 61 | 
 62 |         layer1_weight = self.model.layers[0].get_weights()[0]
 63 |         layer1_bias = self.model.layers[0].get_weights()[1]
 64 |         layer2_weight = self.model.layers[1].get_weights()[0]
 65 |         layer2_bias = self.model.layers[1].get_weights()[1]
 66 | 
 67 |         # Not sure about the naming scheme (trying to match PLS)
 68 |         self.model.x_loadings_ = layer1_weight
 69 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 70 |         self.model.y_loadings_ = layer2_weight
 71 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 72 |         self.xcols_num = len(X.T)
 73 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 74 |         y_pred_train = self.model.predict(X).flatten()
 75 | 
 76 |         # Storing X, Y, and Y_pred
 77 |         self.Y_pred = y_pred_train
 78 |         self.X = X
 79 |         self.Y = Y
 80 |         return y_pred_train
 81 | 
 82 |     def test(self, X, Y=None):
 83 |         """Calculate and return Y predicted value.
 84 | 
 85 |         Parameters
 86 |         ----------
 87 |         X : array-like, shape = [n_samples, n_features]
 88 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
 89 | 
 90 |         Returns
 91 |         -------
 92 |         y_pred_test : array-like, shape = [n_samples, 1]
 93 |             Predicted y score for samples.
 94 |         """
 95 | 
 96 |         layer1_weight = self.model.layers[0].get_weights()[0]
 97 |         layer1_bias = self.model.layers[0].get_weights()[1]
 98 |         layer2_weight = self.model.layers[1].get_weights()[0]
 99 |         layer2_bias = self.model.layers[1].get_weights()[1]
100 | 
101 |         self.model.y_loadings_ = layer2_weight
102 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
103 |         self.model.x_loadings_ = layer1_weight
104 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
105 |         y_pred_test = self.model.predict(X).flatten()
106 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
107 |         return y_pred_test
108 | 


--------------------------------------------------------------------------------
/cimcb/model/RF.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from sklearn.ensemble import RandomForestClassifier
  4 | from sklearn.metrics import roc_auc_score
  5 | from .BaseModel import BaseModel
  6 | from ..utils import binary_metrics, binary_evaluation
  7 | 
  8 | 
  9 | class RF(BaseModel):
 10 |     """Random forest"""
 11 | 
 12 |     parametric = True
 13 |     bootlist = None  # list of metrics to bootstrap
 14 | 
 15 |     bootlist = ["Y_pred", "model.eval_metrics_"]  # list of metrics to bootstrap
 16 | 
 17 |     def __init__(self, n_estimators=100, max_features="auto", max_depth=None, criterion="gini", min_samples_split=2, min_samples_leaf=1, max_leaf_nodes=None, n_jobs=None):
 18 |         self.model = RandomForestClassifier(n_estimators=n_estimators, max_features=max_features, max_depth=max_depth, criterion=criterion, min_samples_split=min_samples_split, min_samples_leaf=min_samples_leaf, max_leaf_nodes=max_leaf_nodes, n_jobs=n_jobs)
 19 |         self.k = n_estimators
 20 | 
 21 |         self.__name__ = 'cimcb.model.RF'
 22 |         self.__params__ = {'n_estimators': n_estimators, 'max_features': max_features, 'max_depth': max_depth, 'criterion': criterion, 'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf, 'max_leaf_nodes': max_leaf_nodes, 'n_jobs': n_jobs}
 23 | 
 24 |     def set_params(self, params):
 25 |         self.__init__(**params)
 26 | 
 27 |     def train(self, X, Y):
 28 |         """ Fit the RF model, save additional stats (as attributes) and return Y predicted values.
 29 | 
 30 |         Parameters
 31 |         ----------
 32 |         X : array-like, shape = [n_samples, n_features]
 33 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 34 | 
 35 |         Y : array-like, shape = [n_samples, 1]
 36 |             Response variables, where n_samples is the number of samples.
 37 | 
 38 |         Returns
 39 |         -------
 40 |         y_pred_train : array-like, shape = [n_samples, 1]
 41 |             Predicted y score for samples.
 42 |         """
 43 | 
 44 |         # Ensure array and error check
 45 |         X, Y = self.input_check(X, Y)
 46 | 
 47 |         # Fit the model
 48 |         self.model.fit(X, Y)
 49 | 
 50 |         # Predict_proba was designed for multi-groups...
 51 |         # This makes it sure that y_pred is correct
 52 |         y_pred = self.model.predict_proba(X)
 53 |         pred_0 = roc_auc_score(Y, y_pred[:, 0])
 54 |         pred_1 = roc_auc_score(Y, y_pred[:, 1])
 55 |         if pred_0 > pred_1:
 56 |             self.pred_index = 0
 57 |         else:
 58 |             self.pred_index = 1
 59 | 
 60 |         # Calculate and return Y prediction value
 61 |         y_pred_train = np.array(self.model.predict_proba(X)[:, self.pred_index])
 62 | 
 63 |         self.model.y_loadings_ = np.array([0, 0, 0])
 64 |         self.model.x_scores_ = np.array([0, 0, 0])
 65 |         self.model.pctvar_ = np.array([0, 0, 0])
 66 | 
 67 |         # Storing X, Y, and Y_pred
 68 |         self.X = X
 69 |         self.Y = Y
 70 |         self.Y_pred = y_pred_train
 71 | 
 72 |         self.metrics_key = []
 73 |         self.model.eval_metrics_ = []
 74 |         bm = binary_evaluation(Y, y_pred_train)
 75 |         for key, value in bm.items():
 76 |             self.model.eval_metrics_.append(value)
 77 |             self.metrics_key.append(key)
 78 | 
 79 |         self.Y_train = Y
 80 |         self.Y_pred_train = y_pred_train
 81 | 
 82 |         self.model.eval_metrics_ = np.array(self.model.eval_metrics_)
 83 | 
 84 |         return y_pred_train
 85 | 
 86 |     def test(self, X, Y=None):
 87 |         """Calculate and return Y predicted value.
 88 | 
 89 |         Parameters
 90 |         ----------
 91 |         X : array-like, shape = [n_samples, n_features]
 92 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
 93 | 
 94 |         Returns
 95 |         -------
 96 |         y_pred_test : array-like, shape = [n_samples, 1]
 97 |             Predicted y score for samples.
 98 |         """
 99 | 
100 |         # Convert to X to numpy array if a DataFrame
101 |         if isinstance(X, pd.DataFrame or pd.Series):
102 |             X = np.array(X)
103 | 
104 |         # Calculate and return Y predicted value
105 |         y_pred_test = np.array(self.model.predict_proba(X)[:, self.pred_index])
106 | 
107 |         if Y is not None:
108 |             self.metrics_key = []
109 |             self.model.eval_metrics_ = []
110 |             bm = binary_evaluation(Y, y_pred_test)
111 |             for key, value in bm.items():
112 |                 self.model.eval_metrics_.append(value)
113 |                 self.metrics_key.append(key)
114 | 
115 |             self.model.eval_metrics_ = np.array(self.model.eval_metrics_)
116 | 
117 |         self.Y_pred = y_pred_test
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/PCR.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from sklearn.decomposition import PCA
  4 | from sklearn.linear_model import LinearRegression
  5 | from .BaseModel import BaseModel
  6 | from ..utils import binary_metrics, binary_evaluation
  7 | 
  8 | 
  9 | class PCR(BaseModel):
 10 |     """ Principal component regression.
 11 | 
 12 |     Parameters
 13 |     ----------
 14 |     n_components : int, (default 2)
 15 |         Number of components to keep.
 16 | 
 17 |     Methods
 18 |     -------
 19 |     train : Fit model to data.
 20 | 
 21 |     test : Apply model to test data.
 22 | 
 23 |     evaluate : Evaluate model.
 24 | 
 25 |     calc_bootci : Calculate bootstrap intervals for plot_featureimportance.
 26 | 
 27 |     plot_featureimportance : Plot coefficient and Variable Importance in Projection (VIP).
 28 | 
 29 |     plot_permutation_test : Perform a permutation test and plot.
 30 |     """
 31 | 
 32 |     parametric = True
 33 |     bootlist = ["model.coef_", "Y_pred", "model.eval_metrics_"]  # list of metrics to bootstrap
 34 | 
 35 |     def __init__(self, n_components=2):
 36 |         self.model = PCA(n_components=n_components)
 37 |         self.regrmodel = LinearRegression()
 38 |         self.k = n_components
 39 | 
 40 |         self.__name__ = 'cimcb.model.PCR'
 41 |         self.__params__ = {'n_components': n_components}
 42 | 
 43 |     def set_params(self, params):
 44 |         self.__init__(**params)
 45 | 
 46 |     def train(self, X, Y):
 47 |         """ Fit the PCR model, save additional stats (as attributes) and return Y predicted values.
 48 | 
 49 |         Parameters
 50 |         ----------
 51 |         X : array-like, shape = [n_samples, n_features]
 52 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 53 | 
 54 |         Y : array-like, shape = [n_samples, 1]
 55 |             Response variables, where n_samples is the number of samples.
 56 | 
 57 |         Returns
 58 |         -------
 59 |         y_pred_train : array-like, shape = [n_samples, 1]
 60 |             Predicted y score for samples.
 61 |         """
 62 | 
 63 |         # Ensure array and error check
 64 |         X, Y = self.input_check(X, Y)
 65 | 
 66 |         # Fit the model
 67 |         self.model.fit(X)
 68 |         self.model.x_scores_ = self.model.transform(X)
 69 |         self.regrmodel.fit(self.model.x_scores_, Y)
 70 | 
 71 |         # Save x_loadings, coef, pctvar, x_weights, y_loadings and vip
 72 |         self.model.x_loadings_ = self.model.components_.T
 73 |         self.model.coef_ = np.dot(self.model.x_loadings_, self.regrmodel.coef_)
 74 |         self.model.pctvar_ = self.model.explained_variance_
 75 |         self.model.x_weights_ = self.model.components_.T
 76 |         self.model.y_loadings_ = self.regrmodel.coef_.reshape(1, len(self.regrmodel.coef_))
 77 | 
 78 |         # Calculate and return Y prediction value
 79 |         y_pred_train = self.regrmodel.predict(self.model.x_scores_).flatten()
 80 | 
 81 |         # Storing X, Y, and Y_pred
 82 |         self.X = X
 83 |         self.Y = Y
 84 |         self.Y_pred = y_pred_train
 85 |         self.metrics_key = []
 86 |         self.model.eval_metrics_ = []
 87 |         bm = binary_evaluation(Y, y_pred_train)
 88 |         for key, value in bm.items():
 89 |             self.model.eval_metrics_.append(value)
 90 |             self.metrics_key.append(key)
 91 | 
 92 |         self.model.eval_metrics_ = np.array(self.model.eval_metrics_)
 93 |         self.Y_train = Y
 94 |         self.Y_pred_train = y_pred_train
 95 | 
 96 |         return y_pred_train
 97 | 
 98 |     def test(self, X, Y=None):
 99 |         """Calculate and return Y predicted value.
100 | 
101 |         Parameters
102 |         ----------
103 |         X : array-like, shape = [n_samples, n_features]
104 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
105 | 
106 |         Returns
107 |         -------
108 |         y_pred_test : array-like, shape = [n_samples, 1]
109 |             Predicted y score for samples.
110 |         """
111 | 
112 |         # Convert to X to numpy array if a DataFrame
113 |         if isinstance(X, pd.DataFrame or pd.Series):
114 |             X = np.array(X)
115 | 
116 |         # Calculate and return Y predicted value
117 |         newX = self.model.transform(X)
118 |         y_pred_test = self.regrmodel.predict(newX).flatten()
119 |         # Calculate and return Y predicted value
120 |         if Y is not None:
121 |             self.metrics_key = []
122 |             self.model.eval_metrics_ = []
123 |             bm = binary_evaluation(Y, y_pred_test)
124 |             for key, value in bm.items():
125 |                 self.model.eval_metrics_.append(value)
126 |                 self.metrics_key.append(key)
127 | 
128 |             self.model.eval_metrics_ = np.array(self.model.eval_metrics_)
129 | 
130 |         self.Y_pred = y_pred_test
131 |         return y_pred_test
132 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_LogitLogit.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | from .BaseModel import BaseModel
  7 | from ..utils import YpredCallback
  8 | 
  9 | 
 10 | class NN_LogitLogit(BaseModel):
 11 |     """2 Layer logistic-logistic neural network using Keras"""
 12 | 
 13 |     parametric = True
 14 |     bootlist = None
 15 | 
 16 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 17 |         self.n_neurons = n_neurons
 18 |         self.verbose = verbose
 19 |         self.n_epochs = epochs
 20 |         self.k = n_neurons
 21 |         self.batch_size = batch_size
 22 |         self.loss = loss
 23 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 24 | 
 25 |         self.__name__ = 'cimcb.model.NN_LogitLogit'
 26 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 27 | 
 28 |     def set_params(self, params):
 29 |         self.__init__(**params)
 30 | 
 31 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 32 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 33 | 
 34 |         Parameters
 35 |         ----------
 36 |         X : array-like, shape = [n_samples, n_features]
 37 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 38 | 
 39 |         Y : array-like, shape = [n_samples, 1]
 40 |             Response variables, where n_samples is the number of samples.
 41 | 
 42 |         Returns
 43 |         -------
 44 |         y_pred_train : array-like, shape = [n_samples, 1]
 45 |             Predicted y score for samples.
 46 |         """
 47 | 
 48 |         # If batch-size is None:
 49 |         if self.batch_size is None:
 50 |             self.batch_size = len(X)
 51 | 
 52 |         self.model = Sequential()
 53 |         self.model.add(Dense(self.n_neurons, activation="sigmoid", input_dim=len(X.T)))
 54 |         self.model.add(Dense(1, activation="sigmoid"))
 55 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 56 | 
 57 |         # If epoch_ypred is True, calculate ypred for each epoch
 58 |         if epoch_ypred is True:
 59 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 60 |         else:
 61 |             self.epoch = Callback()
 62 | 
 63 |         # Fit
 64 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 65 | 
 66 |         layer1_weight = self.model.layers[0].get_weights()[0]
 67 |         layer1_bias = self.model.layers[0].get_weights()[1]
 68 |         layer2_weight = self.model.layers[1].get_weights()[0]
 69 |         layer2_bias = self.model.layers[1].get_weights()[1]
 70 | 
 71 |         # Not sure about the naming scheme (trying to match PLS)
 72 |         self.model.x_loadings_ = layer1_weight
 73 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 74 |         self.model.y_loadings_ = layer2_weight
 75 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 76 |         self.xcols_num = len(X.T)
 77 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 78 |         y_pred_train = self.model.predict(X).flatten()
 79 | 
 80 |         # Storing X, Y, and Y_pred
 81 |         self.Y_pred = y_pred_train
 82 |         self.X = X
 83 |         self.Y = Y
 84 |         return y_pred_train
 85 | 
 86 |     def test(self, X, Y=None):
 87 |         """Calculate and return Y predicted value.
 88 | 
 89 |         Parameters
 90 |         ----------
 91 |         X : array-like, shape = [n_samples, n_features]
 92 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
 93 | 
 94 |         Returns
 95 |         -------
 96 |         y_pred_test : array-like, shape = [n_samples, 1]
 97 |             Predicted y score for samples.
 98 |         """
 99 | 
100 |         layer1_weight = self.model.layers[0].get_weights()[0]
101 |         layer1_bias = self.model.layers[0].get_weights()[1]
102 |         layer2_weight = self.model.layers[1].get_weights()[0]
103 |         layer2_bias = self.model.layers[1].get_weights()[1]
104 | 
105 |         self.model.y_loadings_ = layer2_weight
106 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
107 |         self.model.x_loadings_ = layer1_weight
108 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
109 |         y_pred_test = self.model.predict(X).flatten()
110 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
111 |         return y_pred_test
112 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_LinearLinear.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | from .BaseModel import BaseModel
  7 | from ..utils import YpredCallback
  8 | 
  9 | 
 10 | class NN_LinearLinear(BaseModel):
 11 |     """2 Layer linear-linear neural network using Keras"""
 12 | 
 13 |     parametric = True
 14 |     bootlist = None
 15 | 
 16 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="mean_squared_error", batch_size=None, verbose=0):
 17 |         self.n_neurons = n_neurons
 18 |         self.verbose = verbose
 19 |         self.n_epochs = epochs
 20 |         self.k = n_neurons
 21 |         self.batch_size = batch_size
 22 |         self.loss = loss
 23 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 24 | 
 25 |         self.__name__ = 'cimcb.model.NN_LinearLinear'
 26 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 27 | 
 28 |     def set_params(self, params):
 29 |         self.__init__(**params)
 30 | 
 31 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 32 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 33 | 
 34 |         Parameters
 35 |         ----------
 36 |         X : array-like, shape = [n_samples, n_features]
 37 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 38 | 
 39 |         Y : array-like, shape = [n_samples, 1]
 40 |             Response variables, where n_samples is the number of samples.
 41 | 
 42 |         Returns
 43 |         -------
 44 |         y_pred_train : array-like, shape = [n_samples, 1]
 45 |             Predicted y score for samples.
 46 |         """
 47 | 
 48 |         # If batch-size is None:
 49 |         if self.batch_size is None:
 50 |             self.batch_size = len(X)
 51 | 
 52 |         # Ensure array and error check
 53 |         X, Y = self.input_check(X, Y)
 54 | 
 55 |         self.model = Sequential()
 56 |         self.model.add(Dense(self.n_neurons, activation="linear", input_dim=len(X.T)))
 57 |         self.model.add(Dense(1, activation="linear"))
 58 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 59 | 
 60 |         # If epoch_ypred is True, calculate ypred for each epoch
 61 |         if epoch_ypred is True:
 62 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 63 |         else:
 64 |             self.epoch = Callback()
 65 | 
 66 |         # Fit
 67 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 68 | 
 69 |         layer1_weight = self.model.layers[0].get_weights()[0]
 70 |         layer1_bias = self.model.layers[0].get_weights()[1]
 71 |         layer2_weight = self.model.layers[1].get_weights()[0]
 72 |         layer2_bias = self.model.layers[1].get_weights()[1]
 73 | 
 74 |         # Not sure about the naming scheme (trying to match PLS)
 75 |         self.model.x_loadings_ = layer1_weight
 76 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 77 |         self.model.y_loadings_ = layer2_weight
 78 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 79 |         self.xcols_num = len(X.T)
 80 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 81 |         y_pred_train = self.model.predict(X).flatten()
 82 | 
 83 |         # Storing X, Y, and Y_pred
 84 |         self.Y_pred = y_pred_train
 85 |         self.X = X
 86 |         self.Y = Y
 87 |         return y_pred_train
 88 | 
 89 |     def test(self, X, Y=None):
 90 |         """Calculate and return Y predicted value.
 91 | 
 92 |         Parameters
 93 |         ----------
 94 |         X : array-like, shape = [n_samples, n_features]
 95 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
 96 | 
 97 |         Returns
 98 |         -------
 99 |         y_pred_test : array-like, shape = [n_samples, 1]
100 |             Predicted y score for samples.
101 |         """
102 | 
103 |         layer1_weight = self.model.layers[0].get_weights()[0]
104 |         layer1_bias = self.model.layers[0].get_weights()[1]
105 |         layer2_weight = self.model.layers[1].get_weights()[0]
106 |         layer2_bias = self.model.layers[1].get_weights()[1]
107 | 
108 |         self.model.y_loadings_ = layer2_weight
109 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
110 |         self.model.x_loadings_ = layer1_weight
111 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
112 |         y_pred_test = self.model.predict(X).flatten()
113 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
114 |         return y_pred_test
115 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_SigmoidSigmoidSigmoid.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | from .BaseModel import BaseModel
  7 | from ..utils import YpredCallback
  8 | 
  9 | 
 10 | class NN_SigmoidSigmoidSigmoid(BaseModel):
 11 |     """2 Layer logistic-logistic neural network using Keras"""
 12 | 
 13 |     parametric = True
 14 |     bootlist = None
 15 | 
 16 |     def __init__(self, n_neurons_l1=2, n_neurons_l2=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 17 |         self.n_neurons_l1 = n_neurons_l1
 18 |         self.n_neurons_l2 = n_neurons_l2
 19 |         self.verbose = verbose
 20 |         self.n_epochs = epochs
 21 |         self.k = n_neurons_l1
 22 |         self.batch_size = batch_size
 23 |         self.loss = loss
 24 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 25 | 
 26 |         self.__name__ = 'cimcb.model.NN_SigmoidSigmoidSigmoid'
 27 |         self.__params__ = {'n_neurons_l1': n_neurons_l1, 'n_neurons_l2': n_neurons_l2, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 28 | 
 29 |     def set_params(self, params):
 30 |         self.__init__(**params)
 31 | 
 32 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 33 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 34 | 
 35 |         Parameters
 36 |         ----------
 37 |         X : array-like, shape = [n_samples, n_features]
 38 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 39 | 
 40 |         Y : array-like, shape = [n_samples, 1]
 41 |             Response variables, where n_samples is the number of samples.
 42 | 
 43 |         Returns
 44 |         -------
 45 |         y_pred_train : array-like, shape = [n_samples, 1]
 46 |             Predicted y score for samples.
 47 |         """
 48 | 
 49 |         # If batch-size is None:
 50 |         if self.batch_size is None:
 51 |             self.batch_size = len(X)
 52 | 
 53 |         self.model = Sequential()
 54 |         self.model.add(Dense(self.n_neurons_l1, activation="sigmoid", input_dim=len(X.T)))
 55 |         self.model.add(Dense(self.n_neurons_l2, activation="sigmoid", input_dim=self.n_neurons_l1))
 56 |         self.model.add(Dense(1, activation="sigmoid"))
 57 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 58 | 
 59 |         # If epoch_ypred is True, calculate ypred for each epoch
 60 |         if epoch_ypred is True:
 61 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 62 |         else:
 63 |             self.epoch = Callback()
 64 | 
 65 |         # Fit
 66 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 67 | 
 68 |         layer1_weight = self.model.layers[0].get_weights()[0]
 69 |         layer1_bias = self.model.layers[0].get_weights()[1]
 70 |         layer2_weight = self.model.layers[1].get_weights()[0]
 71 |         layer2_bias = self.model.layers[1].get_weights()[1]
 72 | 
 73 |         # Not sure about the naming scheme (trying to match PLS)
 74 |         # self.model.x_loadings_ = layer1_weight
 75 |         # self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 76 |         # self.model.y_loadings_ = layer2_weight
 77 |         # self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 78 |         # self.xcols_num = len(X.T)
 79 |         # self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 80 |         y_pred_train = self.model.predict(X).flatten()
 81 | 
 82 |         # Storing X, Y, and Y_pred
 83 |         self.Y_pred = y_pred_train
 84 |         self.X = X
 85 |         self.Y = Y
 86 |         return y_pred_train
 87 | 
 88 |     def test(self, X, Y=None):
 89 |         """Calculate and return Y predicted value.
 90 | 
 91 |         Parameters
 92 |         ----------
 93 |         X : array-like, shape = [n_samples, n_features]
 94 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
 95 | 
 96 |         Returns
 97 |         -------
 98 |         y_pred_test : array-like, shape = [n_samples, 1]
 99 |             Predicted y score for samples.
100 |         """
101 | 
102 |         layer1_weight = self.model.layers[0].get_weights()[0]
103 |         layer1_bias = self.model.layers[0].get_weights()[1]
104 |         layer2_weight = self.model.layers[1].get_weights()[0]
105 |         layer2_bias = self.model.layers[1].get_weights()[1]
106 | 
107 |         # self.model.y_loadings_ = layer2_weight
108 |         # self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
109 |         # self.model.x_loadings_ = layer1_weight
110 |         # self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
111 |         y_pred_test = self.model.predict(X).flatten()
112 |         # self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
113 |         return y_pred_test
114 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_ReluTan.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_ReluTan(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_ReluTan'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="relu", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="tanh"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_TanRelu.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_TanRelu(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_TanRelu'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="tanh", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="relu"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_TanTan.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_TanTan(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_TanTan'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="tanh", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="tanh"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_ReluRelu.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_ReluRelu(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_ReluRelu'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="relu", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="relu"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_LinearTan.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_LinearTan(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_LinearTan'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="linear", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="tanh"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_LogitRelu.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_LogitRelu(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_LogitRelu'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="linear", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="relu"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_LogitTan.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_LogitTan(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_LogitTan'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="sigmoid", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="tanh"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_ReluLogit.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_ReluLogit(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_ReluLogit'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="relu", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="sigmoid"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_TanLinear.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_TanLinear(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_TanLinear'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="tanh", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="linear"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_TanLogit.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_TanLogit(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_TanLogit'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="tanh", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="sigmoid"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_LinearRelu.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_LinearRelu(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_LinearRelu'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="linear", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="relu"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_ReluLinear.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_ReluLinear(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_ReluLinear'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="relu", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="linear"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_LogitLinear.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_LogitLinear(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.NN_LogitLinear'
 28 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # # If using Keras, set tf to 1 core
 51 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 52 |         # session = tf.Session(config=config)
 53 |         # K.set_session(session)
 54 | 
 55 |         # If batch-size is None:
 56 |         if self.batch_size is None:
 57 |             self.batch_size = len(X)
 58 | 
 59 |         self.model = Sequential()
 60 |         self.model.add(Dense(self.n_neurons, activation="sigmoid", input_dim=len(X.T)))
 61 |         self.model.add(Dense(1, activation="linear"))
 62 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 63 | 
 64 |         # If epoch_ypred is True, calculate ypred for each epoch
 65 |         if epoch_ypred is True:
 66 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 67 |         else:
 68 |             self.epoch = Callback()
 69 | 
 70 |         # Fit
 71 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 72 | 
 73 |         layer1_weight = self.model.layers[0].get_weights()[0]
 74 |         layer1_bias = self.model.layers[0].get_weights()[1]
 75 |         layer2_weight = self.model.layers[1].get_weights()[0]
 76 |         layer2_bias = self.model.layers[1].get_weights()[1]
 77 | 
 78 |         # Not sure about the naming scheme (trying to match PLS)
 79 |         self.model.x_loadings_ = layer1_weight
 80 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 81 |         self.model.y_loadings_ = layer2_weight
 82 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 83 |         self.xcols_num = len(X.T)
 84 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 85 |         y_pred_train = self.model.predict(X).flatten()
 86 | 
 87 |         # Storing X, Y, and Y_pred
 88 |         self.Y_pred = y_pred_train
 89 |         self.X = X
 90 |         self.Y = Y
 91 |         return y_pred_train
 92 | 
 93 |     def test(self, X, Y=None):
 94 |         """Calculate and return Y predicted value.
 95 | 
 96 |         Parameters
 97 |         ----------
 98 |         X : array-like, shape = [n_samples, n_features]
 99 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
100 | 
101 |         Returns
102 |         -------
103 |         y_pred_test : array-like, shape = [n_samples, 1]
104 |             Predicted y score for samples.
105 |         """
106 | 
107 |         layer1_weight = self.model.layers[0].get_weights()[0]
108 |         layer1_bias = self.model.layers[0].get_weights()[1]
109 |         layer2_weight = self.model.layers[1].get_weights()[0]
110 |         layer2_bias = self.model.layers[1].get_weights()[1]
111 | 
112 |         self.model.y_loadings_ = layer2_weight
113 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
114 |         self.model.x_loadings_ = layer1_weight
115 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
116 |         y_pred_test = self.model.predict(X).flatten()
117 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
118 |         return y_pred_test
119 | 


--------------------------------------------------------------------------------
/cimcb/model/NN_LinearLogit.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.layers import Dense
  6 | import tensorflow as tf
  7 | from keras import backend as K
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback
 10 | 
 11 | 
 12 | class NN_LinearLogit(BaseModel):
 13 |     """2 Layer linear-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = None
 17 | 
 18 |     def __init__(self, n_neurons=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons = n_neurons
 20 |         self.verbose = verbose
 21 |         self.n_epochs = epochs
 22 |         self.k = n_neurons
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.decay = decay
 26 |         self.nesterov = nesterov
 27 |         self.momentum = momentum
 28 |         self.learning_rate = learning_rate
 29 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 30 | 
 31 |         self.__name__ = 'cimcb.model.NN_LinearLogit'
 32 |         self.__params__ = {'n_neurons': n_neurons, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 33 | 
 34 |     def set_params(self, params):
 35 |         self.__init__(**params)
 36 | 
 37 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 38 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 39 | 
 40 |         Parameters
 41 |         ----------
 42 |         X : array-like, shape = [n_samples, n_features]
 43 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 44 | 
 45 |         Y : array-like, shape = [n_samples, 1]
 46 |             Response variables, where n_samples is the number of samples.
 47 | 
 48 |         Returns
 49 |         -------
 50 |         y_pred_train : array-like, shape = [n_samples, 1]
 51 |             Predicted y score for samples.
 52 |         """
 53 | 
 54 |         # # If using Keras, set tf to 1 core
 55 |         # config = K.tf.ConfigProto(intra_op_parallelism_threads=8, inter_op_parallelism_threads=8, allow_soft_placement=True)
 56 |         # session = tf.Session(config=config)
 57 |         # K.set_session(session)
 58 | 
 59 |         # If batch-size is None:
 60 |         if self.batch_size is None:
 61 |             self.batch_size = len(X)
 62 | 
 63 |         self.model = Sequential()
 64 |         self.model.add(Dense(self.n_neurons, activation="linear", input_dim=len(X.T)))
 65 |         self.model.add(Dense(1, activation="sigmoid"))
 66 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 67 | 
 68 |         # If epoch_ypred is True, calculate ypred for each epoch
 69 |         if epoch_ypred is True:
 70 |             self.epoch = YpredCallback(self.model, X, epoch_xtest)
 71 |         else:
 72 |             self.epoch = Callback()
 73 | 
 74 |         # Fit
 75 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=self.batch_size, verbose=self.verbose, callbacks=[self.epoch])
 76 | 
 77 |         layer1_weight = self.model.layers[0].get_weights()[0]
 78 |         layer1_bias = self.model.layers[0].get_weights()[1]
 79 |         layer2_weight = self.model.layers[1].get_weights()[0]
 80 |         layer2_bias = self.model.layers[1].get_weights()[1]
 81 | 
 82 |         # Not sure about the naming scheme (trying to match PLS)
 83 |         self.model.x_loadings_ = layer1_weight
 84 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
 85 |         self.model.y_loadings_ = layer2_weight
 86 |         self.model.pctvar_ = np.ones((1, len(self.model.y_loadings_[0])))
 87 |         self.xcols_num = len(X.T)
 88 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
 89 |         y_pred_train = self.model.predict(X).flatten()
 90 | 
 91 |         # Storing X, Y, and Y_pred
 92 |         self.Y_pred = y_pred_train
 93 |         self.X = X
 94 |         self.Y = Y
 95 |         return y_pred_train
 96 | 
 97 |     def test(self, X, Y=None):
 98 |         """Calculate and return Y predicted value.
 99 | 
100 |         Parameters
101 |         ----------
102 |         X : array-like, shape = [n_samples, n_features]
103 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
104 | 
105 |         Returns
106 |         -------
107 |         y_pred_test : array-like, shape = [n_samples, 1]
108 |             Predicted y score for samples.
109 |         """
110 | 
111 |         layer1_weight = self.model.layers[0].get_weights()[0]
112 |         layer1_bias = self.model.layers[0].get_weights()[1]
113 |         layer2_weight = self.model.layers[1].get_weights()[0]
114 |         layer2_bias = self.model.layers[1].get_weights()[1]
115 | 
116 |         self.model.y_loadings_ = layer2_weight
117 |         self.model.y_loadings_ = self.model.y_loadings_.reshape(1, len(self.model.y_loadings_))
118 |         self.model.x_loadings_ = layer1_weight
119 |         self.model.x_scores_ = np.matmul(X, self.model.x_loadings_) + layer1_bias
120 |         y_pred_test = self.model.predict(X).flatten()
121 |         self.model.pctvar_ = sum(abs(self.model.x_scores_) ** 2) / sum(sum(abs(X) ** 2)) * 100
122 |         return y_pred_test
123 | 


--------------------------------------------------------------------------------
/cimcb/model/PCLR.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pandas as pd
  3 | from sklearn.decomposition import PCA
  4 | from sklearn.linear_model import LogisticRegression
  5 | from .BaseModel import BaseModel
  6 | from sklearn.metrics import roc_auc_score
  7 | from ..utils import binary_metrics, binary_evaluation
  8 | 
  9 | 
 10 | class PCLR(BaseModel):
 11 |     """ Principal component logistic regression.
 12 | 
 13 |     Parameters
 14 |     ----------
 15 |     n_components : int, (default 2)
 16 |         Number of components to keep.
 17 | 
 18 |     Methods
 19 |     -------
 20 |     train : Fit model to data.
 21 | 
 22 |     test : Apply model to test data.
 23 | 
 24 |     evaluate : Evaluate model.
 25 | 
 26 |     calc_bootci : Calculate bootstrap intervals for plot_featureimportance.
 27 | 
 28 |     plot_featureimportance : Plot coefficient and Variable Importance in Projection (VIP).
 29 | 
 30 |     plot_permutation_test : Perform a permutation test and plot.
 31 |     """
 32 | 
 33 |     parametric = True
 34 |     bootlist = ["model.coef_", "Y_pred", "model.eval_metrics_"]  # list of metrics to bootstrap
 35 | 
 36 |     def __init__(self, n_components=2):
 37 |         self.model = PCA(n_components=n_components)
 38 |         self.regrmodel = LogisticRegression(solver="liblinear")
 39 |         self.k = n_components
 40 | 
 41 |         self.__name__ = 'cimcb.model.PCLR'
 42 |         self.__params__ = {'n_components': n_components}
 43 | 
 44 |     def set_params(self, params):
 45 |         self.__init__(**params)
 46 | 
 47 |     def train(self, X, Y):
 48 |         """ Fit the PCR model, save additional stats (as attributes) and return Y predicted values.
 49 | 
 50 |         Parameters
 51 |         ----------
 52 |         X : array-like, shape = [n_samples, n_features]
 53 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 54 | 
 55 |         Y : array-like, shape = [n_samples, 1]
 56 |             Response variables, where n_samples is the number of samples.
 57 | 
 58 |         Returns
 59 |         -------
 60 |         y_pred_train : array-like, shape = [n_samples, 1]
 61 |             Predicted y score for samples.
 62 |         """
 63 | 
 64 |         # Ensure array and error check
 65 |         X, Y = self.input_check(X, Y)
 66 | 
 67 |         # Fit the model
 68 |         self.model.fit(X)
 69 |         self.model.x_scores_ = self.model.transform(X)
 70 |         self.regrmodel.fit(self.model.x_scores_, Y)
 71 | 
 72 |         # Save x_loadings, coef, pctvar, x_weights, y_loadings and vip
 73 |         self.model.x_loadings_ = self.model.components_.T
 74 |         self.model.coef_ = np.dot(self.model.x_loadings_, self.regrmodel.coef_.flatten())
 75 |         self.model.pctvar_ = self.model.explained_variance_
 76 |         self.model.x_weights_ = self.model.components_.T
 77 |         self.model.y_loadings_ = self.regrmodel.coef_.reshape(1, len(self.regrmodel.coef_.flatten()))
 78 | 
 79 |         # Calculate and return Y prediction value
 80 |         #y_pred_train = self.regrmodel.predict(self.model.x_scores_).flatten()
 81 |         # Predict_proba was designed for multi-groups...
 82 |         # This makes it sure that y_pred is correct
 83 |         y_pred = self.regrmodel.predict_proba(self.model.x_scores_)
 84 |         pred_0 = roc_auc_score(Y, y_pred[:, 0])
 85 |         pred_1 = roc_auc_score(Y, y_pred[:, 1])
 86 |         if pred_0 > pred_1:
 87 |             self.pred_index = 0
 88 |         else:
 89 |             self.pred_index = 1
 90 | 
 91 |         # Calculate and return Y prediction value
 92 |         y_pred_train = np.array(self.regrmodel.predict_proba(self.model.x_scores_)[:, self.pred_index])
 93 | 
 94 |         self.Y_train = Y
 95 |         self.Y_pred_train = y_pred_train
 96 |         self.Y_pred = y_pred_train
 97 |         self.X = X
 98 |         self.Y = Y
 99 |         self.metrics_key = []
100 |         self.model.eval_metrics_ = []
101 |         bm = binary_evaluation(Y, y_pred_train)
102 |         for key, value in bm.items():
103 |             self.model.eval_metrics_.append(value)
104 |             self.metrics_key.append(key)
105 | 
106 |         self.model.eval_metrics_ = np.array(self.model.eval_metrics_)
107 | 
108 |         return y_pred_train
109 | 
110 |     def test(self, X, Y=None):
111 |         """Calculate and return Y predicted value.
112 | 
113 |         Parameters
114 |         ----------
115 |         X : array-like, shape = [n_samples, n_features]
116 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
117 | 
118 |         Returns
119 |         -------
120 |         y_pred_test : array-like, shape = [n_samples, 1]
121 |             Predicted y score for samples.
122 |         """
123 | 
124 |         # Convert to X to numpy array if a DataFrame
125 |         if isinstance(X, pd.DataFrame or pd.Series):
126 |             X = np.array(X)
127 | 
128 |         # Calculate and return Y predicted value
129 |         newX = self.model.transform(X)
130 |         #y_pred_test = self.regrmodel.predict(newX).flatten()
131 |         # Calculate and return Y predicted value
132 |         y_pred_test = np.array(self.regrmodel.predict_proba(newX)[:, self.pred_index])
133 | 
134 |         # Calculate and return Y predicted value
135 |         if Y is not None:
136 |             self.metrics_key = []
137 |             self.model.eval_metrics_ = []
138 |             bm = binary_evaluation(Y, y_pred_test)
139 |             for key, value in bm.items():
140 |                 self.model.eval_metrics_.append(value)
141 |                 self.metrics_key.append(key)
142 | 
143 |             self.model.eval_metrics_ = np.array(self.model.eval_metrics_)
144 | 
145 |         self.Y_pred = y_pred_test
146 |         return y_pred_test
147 | 


--------------------------------------------------------------------------------
/cimcb/model/MBNN_SigmoidSigmoid.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | from keras.callbacks import Callback
  3 | from keras.optimizers import SGD
  4 | from keras.models import Sequential
  5 | from keras.models import Model
  6 | from keras.layers import Input, Dense, Concatenate, LSTM, concatenate
  7 | from keras.layers import Dense
  8 | from .BaseModel import BaseModel
  9 | from ..utils import YpredCallback, binary_evaluation
 10 | 
 11 | 
 12 | class MBNN_SigmoidSigmoid(BaseModel):
 13 |     """2 Layer logistic-logistic neural network using Keras"""
 14 | 
 15 |     parametric = True
 16 |     bootlist = ["Y_pred", "model.eval_metrics_"]  # list of metrics to bootstrap
 17 | 
 18 |     def __init__(self, n_neurons_l1=2, n_neurons_l2=2, epochs=200, learning_rate=0.01, momentum=0.0, decay=0.0, nesterov=False, loss="binary_crossentropy", batch_size=None, verbose=0):
 19 |         self.n_neurons_l1 = n_neurons_l1
 20 |         self.n_neurons_l2 = n_neurons_l2
 21 |         self.verbose = verbose
 22 |         self.n_epochs = epochs
 23 |         self.batch_size = batch_size
 24 |         self.loss = loss
 25 |         self.optimizer = SGD(lr=learning_rate, momentum=momentum, decay=decay, nesterov=nesterov)
 26 | 
 27 |         self.__name__ = 'cimcb.model.MBNN_SigmoidSigmoid'
 28 |         self.__params__ = {'n_neurons_l1': n_neurons_l1, 'n_neurons_l2': n_neurons_l2, 'epochs': epochs, 'learning_rate': learning_rate, 'momentum': momentum, 'decay': decay, 'nesterov': nesterov, 'loss': loss, 'batch_size': batch_size, 'verbose': verbose}
 29 | 
 30 |     def set_params(self, params):
 31 |         self.__init__(**params)
 32 | 
 33 |     def train(self, X, Y, epoch_ypred=False, epoch_xtest=None):
 34 |         """ Fit the neural network model, save additional stats (as attributes) and return Y predicted values.
 35 | 
 36 |         Parameters
 37 |         ----------
 38 |         X : array-like, shape = [n_samples, n_features]
 39 |             Predictor variables, where n_samples is the number of samples and n_features is the number of predictors.
 40 | 
 41 |         Y : array-like, shape = [n_samples, 1]
 42 |             Response variables, where n_samples is the number of samples.
 43 | 
 44 |         Returns
 45 |         -------
 46 |         y_pred_train : array-like, shape = [n_samples, 1]
 47 |             Predicted y score for samples.
 48 |         """
 49 | 
 50 |         # If batch-size is None:
 51 |         if self.batch_size is None:
 52 |             batch_size = len(X)
 53 |         else:
 54 |             batch_size = self.batch_size
 55 | 
 56 |         #X = np.array(X)
 57 |         X1 = X[0]
 58 |         X2 = X[1]
 59 | 
 60 |         # Layers in loop
 61 |         layer1 = []
 62 |         for i in X:
 63 |             input_i = Input(shape=(len(i.T),))
 64 |             layer1_i = Dense(self.n_neurons_l1, activation="sigmoid")(input_i)
 65 |             layer1_i = Model(inputs=input_i, outputs=layer1_i)
 66 |             layer1.append(layer1_i)
 67 | 
 68 |         # Concatenate
 69 |         concat = concatenate([i.output for i in layer1])
 70 |         model_concat = Dense(self.n_neurons_l2, activation="sigmoid")(concat)
 71 |         model_concat = Dense(1, activation="sigmoid")(model_concat)
 72 | 
 73 |         self.model = Model(inputs=[i.input for i in layer1], outputs=model_concat)
 74 |         self.model.compile(optimizer=self.optimizer, loss=self.loss, metrics=["accuracy"])
 75 | 
 76 |         self.metrics_key = []
 77 |         self.model.eval_metrics_ = []
 78 | 
 79 |         self.model.pfi_acc_ = np.zeros((1, len(Y)))
 80 |         self.model.pfi_r2q2_ = np.zeros((1, len(Y)))
 81 |         self.model.pfi_auc_ = np.zeros((1, len(Y)))
 82 |         self.model.vip_ = np.zeros((1, len(Y)))
 83 |         self.model.coef_ = np.zeros((1, len(Y)))
 84 | 
 85 |         self.model.y_loadings_ = np.array([0, 0, 0])
 86 |         self.model.x_scores_ = np.array([0, 0, 0])
 87 |         self.model.x_loadings_ = np.array([0, 0, 0])
 88 |         self.model.pctvar_ = np.array([0, 0, 0])
 89 | 
 90 |         # Fit
 91 |         self.model.fit(X, Y, epochs=self.n_epochs, batch_size=batch_size, verbose=self.verbose)
 92 | 
 93 |         # Not sure about the naming scheme (trying to match PLS)
 94 |         y_pred_train = self.model.predict(X).flatten()
 95 | 
 96 | 
 97 |         self.model.eval_metrics_ = []
 98 |         bm = binary_evaluation(Y, y_pred_train)
 99 |         for key, value in bm.items():
100 |             self.model.eval_metrics_.append(value)
101 |             self.metrics_key.append(key)
102 |         self.model.eval_metrics_ = np.array(self.model.eval_metrics_)
103 | 
104 |         # Storing X, Y, and Y_pred
105 |         self.Y_train = Y
106 |         self.Y_pred_train = y_pred_train
107 |         self.Y_pred = y_pred_train
108 |         self.X = X
109 |         self.Y = Y
110 |         return y_pred_train
111 | 
112 |     def test(self, X, Y=None):
113 |         """Calculate and return Y predicted value.
114 | 
115 |         Parameters
116 |         ----------
117 |         X : array-like, shape = [n_samples, n_features]
118 |             Test variables, where n_samples is the number of samples and n_features is the number of predictors.
119 | 
120 |         Returns
121 |         -------
122 |         y_pred_test : array-like, shape = [n_samples, 1]
123 |             Predicted y score for samples.
124 |         """
125 | 
126 |         y_pred_test = self.model.predict(X).flatten()
127 | 
128 |                 # Calculate and return Y predicted value
129 |         if Y is not None:
130 |             self.metrics_key = []
131 |             self.model.eval_metrics_ = []
132 |             bm = binary_evaluation(Y, y_pred_test)
133 |             for key, value in bm.items():
134 |                 self.model.eval_metrics_.append(value)
135 |                 self.metrics_key.append(key)
136 | 
137 |             self.model.eval_metrics_ = np.array(self.model.eval_metrics_)
138 | 
139 |         self.Y_pred = y_pred_test
140 | 
141 |         return y_pred_test
142 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | <img src="cimcb_logo.png" alt="drawing" width="400"/>
 2 | 
 3 | # cimcb
 4 | A package containing the necessary tools for the statistical analysis of untargeted and targeted metabolomics data.
 5 | 
 6 | ## Installation
 7 | 
 8 | ### Dependencies
 9 | cimcb requires:
10 | - Python (>=3.5)
11 | - Bokeh (>=1.0.0)
12 | - Keras
13 | - NumPy (>=1.12)
14 | - Pandas
15 | - SciPy
16 | - scikit-learn
17 | - Statsmodels
18 | - TensorFlow
19 | - tqdm
20 | 
21 | ### User installation
22 | The recommend way to install cimcb and dependencies is to using ``conda``:
23 | ```console
24 | conda install -c cimcb cimcb
25 | ```
26 | or ``pip``:
27 | ```console
28 | pip install cimcb
29 | ```
30 | Alternatively, to install directly from github:
31 | ```console
32 | pip install https://github.com/cimcb/cimcb/archive/master.zip
33 | ```
34 | 
35 | ### API
36 | For futher detail on the usage refer to the docstring.
37 | 
38 | #### cimcb.model
39 | - [PLS_SIMPLS](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/PLS_SIMPLS.py#L6-L23): Partial least-squares regression using the SIMPLS algorithm.
40 | - [PLS_NIPALS](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/PLS_SIMPLS.py#L7-L24): Partial least-squares regression using the NIPALS algorithm.
41 | - [PCR](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/PCR.py#L8-L25): Principal component regression.
42 | - [PCLR](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/PCLR.py#L8-L25): Principal component logistic regression.
43 | - [RF](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/RF.py#L8-L43): Random forest.
44 | - [SVM](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/SVM.py#L8-L31): Support vector machine.
45 | - [NN_LinearLinear](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/NN_LinearLinear.py#L10-L42): 2 Layer linear-linear neural network.
46 | - [NN_LinearSigmoid](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/NN_LinearSigmoid.py#L10-L42): 2 Layer linear-sigmoid neural network.
47 | - [NN_SigmoidSigmoid](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/model/NN_LogitLogit.py#L10-L42): 2 Layer sigmoid-sigmoid neural network.
48 | 
49 | #### cimcb.plot
50 | - [boxplot](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/boxplot.py#L8-L18): Creates a boxplot using Bokeh.
51 | - [distribution](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/distribution.py#L6-L16): Creates a distribution plot using Bokeh.
52 | - [pca](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/pca.py#L10-L17): Creates a PCA scores and loadings plot using Bokeh.
53 | - [permutation_test](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/permutation_test.py#L13-L27): Creates permutation test plots using Bokeh.
54 | - [roc_plot](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/roc.py#L20-L33): Creates a rocplot using Bokeh.
55 | - [scatter](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/scatter.py#L6-L16): Creates a scatterplot using Bokeh.
56 | - [scatterCI](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/plot/scatterCI.py#L7-L14): Creates a scatterCI plot using Bokeh.
57 | 
58 | #### cimcb.cross_val
59 | - [kfold](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/cross_val/kfold.py#L14-L42): Exhaustitive search over param_dict calculating binary metrics using k-fold cross validation.
60 | - [holdout](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/cross_val/holdout.py#L11-L36): Exhaustitive search over param_dict calculating binary metrics using hold-out set.
61 | 
62 | #### cimcb.bootstrap
63 | - [Perc](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/bootstrap/Perc.py#L6-L35): Returns bootstrap confidence intervals using the percentile boostrap interval.
64 | - [BC](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/bootstrap/BC.py#L7-L36): Returns bootstrap confidence intervals using the bias-corrected boostrap interval.
65 | - [BCA](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/bootstrap/BCA.py#L9-L37): Returns bootstrap confidence intervals using the bias-corrected and accelerated boostrap interval.
66 | 
67 | #### cimcb.utils
68 | - [binary_metrics](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/binary_metrics.py#L5-L26): Return a dict of binary stats with the following metrics: R2, auc, accuracy, precision, sensitivity, specificity, and F1 score.
69 | - [ci95_ellipse](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/ci95_ellipse.py#L6-L28): Construct a 95% confidence ellipse using PCA.
70 | - [dict_mean](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/dict_mean.py#L4-L5): Calculate mean for all keys in dictionary.
71 | - [dict_median](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/dict_median.py#L4-L5): Calculate median for all keys in dictionary.
72 | - [dict_perc](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/dict_perc.py#L4-L5): Calculate confidence intervals (percentile) for all keys in dictionary.
73 | - [dict_std](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/dict_std.py#L4-L5): Calculate std for all keys in dictionary.
74 | - [knnimpute](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/knnimpute.py#L7-L22): kNN missing value imputation using Euclidean distance.
75 | - [load_dataCSV](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/load_dataCSV.py#L7-L25): Loads and validates the DataFile and PeakFile from CSV files.
76 | - [load_dataXL](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/load_dataXL.py#L7-L29): Loads and validates the DataFile and PeakFile from a excel file.
77 | - [nested_getattr](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/nested_getattr.py#L4-L5): getattr for nested attributes.
78 | - [scale](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/scale.py#L4-L42): Scales x (which can include nans) with method: 'auto', 'pareto', 'vast', or 'level'.
79 | - [table_check](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/table_check.py#L4-L17): Error checking for DataTable and PeakTable (used in load_dataXL).
80 | - [univariate_2class](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/univariate_2class.py#L8-L35): Creates a table of univariate statistics (2 class).
81 | - [wmean](https://github.com/KevinMMendez/cimcb/blob/master/cimcb/utils/wmean.py#L4-L19): Returns Weighted Mean. Ignores NaNs and handles infinite weights.
82 | 
83 | ### License
84 | cimcb is licensed under the MIT license.
85 | 
86 | ### Authors
87 | - [Kevin Mendez](https://github.com/kevinmmendez)
88 | - [David Broadhurst](https://scholar.google.ca/citations?user=M3_zZwUAAAAJ&hl=en)
89 | 
90 | ### Correspondence
91 | Professor David Broadhurst, Director of the Centre for Integrative Metabolomics & Computation Biology at Edith Cowan University.
92 | E-mail: d.broadhurst@ecu.edu.au
93 | 


--------------------------------------------------------------------------------