├── cw ├── base.pyc ├── multiclass_confidence_weighted_var_diag.pyc ├── multiclass_soft_confidence_weighted_1_diag.pyc ├── multiclass_soft_confidence_weighted_2_diag.pyc ├── base.py ├── create_results_graph.py ├── evaluate_sparse_data.py ├── evaluate_small_data.py ├── confidence_weighted_var.py ├── soft_confidence_weighted_2.py ├── soft_confidence_weighted_1.py ├── multiclass_confidence_weighted_var_diag.py ├── multiclass_soft_confidence_weighted_1_diag.py └── multiclass_soft_confidence_weighted_2_diag.py ├── evaluation └── uci_small_dataset │ ├── acc_car.png │ ├── acc_glass.png │ ├── acc_iris.png │ ├── acc_liver.png │ ├── acc_pima.png │ ├── acc_usps.png │ ├── acc_yeast.png │ ├── acc_abalone.png │ ├── acc_creadit.png │ ├── acc_gisette.png │ ├── acc_isolet.png │ ├── acc_haberman.png │ ├── acc_ionosphere.png │ ├── acc_mammographic.png │ ├── acc_transfusion.png │ ├── cum_elapsed_car.png │ ├── cum_elapsed_iris.png │ ├── cum_elapsed_pima.png │ ├── cum_elapsed_usps.png │ ├── acc_breast_cancer.png │ ├── cum_elapsed_abalone.png │ ├── cum_elapsed_creadit.png │ ├── cum_elapsed_gisette.png │ ├── cum_elapsed_glass.png │ ├── cum_elapsed_isolet.png │ ├── cum_elapsed_liver.png │ ├── cum_elapsed_yeast.png │ ├── cum_elapsed_haberman.png │ ├── acc_magicGamaTelescope.png │ ├── cum_elapsed_ionosphere.png │ ├── cum_elapsed_mammographic.png │ ├── cum_elapsed_transfusion.png │ ├── cum_elapsed_breast_cancer.png │ └── cum_elapsed_magicGamaTelescope.png ├── LICENSE ├── README.md ├── arow ├── arow2.py └── arow2_diag.py ├── pa ├── passive_aggressive_1.py └── passive_aggressive_2.py └── lr └── multiclass_logistic_regression.py /cw/base.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/cw/base.pyc -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_car.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_car.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_glass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_glass.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_iris.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_iris.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_liver.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_liver.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_pima.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_pima.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_usps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_usps.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_yeast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_yeast.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_abalone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_abalone.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_creadit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_creadit.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_gisette.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_gisette.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_isolet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_isolet.png -------------------------------------------------------------------------------- /cw/multiclass_confidence_weighted_var_diag.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/cw/multiclass_confidence_weighted_var_diag.pyc -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_haberman.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_haberman.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_ionosphere.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_ionosphere.png -------------------------------------------------------------------------------- /cw/multiclass_soft_confidence_weighted_1_diag.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/cw/multiclass_soft_confidence_weighted_1_diag.pyc -------------------------------------------------------------------------------- /cw/multiclass_soft_confidence_weighted_2_diag.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/cw/multiclass_soft_confidence_weighted_2_diag.pyc -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_mammographic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_mammographic.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_transfusion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_transfusion.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_car.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_car.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_iris.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_iris.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_pima.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_pima.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_usps.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_usps.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_breast_cancer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_breast_cancer.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_abalone.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_abalone.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_creadit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_creadit.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_gisette.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_gisette.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_glass.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_glass.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_isolet.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_isolet.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_liver.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_liver.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_yeast.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_yeast.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_haberman.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_haberman.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/acc_magicGamaTelescope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/acc_magicGamaTelescope.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_ionosphere.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_ionosphere.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_mammographic.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_mammographic.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_transfusion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_transfusion.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_breast_cancer.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_breast_cancer.png -------------------------------------------------------------------------------- /evaluation/uci_small_dataset/cum_elapsed_magicGamaTelescope.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/kzky/python-online-machine-learning-library/HEAD/evaluation/uci_small_dataset/cum_elapsed_magicGamaTelescope.png -------------------------------------------------------------------------------- /cw/base.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | 5 | from scipy import sparse 6 | from scipy.sparse import csr_matrix 7 | from collections import defaultdict 8 | 9 | 10 | class ConfidenceWeightedModel(object): 11 | """ 12 | """ 13 | 14 | def __init__(self, epochs=10): 15 | """ 16 | """ 17 | self.epochs = epochs 18 | self.data = defaultdict() 19 | self.model = defaultdict() 20 | self.cache = defaultdict() 21 | self.model["warm_start"] = False 22 | self.data["one"] = csr_matrix(([1], ([0], [0]))) 23 | 24 | pass 25 | 26 | def _add_bias_for_dense_sample(self, sample): 27 | return np.hstack((sample, 1)) 28 | 29 | def _add_bias_for_sparse_sample(self, sample): 30 | """ 31 | 32 | Arguments: 33 | - `sample`: 34 | """ 35 | x = sparse.hstack([sample, self.data["one"]]) 36 | #return x.tocsr() 37 | return x 38 | 39 | def inverse_1d_sparse_matrix(self, X): 40 | """ 41 | Disruptive method. 42 | 43 | Arguments: 44 | - `X`: 45 | """ 46 | X.data = 1 / X.data 47 | return X 48 | 49 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014, kzky 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the {organization} nor the names of its 15 | contributors may be used to endorse or promote products derived from 16 | this software without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /cw/create_results_graph.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import pylab as pl 4 | import json 5 | import numpy as np 6 | 7 | # vars 8 | results_filepath = "./evaluate_small_data_results.json" 9 | #image_dirpath = "/home/k_yoshiyama/images/uci_small_dataset" 10 | image_dirpath = "/home/kzk/images/uci_small_dataset" 11 | 12 | # save as image 13 | results = json.load(open(results_filepath)) 14 | data_names = results.keys() 15 | data_names.sort() 16 | for data_name in data_names: 17 | results_per_data = results[data_name] 18 | model_names = results_per_data.keys() 19 | model_names.sort() 20 | 21 | # acc 22 | fig = pl.figure() 23 | for model_name in model_names: 24 | pl.plot(results[data_name][model_name]["acc"], label=model_name) 25 | pass 26 | pl.legend(loc="lower right") 27 | pl.xlabel("epochs") 28 | pl.ylabel("%") 29 | pl.title("Accuracy for %s" % data_name) 30 | pl.savefig("%s/acc_%s.png" % (image_dirpath, data_name), dpi=200) 31 | pl.clf() 32 | 33 | # elapsed time 34 | for model_name in model_names: 35 | if not model_name == "LinearSVC": 36 | pl.plot(np.cumsum(results[data_name][model_name]["elapsed"]), label=model_name) 37 | else: 38 | pl.plot(results[data_name][model_name]["elapsed"], label=model_name) 39 | pass 40 | 41 | pl.legend(loc="lower right") 42 | pl.xlabel("epochs") 43 | pl.ylabel("sec") 44 | pl.title("Cummulative Elapsed Time for %s" % data_name) 45 | pl.savefig("%s/cum_elapsed_%s.png" % (image_dirpath, data_name), dpi=200) 46 | pl.clf() 47 | 48 | pass 49 | pass 50 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # python-online-machine-learning-library 2 | 3 | ## Description 4 | 5 | This is a python online machine learning library (POMLL) for some famous online-learning methods. 6 | The following classifiers are implemented. 7 | 8 | * Passive Agrressive Algorithm 9 | * L1-hinge loss 10 | * L2-hinge loss 11 | * Confidence Weighted 12 | * variance version 13 | * Multi-Class Multi-Label Confidence Weighted 14 | * single contraint and diagonal matrix version 15 | * Adaptive Regularization of Weights 16 | * dense matrix 17 | * diagonal matrix 18 | * Softconfidence Weighted 19 | * L1-hinge loss 20 | * L2-hinge loss 21 | * Softconfidence Weighted 22 | * L1-hinge loss, single contraint, and diagonal matrix 23 | * L2-hinge loss, single contraint, and diagonal matrix 24 | * Logistic Regression (should not be used) 25 | * solved with stochastic gradient 26 | 27 | ## Dependency 28 | * numpy 29 | * scipy 30 | * scikit-learn 31 | 32 | ## Data Interface 33 | Two intefaces are published learn and predict interface. 34 | 35 | * Learn inteface takes two arguments of X and y which are a 2d-numpy array or a scipy sparse martix and an array-like object respectively like scikit-learn fit interface. 36 | * Predict interface takes one argument which is a 1d-numpy array or a 1-by-n scipy sparse matrix. 37 | 38 | All you have to do is instantiate model, call learn method with samples and labels, and predict for one sample similar to scikit-learn interface 39 | 40 | ```python:main.py 41 | # learn 42 | model = MCWVarDiag(eta=0.9, epochs=1) 43 | model.learn(X, y) # X is samples and y is the correponding labels. 44 | 45 | # predict 46 | model.predict(x) # x is one samples. 47 | ``` 48 | 49 | ## Data Format 50 | This is a sample data format to be stored in your storage. 51 | Any data format is acceptable unless you can feed data into learn/predict interface. 52 | 53 | * Dense Labeled Samples 54 | * label,d1,d2,...,dn 55 | * example for binary 56 | 57 | 1,0.1,0.2, ...,0.9 58 | 2,0.1,0.2, ...,0.9 59 | ... 60 | 1,0.1,0.2, ...,0.9 61 | 2,0.1,0.2, ...,0.9 62 | 63 | * example for multi-class 64 | 65 | 1,0.1,0.2, ...,0.9 66 | 2,0.1,0.2, ...,0.9 67 | ... 68 | 3,0.1,0.2, ...,0.9 69 | 2,0.1,0.2, ...,0.9 70 | 71 | * Sparse Labeled Samples 72 | 73 | labelfeature-index:feature-valfeature-index:feature-val... 74 | labelfeature-index:feature-valfeature-index:feature-val... 75 | ... 76 | labelfeature-index:feature-valfeature-index:feature-val... 77 | 78 | ## How to use in detail 79 | Refer to the main function 80 | 81 | ## Note 82 | * All labeled samples to be used for learning are stored in memory. 83 | * All algorithm except for Multi-Class Multi-Label CW and SCW are for binary classification only. 84 | * Bias, 1 is added to a sample, so you do not have to add 1 to dataset. 85 | 86 | ## References 87 | * http://webee.technion.ac.il/people/koby/publications/arow_nips09.pdf 88 | * http://www.aclweb.org/anthology/D/D09/D09-1052.pdf 89 | * https://alliance.seas.upenn.edu/~nlp/publications/pdf/dredze2008f.pdf 90 | * http://webee.technion.ac.il/people/koby/publications/paper_nips08_std.pdf 91 | * http://icml.cc/2012/papers/86.pdf 92 | 93 | ## Future Work 94 | * Labeled samples to be used for learning are not necessarily stored in memory. 95 | * Evaluation compared to batch-learning (e.g., liblinear) 96 | -------------------------------------------------------------------------------- /cw/evaluate_sparse_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import logging as logger 5 | import time 6 | import json 7 | import copy 8 | 9 | 10 | from sklearn.metrics import confusion_matrix 11 | from multiclass_confidence_weighted_var_diag import MCWVarDiag 12 | from multiclass_soft_confidence_weighted_1_diag import MSCWIDiag 13 | from multiclass_soft_confidence_weighted_2_diag import MSCWIIDiag 14 | from sklearn.datasets import load_svmlight_file 15 | from sklearn.svm import LinearSVC 16 | 17 | # file path 18 | filepath = "./evaluate_sparse_data_results.json" 19 | 20 | # data cnofiguration 21 | data_config = { 22 | "rcv1": { 23 | "train": "/home/k_yoshiyama/datasets/rcv1/rcv1_train.multiclass.dat", 24 | "test": "/home/k_yoshiyama/datasets//rcv1/rcv1_test.multiclass.dat", 25 | }, 26 | 27 | "sector": { 28 | "train": "/home/k_yoshiyama/datasets/sector/sector.scale.dat", 29 | "test": "/home/k_yoshiyama/datasets/sector/sector.t.scale.dat", 30 | }, 31 | } 32 | 33 | # results 34 | results = { 35 | "rcv1": { 36 | }, 37 | "sector": { 38 | }, 39 | } 40 | 41 | # model config 42 | models = [MCWVarDiag, MSCWIDiag, MSCWIIDiag] 43 | model_class_name_map = { 44 | MCWVarDiag: "MCWVarDiag", 45 | MSCWIDiag: "MSCWIDiag", 46 | MSCWIIDiag: "MSCWIIDiag", 47 | } 48 | 49 | # results 50 | result_per_data = { 51 | "MCWVarDiag": { 52 | "acc": [], # per epoch 53 | "elapsed": [], # per epoch 54 | }, 55 | 56 | "MSCWIDiag": { 57 | "acc": [], 58 | "elapsed": [], 59 | }, 60 | 61 | "MSCWIIDiag": { 62 | "acc": [], 63 | "elapsed": [], 64 | }, 65 | 66 | "LinearSVC": { 67 | "acc": [], 68 | "elapsed": [], 69 | }, 70 | } 71 | 72 | # results for each data 73 | for data in results: 74 | results[data] = copy.deepcopy(result_per_data) 75 | pass 76 | 77 | # run experiment 78 | epochs = xrange(1, 6) 79 | for data in data_config: 80 | print "data %s is processing..." % data 81 | 82 | # train/test 83 | (X_train, y_train) = load_svmlight_file(data_config[data]["train"]) 84 | (X_test, y_test) = load_svmlight_file(data_config[data]["test"]) 85 | 86 | # evaluate 87 | for model in models: # foreach __main__.class 88 | # init 89 | print "model is %s" % str(model) 90 | model_ = model(epochs=1) 91 | print "model is %s." % model_class_name_map[model] 92 | 93 | # epoch 94 | for epoch in epochs: 95 | print "the number of epochs is %d" % epoch 96 | # warm start 97 | if not epoch == 1: 98 | mu = model_.model["mu"] 99 | S = model_.model["S"] 100 | model_.init_params(mu, S) 101 | pass 102 | 103 | # learn 104 | st = time.time() 105 | model_.epochs = 1 106 | model_.learn(X_train, y_train) 107 | et = time.time() 108 | 109 | # elapsed time 110 | results[data][model_class_name_map[model]]["elapsed"].append(et - st) 111 | 112 | # predict 113 | y_pred = [] 114 | for x in X_test: 115 | y_pred.append(model_.predict(x)) 116 | pass 117 | cm = confusion_matrix(y_test, y_pred) 118 | 119 | # accuracy 120 | results[data][model_class_name_map[model]]["acc"].append(np.sum(cm.diagonal()) * 100.0 / np.sum(cm)) 121 | 122 | pass 123 | pass 124 | 125 | # Linear SVC 126 | print "model is LinearSVC." 127 | model_ = LinearSVC() 128 | st = time.time() 129 | model_.fit(X_train, y_train) 130 | et = time.time() 131 | y_pred = model_.predict(X_test) 132 | cm = confusion_matrix(y_test, y_pred) 133 | for epoch in epochs: # add the same results to all epochs 134 | results[data]["LinearSVC"]["acc"].append(np.sum(cm.diagonal()) * 100.0 / np.sum(cm)) 135 | results[data]["LinearSVC"]["elapsed"].append(et - st) 136 | pass 137 | pass 138 | 139 | with open(filepath, "w") as fpout: 140 | json.dump(results, fpout) 141 | pass 142 | 143 | -------------------------------------------------------------------------------- /arow/arow2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import logging as logger 3 | import time 4 | from collections import defaultdict 5 | from sklearn.metrics import confusion_matrix 6 | 7 | 8 | class AROW2(object): 9 | """ 10 | Adaptive Regularization of Weight Vector algorithm with squared hinge loss. 11 | 12 | References: 13 | - http://webee.technion.ac.il/people/koby/publications/arow_nips09.pdf 14 | 15 | This model is only applied to binary classification. 16 | """ 17 | 18 | def __init__(self, C=1, epochs=10): 19 | """ 20 | model initialization. 21 | """ 22 | logger.basicConfig(level=logger.DEBUG) 23 | logger.info("init starts") 24 | 25 | self.epochs = epochs 26 | self.model = defaultdict() 27 | self.cache = defaultdict() 28 | self._init_model(C) 29 | 30 | logger.info("init finished") 31 | pass 32 | 33 | def _init_model(self, C): 34 | """ 35 | Initialize model. 36 | """ 37 | logger.info("init model starts") 38 | self.model["C"] = C # aggressive parameter 39 | logger.info("init model finished") 40 | pass 41 | 42 | def _learn(self, ): 43 | """ 44 | Learn internally. 45 | """ 46 | pass 47 | 48 | def _update(self, label, sample, margin): 49 | """ 50 | Update model parameter internally. 51 | 52 | Arguments: 53 | - `label`: label = {1, -1} 54 | - `sample`: sample, or feature vector 55 | """ 56 | 57 | # add bias 58 | sample = self._add_bias(sample) 59 | 60 | # beta 61 | beta = sample.dot(self.model["S"]).dot(sample) + self.model["C"] 62 | 63 | # mu 64 | Sx = self.model["S"].dot(sample) 65 | mu = self.model["mu"] + label * (1 - margin) * Sx / beta 66 | self.model["mu"] = mu 67 | 68 | # S 69 | outer_dot_sample = np.outer(sample, sample) 70 | SoS = self.model["S"].dot(outer_dot_sample).dot(self.model["S"]) 71 | S = self.model["S"] - SoS / beta 72 | self.model["S"] = S 73 | pass 74 | 75 | def _predict_value(self, sample): 76 | """ 77 | predict value of \mu^T * x 78 | 79 | Arguments: 80 | - `sample`: 81 | """ 82 | 83 | return self.model["mu"].dot(self._add_bias(sample)) 84 | 85 | def _add_bias(self, sample): 86 | return np.hstack((sample, 1)) 87 | 88 | def learn(self, X, y): 89 | """ 90 | Learn. 91 | """ 92 | logger.info("learn starts") 93 | self.model["n_samples"] = X.shape[0] 94 | self.model["f_dims"] = X.shape[1] 95 | 96 | # model parameter mean 97 | self.model["mu"] = np.zeros(self.model["f_dims"] + 1) 98 | 99 | # model parameter covariance 100 | self.model["S"] = np.identity(self.model["f_dims"] + 1) 101 | 102 | # learn 103 | st = time.time() 104 | for i in xrange(0, self.epochs): 105 | logger.info("iter: %d" % i) 106 | for i in xrange(0, self.model["n_samples"]): 107 | sample = X[i, :] 108 | label = y[i] 109 | pred_val = self._predict_value(sample) 110 | margin = label * pred_val 111 | if margin < 1: 112 | self._update(label, sample, margin) 113 | 114 | logger.info("learn finished") 115 | et = time.time() 116 | logger.info("learning time: %f[s]" % (et - st)) 117 | pass 118 | 119 | def predict(self, sample): 120 | """ 121 | predict {1, -1} base on \mu^T * x 122 | 123 | Arguments: 124 | - `sample`: 125 | """ 126 | pred_val = self._predict_value(sample) 127 | self.cache["pred_val"] = pred_val 128 | if pred_val >= 0: 129 | return 1 130 | else: 131 | return -1 132 | pass 133 | 134 | def update(self, label, sample): 135 | """ 136 | update model. 137 | Arguments: 138 | - `sample`: sample, or feature vector 139 | - `pred_val`: predicted value i.e., mu^T * sample 140 | """ 141 | margin = label * self.model["pred_val"] 142 | if margin < 1: 143 | self._update(label, sample, margin) 144 | pass 145 | pass 146 | 147 | def main(): 148 | """ 149 | Example of how to use 150 | """ 151 | 152 | # data load 153 | fname = "/home/kzk/datasets/uci_csv/liver.csv" 154 | #fname = "/home/kzk/datasets/uci_csv/ad.csv" 155 | #fname = "/home/kzk/datasets/uci_origin/adult.csv" 156 | #fname = "/home/kzk/datasets/uci_csv/adult.csv" 157 | print "dataset is", fname 158 | data = np.loadtxt(fname, delimiter=" ") 159 | X = data[:, 1:] 160 | y = data[:, 0] 161 | 162 | # learn 163 | model = AROW2(C=1, epochs=3) 164 | model.learn(X, y) 165 | 166 | # predict 167 | y_pred = np.ndarray(X.shape[0]) 168 | for i in xrange(0, X.shape[0]): 169 | sample = data[i, 1:] 170 | y_pred[i] = model.predict(sample) 171 | 172 | # show result 173 | cm = confusion_matrix(y, y_pred) 174 | print cm 175 | print "accurary: %d [%%]" % (np.sum(cm.diagonal()) * 100.0 / np.sum(cm)) 176 | 177 | if __name__ == '__main__': 178 | main() 179 | -------------------------------------------------------------------------------- /pa/passive_aggressive_1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import logging as logger 4 | import time 5 | import pylab as pl 6 | from collections import defaultdict 7 | from sklearn.metrics import confusion_matrix 8 | 9 | class PassiveAggressiveI(object): 10 | """ 11 | Passive Aggressive-I algorithm: standard hinge loss PA. 12 | 13 | References: 14 | - http://jmlr.org/papers/volume7/crammer06a/crammer06a.pdf 15 | 16 | This model is only applied to binary classification. 17 | """ 18 | 19 | def __init__(self, fname, delimiter = " ", C = 1, n_scan = 10): 20 | """ 21 | model initialization. 22 | """ 23 | logger.basicConfig(level=logger.DEBUG) 24 | logger.info("init starts") 25 | 26 | self.n_scan = 10 27 | self.data = defaultdict() 28 | self.model = defaultdict() 29 | self.cache = defaultdict() 30 | self._load(fname, delimiter) 31 | self._init_model(C) 32 | 33 | logger.info("init finished") 34 | 35 | def _load(self, fname, delimiter = " "): 36 | """ 37 | Load data set specified with filename. 38 | 39 | data format must be as follows (space-separated file as default), 40 | 41 | l_1 x_11 x_12 x_13 ... x_1m 42 | l_2 x_21 x_22 ... x_2m 43 | ... 44 | l_n x_n1 x_n2 ... x_nm 45 | 46 | l_i must be {1, -1} because of binary classifier. 47 | 48 | Arguments: 49 | - `fname`: file name. 50 | - `delimiter`: delimiter of a file. 51 | """ 52 | logger.info("load data starts") 53 | 54 | # load data 55 | self.data["data"] = np.loadtxt(fname, delimiter = delimiter) 56 | self.data["n_sample"] = self.data["data"].shape[0] 57 | self.data["f_dim"] = self.data["data"].shape[1] - 1 58 | 59 | # binalize 60 | self._binalize(self.data["data"]) 61 | 62 | # normalize 63 | self.normalize(self.data["data"][:, 1:]) 64 | 65 | logger.info("load data finished") 66 | 67 | def _binalize(self, data): 68 | """ 69 | Binalize label of data. 70 | 71 | Arguments: 72 | - `data`: dataset. 73 | """ 74 | logger.info("init starts") 75 | 76 | # binary check 77 | labels = data[:, 0] 78 | classes = np.unique(labels) 79 | if classes.size != 2: 80 | print "label must be a binary value." 81 | exit(1) 82 | 83 | # convert binary lables to {1, -1} 84 | for i in xrange(labels.size): 85 | if labels[i] == classes[0]: 86 | labels[i] = 1 87 | else: 88 | labels[i] = -1 89 | 90 | # set classes 91 | self.data["classes"] = classes 92 | logger.info("init finished") 93 | 94 | def normalize(self, samples): 95 | """ 96 | nomalize sample, such that sqrt(x^2) = 1 97 | 98 | Arguments: 99 | - `samples`: dataset without labels. 100 | """ 101 | logger.info("normalize starts") 102 | for i in xrange(0, self.data["n_sample"]): 103 | samples[i, :] = self._normalize(samples[i, :]) 104 | 105 | logger.info("normalize finished") 106 | 107 | def _normalize(self, sample): 108 | norm = np.sqrt(sample.dot(sample)) 109 | sample = sample/norm 110 | return sample 111 | 112 | def _init_model(self, C): 113 | """ 114 | Initialize model. 115 | """ 116 | logger.info("init model starts") 117 | self.model["w"] = np.ndarray(self.data["f_dim"] + 1) # model paremter 118 | self.model["C"] = C # aggressive parameter 119 | logger.info("init model finished") 120 | 121 | def _learn(self, ): 122 | """ 123 | Learn internally. 124 | """ 125 | 126 | def _update(self, label, sample, margin): 127 | """ 128 | Update model parameter internally. 129 | 130 | - `label`: label = {1, -1} 131 | - `sample`: sample, or feature vector 132 | """ 133 | # add bias 134 | sample = self._add_bias(sample) 135 | 136 | norm = sample.dot(sample) 137 | min_ = min(1/self.model["C"], (1-margin)/norm) 138 | w = self.model["w"] + label * min_ * sample 139 | self.model["w"] = w 140 | 141 | def _predict_value(self, sample): 142 | """ 143 | predict value of \w^T * x 144 | 145 | Arguments: 146 | - `sample`: 147 | """ 148 | return self.model["w"].dot(self._add_bias(sample)) 149 | 150 | def _add_bias(self, sample): 151 | return np.hstack((sample, 1)) 152 | 153 | def learn(self, ): 154 | """ 155 | Learn. 156 | """ 157 | logger.info("learn starts") 158 | data = self.data["data"] 159 | 160 | # learn 161 | for i in xrange(0, self.n_scan): 162 | print "iter: ", i 163 | for i in xrange(0, self.data["n_sample"]): 164 | sample = data[i, 1:] 165 | label = data[i, 0] 166 | pred_val = self._predict_value(sample) 167 | margin = label * pred_val 168 | if margin < 1: 169 | self._update(label, sample, margin) 170 | 171 | logger.info("learn finished") 172 | 173 | def predict(self, sample): 174 | """ 175 | predict {1, -1} base on \w^T * x 176 | 177 | Arguments: 178 | - `sample`: 179 | """ 180 | pred_val = self._predict_value(sample) 181 | self.cache["pred_val"] = pred_val 182 | if pred_val >=0: 183 | return 1 184 | else: 185 | return -1 186 | 187 | def update(self, label, sample): 188 | """ 189 | update model. 190 | Arguments: 191 | - `sample`: sample, or feature vector 192 | - `pred_val`: predicted value i.e., w^T * sample 193 | """ 194 | margin = label * self.model["pred_val"] 195 | if margin < 1: 196 | _update(label, sample, margin) 197 | 198 | @classmethod 199 | def examplify(cls, fname, delimiter = " ", C = 1, n_scan = 100): 200 | """ 201 | Example of how to use 202 | """ 203 | 204 | # learn 205 | st = time.time() 206 | model = PassiveAggressiveI(fname, delimiter, C, n_scan) 207 | model.learn() 208 | et = time.time() 209 | print "learning time: %d [s]" % ((et - st)/1000) 210 | 211 | # predict (after learning) 212 | data = np.loadtxt(fname, delimiter = " ") 213 | model._binalize(data) 214 | n_sample = data.shape[0] 215 | y_label = data[:, 0] 216 | y_pred = np.ndarray(n_sample) 217 | for i in xrange(0, n_sample): 218 | sample = data[i, 1:] 219 | y_pred[i] = model.predict(sample) 220 | 221 | # show result 222 | cm = confusion_matrix(y_label, y_pred) 223 | print cm 224 | print "accurary: %d [%%]" % (np.sum(cm.diagonal()) * 100.0/np.sum(cm)) 225 | 226 | if __name__ == '__main__': 227 | fname = "/home/kzk/datasets/uci_csv/liver.csv" 228 | #fname = "/home/kzk/datasets/uci_csv/ad.csv" 229 | print "dataset is", fname 230 | PassiveAggressiveI.examplify(fname, delimiter = " ", C = 1, n_scan = 100) 231 | -------------------------------------------------------------------------------- /pa/passive_aggressive_2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import logging as logger 4 | import time 5 | import pylab as pl 6 | from collections import defaultdict 7 | from sklearn.metrics import confusion_matrix 8 | 9 | class PassiveAggressiveII(object): 10 | """ 11 | Passive Aggressive-II algorithm: squared hinge loss PA. 12 | 13 | References: 14 | - http://jmlr.org/papers/volume7/crammer06a/crammer06a.pdf 15 | 16 | This model is only applied to binary classification. 17 | """ 18 | 19 | def __init__(self, fname, delimiter = " ", C = 1, n_scan = 10): 20 | """ 21 | model initialization. 22 | """ 23 | logger.basicConfig(level=logger.DEBUG) 24 | logger.info("init starts") 25 | 26 | self.n_scan = 10 27 | self.data = defaultdict() 28 | self.model = defaultdict() 29 | self.cache = defaultdict() 30 | self._load(fname, delimiter) 31 | self._init_model(C) 32 | 33 | logger.info("init finished") 34 | 35 | def _load(self, fname, delimiter = " "): 36 | """ 37 | Load data set specified with filename. 38 | 39 | data format must be as follows (space-separated file as default), 40 | 41 | l_1 x_11 x_12 x_13 ... x_1m 42 | l_2 x_21 x_22 ... x_2m 43 | ... 44 | l_n x_n1 x_n2 ... x_nm 45 | 46 | l_i must be {1, -1} because of binary classifier. 47 | 48 | Arguments: 49 | - `fname`: file name. 50 | - `delimiter`: delimiter of a file. 51 | """ 52 | logger.info("load data starts") 53 | 54 | # load data 55 | self.data["data"] = np.loadtxt(fname, delimiter = delimiter) 56 | self.data["n_sample"] = self.data["data"].shape[0] 57 | self.data["f_dim"] = self.data["data"].shape[1] - 1 58 | 59 | # binalize 60 | self._binalize(self.data["data"]) 61 | 62 | # normalize 63 | self.normalize(self.data["data"][:, 1:]) 64 | 65 | logger.info("load data finished") 66 | 67 | def _binalize(self, data): 68 | """ 69 | Binalize label of data. 70 | 71 | Arguments: 72 | - `data`: dataset. 73 | """ 74 | logger.info("init starts") 75 | 76 | # binary check 77 | labels = data[:, 0] 78 | classes = np.unique(labels) 79 | if classes.size != 2: 80 | print "label must be a binary value." 81 | exit(1) 82 | 83 | # convert binary lables to {1, -1} 84 | for i in xrange(labels.size): 85 | if labels[i] == classes[0]: 86 | labels[i] = 1 87 | else: 88 | labels[i] = -1 89 | 90 | # set classes 91 | self.data["classes"] = classes 92 | logger.info("init finished") 93 | 94 | def normalize(self, samples): 95 | """ 96 | nomalize sample, such that sqrt(x^2) = 1 97 | 98 | Arguments: 99 | - `samples`: dataset without labels. 100 | """ 101 | logger.info("normalize starts") 102 | for i in xrange(0, self.data["n_sample"]): 103 | samples[i, :] = self._normalize(samples[i, :]) 104 | 105 | logger.info("normalize finished") 106 | 107 | def _normalize(self, sample): 108 | norm = np.sqrt(sample.dot(sample)) 109 | sample = sample/norm 110 | return sample 111 | 112 | def _init_model(self, C): 113 | """ 114 | Initialize model. 115 | """ 116 | logger.info("init model starts") 117 | self.model["w"] = np.ndarray(self.data["f_dim"] + 1) # model paremter 118 | self.model["C"] = C # aggressive parameter 119 | logger.info("init model finished") 120 | 121 | def _learn(self, ): 122 | """ 123 | Learn internally. 124 | """ 125 | 126 | def _update(self, label, sample, margin): 127 | """ 128 | Update model parameter internally. 129 | update rule is as follows, 130 | w = w + y (1 - m)/(||x||_2^2 + C) * x 131 | Arguments: 132 | - `label`: label = {1, -1} 133 | - `sample`: sample, or feature vector 134 | """ 135 | # add bias 136 | sample = self._add_bias(sample) 137 | 138 | norm = sample.dot(sample) 139 | w = self.model["w"] + label * (1 - margin)/(norm + self.model["C"]) * sample 140 | self.model["w"] = w 141 | 142 | def _predict_value(self, sample): 143 | """ 144 | predict value of \w^T * x 145 | 146 | Arguments: 147 | - `sample`: 148 | """ 149 | return self.model["w"].dot(self._add_bias(sample)) 150 | 151 | def _add_bias(self, sample): 152 | return np.hstack((sample, 1)) 153 | 154 | def learn(self, ): 155 | """ 156 | Learn. 157 | """ 158 | logger.info("learn starts") 159 | data = self.data["data"] 160 | 161 | # learn 162 | for i in xrange(0, self.n_scan): 163 | for i in xrange(0, self.data["n_sample"]): 164 | sample = data[i, 1:] 165 | label = data[i, 0] 166 | pred_val = self._predict_value(sample) 167 | margin = label * pred_val 168 | if margin < 1: 169 | self._update(label, sample, margin) 170 | 171 | logger.info("learn finished") 172 | 173 | def predict(self, sample): 174 | """ 175 | predict {1, -1} base on \w^T * x 176 | 177 | Arguments: 178 | - `sample`: 179 | """ 180 | pred_val = self._predict_value(sample) 181 | self.cache["pred_val"] = pred_val 182 | if pred_val >=0: 183 | return 1 184 | else: 185 | return -1 186 | 187 | def update(self, label, sample): 188 | """ 189 | update model. 190 | Arguments: 191 | - `sample`: sample, or feature vector 192 | - `pred_val`: predicted value i.e., w^T * sample 193 | """ 194 | margin = label * self.model["pred_val"] 195 | if margin < 1: 196 | _update(label, sample, margin) 197 | 198 | @classmethod 199 | def examplify(cls, fname, delimiter = " ", C = 1 , n_scan = 3): 200 | """ 201 | Example of how to use 202 | """ 203 | 204 | # learn 205 | st = time.time() 206 | model = PassiveAggressiveII(fname, delimiter, C , n_scan) 207 | model.learn() 208 | et = time.time() 209 | print "learning time: %f[s]" % (et - st) 210 | 211 | # predict (after learning) 212 | data = np.loadtxt(fname, delimiter = " ") 213 | model._binalize(data) 214 | n_sample = data.shape[0] 215 | y_label = data[:, 0] 216 | y_pred = np.ndarray(n_sample) 217 | for i in xrange(0, n_sample): 218 | sample = data[i, 1:] 219 | y_pred[i] = model.predict(sample) 220 | 221 | # show result 222 | cm = confusion_matrix(y_label, y_pred) 223 | print cm 224 | print "accurary: %d [%%]" % (np.sum(cm.diagonal()) * 100.0/np.sum(cm)) 225 | 226 | if __name__ == '__main__': 227 | fname = "/home/kzk/datasets/uci_csv/liver.csv" 228 | #fname = "/home/kzk/datasets/uci_csv/ad.csv" 229 | print "dataset is", fname 230 | PassiveAggressiveII.examplify(fname, delimiter = " ", C = 1, n_scan = 100) 231 | -------------------------------------------------------------------------------- /cw/evaluate_small_data.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import numpy as np 4 | import time 5 | import json 6 | import copy 7 | 8 | from sklearn.metrics import confusion_matrix 9 | from multiclass_confidence_weighted_var_diag import MCWVarDiag 10 | from multiclass_soft_confidence_weighted_1_diag import MSCWIDiag 11 | from multiclass_soft_confidence_weighted_2_diag import MSCWIIDiag 12 | from sklearn.svm import LinearSVC 13 | 14 | # file path 15 | filepath = "./evaluate_small_data_results.json" 16 | 17 | # data cnofiguration 18 | data_config = { 19 | "abalone": { 20 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/abalone.csv", 21 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/abalone.csv", 22 | }, 23 | "transfusion": { 24 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/transfusion.csv", 25 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/transfusion.csv", 26 | }, 27 | "gisette": { 28 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/gisette.csv", 29 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/gisette.csv", 30 | }, 31 | "iris": { 32 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/iris.csv", 33 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/iris.csv", 34 | }, 35 | "glass": { 36 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/glass.csv", 37 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/glass.csv", 38 | }, 39 | "breast_cancer": { 40 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/breast_cancer.csv", 41 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/breast_cancer.csv", 42 | }, 43 | "car": { 44 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/car.csv", 45 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/car.csv", 46 | }, 47 | "creadit": { 48 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/credit.csv", 49 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/credit.csv", 50 | }, 51 | "usps": { 52 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/usps.csv", 53 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/usps.csv", 54 | }, 55 | "liver": { 56 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/liver.csv", 57 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/liver.csv", 58 | }, 59 | "haberman": { 60 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/haberman.csv", 61 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/haberman.csv", 62 | }, 63 | "pima": { 64 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/pima.csv", 65 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/pima.csv", 66 | }, 67 | "ionosphere": { 68 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/ionosphere.csv", 69 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/ionosphere.csv", 70 | }, 71 | "isolet": { 72 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/isolet.csv", 73 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/isolet.csv", 74 | }, 75 | "magicGamaTelescope": { 76 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/magicGamaTelescope.csv", 77 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/magicGamaTelescope.csv", 78 | }, 79 | "mammographic": { 80 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/mammographic.csv", 81 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/mammographic.csv", 82 | }, 83 | "yeast": { 84 | "train": "/home/k_yoshiyama/datasets/uci_csv_train/yeast.csv", 85 | "test": "/home/k_yoshiyama/datasets/uci_csv_test/yeast.csv", 86 | }, 87 | } 88 | 89 | # results 90 | results = { 91 | "abalone": { 92 | }, 93 | "transfusion": { 94 | }, 95 | "gisette": { 96 | }, 97 | "iris": { 98 | }, 99 | "glass": { 100 | }, 101 | "breast_cancer": { 102 | }, 103 | "car": { 104 | }, 105 | "creadit": { 106 | }, 107 | "usps": { 108 | }, 109 | "liver": { 110 | }, 111 | "haberman": { 112 | }, 113 | "pima": { 114 | }, 115 | "ionosphere": { 116 | }, 117 | "isolet": { 118 | }, 119 | "magicGamaTelescope": { 120 | }, 121 | "mammographic": { 122 | }, 123 | "yeast": { 124 | }, 125 | } 126 | 127 | # model config 128 | models = [MCWVarDiag, MSCWIDiag, MSCWIIDiag] 129 | model_class_name_map = { 130 | MCWVarDiag: "MCWVarDiag", 131 | MSCWIDiag: "MSCWIDiag", 132 | MSCWIIDiag: "MSCWIIDiag", 133 | } 134 | 135 | # results 136 | result_per_data = { 137 | "MCWVarDiag": { 138 | "acc": [], # per epoch 139 | "elapsed": [], # per epoch 140 | }, 141 | 142 | "MSCWIDiag": { 143 | "acc": [], 144 | "elapsed": [], 145 | }, 146 | 147 | "MSCWIIDiag": { 148 | "acc": [], 149 | "elapsed": [], 150 | }, 151 | 152 | "LinearSVC": { 153 | "acc": [], 154 | "elapsed": [], 155 | }, 156 | } 157 | 158 | # results for each data 159 | for data in results: 160 | results[data] = copy.deepcopy(result_per_data) 161 | pass 162 | 163 | # run experiment 164 | epochs = xrange(1, 51) 165 | for data in data_config: 166 | print "data %s is processing..." % data 167 | 168 | # train/test 169 | data_train = np.loadtxt(data_config[data]["train"], delimiter=" ") 170 | X_train = data_train[:, 1:] 171 | y_train = data_train[:, 0] 172 | 173 | data_test = np.loadtxt(data_config[data]["test"], delimiter=" ") 174 | X_test = data_test[:, 1:] 175 | y_test = data_test[:, 0] 176 | 177 | # evaluate 178 | for model in models: # foreach __main__.class 179 | # init 180 | print "model is %s" % str(model) 181 | model_ = model(epochs=1) 182 | print "model is %s." % model_class_name_map[model] 183 | 184 | # epoch 185 | for epoch in epochs: 186 | print "the number of epochs is %d" % epoch 187 | # warm start 188 | if not epoch == 1: 189 | mu = model_.model["mu"] 190 | S = model_.model["S"] 191 | model_.init_params(mu, S) 192 | pass 193 | 194 | # learn 195 | st = time.time() 196 | model_.epochs = 1 197 | model_.learn(X_train, y_train) 198 | et = time.time() 199 | 200 | # elapsed time 201 | results[data][model_class_name_map[model]]["elapsed"].append(et - st) 202 | 203 | # predict 204 | y_pred = [] 205 | for x in X_test: 206 | y_pred.append(model_.predict(x)) 207 | pass 208 | 209 | cm = confusion_matrix(y_test, y_pred) 210 | 211 | # accuracy 212 | results[data][model_class_name_map[model]]["acc"].append(np.sum(cm.diagonal()) * 100.0 / np.sum(cm)) 213 | 214 | pass 215 | pass 216 | 217 | # Linear SVC 218 | print "model is LinearSVC." 219 | model_ = LinearSVC() 220 | st = time.time() 221 | model_.fit(X_train, y_train) 222 | et = time.time() 223 | y_pred = model_.predict(X_test) 224 | cm = confusion_matrix(y_test, y_pred) 225 | acc = np.sum(cm.diagonal()) * 100.0 / np.sum(cm) 226 | elapsed_time = et - st 227 | for epoch in epochs: # add the same results to all epochs 228 | results[data]["LinearSVC"]["acc"].append(acc) 229 | results[data]["LinearSVC"]["elapsed"].append(elapsed_time) 230 | pass 231 | 232 | 233 | with open(filepath, "w") as fpout: 234 | json.dump(results, fpout) 235 | pass 236 | 237 | -------------------------------------------------------------------------------- /arow/arow2_diag.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import logging as logger 4 | import time 5 | import pylab as pl 6 | from collections import defaultdict 7 | from sklearn.metrics import confusion_matrix 8 | 9 | class Arow2Diag(object): 10 | """ 11 | Adaptive Regularization of Weight Vector algorithm with squared hinge loss, 12 | omitting non-diagonal elements. 13 | 14 | Reference: 15 | - http://webee.technion.ac.il/people/koby/publications/arow_nips09.pdf 16 | 17 | This model is only applied to binary classification. 18 | """ 19 | 20 | def __init__(self, fname, delimiter = " ", C = 1, n_scan = 10): 21 | """ 22 | model initialization. 23 | """ 24 | logger.basicConfig(level=logger.DEBUG) 25 | logger.info("init starts") 26 | 27 | self.n_scan = n_scan 28 | self.data = defaultdict() 29 | self.model = defaultdict() 30 | self.cache = defaultdict() 31 | self._load(fname, delimiter) 32 | self._init_model(C) 33 | 34 | logger.info("init finished") 35 | 36 | def _load(self, fname, delimiter = " "): 37 | """ 38 | Load data set specified with filename. 39 | 40 | data format must be as follows (space-separated file as default), 41 | 42 | l_1 x_11 x_12 x_13 ... x_1m 43 | l_2 x_21 x_22 ... x_2m 44 | ... 45 | l_n x_n1 x_n2 ... x_nm 46 | 47 | l_i must be {1, -1} because of binary classifier. 48 | 49 | Arguments: 50 | - `fname`: file name. 51 | - `delimiter`: delimiter of a file. 52 | """ 53 | logger.info("load data starts") 54 | 55 | # load data 56 | st = time.time() 57 | self.data["data"] = np.loadtxt(fname, delimiter = delimiter) 58 | et = time.time() 59 | logger.info("loading data time: %f[s]", (et - st)) 60 | self.data["n_sample"] = self.data["data"].shape[0] 61 | self.data["f_dim"] = self.data["data"].shape[1] - 1 62 | 63 | 64 | # binalize 65 | self._binalize(self.data["data"]) 66 | 67 | # normlize 68 | self.normalize(self.data["data"][:, 1:]) 69 | 70 | logger.info("load data finished") 71 | 72 | def _binalize(self, data): 73 | """ 74 | Binalize label of data. 75 | 76 | Arguments: 77 | - `data`: dataset. 78 | """ 79 | logger.info("init starts") 80 | 81 | # binary check 82 | labels = data[:, 0] 83 | classes = np.unique(labels) 84 | if classes.size != 2: 85 | print "label must be a binary value." 86 | exit(1) 87 | 88 | # convert binary lables to {1, -1} 89 | for i in xrange(labels.size): 90 | if labels[i] == classes[0]: 91 | labels[i] = 1 92 | else: 93 | labels[i] = -1 94 | 95 | # set classes 96 | self.data["classes"] = classes 97 | logger.info("init finished") 98 | 99 | def normalize(self, samples): 100 | """ 101 | nomalize sample, such that sqrt(x^2) = 1 102 | 103 | Arguments: 104 | - `samples`: dataset without labels. 105 | """ 106 | logger.info("normalize starts") 107 | for i in xrange(0, self.data["n_sample"]): 108 | samples[i, :] = self._normalize(samples[i, :]) 109 | 110 | logger.info("normalize finished") 111 | 112 | def _normalize(self, sample): 113 | norm = np.sqrt(sample.dot(sample)) 114 | sample = sample/norm 115 | return sample 116 | 117 | def _init_model(self, C): 118 | """ 119 | Initialize model. 120 | """ 121 | logger.info("init model starts") 122 | self.model["mu"] = np.zeros(self.data["f_dim"] + 1) # model parameter mean 123 | self.model["S"] = np.ones(self.data["f_dim"] + 1) # model parameter covariance 124 | self.model["C"] = C # aggressive parameter 125 | logger.info("init model finished") 126 | 127 | def _learn(self, ): 128 | """ 129 | Learn internally. 130 | """ 131 | 132 | def _update(self, label, sample, margin): 133 | """ 134 | Update model parameter internally. 135 | update rule is as follows, 136 | beta = x^TSx + C, 137 | mu = mu + y(1 - m)Sx /beta, and 138 | S = S - Sxx^TS / C. 139 | 140 | Arguments: 141 | - `label`: label = {1, -1} 142 | - `sample`: sample, or feature vector 143 | """ 144 | 145 | # add bias 146 | sample = self._add_bias(sample) 147 | 148 | # beta 149 | beta = np.sum(sample * self.model["S"] * sample) + self.model["C"] 150 | 151 | # mu 152 | norm = sample.dot(sample) 153 | mu = self.model["mu"] + label * (1 - margin) * self.model["S"] * sample/beta 154 | self.model["mu"] = mu 155 | 156 | # S 157 | S = self.model["S"] - (self.model["S"] * sample * sample * self.model["S"])/beta 158 | self.model["S"] = S 159 | 160 | def _predict_value(self, sample): 161 | """ 162 | predict value of \mu^T * x 163 | 164 | Arguments: 165 | - `sample`: 166 | """ 167 | return self.model["mu"].dot(self._add_bias(sample)) 168 | 169 | def _add_bias(self, sample): 170 | return np.hstack((sample, 1)) 171 | 172 | def learn(self, ): 173 | """ 174 | Learn. 175 | """ 176 | logger.info("learn starts") 177 | data = self.data["data"] 178 | 179 | # learn 180 | st = time.time() 181 | for i in xrange(0, self.n_scan): 182 | print "iter:", i 183 | for i in xrange(0, self.data["n_sample"]): 184 | sample = data[i, 1:] 185 | label = data[i, 0] 186 | pred_val = self._predict_value(sample) 187 | margin = label * pred_val 188 | if margin < 1: 189 | self._update(label, sample, margin) 190 | 191 | logger.info("learn finished") 192 | et = time.time() 193 | logger.info("learning time: %f[s]" % (et - st)) 194 | 195 | def predict(self, sample): 196 | """ 197 | predict {1, -1} base on \mu^T * x 198 | 199 | Arguments: 200 | - `sample`: 201 | """ 202 | pred_val = self._predict_value(sample) 203 | self.cache["pred_val"] = pred_val 204 | if pred_val >=0: 205 | return 1 206 | else: 207 | return -1 208 | 209 | def update(self, label, sample): 210 | """ 211 | update model. 212 | Arguments: 213 | - `sample`: sample, or feature vector 214 | - `pred_val`: predicted value i.e., mu^T * sample 215 | """ 216 | margin = label * self.model["pred_val"] 217 | if margin < 1: 218 | _update(label, sample, margin) 219 | 220 | @classmethod 221 | def examplify(cls, fname, delimiter = " ", C = 1, n_scan = 1): 222 | """ 223 | Example of how to use 224 | """ 225 | 226 | # learn 227 | model = Arow2Diag(fname = fname, delimiter = delimiter, C = C, n_scan = n_scan) 228 | model.learn() 229 | 230 | # predict (after learning) 231 | data = np.loadtxt(fname, delimiter = " ") 232 | model._binalize(data) 233 | model.normalize(data[:, 1:]) 234 | n_sample = data.shape[0] 235 | y_label = data[:, 0] 236 | y_pred = np.ndarray(n_sample) 237 | for i in xrange(0, n_sample): 238 | sample = data[i, 1:] 239 | y_pred[i] = model.predict(sample) 240 | 241 | # show result 242 | cm = confusion_matrix(y_label, y_pred) 243 | print cm 244 | print "accurary: %d [%%]" % (np.sum(cm.diagonal()) * 100.0/np.sum(cm)) 245 | 246 | if __name__ == '__main__': 247 | fname = "/home/kzk/datasets/uci_csv/liver.csv" 248 | #fname = "/home/kzk/datasets/uci_csv/ad.csv" 249 | #fname = "/home/kzk/datasets/uci_csv/adult.csv" 250 | print "dataset is", fname 251 | Arow2Diag.examplify(fname, delimiter = " ", C = 1, n_scan = 1) 252 | -------------------------------------------------------------------------------- /lr/multiclass_logistic_regression.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import logging as logger 4 | import time 5 | import pylab as pl 6 | from collections import defaultdict 7 | from sklearn.metrics import confusion_matrix 8 | 9 | ################### 10 | ## should be refactored ## 11 | ################### 12 | 13 | class MultiClassLogisticRegression(object): 14 | """ 15 | Mulit Class Logistic Regression solved with stochastic gradient descent. 16 | Applicable only to linear model. 17 | 18 | Should be refactored. 19 | """ 20 | def __init__(self, epsilon = 0.05, n_scan = 500): 21 | """ 22 | Initializer. 23 | Arguments: 24 | - `epsilon`: step size for stochastic gradient. 25 | - `n_scan`: n_scan 26 | """ 27 | logger.basicConfig(level=logger.DEBUG) 28 | logger.info("init starts") 29 | self.data = None 30 | self.model = defaultdict() 31 | self.model["epsilon"] = epsilon 32 | self.model["n_scan"] = n_scan 33 | self.epsilon = epsilon 34 | logger.info("init finished") 35 | 36 | def _load(self, fname, delimiter=" "): 37 | """ 38 | Load data with file name. 39 | data format must be as follows (space-separated file as default), 40 | 41 | l_1 x_11 x_12 x_13 ... x_1m 42 | l_2 x_21 x_22 ... x_2m 43 | ... 44 | l_n x_n1 x_n2 ... x_nm 45 | 46 | Arguments: 47 | - `fname`: File name to be loaded. 48 | """ 49 | logger.info("load data starts") 50 | 51 | # load data 52 | self.data = np.loadtxt(fname, delimiter = delimiter) 53 | 54 | logger.info("load data finished") 55 | 56 | def _init_model(self): 57 | """ 58 | Initialize model. 59 | """ 60 | logger.info("init model starts") 61 | 62 | # class label/num of classes 63 | classes = np.unique(self.data[:, 0]) 64 | self.model["classes"] = classes 65 | self.model["n_classes"] = classes.size 66 | 67 | # theta's dimension/ number of samples 68 | self.model["n_samples"] = self.data.shape[0] 69 | self.model["f_dims"] = self.data.shape[1] - 1 70 | 71 | # map[label, vector]/probability 72 | self.model["theta"] = defaultdict() 73 | self.model["probs"] = defaultdict() 74 | for i in self.model["classes"]: 75 | self.model["theta"][i] = np.random.rand(self.model["f_dims"]) - 0.5 76 | self.model["probs"][i] = 0.0 77 | 78 | # model information 79 | logger.info("####### Model Info ###########") 80 | logger.info("feature dimensions: %d" % self.model["f_dims"]) 81 | logger.info("number of samples: %d" % self.model["n_samples"]) 82 | logger.info("number of classes: %d" % self.model["n_classes"]) 83 | logger.info("classes: %s" % self.model["classes"]) 84 | logger.info("thetas: %s" % self.model["theta"]) 85 | logger.info("probability: %s" % self.model["probs"]) 86 | logger.info("##########################") 87 | 88 | logger.info("init model finished") 89 | 90 | def learn(self, fname): 91 | """ 92 | Learn through dataset specified by fname with the number of scan. 93 | 94 | Arguments: 95 | - `fname`: dataset. 96 | """ 97 | # load dataset 98 | self._load(fname) 99 | 100 | # init model info 101 | self._init_model() 102 | 103 | # learn 104 | logger.info("learning starts") 105 | for c in xrange(0, self.model["n_scan"]): 106 | for i in xrange(0, self.model["n_samples"]): 107 | self._learn(self.data[i, :]) 108 | 109 | logger.info("learning finished") 110 | 111 | def _learn(self, l_sample): 112 | """ 113 | learn method internally. 114 | """ 115 | # predict 116 | probs = self._predict_probs(l_sample[1:]) 117 | 118 | # update 119 | self._update(l_sample, probs) 120 | 121 | def _update(self, l_sample, probs): 122 | """ 123 | Update model internally. 124 | 125 | Update rule is as follows. 126 | theta_c = theta_c + epsilon * delta(J(theta)) for all c. 127 | delta(J) = - exp(theta_c^T * x)/sum_c {exp(theta_c^T * x} * x + x ( if c = y ) 128 | delta(J) = - exp(theta_c^T * x)/sum_c {exp(theta_c^T * x} * x ( if c != y ) 129 | """ 130 | # self.epsilon = self.epsilon/2 # huristics update for step size 131 | for cls in self.model["classes"]: 132 | self.model["theta"][cls] = self.model["theta"][cls] - self.epsilon * probs[cls] *l_sample[1:] 133 | 134 | self.model["theta"][l_sample[0]] = self.model["theta"][l_sample[0]] + self.epsilon * l_sample[1:] 135 | 136 | def _predict_probs(self, sample): 137 | """ 138 | predict samples with probability internally. 139 | Arguments: 140 | - `sample`: 1-dimensional ndarray. 141 | """ 142 | # inner prods 143 | inner_prods = defaultdict() 144 | for cls in self.model["classes"]: 145 | inner_prods[cls] = self.model["theta"][cls].dot(sample) 146 | max_ = max(inner_prods.values()) 147 | 148 | # probs 149 | probs = defaultdict() 150 | sum_ = 0 151 | for cls in self.model["classes"]: 152 | probs[cls] = np.exp(inner_prods[cls] - max_) 153 | sum_ += probs[cls] 154 | 155 | # normalize probs 156 | for cls in probs.keys(): 157 | probs[cls] = probs[cls]/sum_ 158 | 159 | return probs 160 | 161 | def predict_probs(self, fname): 162 | """ 163 | predict probability with leaned model 164 | 165 | Arguments: 166 | - `fname`: 167 | """ 168 | logger.info("predict starts") 169 | data = np.loadtxt(fname) 170 | n_samples = data.shape[0] 171 | outputs = defaultdict() 172 | for i in xrange(0, n_samples): 173 | outputs[i] = self._predict_probs(data[i, 1:]) 174 | 175 | logger.info("predict finished") 176 | 177 | return outputs 178 | 179 | def predict(self, fname): 180 | """ 181 | predict label with leaned model 182 | Arguments: 183 | - `fname`: 184 | """ 185 | outputs_ = self.predict_probs(fname) 186 | outputs = np.ndarray(len(outputs_)) 187 | 188 | for i in xrange(0, len(outputs)): 189 | outputs[i] = max(outputs_[i], key=outputs_[i].get) 190 | 191 | return outputs 192 | 193 | def predict_probs_then_update(self, sample): 194 | 195 | # predict 196 | probs = self._predict_probs(sample) 197 | 198 | # update 199 | self._update(sample, probs) 200 | 201 | return probs 202 | 203 | @classmethod 204 | def examplify(cls, fname): 205 | """ 206 | example of how to use 207 | """ 208 | logger.info("examplify starts") 209 | 210 | # model 211 | model = MultiClassLogisticRegression(epsilon=0.01, n_scan = 100) 212 | 213 | # learn 214 | st = time.time() 215 | model.learn(fname) 216 | et = time.time() 217 | print "learning time: %d [s]" % ((et - st)/1000) 218 | 219 | # predict 220 | y_pred = model.predict(fname) 221 | 222 | # confusion matrix 223 | y_label = np.loadtxt(fname, delimiter=" ")[:, 0] 224 | 225 | cm = confusion_matrix(y_label, y_pred) 226 | #pl.matshow(cm) 227 | #pl.title('Confusion matrix') 228 | #pl.colorbar() 229 | #pl.ylabel('True label') 230 | #pl.xlabel('Predicted label') 231 | #pl.show() 232 | 233 | print cm 234 | print "accurary: %d [%%]" % (np.sum(cm.diagonal()) * 100.0/np.sum(cm)) 235 | logger.info("examplify finished") 236 | 237 | if __name__ == '__main__': 238 | # TODO 239 | # stopping criterion 240 | # epsilon/n_scan treatment 241 | #fname = "/home/kzk/datasets/uci_csv/glass.csv" 242 | fname = "/home/kzk/datasets/uci_csv/liver.csv" 243 | MultiClassLogisticRegression.examplify(fname) 244 | 245 | 246 | 247 | -------------------------------------------------------------------------------- /cw/confidence_weighted_var.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import logging as logger 4 | import time 5 | import pylab as pl 6 | from collections import defaultdict 7 | from sklearn.metrics import confusion_matrix 8 | from scipy.stats import norm 9 | 10 | class CWVar(object): 11 | """ 12 | Full matrix version of Confidence-Weighted algorithm. 13 | (somtimes referred to as CW-Var) 14 | 15 | References: 16 | - https://alliance.seas.upenn.edu/~nlp/publications/pdf/dredze2008f.pdf 17 | - http://webee.technion.ac.il/people/koby/publications/paper_nips08_std.pdf 18 | 19 | This model is only applied to binary classification. 20 | """ 21 | 22 | def __init__(self, fname, delimiter = " ", eta = 0.9, a = 1, n_scan = 10): 23 | """ 24 | model initialization. 25 | """ 26 | logger.basicConfig(level=logger.DEBUG) 27 | logger.info("init starts") 28 | 29 | self.n_scan = n_scan 30 | self.data = defaultdict() 31 | self.model = defaultdict() 32 | self.cache = defaultdict() 33 | self._load(fname, delimiter) 34 | self._init_model(eta, a) 35 | 36 | logger.info("init finished") 37 | 38 | def _load(self, fname, delimiter = " "): 39 | """ 40 | Load data set specified with filename. 41 | 42 | data format must be as follows (space-separated file as default), 43 | 44 | l_1 x_11 x_12 x_13 ... x_1m 45 | l_2 x_21 x_22 ... x_2m 46 | ... 47 | l_n x_n1 x_n2 ... x_nm 48 | 49 | l_i must be {1, -1} because of binary classifier. 50 | 51 | Arguments: 52 | - `fname`: file name. 53 | - `delimiter`: delimiter of a file. 54 | """ 55 | logger.info("load data starts") 56 | 57 | # load data 58 | st = time.time() 59 | self.data["data"] = np.loadtxt(fname, delimiter = delimiter) 60 | et = time.time() 61 | logger.info("loading data time: %f[s]", (et - st)) 62 | self.data["n_sample"] = self.data["data"].shape[0] 63 | self.data["f_dim"] = self.data["data"].shape[1] - 1 64 | 65 | # binalize 66 | self._binalize(self.data["data"]) 67 | 68 | # normlize 69 | self.normalize(self.data["data"][:, 1:]) 70 | 71 | logger.info("load data finished") 72 | 73 | def _binalize(self, data): 74 | """ 75 | Binalize label of data. 76 | 77 | Arguments: 78 | - `data`: dataset. 79 | """ 80 | logger.info("init starts") 81 | 82 | # binary check 83 | labels = data[:, 0] 84 | classes = np.unique(labels) 85 | if classes.size != 2: 86 | print "label must be a binary value." 87 | exit(1) 88 | 89 | # convert binary lables to {1, -1} 90 | for i in xrange(labels.size): 91 | if labels[i] == classes[0]: 92 | labels[i] = 1 93 | else: 94 | labels[i] = -1 95 | 96 | # set classes 97 | self.data["classes"] = classes 98 | logger.info("init finished") 99 | 100 | def normalize(self, samples): 101 | """ 102 | nomalize sample, such that sqrt(x^2) = 1 103 | 104 | Arguments: 105 | - `samples`: dataset without labels. 106 | """ 107 | logger.info("normalize starts") 108 | for i in xrange(0, self.data["n_sample"]): 109 | samples[i, :] = self._normalize(samples[i, :]) 110 | 111 | logger.info("normalize finished") 112 | 113 | def _normalize(self, sample): 114 | norm = np.sqrt(sample.dot(sample)) 115 | sample = sample/norm 116 | return sample 117 | 118 | def _init_model(self, eta, a): 119 | """ 120 | Initialize model. 121 | """ 122 | logger.info("init model starts") 123 | self.model["mu"] = np.zeros(self.data["f_dim"] + 1) # model parameter mean 124 | self.model["a"] = a 125 | self.model["S"] = a * np.identity(self.data["f_dim"] + 1)#model parameter covariance 126 | self.model["eta"] = eta # confidence parameter 127 | self.model["phi"] = norm.ppf(norm.cdf(eta)) # inverse of cdf(eta) 128 | logger.info("init model finished") 129 | 130 | def _learn(self, ): 131 | """ 132 | Learn internally. 133 | """ 134 | 135 | def _update(self, label, sample, m, v): 136 | """ 137 | Update model parameter internally. 138 | 139 | Arguments: 140 | - `label`: label = {1, -1} 141 | - `sample`: sample, or feature vector 142 | """ 143 | 144 | # add bias 145 | sample = self._add_bias(sample) 146 | 147 | # alpha 148 | phi = self.model["phi"] 149 | a = (1 + 2 * phi * m) 150 | b = (m - phi * v) 151 | alpha = (-a + np.sqrt(np.power(a, 2)) - 8 * phi * b)/(4 * phi * v) 152 | 153 | # mu 154 | mu = self.model["mu"] + alpha * label * self.model["S"].dot(sample) 155 | self.model["mu"] = mu 156 | 157 | # S 158 | c = 2 * alpha * phi 159 | coef = c/(1 + c * v) 160 | S = self.model["S"] - self.model["S"].dot(np.outer(sample, sample)).dot(self.model["S"]) * coef 161 | self.model["S"] = S 162 | 163 | def _predict_value(self, sample): 164 | """ 165 | predict value of \mu^T * x 166 | 167 | Arguments: 168 | - `sample`: 169 | """ 170 | return self.model["mu"].dot(self._add_bias(sample)) 171 | 172 | def _add_bias(self, sample): 173 | return np.hstack((sample, 1)) 174 | 175 | def learn(self, ): 176 | """ 177 | Learn. 178 | """ 179 | logger.info("learn starts") 180 | data = self.data["data"] 181 | 182 | # learn 183 | st = time.time() 184 | for i in xrange(0, self.n_scan): 185 | print "iter:", i 186 | for i in xrange(0, self.data["n_sample"]): 187 | sample = data[i, 1:] 188 | label = data[i, 0] 189 | pred_val = self._predict_value(sample) 190 | m = label * pred_val 191 | biased_sample = self._add_bias(sample) 192 | v = biased_sample.dot(self.model["S"]).dot(biased_sample) 193 | if m < self.model["phi"] * v: # =~ self.model["eta"] 194 | self._update(label, sample, m, v) 195 | 196 | logger.info("learn finished") 197 | et = time.time() 198 | logger.info("learning time: %f[s]" % (et - st)) 199 | 200 | def predict(self, sample): 201 | """ 202 | predict {1, -1} base on \mu^T * x 203 | 204 | Arguments: 205 | - `sample`: 206 | """ 207 | pred_val = self._predict_value(sample) 208 | self.cache["pred_val"] = pred_val 209 | if pred_val >=0: 210 | return 1 211 | else: 212 | return -1 213 | 214 | ## TODO 215 | def update(self, label, sample): 216 | """ 217 | update model. 218 | Arguments: 219 | - `sample`: sample, or feature vector 220 | - `pred_val`: predicted value i.e., mu^T * sample 221 | """ 222 | 223 | margin = label * self.model["pred_val"] 224 | if margin < 1: 225 | _update(label, sample, margin) 226 | 227 | @classmethod 228 | def examplify(cls, fname, delimiter = " ", eta = 0.1, a = 1, n_scan = 1): 229 | """ 230 | Example of how to use 231 | """ 232 | 233 | # learn 234 | model = CWVar(fname=fname, delimiter=delimiter, eta=eta, a=a, n_scan=n_scan) 235 | model.learn() 236 | 237 | # predict (after learning) 238 | data = np.loadtxt(fname, delimiter = delimiter) 239 | model._binalize(data) 240 | model.normalize(data[:, 1:]) 241 | n_sample = data.shape[0] 242 | y_label = data[:, 0] 243 | y_pred = np.ndarray(n_sample) 244 | for i in xrange(0, n_sample): 245 | sample = data[i, 1:] 246 | y_pred[i] = model.predict(sample) 247 | 248 | # show result 249 | cm = confusion_matrix(y_label, y_pred) 250 | print cm 251 | print "accurary: %d [%%]" % (np.sum(cm.diagonal()) * 100.0/np.sum(cm)) 252 | 253 | if __name__ == '__main__': 254 | fname = "/home/kzk/datasets/uci_csv/liver.csv" 255 | #fname = "/home/kzk/datasets/uci_csv/ad.csv" 256 | #fname = "/home/kzk/datasets/uci_csv/adult.csv" 257 | #fname = "/home/kzk/datasets/uci_csv/iris2.csv" 258 | print "dataset is", fname 259 | CWVar.examplify(fname, delimiter = " ", eta = 0.9, a = 1, n_scan = 10) 260 | -------------------------------------------------------------------------------- /cw/soft_confidence_weighted_2.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import logging as logger 4 | import time 5 | import pylab as pl 6 | from collections import defaultdict 7 | from sklearn.metrics import confusion_matrix 8 | from scipy.stats import norm 9 | 10 | class SCWII(object): 11 | """ 12 | Full matrix version of Soft Confidence-Weighted algorithm with L2-hinge loss. 13 | 14 | References: 15 | - http://icml.cc/2012/papers/86.pdf 16 | - https://alliance.seas.upenn.edu/~nlp/publications/pdf/dredze2008f.pdf 17 | - http://webee.technion.ac.il/people/koby/publications/paper_nips08_std.pdf 18 | 19 | Note: 20 | - This model is only applied to binary classification. 21 | """ 22 | 23 | def __init__(self, fname, delimiter = " ", eta = 0.9, C = 1, n_scan = 10): 24 | """ 25 | model initialization. 26 | """ 27 | logger.basicConfig(level=logger.DEBUG) 28 | logger.info("init starts") 29 | 30 | self.n_scan = n_scan 31 | self.data = defaultdict() 32 | self.model = defaultdict() 33 | self.cache = defaultdict() 34 | self._load(fname, delimiter) 35 | self._init_model(eta, C) 36 | 37 | logger.info("init finished") 38 | 39 | def _load(self, fname, delimiter = " "): 40 | """ 41 | Load data set specified with filename. 42 | 43 | data format must be as follows (space-separated file as default), 44 | 45 | l_1 x_11 x_12 x_13 ... x_1m 46 | l_2 x_21 x_22 ... x_2m 47 | ... 48 | l_n x_n1 x_n2 ... x_nm 49 | 50 | l_i must be {1, -1} because of binary classifier. 51 | 52 | Arguments: 53 | - `fname`: file name. 54 | - `delimiter`: delimiter of a file. 55 | """ 56 | logger.info("load data starts") 57 | 58 | # load data 59 | st = time.time() 60 | self.data["data"] = np.loadtxt(fname, delimiter = delimiter) 61 | et = time.time() 62 | logger.info("loading data time: %f[s]", (et - st)) 63 | self.data["n_sample"] = self.data["data"].shape[0] 64 | self.data["f_dim"] = self.data["data"].shape[1] - 1 65 | 66 | # binalize 67 | self._binalize(self.data["data"]) 68 | 69 | # normlize 70 | self.normalize(self.data["data"][:, 1:]) 71 | 72 | logger.info("load data finished") 73 | 74 | def _binalize(self, data): 75 | """ 76 | Binalize label of data. 77 | 78 | Arguments: 79 | - `data`: dataset. 80 | """ 81 | logger.info("init starts") 82 | 83 | # binary check 84 | labels = data[:, 0] 85 | classes = np.unique(labels) 86 | if classes.size != 2: 87 | print "label must be a binary value." 88 | exit(1) 89 | 90 | # convert binary lables to {1, -1} 91 | for i in xrange(labels.size): 92 | if labels[i] == classes[0]: 93 | labels[i] = 1 94 | else: 95 | labels[i] = -1 96 | 97 | # set classes 98 | self.data["classes"] = classes 99 | logger.info("init finished") 100 | 101 | def normalize(self, samples): 102 | """ 103 | nomalize sample, such that sqrt(x^2) = 1 104 | 105 | Arguments: 106 | - `samples`: dataset without labels. 107 | """ 108 | logger.info("normalize starts") 109 | for i in xrange(0, self.data["n_sample"]): 110 | samples[i, :] = self._normalize(samples[i, :]) 111 | 112 | logger.info("normalize finished") 113 | 114 | def _normalize(self, sample): 115 | norm = np.sqrt(sample.dot(sample)) 116 | sample = sample/norm 117 | return sample 118 | 119 | def _init_model(self, eta, C): 120 | """ 121 | Initialize model. 122 | """ 123 | logger.info("init model starts") 124 | self.model["mu"] = np.zeros(self.data["f_dim"] + 1) # model parameter mean 125 | self.model["C"] = C 126 | self.model["S"] = np.identity(self.data["f_dim"] + 1)#model parameter covariance 127 | self.model["eta"] = eta # confidence parameter 128 | self.model["phi"] = norm.ppf(norm.cdf(eta)) # inverse of cdf(eta) 129 | self.model["phi_2"] = self.model["phi"] ** 2 130 | self.model["phi_4"] = self.model["phi_2"] ** 2 131 | self.model["psi"] = 1 + self.model["phi_2"] / 2 132 | self.model["zeta"] = 1 + self.model["phi_2"] 133 | logger.info("init model finished") 134 | 135 | def _learn(self, ): 136 | """ 137 | Learn internally. 138 | """ 139 | 140 | def _update(self, label, sample, m, v): 141 | """ 142 | Update model parameter internally. 143 | 144 | Arguments: 145 | - `label`: label = {1, -1} 146 | - `sample`: sample, or feature vector 147 | """ 148 | 149 | # add bias 150 | sample = self._add_bias(sample) 151 | 152 | # alpha 153 | C = self.model["C"] 154 | n = v + 1/(2*C) 155 | phi = self.model["phi"] 156 | phi_2 = self.model["phi_2"] 157 | gamma = phi * np.sqrt((phi*m*v)**2 + 4*n*v*(n + v*phi_2)) 158 | alpha = -((2*m*n + phi_2*m*v) + gamma)/(2*(n**2 + n*v*phi_2)) 159 | alpha = max(0, alpha) 160 | 161 | # mu 162 | S = self.model["S"] 163 | mu = self.model["mu"] + alpha * label * S.dot(sample) 164 | self.model["mu"] = mu 165 | 166 | # beta 167 | alpha_v_phi = alpha*v*phi 168 | u = ((-alpha_v_phi + np.sqrt(alpha_v_phi**2 + 4*v)) ** 2) / 4 169 | beta = alpha * phi / (np.sqrt(u) + alpha_v_phi) 170 | 171 | # S 172 | S = S - beta * S.dot(np.outer(sample, sample)).dot(S) 173 | self.model["S"] = S 174 | 175 | def _predict_value(self, sample): 176 | """ 177 | predict value of \mu^T * x 178 | 179 | Arguments: 180 | - `sample`: 181 | """ 182 | return self.model["mu"].dot(self._add_bias(sample)) 183 | 184 | def _add_bias(self, sample): 185 | return np.hstack((sample, 1)) 186 | 187 | def learn(self, ): 188 | """ 189 | Learn. 190 | """ 191 | logger.info("learn starts") 192 | data = self.data["data"] 193 | 194 | # learn 195 | st = time.time() 196 | for i in xrange(0, self.n_scan): 197 | print "iter:", i 198 | for i in xrange(0, self.data["n_sample"]): 199 | sample = data[i, 1:] 200 | label = data[i, 0] 201 | pred_val = self._predict_value(sample) 202 | m = label * pred_val 203 | biased_sample = self._add_bias(sample) 204 | v = biased_sample.dot(self.model["S"]).dot(biased_sample) 205 | if m < self.model["phi"] * np.sqrt(v): 206 | self._update(label, sample, m, v) 207 | 208 | logger.info("learn finished") 209 | et = time.time() 210 | logger.info("learning time: %f[s]" % (et - st)) 211 | 212 | def predict(self, sample): 213 | """ 214 | predict {1, -1} base on \mu^T * x 215 | 216 | Arguments: 217 | - `sample`: 218 | """ 219 | pred_val = self._predict_value(sample) 220 | self.cache["pred_val"] = pred_val 221 | if pred_val >=0: 222 | return 1 223 | else: 224 | return -1 225 | 226 | ## TODO 227 | def update(self, label, sample): 228 | """ 229 | update model. 230 | Arguments: 231 | - `sample`: sample, or feature vector 232 | - `pred_val`: predicted value i.e., mu^T * sample 233 | """ 234 | 235 | margin = label * self.model["pred_val"] 236 | if margin < 1: 237 | _update(label, sample, margin) 238 | 239 | @classmethod 240 | def examplify(cls, fname, delimiter = " ", eta = 0.1, C = 1, n_scan = 1): 241 | """ 242 | Example of how to use 243 | """ 244 | 245 | # learn 246 | model = SCWII(fname = fname, delimiter = delimiter, eta = eta, C = C, n_scan = n_scan) 247 | model.learn() 248 | 249 | # predict (after learning) 250 | data = np.loadtxt(fname, delimiter = " ") 251 | model._binalize(data) 252 | model.normalize(data[:, 1:]) 253 | n_sample = data.shape[0] 254 | y_label = data[:, 0] 255 | y_pred = np.ndarray(n_sample) 256 | for i in xrange(0, n_sample): 257 | sample = data[i, 1:] 258 | y_pred[i] = model.predict(sample) 259 | 260 | # show result 261 | cm = confusion_matrix(y_label, y_pred) 262 | print cm 263 | print "accurary: %d [%%]" % (np.sum(cm.diagonal()) * 100.0/np.sum(cm)) 264 | 265 | if __name__ == '__main__': 266 | #fname = "/home/kzk/datasets/uci_csv/liver.csv" 267 | #fname = "/home/kzk/datasets/uci_csv/ad.csv" 268 | #fname = "/home/kzk/datasets/uci_csv/adult.csv" 269 | fname = "/home/kzk/datasets/uci_csv/iris2.csv" 270 | print "dataset is", fname 271 | SCWII.examplify(fname, delimiter = " ", eta = 0.9, C = 1, n_scan = 10) 272 | -------------------------------------------------------------------------------- /cw/soft_confidence_weighted_1.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import scipy as sp 3 | import logging as logger 4 | import time 5 | import pylab as pl 6 | from collections import defaultdict 7 | from sklearn.metrics import confusion_matrix 8 | from scipy.stats import norm 9 | 10 | class SCWI(object): 11 | """ 12 | Full matrix version of Soft Confidence-Weighted algorithm with L1-hinge loss. 13 | 14 | References: 15 | - http://icml.cc/2012/papers/86.pdf 16 | - https://alliance.seas.upenn.edu/~nlp/publications/pdf/dredze2008f.pdf 17 | - http://webee.technion.ac.il/people/koby/publications/paper_nips08_std.pdf 18 | 19 | Note: 20 | - This model is only applied to binary classification. 21 | 22 | """ 23 | 24 | def __init__(self, fname, delimiter = " ", eta = 0.9, C = 1, n_scan = 10): 25 | """ 26 | model initialization. 27 | """ 28 | logger.basicConfig(level=logger.DEBUG) 29 | logger.info("init starts") 30 | 31 | self.n_scan = n_scan 32 | self.data = defaultdict() 33 | self.model = defaultdict() 34 | self.cache = defaultdict() 35 | self._load(fname, delimiter) 36 | self._init_model(eta, C) 37 | 38 | logger.info("init finished") 39 | 40 | def _load(self, fname, delimiter = " "): 41 | """ 42 | Load data set specified with filename. 43 | 44 | data format must be as follows (space-separated file as default), 45 | 46 | l_1 x_11 x_12 x_13 ... x_1m 47 | l_2 x_21 x_22 ... x_2m 48 | ... 49 | l_n x_n1 x_n2 ... x_nm 50 | 51 | l_i must be {1, -1} because of binary classifier. 52 | 53 | Arguments: 54 | - `fname`: file name. 55 | - `delimiter`: delimiter of a file. 56 | """ 57 | logger.info("load data starts") 58 | 59 | # load data 60 | st = time.time() 61 | self.data["data"] = np.loadtxt(fname, delimiter = delimiter) 62 | et = time.time() 63 | logger.info("loading data time: %f[s]", (et - st)) 64 | self.data["n_sample"] = self.data["data"].shape[0] 65 | self.data["f_dim"] = self.data["data"].shape[1] - 1 66 | 67 | # binalize 68 | self._binalize(self.data["data"]) 69 | 70 | # normlize 71 | self.normalize(self.data["data"][:, 1:]) 72 | 73 | logger.info("load data finished") 74 | 75 | def _binalize(self, data): 76 | """ 77 | Binalize label of data. 78 | 79 | Arguments: 80 | - `data`: dataset. 81 | """ 82 | logger.info("init starts") 83 | 84 | # binary check 85 | labels = data[:, 0] 86 | classes = np.unique(labels) 87 | if classes.size != 2: 88 | print "label must be a binary value." 89 | exit(1) 90 | 91 | # convert binary lables to {1, -1} 92 | for i in xrange(labels.size): 93 | if labels[i] == classes[0]: 94 | labels[i] = 1 95 | else: 96 | labels[i] = -1 97 | 98 | # set classes 99 | self.data["classes"] = classes 100 | logger.info("init finished") 101 | 102 | def normalize(self, samples): 103 | """ 104 | nomalize sample, such that sqrt(x^2) = 1 105 | 106 | Arguments: 107 | - `samples`: dataset without labels. 108 | """ 109 | logger.info("normalize starts") 110 | for i in xrange(0, self.data["n_sample"]): 111 | samples[i, :] = self._normalize(samples[i, :]) 112 | 113 | logger.info("normalize finished") 114 | 115 | def _normalize(self, sample): 116 | norm = np.sqrt(sample.dot(sample)) 117 | sample = sample/norm 118 | return sample 119 | 120 | def _init_model(self, eta, C): 121 | """ 122 | Initialize model. 123 | """ 124 | logger.info("init model starts") 125 | self.model["mu"] = np.zeros(self.data["f_dim"] + 1) # model parameter mean 126 | self.model["C"] = C 127 | self.model["S"] = np.identity(self.data["f_dim"] + 1)#model parameter covariance 128 | self.model["eta"] = eta # confidence parameter 129 | self.model["phi"] = norm.ppf(norm.cdf(eta)) # inverse of cdf(eta) 130 | self.model["phi_2"] = self.model["phi"] ** 2 131 | self.model["phi_4"] = self.model["phi_2"] ** 2 132 | self.model["psi"] = 1 + self.model["phi_2"] / 2 133 | self.model["zeta"] = 1 + self.model["phi_2"] 134 | logger.info("init model finished") 135 | 136 | def _learn(self, ): 137 | """ 138 | Learn internally. 139 | """ 140 | 141 | def _update(self, label, sample, m, v): 142 | """ 143 | Update model parameter internally. 144 | 145 | Arguments: 146 | - `label`: label = {1, -1} 147 | - `sample`: sample, or feature vector 148 | """ 149 | 150 | # add bias 151 | sample = self._add_bias(sample) 152 | 153 | # alpha 154 | phi = self.model["phi"] 155 | phi_2 = self.model["phi_2"] 156 | phi_4 = self.model["phi_4"] 157 | zeta = self.model["zeta"] 158 | psi = self.model["psi"] 159 | C = self.model["C"] 160 | alpha = max(0, (-m*psi + np.sqrt(m**2 * phi_4/4 + v*phi_2*zeta) ) / (v*zeta)) 161 | alpha = min(C, alpha) 162 | 163 | # mu 164 | S = self.model["S"] 165 | mu = self.model["mu"] + alpha * label * S.dot(sample) 166 | self.model["mu"] = mu 167 | 168 | # beta 169 | alpha_v_phi = alpha*v*phi 170 | u = ((-alpha_v_phi + np.sqrt(alpha_v_phi**2 + 4*v)) ** 2) / 4 171 | beta = alpha * phi / (np.sqrt(u) + alpha_v_phi) 172 | 173 | # S 174 | S = S - beta * S.dot(np.outer(sample, sample)).dot(S) 175 | self.model["S"] = S 176 | 177 | def _predict_value(self, sample): 178 | """ 179 | predict value of \mu^T * x 180 | 181 | Arguments: 182 | - `sample`: 183 | """ 184 | return self.model["mu"].dot(self._add_bias(sample)) 185 | 186 | def _add_bias(self, sample): 187 | return np.hstack((sample, 1)) 188 | 189 | def learn(self, ): 190 | """ 191 | Learn. 192 | """ 193 | logger.info("learn starts") 194 | data = self.data["data"] 195 | 196 | # learn 197 | st = time.time() 198 | for i in xrange(0, self.n_scan): 199 | print "iter:", i 200 | for i in xrange(0, self.data["n_sample"]): 201 | sample = data[i, 1:] 202 | label = data[i, 0] 203 | pred_val = self._predict_value(sample) 204 | m = label * pred_val 205 | biased_sample = self._add_bias(sample) 206 | v = biased_sample.dot(self.model["S"]).dot(biased_sample) 207 | if m < self.model["phi"] * np.sqrt(v): 208 | self._update(label, sample, m, v) 209 | 210 | logger.info("learn finished") 211 | et = time.time() 212 | logger.info("learning time: %f[s]" % (et - st)) 213 | 214 | def predict(self, sample): 215 | """ 216 | predict {1, -1} base on \mu^T * x 217 | 218 | Arguments: 219 | - `sample`: 220 | """ 221 | pred_val = self._predict_value(sample) 222 | self.cache["pred_val"] = pred_val 223 | if pred_val >=0: 224 | return 1 225 | else: 226 | return -1 227 | 228 | ## TODO 229 | def update(self, label, sample): 230 | """ 231 | update model. 232 | Arguments: 233 | - `sample`: sample, or feature vector 234 | - `pred_val`: predicted value i.e., mu^T * sample 235 | """ 236 | 237 | margin = label * self.model["pred_val"] 238 | if margin < 1: 239 | _update(label, sample, margin) 240 | 241 | @classmethod 242 | def examplify(cls, fname, delimiter = " ", eta = 0.1, C = 1, n_scan = 1): 243 | """ 244 | Example of how to use 245 | """ 246 | 247 | # learn 248 | model = SCWI(fname = fname, delimiter = delimiter, eta = eta, C = C, n_scan = n_scan) 249 | model.learn() 250 | 251 | # predict (after learning) 252 | data = np.loadtxt(fname, delimiter = " ") 253 | model._binalize(data) 254 | model.normalize(data[:, 1:]) 255 | n_sample = data.shape[0] 256 | y_label = data[:, 0] 257 | y_pred = np.ndarray(n_sample) 258 | for i in xrange(0, n_sample): 259 | sample = data[i, 1:] 260 | y_pred[i] = model.predict(sample) 261 | 262 | # show result 263 | cm = confusion_matrix(y_label, y_pred) 264 | print cm 265 | print "accurary: %d [%%]" % (np.sum(cm.diagonal()) * 100.0/np.sum(cm)) 266 | 267 | if __name__ == '__main__': 268 | fname = "/home/kzk/datasets/uci_csv/liver.csv" 269 | #fname = "/home/kzk/datasets/uci_csv/ad.csv" 270 | #fname = "/home/kzk/datasets/uci_csv/adult.csv" 271 | #fname = "/home/kzk/datasets/uci_csv/iris2.csv" 272 | print "dataset is", fname 273 | SCWI.examplify(fname, delimiter = " ", eta = 0.9, C = 1, n_scan = 10) 274 | -------------------------------------------------------------------------------- /cw/multiclass_confidence_weighted_var_diag.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import logging as logger 3 | import time 4 | from collections import defaultdict 5 | from sklearn.metrics import confusion_matrix 6 | from sklearn.datasets import load_svmlight_file 7 | from base import ConfidenceWeightedModel 8 | from scipy.stats import norm 9 | from scipy import sparse 10 | from scipy.sparse import csr_matrix 11 | 12 | 13 | class MCWVarDiag(ConfidenceWeightedModel): 14 | """ 15 | Diagonal elements of matrix version of Confidence-Weighted algorithm; 16 | non-diagonal elements in covariance matrix are ignored. 17 | 18 | References: 19 | - http://www.aclweb.org/anthology/D/D09/D09-1052.pdf 20 | - https://alliance.seas.upenn.edu/~nlp/publications/pdf/dredze2008f.pdf 21 | 22 | Feature function F(x, y) is chosen as cartesian product of x and y. 23 | x is feature vector and y is 1-of-K vector. 24 | 25 | This model is applied to multiclass-multilabel classification, solved with 26 | single constraint update in http://www.aclweb.org/anthology/D/D09/D09-1052.pdf. 27 | """ 28 | 29 | def __init__(self, eta=0.9, epochs=10): 30 | """ 31 | model initialization. 32 | """ 33 | logger.basicConfig(level=logger.DEBUG) 34 | logger.info("init starts") 35 | 36 | super(MCWVarDiag, self).__init__(epochs) 37 | self._init_model(eta) 38 | 39 | logger.info("init finished") 40 | 41 | def _init_model(self, eta): 42 | """ 43 | Initialize model. 44 | """ 45 | logger.info("init model starts") 46 | self.model["mu"] = defaultdict() # model parameter mean 47 | self.model["S"] = defaultdict() # model parameter covariance 48 | self.model["eta"] = eta # confidence parameter 49 | self.model["phi"] = norm.ppf(norm.cdf(eta)) # inverse of cdf(eta) 50 | logger.info("init model finished") 51 | pass 52 | 53 | def init_params(self, mu, S): 54 | """ 55 | This method is used for warm start. 56 | Arguments: 57 | - `mu`: model parameter mean 58 | - `S`: model parameter covariance 59 | """ 60 | self.model["warm_start"] = True 61 | self.model["mu"] = mu 62 | self.model["S"] = S 63 | 64 | pass 65 | 66 | def _learn(self, ): 67 | """ 68 | Learn internally. 69 | """ 70 | 71 | def _update_for_dense_sample(self, sample, y, r): 72 | """ 73 | Update model parameter internally. 74 | update rule is as follows, 75 | mu = mu + alpha * y * Sx 76 | S = (S^{-1} + 2 * alpha * phi * diag(g_{y, r}^2))^{-1} 77 | g_{y, r} = F(x, y) - F(x, r) 78 | 79 | Note: diagonal elements are only considered. 80 | 81 | Arguments: 82 | - `sample`: sample, or feature vector 83 | - `y`: true label 84 | - `r`: predicted label (!=y) with high rank value 85 | """ 86 | 87 | # components 88 | phi = self.model["phi"] 89 | sample = self._add_bias_for_dense_sample(sample) 90 | g_y = sample 91 | g_r = -sample 92 | m = self.model["mu"][y].dot(g_y) + self.model["mu"][r].dot(g_r) 93 | first_term = (g_y * self.model["S"][y]).dot(g_y) 94 | second_term = (g_r * self.model["S"][r]).dot(g_r) 95 | v = first_term + second_term 96 | a = 1 + 2 * phi * m 97 | 98 | # gamma/alpha 99 | gamma = (-a + np.sqrt(a * a - 8 * phi * (m - phi * v))) / (4 * phi * v) 100 | alpha = max(0, gamma) 101 | 102 | # mu 103 | mu_y = self.model["mu"][y] + alpha * self.model["S"][y] * g_y 104 | mu_r = self.model["mu"][r] + alpha * self.model["S"][r] * g_r 105 | self.model["mu"][y] = mu_y 106 | self.model["mu"][r] = mu_r 107 | 108 | # S 109 | S_y = 1 / (1 / self.model["S"][y] + 2 * alpha * phi * g_y * g_y) 110 | S_r = 1 / (1 / self.model["S"][r] + 2 * alpha * phi * g_r * g_r) 111 | self.model["S"][y] = S_y 112 | self.model["S"][r] = S_r 113 | 114 | def _update_for_sparse_sample(self, sample, y, r): 115 | """ 116 | Update model parameter internally. 117 | update rule is as follows, 118 | mu = mu + alpha * y * Sx 119 | S = (S^{-1} + 2 * alpha * phi * diag(g_{y, r}^2))^{-1} 120 | g_{y, r} = F(x, y) - F(x, r) 121 | 122 | Note: diagonal elements are only considered. 123 | 124 | Arguments: 125 | - `sample`: sample, or feature vector 126 | - `y`: true label 127 | - `r`: predicted label (!=y) with high rank value 128 | """ 129 | 130 | # components 131 | phi = self.model["phi"] 132 | sample = self._add_bias_for_sparse_sample(sample) 133 | g_y = sample 134 | g_r = -sample 135 | gg = sample.multiply(sample) 136 | m = (self.model["mu"][y].multiply(g_y)).sum() + (self.model["mu"][r].multiply(g_r)).sum() 137 | 138 | first_term = (self.model["S"][y].multiply(gg)).sum() 139 | second_term = (self.model["S"][r].multiply(gg)).sum() 140 | v = first_term + second_term 141 | a = 1 + 2 * phi * m 142 | 143 | # gamma/alpha 144 | gamma = (-a + np.sqrt(a * a - 8 * phi * (m - phi * v))) / (4 * phi * v) 145 | alpha = max(0, gamma) 146 | 147 | # mu 148 | Sy_gy = self.model["S"][y].multiply(g_y) 149 | mu_y = self.model["mu"][y] + Sy_gy.multiply(alpha) 150 | self.model["mu"][y] = mu_y 151 | 152 | Sr_gr = self.model["S"][r].multiply(g_r) 153 | mu_r = self.model["mu"][r] + Sr_gr.multiply(alpha) 154 | self.model["mu"][r] = mu_r 155 | 156 | # S 157 | S_y_inv = self.inverse_1d_sparse_matrix(self.model["S"][y]) 158 | S_y = self.inverse_1d_sparse_matrix(S_y_inv + gg.multiply(2 * alpha * phi)) 159 | 160 | S_r_inv = self.inverse_1d_sparse_matrix(self.model["S"][r]) 161 | S_r = self.inverse_1d_sparse_matrix(S_r_inv + gg.multiply(2 * alpha * phi)) 162 | 163 | self.model["S"][y] = S_y 164 | self.model["S"][r] = S_r 165 | 166 | pass 167 | 168 | def learn(self, X, y): 169 | """ 170 | Learn. 171 | """ 172 | self.data["sparse"] = sparse.issparse(X) 173 | if self.data["sparse"]: 174 | self._learn_for_sparse_samples(X, y) 175 | else: 176 | self._learn_for_dense_samples(X, y) 177 | pass 178 | 179 | def _learn_for_dense_samples(self, X, y): 180 | """ 181 | Learn for dense samples. 182 | """ 183 | self.data["n_samples"] = X.shape[0] 184 | self.data["f_dims"] = X.shape[1] 185 | self.data["classes"] = np.unique(y) 186 | 187 | logger.info("learn starts") 188 | if not self.model["warm_start"]: 189 | for k in self.data["classes"]: 190 | self.model["mu"][k] = np.zeros(self.data["f_dims"] + 1) 191 | self.model["S"][k] = np.ones(self.data["f_dims"] + 1) # only for diagonal 192 | pass 193 | 194 | # learn 195 | st = time.time() 196 | for e in xrange(0, self.epochs): 197 | logger.debug("iter: %d" % e) 198 | for i in xrange(0, self.data["n_samples"]): 199 | sample = X[i, :] 200 | label = y[i] 201 | pred_vals = self._predict_values_for_dense_sample(sample) 202 | high_rank_class = pred_vals[0][0] 203 | if high_rank_class != label: # highest rank class 204 | self._update_for_dense_sample(sample, label, high_rank_class) 205 | 206 | logger.info("learn finished") 207 | et = time.time() 208 | logger.info("learning time: %f[s]" % (et - st)) 209 | 210 | def _learn_for_sparse_samples(self, X, y): 211 | """ 212 | Learn for sparse samples 213 | """ 214 | self.data["n_samples"] = X.shape[0] 215 | self.data["f_dims"] = X.shape[1] 216 | self.data["classes"] = np.unique(y) 217 | 218 | logger.info("learn starts") 219 | if not self.model["warm_start"]: 220 | for k in self.data["classes"]: 221 | self.model["mu"][k] = csr_matrix(np.zeros(self.data["f_dims"] + 1)) 222 | self.model["S"][k] = csr_matrix(np.ones(self.data["f_dims"] + 1)) # only for diagonal 223 | pass 224 | 225 | # learn 226 | st = time.time() 227 | for e in xrange(0, self.epochs): 228 | logger.debug("iter: %d" % e) 229 | for i in xrange(0, self.data["n_samples"]): 230 | if i % 1000 == 0: 231 | logger.debug("#samples = %d" % i) 232 | pass 233 | sample = X[i, :] 234 | label = y[i] 235 | pred_vals = self._predict_values_for_sparse_sample(sample) 236 | high_rank_class = pred_vals[0][0] 237 | if high_rank_class != label: # highest rank class 238 | self._update_for_sparse_sample(sample, label, high_rank_class) 239 | 240 | logger.info("learn finished") 241 | et = time.time() 242 | logger.info("learning time: %f[s]" % (et - st)) 243 | 244 | def _predict_values_for_dense_sample(self, sample): 245 | """ 246 | predict value of \mu^T * x 247 | 248 | Arguments: 249 | - `sample`: 250 | """ 251 | 252 | values = defaultdict() 253 | sample = self._add_bias_for_dense_sample(sample) 254 | for k in self.data["classes"]: 255 | values[k] = self.model["mu"][k].dot(sample) 256 | 257 | # return as list of tuple (class, ranking) in descending order 258 | return [(k, v) for k, v in sorted(values.items(), 259 | key=lambda x:x[1], reverse=True)] 260 | 261 | def _predict_values_for_sparse_sample(self, sample): 262 | """ 263 | predict value of \mu^T * x 264 | 265 | Arguments: 266 | - `sample`: 267 | """ 268 | 269 | values = defaultdict() 270 | sample = self._add_bias_for_sparse_sample(sample) 271 | for k in self.data["classes"]: 272 | values[k] = (self.model["mu"][k].multiply(sample)).sum() 273 | 274 | # return as list of tuple (class, ranking) in descending order 275 | return [(k, v) for k, v in sorted(values.items(), 276 | key=lambda x:x[1], reverse=True)] 277 | 278 | def predict(self, sample): 279 | """ 280 | 281 | Arguments: 282 | - `sample`: 283 | """ 284 | if self.data["sparse"]: 285 | return self._predict_for_sparse_sample(sample) 286 | else: 287 | return self._predict_for_dense_sample(sample) 288 | 289 | def _predict_for_dense_sample(self, sample): 290 | """ 291 | predict class base on argmax_{z} w^T F(x, z) 292 | 293 | Arguments: 294 | - `sample`: 295 | """ 296 | pred_vals = self._predict_values_for_dense_sample(sample) 297 | self.cache["pred_vals"] = pred_vals 298 | return pred_vals[0][0] 299 | 300 | def _predict_for_sparse_sample(self, sample): 301 | """ 302 | predict class base on argmax_{z} w^T F(x, z) 303 | 304 | Arguments: 305 | - `sample`: 306 | """ 307 | pred_vals = self._predict_values_for_sparse_sample(sample) 308 | self.cache["pred_vals"] = pred_vals 309 | return pred_vals[0][0] 310 | 311 | ## TODO 312 | def update(self, label, sample): 313 | """ 314 | update model. 315 | Arguments: 316 | - `label`: label 317 | - `sample`: sample, or feature vector 318 | """ 319 | pass 320 | pass 321 | 322 | 323 | def main(): 324 | """ 325 | Example of how to use 326 | """ 327 | # data load 328 | #fname = "/home/kzk/datasets/uci_csv/iris.csv" 329 | fname = "/home/kzk/datasets/uci_csv/glass.csv" 330 | #fname = "/home/kzk/datasets/uci_csv/breast_cancer.csv" 331 | #fname = "/home/kzk/datasets/uci_csv/car.csv" 332 | #fname = "/home/kzk/datasets/uci_csv/credit.csv" 333 | #fname = "/home/kzk/datasets/uci_csv/usps.csv" 334 | #fname = "/home/kzk/datasets/uci_csv/liver.csv" 335 | #fname = "/home/kzk/datasets/uci_csv/haberman.csv" 336 | #fname = "/home/kzk/datasets/uci_csv/pima.csv" 337 | #fname = "/home/kzk/datasets/uci_csv/parkinsons.csv" 338 | #fname = "/home/kzk/datasets/uci_csv/ionosphere.csv" 339 | #fname = "/home/kzk/datasets/uci_csv/isolet.csv" 340 | #fname = "/home/kzk/datasets/uci_csv/magicGamaTelescope.csv" 341 | #fname = "/home/kzk/datasets/uci_csv/mammographic.csv" 342 | #fname = "/home/kzk/datasets/uci_csv/yeast.csv" 343 | #fname = "/home/kzk/datasets/news20/news20.dat" 344 | fname = "/home/k_yoshiyama/datasets/news20/news20.dat" 345 | print "dataset is", fname 346 | 347 | #data = np.loadtxt(fname, delimiter=" ") 348 | #X = data[:, 1:] 349 | #y = data[:, 0] 350 | 351 | (X, y) = load_svmlight_file(fname) 352 | n_samples = X.shape[0] 353 | y_pred = np.ndarray(n_samples) 354 | #X = X.toarray() 355 | 356 | # learn 357 | model = MCWVarDiag(eta=0.9, epochs=1) 358 | model.learn(X, y) 359 | 360 | # predict 361 | st = time.time() 362 | for i in xrange(0, n_samples): 363 | if i % 1000 == 0: 364 | print "#samples = %d" % i 365 | pass 366 | sample = X[i, :] 367 | y_pred[i] = model.predict(sample) 368 | et = time.time() 369 | print "prediction time: %f[s]" % (et - st) 370 | print "prediction time/sample: %f[s]" % ((et - st) / n_samples) 371 | 372 | # show result 373 | cm = confusion_matrix(y, y_pred) 374 | #print cm 375 | print "accurary: %d [%%]" % (np.sum(cm.diagonal()) * 100.0 / np.sum(cm)) 376 | 377 | if __name__ == '__main__': 378 | main() 379 | -------------------------------------------------------------------------------- /cw/multiclass_soft_confidence_weighted_1_diag.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import logging as logger 3 | import time 4 | from base import ConfidenceWeightedModel 5 | from collections import defaultdict 6 | from sklearn.metrics import confusion_matrix 7 | from sklearn.datasets import load_svmlight_file 8 | from scipy.stats import norm 9 | from scipy.sparse import csr_matrix 10 | from scipy import sparse 11 | 12 | 13 | class MSCWIDiag(ConfidenceWeightedModel): 14 | """ 15 | Diagonal elements of matrix version of Soft Confidence-Weighted I algorithm; 16 | non-diagonal elements in covariance matrix are ignored. 17 | 18 | References: 19 | - http://www.aclweb.org/anthology/D/D09/D09-1052.pdf 20 | - http://icml.cc/2012/papers/86.pdf 21 | 22 | This model is applied to multiclass-multilabel classification, solved with 23 | single constraint update in http://www.aclweb.org/anthology/D/D09/D09-1052.pdf. 24 | """ 25 | 26 | def __init__(self, C=1, eta=0.9, epochs=10): 27 | """ 28 | model initialization. 29 | """ 30 | super(MSCWIDiag, self).__init__(epochs) 31 | 32 | logger.basicConfig(level=logger.DEBUG) 33 | logger.info("init starts") 34 | 35 | self._init_model(C, eta) 36 | 37 | logger.info("init finished") 38 | 39 | def _init_model(self, C, eta): 40 | """ 41 | Initialize model. 42 | """ 43 | logger.info("init model starts") 44 | self.model["mu"] = defaultdict() # model parameter mean 45 | self.model["S"] = defaultdict() # model parameter covariance 46 | self.model["C"] = C # PA parameter 47 | self.model["eta"] = eta # confidence parameter 48 | self.model["phi"] = norm.ppf(norm.cdf(eta)) # inverse of cdf(eta) 49 | self.model["phi_2"] = np.power(self.model["phi"], 2) 50 | self.model["psi"] = 1 + self.model["phi_2"] / 2 51 | self.model["zeta"] = 1 + self.model["phi_2"] 52 | logger.info("init model finished") 53 | pass 54 | 55 | def init_params(self, mu, S): 56 | """ 57 | This method is used for warm start. 58 | Arguments: 59 | - `mu`: model parameter mean 60 | - `S`: model parameter covariance 61 | """ 62 | self.model["warm_start"] = True 63 | self.model["mu"] = mu 64 | self.model["S"] = S 65 | 66 | pass 67 | 68 | def _update_for_dense_samples(self, sample, y, r): 69 | """ 70 | Update model parameter internally. 71 | update rule is as follows, 72 | mu = mu + alpha * y * Sx 73 | S = (S^{-1} + 2 * alpha * phi * diag(g_{y, r}^2))^{-1} 74 | g_{y, r} = F(x, y) - F(x, r) 75 | 76 | Note: diagonal elements are only considered. 77 | 78 | Arguments: 79 | - `sample`: sample, or feature vector 80 | - `y`: true label 81 | - `r`: predicted label (!=y) with high rank value 82 | """ 83 | 84 | # components 85 | phi = self.model["phi"] 86 | phi_2 = self.model["phi_2"] 87 | psi = self.model["psi"] 88 | zeta = self.model["zeta"] 89 | sample = self._add_bias_for_dense_sample(sample) 90 | g_y = sample 91 | g_r = -sample 92 | m = self.model["mu"][y].dot(g_y) + self.model["mu"][r].dot(g_r) 93 | first_term = (g_y * self.model["S"][y]).dot(g_y) 94 | second_term = (g_r * self.model["S"][r]).dot(g_r) 95 | v = first_term + second_term 96 | v_zeta = v * zeta 97 | 98 | # alpha 99 | first_term = -m * psi 100 | second_term = np.sqrt(np.power(m, 2) * 101 | np.power(phi_2, 2) / 4 + phi_2 * v_zeta) 102 | alpha = (first_term + second_term) / (v_zeta) 103 | alpha = min(self.model["C"], max(0, alpha)) 104 | 105 | # mu 106 | mu_y = self.model["mu"][y] + alpha * self.model["S"][y] * g_y 107 | mu_r = self.model["mu"][r] + alpha * self.model["S"][r] * g_r 108 | self.model["mu"][y] = mu_y 109 | self.model["mu"][r] = mu_r 110 | 111 | # beta 112 | alpha_2 = alpha * alpha 113 | v_2 = v * v 114 | u = -alpha * v * phi + np.sqrt(alpha_2 * v_2 * phi_2 + 4 * v) 115 | u = u * u / 4 116 | beta = (alpha * phi) / (np.sqrt(u) + v * alpha * phi) 117 | 118 | # S (only diagonal) 119 | d = beta * self.model["S"][y] * self.model["S"][y] * g_y * g_y 120 | S_y = self.model["S"][y] - d 121 | d = beta * self.model["S"][r] * self.model["S"][r] * g_r * g_r 122 | S_r = self.model["S"][r] - d 123 | self.model["S"][y] = S_y 124 | self.model["S"][r] = S_r 125 | 126 | def _update_for_sparse_sample(self, sample, y, r): 127 | """ 128 | Update model parameter internally. 129 | update rule is as follows, 130 | mu = mu + alpha * y * Sx 131 | S = (S^{-1} + 2 * alpha * phi * diag(g_{y, r}^2))^{-1} 132 | g_{y, r} = F(x, y) - F(x, r) 133 | 134 | Note: diagonal elements are only considered. 135 | 136 | Arguments: 137 | - `sample`: sample, or feature vector 138 | - `y`: true label 139 | - `r`: predicted label (!=y) with high rank value 140 | """ 141 | 142 | # components 143 | phi = self.model["phi"] 144 | phi_2 = self.model["phi_2"] 145 | psi = self.model["psi"] 146 | zeta = self.model["zeta"] 147 | 148 | sample = self._add_bias_for_sparse_sample(sample) 149 | g_y = sample 150 | g_r = -sample 151 | gg = sample.multiply(sample) 152 | m = (self.model["mu"][y].multiply(g_y)).sum() + (self.model["mu"][r].multiply(g_r)).sum() 153 | first_term = (self.model["S"][y].multiply(gg)).sum() 154 | second_term = (self.model["S"][r].multiply(gg)).sum() 155 | v = first_term + second_term 156 | v_zeta = v * zeta 157 | 158 | # alpha 159 | first_term = -m * psi 160 | second_term = np.sqrt(np.power(m, 2) * 161 | np.power(phi_2, 2) / 4 + phi_2 * v_zeta) 162 | alpha = (first_term + second_term) / (v_zeta) 163 | alpha = min(self.model["C"], max(0, alpha)) 164 | 165 | # mu 166 | mu_y = self.model["mu"][y] + self.model["S"][y].multiply(g_y).multiply(alpha) 167 | mu_r = self.model["mu"][r] + self.model["S"][r].multiply(g_r).multiply(alpha) 168 | self.model["mu"][y] = mu_y 169 | self.model["mu"][r] = mu_r 170 | 171 | # beta 172 | alpha_2 = alpha * alpha 173 | v_2 = v * v 174 | u = -alpha * v * phi + np.sqrt(alpha_2 * v_2 * phi_2 + 4 * v) 175 | u = u * u / 4 176 | beta = (alpha * phi) / (np.sqrt(u) + v * alpha * phi) 177 | 178 | # S (only diagonal) 179 | gg_beta = gg.multiply(beta) 180 | d = self.model["S"][y].multiply(self.model["S"][y]).multiply(gg_beta) 181 | S_y = self.model["S"][y] - d 182 | d = self.model["S"][r].multiply(self.model["S"][r]).multiply(gg_beta) 183 | S_r = self.model["S"][r] - d 184 | self.model["S"][y] = S_y 185 | self.model["S"][r] = S_r 186 | 187 | def _predict_values_for_dense_sample(self, sample): 188 | """ 189 | predict value of \mu^T * x 190 | 191 | Arguments: 192 | - `sample`: 193 | """ 194 | 195 | values = defaultdict() 196 | sample = self._add_bias_for_dense_sample(sample) 197 | for k in self.data["classes"]: 198 | values[k] = self.model["mu"][k].dot(sample) 199 | 200 | # return as list of tuple (class, ranking) in descending order 201 | return [(k, v) for k, v in sorted(values.items(), 202 | key=lambda x:x[1], reverse=True)] 203 | 204 | def _predict_values_for_sparse_sample(self, sample): 205 | """ 206 | predict value of \mu^T * x 207 | 208 | Arguments: 209 | - `sample`: 210 | """ 211 | 212 | values = defaultdict() 213 | sample = self._add_bias_for_sparse_sample(sample) 214 | for k in self.data["classes"]: 215 | values[k] = (self.model["mu"][k].multiply(sample)).sum() 216 | 217 | # return as list of tuple (class, ranking) in descending order 218 | return [(k, v) for k, v in sorted(values.items(), 219 | key=lambda x:x[1], reverse=True)] 220 | 221 | def learn(self, X, y): 222 | """ 223 | Learn. 224 | """ 225 | self.data["sparse"] = sparse.issparse(X) 226 | if self.data["sparse"]: 227 | self._learn_for_sparse_samples(X, y) 228 | else: 229 | self._learn_for_dense_samples(X, y) 230 | pass 231 | 232 | def _learn_for_dense_samples(self, X, y): 233 | """ 234 | 235 | """ 236 | logger.info("learn starts") 237 | self.data["n_samples"] = X.shape[0] 238 | self.data["f_dims"] = X.shape[1] 239 | self.data["classes"] = np.unique(y) 240 | 241 | if not self.model["warm_start"]: 242 | for k in self.data["classes"]: 243 | self.model["mu"][k] = np.zeros(self.data["f_dims"] + 1) 244 | self.model["S"][k] = np.ones(self.data["f_dims"] + 1) # only for diagonal 245 | pass 246 | 247 | # learn 248 | st = time.time() 249 | for e in xrange(0, self.epochs): 250 | logger.debug("iter: %d" % e) 251 | for i in xrange(0, self.data["n_samples"]): 252 | sample = X[i, :] 253 | label = y[i] 254 | pred_vals = self._predict_values_for_dense_sample(sample) 255 | high_rank_class = pred_vals[0][0] 256 | if high_rank_class != label: # highest rank class 257 | self._update_for_dense_samples(sample, label, high_rank_class) 258 | 259 | logger.info("learn finished") 260 | et = time.time() 261 | logger.info("learning time: %f[s]" % (et - st)) 262 | 263 | def _learn_for_sparse_samples(self, X, y): 264 | """ 265 | Learn for sparse samples 266 | """ 267 | logger.info("learn starts") 268 | self.data["n_samples"] = X.shape[0] 269 | self.data["f_dims"] = X.shape[1] 270 | self.data["classes"] = np.unique(y) 271 | 272 | if not self.model["warm_start"]: 273 | for k in self.data["classes"]: 274 | self.model["mu"][k] = csr_matrix(np.zeros(self.data["f_dims"] + 1)) 275 | self.model["S"][k] = csr_matrix(np.ones(self.data["f_dims"] + 1)) # only for diagonal 276 | pass 277 | 278 | # learn 279 | st = time.time() 280 | for e in xrange(0, self.epochs): 281 | logger.debug("iter: %d" % e) 282 | for i in xrange(0, self.data["n_samples"]): 283 | if i % 1000 == 0: 284 | logger.debug("#samples = %d" % i) 285 | pass 286 | sample = X[i, :] 287 | label = y[i] 288 | pred_vals = self._predict_values_for_sparse_sample(sample) 289 | high_rank_class = pred_vals[0][0] 290 | if high_rank_class != label: # highest rank class 291 | self._update_for_sparse_sample(sample, label, high_rank_class) 292 | 293 | logger.info("learn finished") 294 | et = time.time() 295 | logger.info("learning time: %f[s]" % (et - st)) 296 | 297 | def predict(self, sample): 298 | """ 299 | 300 | Arguments: 301 | - `sample`: 302 | """ 303 | if self.data["sparse"]: 304 | return self._predict_for_sparse_sample(sample) 305 | else: 306 | return self._predict_for_dense_sample(sample) 307 | 308 | def _predict_for_dense_sample(self, sample): 309 | """ 310 | predict class base on argmax_{z} w^T F(x, z) 311 | 312 | Arguments: 313 | - `sample`: 314 | """ 315 | pred_vals = self._predict_values_for_dense_sample(sample) 316 | self.cache["pred_vals"] = pred_vals 317 | return pred_vals[0][0] 318 | 319 | def _predict_for_sparse_sample(self, sample): 320 | """ 321 | predict class base on argmax_{z} w^T F(x, z) 322 | 323 | Arguments: 324 | - `sample`: 325 | """ 326 | pred_vals = self._predict_values_for_sparse_sample(sample) 327 | self.cache["pred_vals"] = pred_vals 328 | return pred_vals[0][0] 329 | 330 | 331 | ## TODO 332 | def update(self, label, sample): 333 | """ 334 | update model. 335 | Arguments: 336 | - `label`: label 337 | - `sample`: sample, or feature vector 338 | """ 339 | 340 | 341 | def main(): 342 | """ 343 | Example of how to use 344 | """ 345 | # data load 346 | #fname = "/home/kzk/datasets/uci_csv/iris.csv" 347 | fname = "/home/kzk/datasets/uci_csv/glass.csv" 348 | #fname = "/home/kzk/datasets/uci_csv/breast_cancer.csv" 349 | #fname = "/home/kzk/datasets/uci_csv/car.csv" 350 | #fname = "/home/kzk/datasets/uci_csv/credit.csv" 351 | #fname = "/home/kzk/datasets/uci_csv/usps.csv" 352 | #fname = "/home/kzk/datasets/uci_csv/liver.csv" 353 | #fname = "/home/kzk/datasets/uci_csv/haberman.csv" 354 | #fname = "/home/kzk/datasets/uci_csv/pima.csv" 355 | #fname = "/home/kzk/datasets/uci_csv/parkinsons.csv" 356 | #fname = "/home/kzk/datasets/uci_csv/ionosphere.csv" 357 | #fname = "/home/kzk/datasets/uci_csv/isolet.csv" 358 | #fname = "/home/kzk/datasets/uci_csv/magicGamaTelescope.csv" 359 | #fname = "/home/kzk/datasets/uci_csv/mammographic.csv" 360 | #fname = "/home/kzk/datasets/uci_csv/yeast.csv" 361 | fname = "/home/k_yoshiyama/datasets/news20/news20.dat" 362 | print "dataset is", fname 363 | 364 | #data = np.loadtxt(fname, delimiter=" ") 365 | #X = data[:, 1:] 366 | #y = data[:, 0] 367 | 368 | (X, y) = load_svmlight_file(fname) 369 | n_samples = X.shape[0] 370 | y_pred = np.ndarray(n_samples) 371 | #X = X.toarray() 372 | 373 | n_samples = X.shape[0] 374 | y_pred = np.ndarray(n_samples) 375 | 376 | # learn 377 | model = MSCWIDiag(C=1, eta=0.9, epochs=1) 378 | model.learn(X, y) 379 | 380 | # predict 381 | st = time.time() 382 | for i in xrange(0, n_samples): 383 | if i % 1000 == 0: 384 | print "#samples = %d" % i 385 | pass 386 | sample = X[i, :] 387 | y_pred[i] = model.predict(sample) 388 | et = time.time() 389 | print "prediction time: %f[s]" % (et - st) 390 | print "prediction time/sample: %f[s]" % ((et - st) / n_samples) 391 | 392 | # show result 393 | cm = confusion_matrix(y, y_pred) 394 | #print cm 395 | print "accurary: %d [%%]" % (np.sum(cm.diagonal()) * 100.0 / np.sum(cm)) 396 | 397 | if __name__ == "__main__": 398 | main() 399 | 400 | -------------------------------------------------------------------------------- /cw/multiclass_soft_confidence_weighted_2_diag.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import logging as logger 3 | import time 4 | from base import ConfidenceWeightedModel 5 | from collections import defaultdict 6 | from sklearn.metrics import confusion_matrix 7 | from sklearn.datasets import load_svmlight_file 8 | from scipy.stats import norm 9 | from scipy.sparse import csr_matrix 10 | from scipy import sparse 11 | 12 | 13 | class MSCWIIDiag(ConfidenceWeightedModel): 14 | """ 15 | Diagonal elements of matrix version of Soft Confidence-Weighted II algorithm; 16 | non-diagonal elements in covariance matrix are ignored. 17 | 18 | References: 19 | - http://www.aclweb.org/anthology/D/D09/D09-1052.pdf 20 | - http://icml.cc/2012/papers/86.pdf 21 | 22 | Feature function F(x, y) is chosen as cartesian product of x and y. 23 | x is feature vector and y is 1-of-K vector. 24 | 25 | This model is applied to multiclass-multilabel classification, solved with 26 | single constraint update in http://www.aclweb.org/anthology/D/D09/D09-1052.pdf. 27 | """ 28 | 29 | def __init__(self, C=1, eta=0.9, epochs=10): 30 | """ 31 | model initialization. 32 | """ 33 | super(MSCWIIDiag, self).__init__(epochs) 34 | 35 | logger.basicConfig(level=logger.DEBUG) 36 | logger.info("init starts") 37 | 38 | self._init_model(C, eta) 39 | 40 | logger.info("init finished") 41 | 42 | def _init_model(self, C, eta): 43 | """ 44 | Initialize model. 45 | """ 46 | logger.info("init model starts") 47 | self.model["mu"] = defaultdict() # model parameter mean 48 | self.model["S"] = defaultdict() # model parameter covariance 49 | self.model["C"] = C # PA parameter 50 | self.model["eta"] = eta # confidence parameter 51 | self.model["phi"] = norm.ppf(norm.cdf(eta)) # inverse of cdf(eta) 52 | self.model["phi_2"] = np.power(self.model["phi"], 2) 53 | self.model["psi"] = 1 + self.model["phi_2"] / 2 54 | self.model["zeta"] = 1 + self.model["phi_2"] 55 | logger.info("init model finished") 56 | 57 | pass 58 | 59 | def init_params(self, mu, S): 60 | """ 61 | This method is used for warm start. 62 | Arguments: 63 | - `mu`: model parameter mean 64 | - `S`: model parameter covariance 65 | """ 66 | self.model["warm_start"] = True 67 | self.model["mu"] = mu 68 | self.model["S"] = S 69 | 70 | pass 71 | 72 | def _learn(self, ): 73 | """ 74 | Learn internally. 75 | """ 76 | 77 | def _update_for_dense_sample(self, sample, y, r): 78 | """ 79 | Update model parameter internally. 80 | update rule is as follows, 81 | mu = mu + alpha * y * Sx 82 | S = (S^{-1} + 2 * alpha * phi * diag(g_{y, r}^2))^{-1} 83 | g_{y, r} = F(x, y) - F(x, r) 84 | 85 | Note: diagonal elements are only considered. 86 | 87 | Arguments: 88 | - `sample`: sample, or feature vector 89 | - `y`: true label 90 | - `r`: predicted label (!=y) with high rank value 91 | """ 92 | 93 | # components 94 | phi = self.model["phi"] 95 | phi_2 = self.model["phi_2"] 96 | 97 | sample = self._add_bias_for_dense_sample(sample) 98 | g_y = sample 99 | g_r = -sample 100 | m = self.model["mu"][y].dot(g_y) + self.model["mu"][r].dot(g_r) 101 | first_term = (g_y * self.model["S"][y]).dot(g_y) 102 | second_term = (g_r * self.model["S"][r]).dot(g_r) 103 | v = first_term + second_term 104 | 105 | n = v + 1 / (2 * self.model["C"]) 106 | n_2 = np.power(n, 2) 107 | m_2 = np.power(m, 2) 108 | v_2 = np.power(v, 2) 109 | gamma = phi * (np.sqrt(phi_2 * m_2 * v_2 + 110 | 4 * n * v * (n + v * phi_2))) 111 | 112 | # alpha 113 | numerator = -(2 * m * n + phi_2 * m * v) + gamma 114 | denominator = 2 * (n_2 + n * v * phi_2) 115 | alpha = max(0, numerator / denominator) 116 | 117 | # mu 118 | mu_y = self.model["mu"][y] + alpha * self.model["S"][y] * g_y 119 | mu_r = self.model["mu"][r] + alpha * self.model["S"][r] * g_r 120 | self.model["mu"][y] = mu_y 121 | self.model["mu"][r] = mu_r 122 | 123 | # beta 124 | alpha_2 = alpha * alpha 125 | v_2 = v * v 126 | u = -alpha * v * phi + np.sqrt(alpha_2 * v_2 * phi_2 + 4 * v) 127 | u = u * u / 4 128 | beta = (alpha * phi) / (np.sqrt(u) + v * alpha * phi) 129 | 130 | # S (only diagonal) 131 | d = beta * self.model["S"][y] * self.model["S"][y] * g_y * g_y 132 | S_y = self.model["S"][y] - d 133 | d = beta * self.model["S"][r] * self.model["S"][r] * g_r * g_r 134 | S_r = self.model["S"][r] - d 135 | self.model["S"][y] = S_y 136 | self.model["S"][r] = S_r 137 | 138 | def _update_for_sparse_sample(self, sample, y, r): 139 | """ 140 | Update model parameter internally. 141 | update rule is as follows, 142 | mu = mu + alpha * y * Sx 143 | S = (S^{-1} + 2 * alpha * phi * diag(g_{y, r}^2))^{-1} 144 | g_{y, r} = F(x, y) - F(x, r) 145 | 146 | Note: diagonal elements are only considered. 147 | 148 | Arguments: 149 | - `sample`: sample, or feature vector 150 | - `y`: true label 151 | - `r`: predicted label (!=y) with high rank value 152 | """ 153 | 154 | # components 155 | phi = self.model["phi"] 156 | phi_2 = self.model["phi_2"] 157 | 158 | sample = self._add_bias_for_sparse_sample(sample) 159 | g_y = sample 160 | g_r = -sample 161 | gg = sample.multiply(sample) 162 | m = (self.model["mu"][y].multiply(g_y)).sum() + (self.model["mu"][r].multiply(g_r)).sum() 163 | first_term = (self.model["S"][y].multiply(gg)).sum() 164 | second_term = (self.model["S"][r].multiply(gg)).sum() 165 | v = first_term + second_term 166 | 167 | n = v + 1 / (2 * self.model["C"]) 168 | n_2 = np.power(n, 2) 169 | m_2 = np.power(m, 2) 170 | v_2 = np.power(v, 2) 171 | gamma = phi * (np.sqrt(phi_2 * m_2 * v_2 + 172 | 4 * n * v * (n + v * phi_2))) 173 | 174 | # alpha 175 | numerator = -(2 * m * n + phi_2 * m * v) + gamma 176 | denominator = 2 * (n_2 + n * v * phi_2) 177 | alpha = max(0, numerator / denominator) 178 | 179 | # mu 180 | mu_y = self.model["mu"][y] + self.model["S"][y].multiply(g_y).multiply(alpha) 181 | mu_r = self.model["mu"][r] + self.model["S"][r].multiply(g_r).multiply(alpha) 182 | self.model["mu"][y] = mu_y 183 | self.model["mu"][r] = mu_r 184 | 185 | # beta 186 | alpha_2 = alpha * alpha 187 | v_2 = v * v 188 | u = -alpha * v * phi + np.sqrt(alpha_2 * v_2 * phi_2 + 4 * v) 189 | u = u * u / 4 190 | beta = (alpha * phi) / (np.sqrt(u) + v * alpha * phi) 191 | 192 | # S (only diagonal) 193 | gg_beta = gg.multiply(beta) 194 | d = self.model["S"][y].multiply(self.model["S"][y]).multiply(gg_beta) 195 | S_y = self.model["S"][y] - d 196 | d = self.model["S"][r].multiply(self.model["S"][r]).multiply(gg_beta) 197 | S_r = self.model["S"][r] - d 198 | self.model["S"][y] = S_y 199 | self.model["S"][r] = S_r 200 | 201 | def _predict_values_for_dense_sample(self, sample): 202 | """ 203 | predict value of \mu^T * x 204 | 205 | Arguments: 206 | - `sample`: 207 | """ 208 | 209 | values = defaultdict() 210 | sample = self._add_bias_for_dense_sample(sample) 211 | for k in self.data["classes"]: 212 | values[k] = self.model["mu"][k].dot(sample) 213 | 214 | # return as list of tuple (class, ranking) in descending order 215 | return [(k, v) for k, v in sorted(values.items(), 216 | key=lambda x:x[1], reverse=True)] 217 | 218 | def _predict_values_for_sparse_sample(self, sample): 219 | """ 220 | predict value of \mu^T * x 221 | 222 | Arguments: 223 | - `sample`: 224 | """ 225 | 226 | values = defaultdict() 227 | sample = self._add_bias_for_sparse_sample(sample) 228 | for k in self.data["classes"]: 229 | values[k] = (self.model["mu"][k].multiply(sample)).sum() 230 | 231 | # return as list of tuple (class, ranking) in descending order 232 | return [(k, v) for k, v in sorted(values.items(), 233 | key=lambda x:x[1], reverse=True)] 234 | 235 | def learn(self, X, y): 236 | """ 237 | Learn. 238 | """ 239 | self.data["sparse"] = sparse.issparse(X) 240 | if self.data["sparse"]: 241 | self._learn_for_sparse_samples(X, y) 242 | else: 243 | self._learn_for_dense_samples(X, y) 244 | pass 245 | 246 | def _learn_for_dense_samples(self, X, y): 247 | """ 248 | Learn. 249 | """ 250 | logger.info("learn starts") 251 | self.data["n_samples"] = X.shape[0] 252 | self.data["f_dims"] = X.shape[1] 253 | self.data["classes"] = np.unique(y) 254 | 255 | if not self.model["warm_start"]: 256 | for k in self.data["classes"]: 257 | self.model["mu"][k] = np.zeros(self.data["f_dims"] + 1) 258 | self.model["S"][k] = np.ones(self.data["f_dims"] + 1) # only for diagonal 259 | pass 260 | 261 | # learn 262 | st = time.time() 263 | for e in xrange(0, self.epochs): 264 | logger.debug("iter: %d" % e) 265 | for i in xrange(0, self.data["n_samples"]): 266 | sample = X[i, :] 267 | label = y[i] 268 | pred_vals = self._predict_values_for_dense_sample(sample) 269 | high_rank_class = pred_vals[0][0] 270 | if high_rank_class != label: # highest rank class 271 | self._update_for_dense_sample(sample, label, high_rank_class) 272 | 273 | logger.info("learn finished") 274 | et = time.time() 275 | logger.info("learning time: %f[s]" % (et - st)) 276 | 277 | def _learn_for_sparse_samples(self, X, y): 278 | """ 279 | Learn. 280 | """ 281 | logger.info("learn starts") 282 | self.data["n_samples"] = X.shape[0] 283 | self.data["f_dims"] = X.shape[1] 284 | self.data["classes"] = np.unique(y) 285 | 286 | if not self.model["warm_start"]: 287 | for k in self.data["classes"]: 288 | self.model["mu"][k] = csr_matrix(np.zeros(self.data["f_dims"] + 1)) 289 | self.model["S"][k] = csr_matrix(np.ones(self.data["f_dims"] + 1)) # only for diagonal 290 | pass 291 | 292 | # learn 293 | st = time.time() 294 | for e in xrange(0, self.epochs): 295 | logger.debug("iter: %d" % e) 296 | for i in xrange(0, self.data["n_samples"]): 297 | if i % 1000 == 0: 298 | logger.debug("#samples = %d" % i) 299 | pass 300 | 301 | sample = X[i, :] 302 | label = y[i] 303 | pred_vals = self._predict_values_for_sparse_sample(sample) 304 | high_rank_class = pred_vals[0][0] 305 | if high_rank_class != label: # highest rank class 306 | self._update_for_sparse_sample(sample, label, high_rank_class) 307 | 308 | logger.info("learn finished") 309 | et = time.time() 310 | logger.info("learning time: %f[s]" % (et - st)) 311 | 312 | def predict(self, sample): 313 | """ 314 | 315 | Arguments: 316 | - `sample`: 317 | """ 318 | if self.data["sparse"]: 319 | return self._predict_for_sparse_sample(sample) 320 | else: 321 | return self._predict_for_dense_sample(sample) 322 | 323 | def _predict_for_dense_sample(self, sample): 324 | """ 325 | predict class base on argmax_{z} w^T F(x, z) 326 | 327 | Arguments: 328 | - `sample`: 329 | """ 330 | pred_vals = self._predict_values_for_dense_sample(sample) 331 | self.cache["pred_vals"] = pred_vals 332 | return pred_vals[0][0] 333 | 334 | def _predict_for_sparse_sample(self, sample): 335 | """ 336 | predict class base on argmax_{z} w^T F(x, z) 337 | 338 | Arguments: 339 | - `sample`: 340 | """ 341 | pred_vals = self._predict_values_for_sparse_sample(sample) 342 | self.cache["pred_vals"] = pred_vals 343 | return pred_vals[0][0] 344 | 345 | ## TODO 346 | def update(self, label, sample): 347 | """ 348 | update model. 349 | Arguments: 350 | - `label`: label 351 | - `sample`: sample, or feature vector 352 | """ 353 | 354 | 355 | def main(): 356 | """ 357 | Example of how to use 358 | """ 359 | # data load 360 | #fname = "/home/kzk/datasets/uci_csv/iris.csv" 361 | fname = "/home/kzk/datasets/uci_csv/glass.csv" 362 | #fname = "/home/kzk/datasets/uci_csv/breast_cancer.csv" 363 | #fname = "/home/kzk/datasets/uci_csv/car.csv" 364 | #fname = "/home/kzk/datasets/uci_csv/credit.csv" 365 | #fname = "/home/kzk/datasets/uci_csv/usps.csv" 366 | #fname = "/home/kzk/datasets/uci_csv/liver.csv" 367 | #fname = "/home/kzk/datasets/uci_csv/haberman.csv" 368 | #fname = "/home/kzk/datasets/uci_csv/pima.csv" 369 | #fname = "/home/kzk/datasets/uci_csv/parkinsons.csv" 370 | #fname = "/home/kzk/datasets/uci_csv/ionosphere.csv" 371 | #fname = "/home/kzk/datasets/uci_csv/isolet.csv" 372 | #fname = "/home/kzk/datasets/uci_csv/magicGamaTelescope.csv" 373 | #fname = "/home/kzk/datasets/uci_csv/mammographic.csv" 374 | #fname = "/home/kzk/datasets/uci_csv/yeast.csv" 375 | fname = "/home/k_yoshiyama/datasets/news20/news20.dat" 376 | print "dataset is", fname 377 | 378 | #data = np.loadtxt(fname, delimiter=" ") 379 | #X = data[:, 1:] 380 | #y = data[:, 0] 381 | 382 | (X, y) = load_svmlight_file(fname) 383 | n_samples = X.shape[0] 384 | y_pred = np.ndarray(n_samples) 385 | #X = X.toarray() 386 | 387 | n_samples = X.shape[0] 388 | y_pred = np.ndarray(n_samples) 389 | 390 | # learn 391 | model = MSCWIIDiag(C=1, eta=0.9, epochs=1) 392 | model.learn(X, y) 393 | 394 | # predict 395 | st = time.time() 396 | for i in xrange(0, n_samples): 397 | if i % 1000 == 0: 398 | print "#samples = %d" % i 399 | pass 400 | sample = X[i, :] 401 | y_pred[i] = model.predict(sample) 402 | et = time.time() 403 | print "prediction time: %f[s]" % (et - st) 404 | print "prediction time/sample: %f[s]" % ((et - st) / n_samples) 405 | 406 | # show result 407 | cm = confusion_matrix(y, y_pred) 408 | #print cm 409 | print "accurary: %d [%%]" % (np.sum(cm.diagonal()) * 100.0 / np.sum(cm)) 410 | 411 | if __name__ == '__main__': 412 | main() 413 | 414 | --------------------------------------------------------------------------------