├── CMakeLists.txt
├── Eigen
    └── down.txt
├── README.md
├── index.html
├── install.pdf
├── python
    ├── KOL.exe
    ├── a9a_test
    ├── a9a_train
    ├── cross.py
    ├── german
    ├── learn.py
    └── rand.py
├── sample_dataset
    ├── a9a_test
    └── a9a_train
└── src
    ├── Params.cpp
    ├── Params.h
    ├── common
        ├── ezOptionParser.hpp
        ├── init_param.h
        ├── md5.h
        └── util.h
    ├── data
        ├── Cacher.cpp
        ├── DataPoint.h
        ├── DataReader.h
        ├── DataSet.h
        ├── DataSetHelper.h
        ├── MNISTConvert.cpp
        ├── MNISTReader.h
        ├── basic_io.cpp
        ├── basic_io.h
        ├── comp.h
        ├── data_analysis.cpp
        ├── gzip_io.cpp
        ├── gzip_io.h
        ├── io_handler.h
        ├── io_interface.h
        ├── libsvm_binary.h
        ├── libsvmread.h
        ├── makefile
        ├── parser.h
        ├── s_array.h
        ├── thread_primitive.h
        ├── zlib_io.cpp
        └── zlib_io.h
    ├── kernel
        ├── kernel_RBP.h
        ├── kernel_bogd.h
        ├── kernel_bpas.h
        ├── kernel_fogd.h
        ├── kernel_forgetron.h
        ├── kernel_nogd.h
        ├── kernel_optim.h
        ├── kernel_pa.h
        ├── kernel_perceptron.h
        ├── kernel_projectron.h
        ├── kernel_projectronpp.h
        └── kernel_sgd.h
    ├── loss
        ├── HingeLoss.h
        ├── LogisticLoss.h
        ├── LossFunction.h
        ├── SquareLoss.h
        └── SquaredHingeLoss.h
    ├── main.cpp
    └── makefile


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | cmake_minimum_required(VERSION 2.6)
 2 | 
 3 | project(KOL)
 4 | 
 5 | set (EXECUTABLE_OUTPUT_PATH ${PROJECT_BINARY_DIR}/bin)
 6 | #IF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
 7 | set (CMAKE_INSTALL_PREFIX ${CMAKE_SOURCE_DIR})
 8 | #ENDIF(CMAKE_INSTALL_PREFIX_INITIALIZED_TO_DEFAULT)
 9 | 
10 | #check if Eigen exists
11 | find_path(EIGEN_PATH NAMES Eigen PATHS ${EIGEN_DIR} REQUIRED)
12 | if (NOT EIGEN_DIR)
13 |     message(FATAL_ERROR "Eigen is not found, please specify by: -DEIGEN_DIR=<eigen_path>")
14 | endif (NOT EIGEN_DIR)
15 | 
16 | 
17 | FILE(GLOB data_files src/data/*.h)
18 | FILE(GLOB loss_files src/loss/*.h)
19 | FILE(GLOB opti_files src/kernel/*.h)
20 | FILE(GLOB comm_files src/common/*.h)
21 | 
22 | set (data_files ${data_files} 
23 |     src/data/basic_io.cpp 
24 |     )
25 | 
26 | include_directories(
27 |     ${EIGEN_PATH}
28 |     )
29 | 
30 | #set (data_files ${datafiles} 
31 | #    src/data/basic_io.cpp 
32 | #    src/data/zlib_io.cpp 
33 | #    src/data/gzip_io.cpp
34 | #    )
35 | source_group("data" FILES ${data_files})
36 | source_group("loss" FILES ${loss_files})
37 | source_group("kernel" FILES ${opti_files})
38 | source_group("common" FILES ${comm_files})
39 | 
40 | set (SRC_LIST
41 |     ${data_files} ${loss_files} ${opti_files} ${comm_files}
42 |     )
43 | 
44 | IF(CMAKE_COMPILER_IS_GNUCXX)
45 |     SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-write-strings -O2 -s")
46 | ENDIF(CMAKE_COMPILER_IS_GNUCXX)
47 |     
48 | add_executable(KOL src/Params.cpp src/Params.h 
49 |     src/main.cpp ${SRC_LIST}) 
50 | IF(UNIX)
51 |     target_link_libraries(KOL pthread)
52 | ENDIF(UNIX)
53 | 
54 | add_executable(Cacher src/data/Cacher.cpp ${data_files})
55 | IF(UNIX)
56 |     target_link_libraries(Cacher pthread)
57 | ENDIF(UNIX)
58 | 
59 | install(TARGETS KOL Cacher
60 |     DESTINATION .)
61 | 


--------------------------------------------------------------------------------
/Eigen/down.txt:
--------------------------------------------------------------------------------
1 | eigen.tuxfamily.org


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | LIBOKL-- A Library for Online Kernel Learning Algorithms
  2 | 
  3 | Authors:
  4 | Lu Jing, Wu Yue, Steven Hoi
  5 | 
  6 | Contact: chhoi@ntu.edu.sg, jing.lu.2014@phdis.smu.edu.sg
  7 | 
  8 | LIBOKL is a package for solving large scale online kernel learning tasks. The current version is in C++ and has a total of 10 different online single kernel learning algorithm for binary classification, which are all widely used in online kernel learning research. We also provide other packages for multi-class classification and regression and multiple kernel learning. See the link at the end.
  9 | 
 10 | The algorithms in this package includes:
 11 | 
 12 | 1. Perceptron: The kernelized Perceptron without budget maintainance. http://cseweb.ucsd.edu/~yfreund/papers/LargeMarginsUsingPerceptron.pdf
 13 | 
 14 | 2. Online Gradient Descent (OGD): The kernelized online gradient descent algorithm without budget maintainance. 
 15 | http://eprints.pascal-network.org/archive/00002055/01/KivSmoWil04.pdf
 16 | 
 17 | 3. Random Budget Perceptron (RBP): Budgeted perceptron algorithm with random support vector removal strategy. 
 18 |  http://air.unimi.it/bitstream/2434/26350/1/J29.pdf
 19 | 
 20 | 4. Forgetron: Forgetron algorithm that maintains the budget size by discarding the oldest support vectors. 
 21 | http://papers.nips.cc/paper/2806-the-forgetron-a-kernel-based-perceptron-on-a-fixed-budget.pdf
 22 | 
 23 | 5. Projectron: The Projectron algorithm using budget projection strategy. 
 24 | http://eprints.pascal-network.org/archive/00004472/01/355.pdf
 25 | 
 26 | 6. Projectron++: The aggressive version of Projectron algorithm that updates with both margin error and mistake case. 
 27 | http://eprints.pascal-network.org/archive/00004472/01/355.pdf
 28 | 
 29 | 7. BPAs: The budget passive-aggressive algrotihtm with simple supprot removal strategy.
 30 | http://machinelearning.wustl.edu/mlpapers/paper_files/AISTATS2010_WangV10.pdf
 31 | 
 32 | 8. BOGD: The budget online gradient descent algorithm by SV removal strategy 
 33 | http://arxiv.org/ftp/arxiv/papers/1206/1206.4633.pdf
 34 | 
 35 | 9. FOGD: The Fourier Online Gradient Descent algorithm using functional approximation method.
 36 | http://jingonline.weebly.com/uploads/5/3/7/3/53733905/lu15a.pdf
 37 | 
 38 | 10. NOGD: The Nystrom Online Gradient Descent algorithm using functional approximation method.[pdf]
 39 | http://jingonline.weebly.com/uploads/5/3/7/3/53733905/lu15a.pdf
 40 | 
 41 | The last two were proposed by our group and published on Journal of Machine Learning Research. If you need to use this code package, please cite our paper as: 
 42 | ________________________________________
 43 | 
 44 | Lu J, Hoi S C H, Wang J, et al. Large scale online kernel learning[J]. Journal of Machine Learning Research, 2016, 17(47): 1.
 45 | 
 46 | or bib:
 47 | ________________________________________
 48 | @article{lu2016large,
 49 |   title={Large scale online kernel learning},
 50 |   author={Lu, Jing and Hoi, Steven CH and Wang, Jialei and Zhao, Peilin and Liu, Zhi-Yong},
 51 |   journal={Journal of Machine Learning Research},
 52 |   volume={17},
 53 |   number={47},
 54 |   pages={1},
 55 |   year={2016},
 56 |   publisher={Journal of Machine Learning Research/Microtome Publishing}
 57 | }
 58 | _________________________________________
 59 | 
 60 | To get started, please refer to the file install.pdf, which provide a detailed step-by-step guide on the installation of this package. Before it, an Eigen package is needed (http://eigen.tuxfamily.org/index.php?title=Main_Page). After building, we get an executable file KOL and use it in command line.
 61 | _______________________________________
 62 | 
 63 | Prepare for the input data
 64 | 
 65 | We use the LIBSVM dataset formate, which is an effcient sparse data representation as input.  Each instance in the dataset is represented by a row of numbers ended by "\n". For example:
 66 | 
 67 | +1 5:1 16:1 20:1 37:1 40:1 63:1 68:1 73:1 74:1 76:1 82:1 93:1
 68 | 
 69 | -1 2:1 6:1 18:1 19:1 39:1 40:1 52:1 61:1 71:1 72:1 74:1 76:1 80:1 95:1
 70 | 
 71 | In the above dataset, there are 2 instances stored in two rows. Each row begins with the class label of this instance. In binary classification the label appears in two forms: {+1, -1}. Note that some dataset files might be labeled with {0, 1}, which is not allowed by our toolbox. They have to be preprocessed and transformed to the {-1,+1} formate. Following the label, the feature values appears in form feature_index:feature_value. This is a sparse feature representation. If one certain feature index does not appear, it indicates that its value is zero.
 72 | 
 73 | Our toolbox is well designed to follow the standard online learning setting and load the dataset sequentially. So there is no memory limitation at all for large scale datasets. Users are not required to input the feature dimension of the dataset before training, since the algorithm will automaticly adjust to the increase of feature dimension.
 74 | 
 75 | _________________________________
 76 | 
 77 | Command Line
 78 | 
 79 | After compiling the code of the toolbox and getting the executable file "KOL", we can use command line mode to run the algorithms:
 80 | 
 81 | >>KOL -i training_dataset [-t testing_dataset] -opt algorithm_name [parameter setting]
 82 | 
 83 | KOL is the name of the executable file we got from compiling the code. -i training_dataset is a necessary input indicating the training dataset name. -opt algorithm_name is another necessary input indicating the selected algorithm for learning. -t testing_dataset is an optional input indicating the testing dataset name. If not indicated, the algorithm will only conduct the training process and output the online training accuracy and time cost. Parameter setting is also optional and diverses among different algorithms. If not indicated, the algorithm will use default setting.
 84 | 
 85 | ______________________________________
 86 | 
 87 | A quick example:
 88 | 
 89 | We may download the a9a datasets and perform the online kernel learning using the perceptron algorithm. We try the following command line:
 90 | 
 91 | >>KOL -i a9a_train -t a9a_test -opt kernel-perceptron
 92 | 
 93 | The ourput is as followings:
 94 | 
 95 | Algorithm: kernel_perceptron 
 96 | 
 97 | 0	10000	20000	30000 
 98 | 
 99 | #Training Instances:32561
100 | 
101 | Learn acuracy: 78.851997%
102 | 
103 | #SV:6887
104 | 
105 | Learning time: 10.218000 s
106 | 
107 | Test acuracy: 70.738899 %
108 | 
109 | Test time: 9.766000 s
110 | 
111 | The second line indicates the number of processed training samples until now, which can give an intuitive impression of the processing speed. This is a necessary output in the case when the training time is extremely long. The output includes the training accuracy, training time cost (including loading time), the number of support vectors, test accuracy and test time (including loading time).
112 | 
113 | __________________________________________________________
114 | 
115 | Parameter Setting:
116 | 
117 | Each algorithm has its own set of parameters. We will give detailed explainations about the useage of each algorithm.
118 | 
119 | parameter                                                                   command line    default value
120 | 
121 | the gaussian width parameter for gaussian kernel exp(-\gamma||x-y||_2^2)    -gamma           gamma=0.01
122 | 
123 | budget size for all budget algorithms, the max number of support vectors     -B               B=100
124 | 
125 | the learning rate for gradient descent based algorithms                      -eta             eta= 0.5
126 | 
127 | the regularizer parameter for bogd                                           -lambda          gamma=0.01
128 | 
129 | 
130 | For parameters specially for some algorithms, we will introduce with the following examples:
131 | 
132 | 1. Perceptron:
133 | 
134 | >>KOL -i a9a_train -t a9a_test -opt kernel-perceptron -gamma 0.1
135 | 
136 | 2. OGD:
137 | 
138 | >>KOL -i a9a_train -t a9a_test -opt kernel-ogd -eta 0.1 -gamma 0.01
139 | 
140 | 3. RBP
141 | 
142 | >>KOL -i a9a_train -t a9a_test -opt kernel-rbp -B 300
143 | 
144 | 4. Kernel-forgetron
145 | 
146 | >>KOL -i a9a_train -t a9a_test -opt kernel-forgetron -B 300 -gamma 0.01
147 | 
148 | 5. Kernel-projectron
149 | 
150 | >>KOL -i a9a_train -t a9a_test -opt kernel-projectron -B 300
151 | 
152 | 6. Kernel-projectronpp
153 | 
154 | >>KOL -i a9a_train -t a9a_test -opt kernel-projectronpp -B 300 -gamma 0.01
155 | 
156 | 7. Kernel-bpas
157 | 
158 | >>KOL -i a9a_train -t a9a_test -opt kernel-bpas -B 300 -cbpas 1 -gamma 0.01
159 | 
160 | Note that the parameter cbpas is the weight paramter C, which controls the step size. default value is 1. 
161 | 
162 | 8: BOGD
163 | 
164 | >>KOL -i a9a_train -opt kernel-bogd -B 300 -lambda 0.1 -eta 0.1 -gamma 0.01
165 | 
166 | 9: FOGD
167 | 
168 | >>KOL -i a9a_train -opt kernel-fogd -D 400 -eta 0.001 -gamma 0.001
169 | 
170 | Note that the parameter D is the number of fourier components for the FOGD algorithm. default value is 400
171 | 
172 | 10: NOGD
173 | 
174 | >>KOL -i a9a_train -opt kernel-nogd -knogd 30 -eta 0.1 -eta1 0.3 -gamma 0.01 -B 300
175 | 
176 | Note that the parameter -knogd is the matrix rank for SVD. default value 20. The eta is the kernel step size and eta1 is the linear step size, both with 0.5 default value.  
177 | ____________________________________________________
178 | 
179 | Related links:
180 | 
181 | Steven Hoi's home page: http://stevenhoi.org/
182 | 
183 | LU Jing's home page: http://jingonline.weebly.com/
184 | 
185 | LIBOL: http://libol.stevenhoi.org/
186 | 
187 | LIBSOL: http://libsol.stevenhoi.org/
188 | 
189 | Eigen: http://eigen.tuxfamily.org/index.php?title=Main_Page
190 | 
191 | LIBSVM: https://www.csie.ntu.edu.tw/~cjlin/libsvm/
192 | 
193 | Journal of Machine Learning Reseaerch: http://jmlr.org/papers/v17/14-148.html
194 | 
195 | 
196 | Our Matlab codes for all experiments in the research paper:https://github.com/jingcoco/Online-Kernel-Learning
197 | 
198 | Our follow-up research in online multiple kernel learning: https://github.com/jingcoco/Online-Multiple-Kernel-Learning
199 | 
200 | 
201 | A follow-up work to our proposed algorithm in NIPS: https://papers.nips.cc/paper/6560-dual-space-gradient-descent-for-online-learning.pdf
202 | 
203 | 
204 | 
205 | 
206 | 


--------------------------------------------------------------------------------
/install.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LIBOL/KOL/e8627b81635dde6007af4715ec9dc84b76e98152/install.pdf


--------------------------------------------------------------------------------
/python/KOL.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/LIBOL/KOL/e8627b81635dde6007af4715ec9dc84b76e98152/python/KOL.exe


--------------------------------------------------------------------------------
/python/cross.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | import platform
  4 | 
  5 | 
  6 | train_file ='german'####training file name
  7 | eta=['1','0.1','0.01','0.001','0.0001']#####
  8 | gamma=['1','0.1','0.01','0.001','0.0001']#####
  9 | 
 10 | 
 11 | 
 12 | fold_num=5
 13 | count_cmd = 'wc -l %s' %train_file
 14 | count_handler = os.popen(count_cmd)
 15 | line_num = int(count_handler.read().split()[0])
 16 | count_handler.close()
 17 | 
 18 | split_line_num = int(line_num / fold_num)
 19 | 
 20 | split_list = []
 21 | for k in range(0,fold_num):
 22 |     file_name = train_file + '_cva' + chr(ord('a') + k)
 23 |     os.system('rm -f %s' %file_name)
 24 |     split_list.append(file_name)
 25 | 
 26 | split_cmd = 'split -l {0} {1} {2}_cv'\
 27 |             .format(split_line_num,train_file, train_file) 
 28 | 
 29 | os.system(split_cmd)
 30 |     
 31 | f0 = open(split_list[0])
 32 | raw0=f0.read()
 33 | f0.close()
 34 | f1 = open(split_list[1])
 35 | raw1=f1.read()
 36 | f1.close()
 37 | f2 = open(split_list[2])
 38 | raw2=f2.read()
 39 | f2.close()
 40 | f3 = open(split_list[3])
 41 | raw3=f3.read()
 42 | f3.close()
 43 | f4 = open(split_list[4])
 44 | raw4=f4.read()
 45 | f4.close()
 46 | train_list=['train0','train1','train2','train3','train4']
 47 | output_file = open(train_list[0], 'w')
 48 | output_file.write(raw1+raw2+raw3+raw4)
 49 | output_file.close()
 50 | 
 51 | output_file = open(train_list[1], 'w')
 52 | output_file.write(raw0+raw2+raw3+raw4)
 53 | output_file.close()
 54 | 
 55 | output_file = open(train_list[2], 'w')
 56 | output_file.write(raw0+raw1+raw3+raw4)
 57 | output_file.close()
 58 | 
 59 | output_file = open(train_list[3], 'w')
 60 | output_file.write(raw0+raw1+raw2+raw4)
 61 | output_file.close()
 62 | 
 63 | output_file = open(train_list[4], 'w')
 64 | output_file.write(raw0+raw1+raw2+raw3)
 65 | output_file.close()
 66 | 
 67 | del raw0
 68 | del raw1
 69 | del raw2
 70 | del raw3
 71 | del raw4
 72 | 
 73 | 
 74 | for k in range(len(gamma)):
 75 |     for j in range(len(eta)):
 76 |         for i in range(0,5):
 77 |             command="KOL -i "+train_list[i]+" -opt kernel-ogd -t "+split_list[i]+" -eta "+eta[j]+' -gamma '+gamma[k]+' >>result.txt'####
 78 |             os.system(command)
 79 |     
 80 |         f0 = open("result.txt")
 81 |         raw=f0.read()
 82 | 
 83 |         sum=0
 84 |         for i in range(0,5):
 85 |             indexleft=raw.find('Test acuracy:')
 86 |             indexright=indexleft+20
 87 |             sss=raw[indexleft+13:indexright]
 88 |             raw=raw[indexright+10:]
 89 |             accu=float(sss)
 90 |             sum=sum+accu
 91 | 
 92 |         accurate=sum/5
 93 |         print(gamma[k]+'\t'+eta[j]+"\t"+str(accurate))
 94 |         f0.close()
 95 |         os.remove("result.txt")
 96 | 
 97 | for i in range(0,5):   
 98 |     os.remove(train_list[i])
 99 |     os.remove(split_list[i])
100 | 


--------------------------------------------------------------------------------
/python/learn.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | data='a9a'
 5 | train=data+'_train'
 6 | test=data+'_test'
 7 | train_set=[train,'train0','train1','train2','train3','train4','train5','train6','train7','train8','train9','train10']
 8 | for i in range(0,10):
 9 |     cmd='rand.py '+ train_set[i]+' '+ train_set[i+1]
10 |     os.system(cmd)
11 |     cmd='KOL.exe -i '+ train_set[i]+" -t "+test+'  -opt kernel-ogd >>"reslut_project.txt'
12 |     os.system(cmd)
13 | 
14 | 
15 | f0 = open("reslut_project.txt")
16 | raw=f0.read()
17 | sum=0
18 | for i in range(0,10):
19 |     indexleft=raw.find('Test acuracy:')
20 |     indexright=indexleft+20
21 |     sss=raw[indexleft+13:indexright]
22 |     raw=raw[indexright+10:]
23 |     accu=float(sss)
24 |     sum=sum+accu
25 |     print(accu)
26 |     
27 | accurate=sum/10
28 | print("\n")
29 | print("test accuracy")
30 | print(accurate)
31 | print("\n")
32 | 
33 | 
34 | f0 = open("reslut_project.txt")
35 | raw=f0.read()
36 | sum=0
37 | for i in range(0,10):
38 |     indexleft=raw.find('Learning time: ')
39 |     indexright=indexleft+22
40 |     sss=raw[indexleft+15:indexright]
41 |     raw=raw[indexright+10:]
42 |     accu=float(sss)
43 |     sum=sum+accu
44 | 
45 | accurate=sum/10
46 | print("\n")
47 | print("learning time:")
48 | print(accurate)
49 | print("\n")
50 | 
51 | f0 = open("reslut_project.txt")
52 | raw=f0.read()
53 | sum=0
54 | for i in range(0,10):
55 |     indexleft=raw.find('Test time: ')
56 |     indexright=indexleft+18
57 |     sss=raw[indexleft+11:indexright]
58 |     raw=raw[indexright+10:]
59 |     accu=float(sss)
60 |     sum=sum+accu
61 |     
62 | accurate=sum/10
63 | print("\n")
64 | print("Test time")
65 | print(accurate)
66 | print("\n")
67 | f0.close()
68 | #os.remove('reslut1.txt')
69 | 
70 | 
71 | 


--------------------------------------------------------------------------------
/python/rand.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """rand a dataset"""
 3 | 
 4 | import sys
 5 | import random
 6 | def Usage():
 7 |     print 'rand.py inputfile outputfile'
 8 | 
 9 | if len(sys.argv) != 3:
10 |     Usage()
11 |     sys.exit()
12 | input_file = sys.argv[1]
13 | output_file = sys.argv[2]
14 | file_handler = open(input_file,'r')
15 | content = file_handler.readlines()
16 | #print content[-1]
17 | if content[-1][-1] != '\n':
18 |     #print content[-1][-1]
19 |     content[-1]+='\n'
20 | file_handler.close()
21 | 
22 | random.shuffle(content)
23 | 
24 | file_handler = open(output_file, 'w')
25 | file_handler.writelines(content)
26 | file_handler.close()
27 | 


--------------------------------------------------------------------------------
/src/Params.cpp:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 | > File Name: Params.cpp
  3 | > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 | > Created Time: Thu 26 Sep 2013 05:49:18 PM SGT
  5 | > Functions: Class for Parsing parameters
  6 | ************************************************************************/
  7 | #include "Params.h"
  8 | #include "common/util.h"
  9 | #include "common/init_param.h"
 10 | 
 11 | #include <iostream>
 12 | #include <cstdlib>
 13 | 
 14 | using namespace std;
 15 | using namespace ez;
 16 | 
 17 | namespace SOL {
 18 | 	Params::Params() {
 19 | 		this->vfloat = new ezOptionValidator("f");
 20 | 		this->vint = new ezOptionValidator("u4");
 21 | 		this->vbool = new ezOptionValidator("t","in","true,false",false);
 22 | 
 23 | 		this->Init();
 24 | 	}
 25 | 
 26 | 	Params::~Params(){
 27 | 	}
 28 | 	void Params::Init(){
 29 | 		//initialize params
 30 | 		opt.overview = "Sparse Online Learning Library";
 31 | 		opt.syntax	= "SOL [options] -i train_file" ;
 32 | 		opt.example = "SOL -i train_file -opt SGD";
 33 | 
 34 | 		opt.add("",0,0,',',"help message","-h","--help");
 35 | 
 36 | 		this->add_option("",0,1,"train file","-i", &this->fileName);
 37 | 		this->add_option("",0,1,"test file name","-t",&this->test_fileName);
 38 | 		this->add_option("",0,1,"cached train file name","-c",&this->cache_fileName);
 39 | 		this->add_option("",0,1,"cached test file name","-tc",&this->test_cache_fileName);
 40 | 
 41 | 		this->add_option(init_data_type,0,1,"data type format","-dt",&this->str_data_type);
 42 | 		this->add_option(init_buf_size,0,1,"number of chunks for buffering","-bs",&this->buf_size);
 43 | 
 44 | 		this->add_option(init_loss_type,0,1,"loss function type:\nHinge, Logit, Square, SquareHinge","-loss",&this->str_loss);
 45 | 
 46 | 		this->add_option(init_opti_method,0,1,
 47 | 			"optimization method:\nSGD, STG, RDA, RDA_E, FOBOS, Ada-RDA, Ada-FOBOS, AROW, SAROW, CW-RDA, SCW-RDA","-opt", &this->str_opt);
 48 | 		this->add_option(init_is_learn_best_param,0,0,"learn best parameter", 
 49 | 			"-lbp", &this->is_learn_best_param);
 50 | 		this->add_option(init_eta,0,1,"learning rate", "-eta",&this->eta); 
 51 | 		this->add_option(gamma_int,0,1,"sigma_kernel", "-gamma",&this->gamma); 
 52 | 		this->add_option(Budget_ini,0,1,"Budget", "-B",&this->Budget_set); 
 53 | 		this->add_option(D_fogd,0,1,"D_fogd", "-D",&this->D_set); 
 54 | 		this->add_option(init_power_t,0,1,"power t of decaying learning rate","-power_t",&this->power_t); 
 55 | 		this->add_option(init_initial_t,0,1,"initial iteration number","-t0",&this->initial_t);
 56 | 		this->add_option(init_lambda,0,1,"l1 regularization","-lambda", &this->lambda);
 57 | 		this->add_option(1,0,1,"number of passes","-passes", &this->passNum);
 58 | 		this->add_option(k_nogd_ini,0,1,"k_nogd","-knogd", &this->k_nogd);
 59 | 		this->add_option(init_eta,0,1,"k_nogd","-eta1", &this->eta1);
 60 | 		this->add_option(10000,0,1,"c for pa","-cpa",&this->C);
 61 | 		this->add_option(C_bpas_ini,0,1,"c for bpas","-cbpas",&this->C_bpas);
 62 | 
 63 | 	}
 64 | 
 65 | 	void Params::add_option(float default_val, bool is_required, int expectArgs, 
 66 | 		const char* descr, const char* flag, float *storage){
 67 | 			*storage = default_val;
 68 | 			this->opt.add("",is_required,expectArgs,0,descr,flag,this->vfloat);
 69 | 			this->flag2storage_float[flag] = storage;
 70 | 	}
 71 | 
 72 | 
 73 | 	void Params::add_option(int default_val, bool is_required, int expectArgs, 
 74 | 		const char* descr, const char* flag, int *storage){
 75 | 			*storage = default_val;
 76 | 			this->opt.add("",is_required,expectArgs,0,descr,flag,this->vint);
 77 | 			this->flag2storage_int[flag] = storage;
 78 | 	}
 79 | 	void Params::add_option(bool default_val, bool is_required, int expectArgs, 
 80 | 		const char* descr, const char* flag, bool *storage){
 81 | 			*storage = default_val;
 82 | 			this->opt.add("",is_required,expectArgs,0,descr,flag, this->vbool);
 83 | 			this->flag2storage_bool[flag] = storage;
 84 | 	}
 85 | 
 86 | 	void Params::add_option(const char* default_val, bool is_required, int expectArgs, 
 87 | 		const char* descr, const char* flag, string *storage){
 88 | 			*storage = default_val;
 89 | 			this->opt.add("",is_required,expectArgs,0,descr,flag);
 90 | 			this->flag2storage_str[flag] = storage;
 91 | 	}
 92 | 
 93 | 	bool Params::Parse(int argc, const char** args) {
 94 | 		if (opt.isSet("-h")){
 95 | 			this->Help();
 96 | 			return false;
 97 | 		}
 98 | 		opt.parse(argc, args);
 99 | 		vector<string> badOptions;
100 | 		if (!opt.gotRequired(badOptions)){
101 | 			for (size_t i = 0; i < badOptions.size(); i++)
102 | 				cerr<<"ERROR: Missing required option "<<badOptions[i]<<".\n\n";
103 | 			this->Help();
104 | 			return false;
105 | 		}
106 | 		if (!opt.gotExpected(badOptions)){
107 | 			for (size_t i = 0; i < badOptions.size(); i++)
108 | 				cerr<<"ERROR: Got unexpected number of arguments for option "<<badOptions[i]<<".\n\n";
109 | 			this->Help();
110 | 			return false;
111 | 		}
112 | 		for (map_float_iter iter = this->flag2storage_float.begin();
113 | 			iter != this->flag2storage_float.end(); iter++){
114 | 				if (opt.isSet(iter->first.c_str()))
115 | 					opt.get(iter->first.c_str())->getFloat(*(iter->second));
116 | 		}
117 | 
118 | 		for (map_int_iter iter = this->flag2storage_int.begin();
119 | 			iter != this->flag2storage_int.end(); iter++){
120 | 				if (opt.isSet(iter->first.c_str()))
121 | 					opt.get(iter->first.c_str())->getInt(*(iter->second));
122 | 		}
123 | 		for (map_bool_iter iter = this->flag2storage_bool.begin();
124 | 			iter != this->flag2storage_bool.end(); iter++){
125 | 				if (opt.isSet(iter->first.c_str()))
126 | 					if (opt.get(iter->first.c_str())->expectArgs == 0)
127 | 						*(iter->second) = true;
128 | 					else{
129 | 						string out;
130 | 						opt.get(iter->first.c_str())->getString(out);
131 | 						ToLowerCase(out);
132 | 						if (out == "true")
133 | 							*(iter->second) = true;
134 | 						else
135 | 							*(iter->second) = false;
136 | 					}
137 | 		}
138 | 		for (map_str_iter iter = this->flag2storage_str.begin();
139 | 			iter != this->flag2storage_str.end(); iter++){
140 | 				if (opt.isSet(iter->first.c_str()))
141 | 					opt.get(iter->first.c_str())->getString(*(iter->second));
142 | 		}
143 | 
144 | 		if (this->cache_fileName.size() == 0 && this->fileName.length() == 0){
145 | 			cerr<<"you must specify the training data"<<endl;
146 | 			return false;
147 | 		}
148 | 		return true;
149 | 	}
150 | 
151 | 	void Params::Help() {
152 | 		string usage;
153 | 		opt.getUsage(usage);
154 | 		cout<<usage<<endl;
155 | 	}
156 | }
157 | 


--------------------------------------------------------------------------------
/src/Params.h:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 | > File Name: Params.h
  3 | > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 | > Created Time: Thu 26 Sep 2013 05:51:05 PM SGT
  5 | > Functions: Class for Parsing parameters
  6 | ************************************************************************/
  7 | 
  8 | #ifndef HEADER_PARSER_PARAM
  9 | #define HEADER_PARSER_PARAM
 10 | 
 11 | #include "common/ezOptionParser.hpp"
 12 | 
 13 | #include "data/parser.h"
 14 | 
 15 | #include <string>
 16 | #include <map>
 17 | 
 18 | 
 19 | using std::string;
 20 | using std::map;
 21 | 
 22 | //using namespace ez;
 23 | 
 24 | namespace SOL
 25 | {
 26 | 	class Params
 27 | 	{
 28 | 	private:
 29 | 		ez::ezOptionParser opt;
 30 | 		ez::ezOptionValidator* vfloat; 
 31 | 		ez::ezOptionValidator* vint; 
 32 | 		ez::ezOptionValidator* vbool; 
 33 | 
 34 | 		map<std::string, float*> flag2storage_float;
 35 | 		map<std::string, int*> flag2storage_int;
 36 | 		map<std::string, bool*> flag2storage_bool;
 37 | 		map<std::string, std::string*> flag2storage_str;
 38 | 
 39 | 		typedef map<std::string, float*>::iterator map_float_iter;
 40 | 		typedef map<std::string, int*>::iterator map_int_iter;
 41 | 		typedef map<std::string, bool*>::iterator map_bool_iter;
 42 | 		typedef map<std::string, std::string*>::iterator map_str_iter;
 43 | 
 44 | 	public:
 45 | 		//input data
 46 | 		string fileName; //source file name
 47 | 		string cache_fileName; //cached file name
 48 | 		string test_fileName; //test file name
 49 | 		string test_cache_fileName; //cached test file name
 50 | 
 51 | 		//dataset type
 52 | 		string str_data_type;
 53 | 		//loss function type
 54 | 		string str_loss;
 55 | 		//optimization method
 56 | 		string str_opt;
 57 | 
 58 | 		int passNum;
 59 | 		int D_set;
 60 | 		bool ave;
 61 | 
 62 | 		//optimzation parameters
 63 | 		float eta; //learning rate
 64 | 		float eta1;
 65 | 		float gamma;
 66 | 		float lambda; //for l1 regularization
 67 | 		int K; //for STG method
 68 | 		int Budget_set;
 69 | 		float gamma_rou; //for RDA
 70 | 		int k_nogd;
 71 | 		float delta; //for Ada-
 72 | 		float r; //for AROW
 73 | 		float phi; //for SCW
 74 | 		float C;
 75 | 		int buf_size; //number of chunks in dataset 
 76 | 		int start_ave;
 77 | 		float C_bpas;
 78 | 
 79 | 		int initial_t;
 80 | 		float power_t; 
 81 | 		bool is_learn_best_param; //whether learn best parameter
 82 | 
 83 | 		bool is_normalize;
 84 | 
 85 | 		float beta_spa;
 86 | 		float alpha_spa;
 87 | 		int weight_sum;
 88 | 		float delt_max;
 89 | 	public:
 90 | 		Params();
 91 | 		~Params();
 92 | 
 93 | 		bool Parse(int argc, const char** args);
 94 | 		void Help();
 95 | 
 96 | 	private:
 97 | 		void Init();
 98 | 
 99 | 		void add_option(float default_val, bool is_required, int expectArgs, 
100 | 			const char* descr, const char* flag, float *storage);
101 | 		void add_option(int default_val, bool is_required, int expectArgs, 
102 | 			const char* descr, const char* flag, int *storage);
103 | 		void add_option(bool default_val, bool is_required, int expectArgs, 
104 | 			const char* descr, const char* flag, bool *storage);
105 | 		void add_option(const char* default_val, bool is_required, int expectArgs, 
106 | 			const char* descr, const char* flag, string *storage);
107 | 	};
108 | }
109 | #endif
110 | 


--------------------------------------------------------------------------------
/src/common/init_param.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |   > File Name: init_param.h
 3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 |   > Created Time: 2013/9/28 15:12:27
 5 |   > Functions: init parameters
 6 |  ************************************************************************/
 7 | 
 8 | #ifndef HEADER_INIT_PARAM
 9 | #define HEADER_INIT_PARAM
10 | 
11 | #include <stdint.h>
12 | #include <string>
13 | namespace SOL {
14 | #define IndexType uint32_t
15 | 
16 |     //compress cache
17 | #define BASIC_IO 0
18 | #define GZIP_IO 1
19 | #define ZLIB_IO 2
20 |     //
21 |     /////////////////////Optimizer Initalization parameters//////////////////
22 |     //
23 |     //learning rate for sgd
24 | 	static const float init_eta = 0.5;
25 |     //budget size
26 |     static const int Budget_ini=100;
27 |     static const float lambda_ini=0.01f;///
28 |     static const float C_bpas_ini=1;
29 |     static const int D_fogd=4*Budget_ini;
30 |     static const float gamma_int=0.01;
31 |     static const float ini_eta_fogd=5e-4f;
32 |     static const int k_nogd_ini= (int)(0.2*Budget_ini);
33 | //	static const int x_ini_dimension=24;///////////////for fogd
34 | 
35 |     /////////////////////Optimizer Initalization parameters//////////////////
36 |     //
37 |     //whether to learn the best parameter
38 |     static const bool init_is_learn_best_param = false;
39 |     //learning rate
40 | 
41 |     static const float init_eta_max = 128.f;
42 |     static const float init_eta_min = 1.f;
43 |     static const float init_eta_step = 2.f;
44 |     //pow decaying learing rate
45 |     static const float init_power_t = 0.5;
46 |     //initial t
47 |     static const int init_initial_t = 1;
48 |     //l1 regularization
49 |     static const float init_lambda = 0.001;
50 |     //sparse soft threshold when counting zero-weights
51 |     static const float init_sparse_soft_thresh = (float)(1e-5);
52 |     //truncate gradients every K steps
53 |     static const int init_k = 10;
54 |     //gammarou in enchanced RDA
55 |     static const float init_gammarou = 25;
56 |     //delta in adaptive algorithms
57 |     static const float init_delta = 10;
58 |     static const float init_delta_max = 16.f;
59 |     static const float init_delta_min = 0.125f;
60 |     static const float init_delta_step = 2.f;
61 |     //r in AROW
62 |     static const float init_r = 1;
63 |     static const float init_r_max = 16.f;
64 |     static const float init_r_min = 0.125f;
65 |     static const float init_r_step = 2.f;
66 | 
67 |     //skip value in SVM2SGD
68 |     static const int init_skip = 16;
69 |     //intial value of norminv in Confidence weighted algorithms
70 |     static const float init_phi =  1.f;
71 |     //is normalize the data
72 |     static const bool init_normalize = false;
73 | 
74 |     static const char* init_loss_type = "Hinge";
75 |     static const char* init_data_type = "LibSVM";
76 |     static const char* init_opti_method = "SGD";
77 | 
78 |     //trying the optimal parameters
79 | 
80 | 
81 | 
82 |     ////////////////////Data Set Reader Parameters///////////////////////////
83 |     static const size_t init_chunk_size = 256;
84 |     static const size_t init_buf_size = 2;
85 | 
86 |     //////////////////////Zlib Parameters/////////////////////////////
87 |     static const int zlib_deflate_level = -1; // use default deflate level
88 |     static const size_t zlib_buf_size = 16348; //default buffer size of zlib
89 | }
90 | #endif
91 | 


--------------------------------------------------------------------------------
/src/common/md5.h:
--------------------------------------------------------------------------------
  1 | /* MD5
  2 |    converted to C++ class by Frank Thilo (thilo@unix-ag.org)
  3 |    for bzflag (http://www.bzflag.org)
  4 | 
  5 |    based on:
  6 | 
  7 |    md5.h and md5.c
  8 |    reference implementation of RFC 1321
  9 | 
 10 |    Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
 11 |    rights reserved.
 12 | 
 13 |    License to copy and use this software is granted provided that it
 14 |    is identified as the "RSA Data Security, Inc. MD5 Message-Digest
 15 |    Algorithm" in all material mentioning or referencing this software
 16 |    or this function.
 17 | 
 18 |    License is also granted to make and use derivative works provided
 19 |    that such works are identified as "derived from the RSA Data
 20 |    Security, Inc. MD5 Message-Digest Algorithm" in all material
 21 |    mentioning or referencing the derived work.
 22 | 
 23 |    RSA Data Security, Inc. makes no representations concerning either
 24 |    the merchantability of this software or the suitability of this
 25 |    software for any particular purpose. It is provided "as is"
 26 |    without express or implied warranty of any kind.
 27 | 
 28 |    These notices must be retained in any copies of any part of this
 29 |    documentation and/or software.
 30 | 
 31 | */
 32 | 
 33 | #ifndef BZF_MD5_H
 34 | #define BZF_MD5_H
 35 | 
 36 | #include <string>
 37 | #include <cstring>
 38 | #include <iostream>
 39 | #include <cstdio>
 40 | 
 41 | // Constants for MD5Transform routine.
 42 | #define S11 7
 43 | #define S12 12
 44 | #define S13 17
 45 | #define S14 22
 46 | #define S21 5
 47 | #define S22 9
 48 | #define S23 14
 49 | #define S24 20
 50 | #define S31 4
 51 | #define S32 11
 52 | #define S33 16
 53 | #define S34 23
 54 | #define S41 6
 55 | #define S42 10
 56 | #define S43 15
 57 | #define S44 21
 58 | 
 59 | // a small class for calculating MD5 hashes of strings or byte arrays
 60 | // it is not meant to be fast or secure
 61 | //
 62 | // usage: 1) feed it blocks of uchars with update()
 63 | //      2) finalize()
 64 | //      3) get hexdigest() string
 65 | //      or
 66 | //      MD5(std::string).hexdigest()
 67 | //
 68 | // assumes that char is 8 bit and int is 32 bit
 69 | class MD5
 70 | {
 71 |     public:
 72 |         typedef unsigned int size_type; // must be 32bit
 73 | 
 74 |         MD5() { init();}
 75 |         MD5(const std::string& text)
 76 |         {
 77 |             init();
 78 |             update(text.c_str(), text.length());
 79 |             finalize();
 80 |         }
 81 |         void update(const unsigned char input[], size_type length)
 82 |         {
 83 | 
 84 |             // compute number of bytes mod 64
 85 |             size_type index = count[0] / 8 % blocksize;
 86 | 
 87 |             // Update number of bits
 88 |             if ((count[0] += (length << 3)) < (length << 3))
 89 |                 count[1]++;
 90 |             count[1] += (length >> 29);
 91 | 
 92 |             // number of bytes we need to fill in buffer
 93 |             size_type firstpart = 64 - index;
 94 | 
 95 |             size_type i;
 96 | 
 97 |             // transform as many times as possible.
 98 |             if (length >= firstpart)
 99 |             {
100 |                 // fill buffer first, transform
101 |                 memcpy(&buffer[index], input, firstpart);
102 |                 transform(buffer);
103 | 
104 |                 // transform chunks of blocksize (64 bytes)
105 |                 for (i = firstpart; i + blocksize <= length; i += blocksize)
106 |                     transform(&input[i]);
107 | 
108 |                 index = 0;
109 |             }
110 |             else
111 |                 i = 0;
112 | 
113 |             // buffer remaining input
114 |             memcpy(&buffer[index], &input[i], length-i);
115 |         }
116 | 
117 |         void update(const char input[], size_type length)
118 |         {
119 |             update((const unsigned char*)input, length);
120 |         }
121 |         MD5& finalize()
122 |         {
123 | 
124 |             static unsigned char padding[64] = {
125 |                 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
126 |                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127 |                 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
128 |             };
129 | 
130 |             if (!finalized) {
131 |                 // Save number of bits
132 |                 unsigned char bits[8];
133 |                 encode(bits, count, 8);
134 | 
135 |                 // pad out to 56 mod 64.
136 |                 size_type index = count[0] / 8 % 64;
137 |                 size_type padLen = (index < 56) ? (56 - index) : (120 - index);
138 |                 update(padding, padLen);
139 | 
140 |                 // Append length (before padding)
141 |                 update(bits, 8);
142 | 
143 |                 // Store state in digest
144 |                 encode(digest, state, 16);
145 | 
146 |                 // Zeroize sensitive information.
147 |                 memset(buffer, 0, sizeof buffer);
148 |                 memset(count, 0, sizeof count);
149 | 
150 |                 finalized=true;
151 |             }
152 | 
153 |             return *this;
154 |         }
155 |         std::string hexdigest() const
156 |         {
157 |             if (!finalized)
158 |                 return "";
159 | 
160 |             char buf[33];
161 |             for (int i=0; i<16; i++)
162 |                 sprintf(buf+i*2, "%02x", digest[i]);
163 |             buf[32]=0;
164 | 
165 |             return std::string(buf);
166 |         }
167 |         friend std::ostream& operator<<(std::ostream&, MD5 md5);
168 | 
169 |     private:
170 |         void init()
171 |         {
172 | 
173 |             finalized=false;
174 | 
175 |             count[0] = 0;
176 |             count[1] = 0;
177 | 
178 |             // load magic initialization constants.
179 |             state[0] = 0x67452301;
180 |             state[1] = 0xefcdab89;
181 |             state[2] = 0x98badcfe;
182 |             state[3] = 0x10325476;
183 |         }
184 |         typedef unsigned char uint1; //  8bit
185 |         typedef unsigned int uint4;  // 32bit
186 |         enum {blocksize = 64}; // VC6 won't eat a const static int here
187 | 
188 |         void transform(const uint1 block[blocksize])
189 |         {
190 | 
191 |             uint4 a = state[0], b = state[1], c = state[2], d = state[3], x[16];
192 |             decode (x, block, blocksize);
193 | 
194 |             /* Round 1 */
195 |             FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
196 |             FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
197 |             FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
198 |             FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
199 |             FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
200 |             FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
201 |             FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
202 |             FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
203 |             FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
204 |             FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
205 |             FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
206 |             FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
207 |             FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
208 |             FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
209 |             FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
210 |             FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
211 | 
212 |             /* Round 2 */
213 |             GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
214 |             GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
215 |             GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
216 |             GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
217 |             GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
218 |             GG (d, a, b, c, x[10], S22,  0x2441453); /* 22 */
219 |             GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
220 |             GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
221 |             GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
222 |             GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
223 |             GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
224 |             GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
225 |             GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
226 |             GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
227 |             GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
228 |             GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
229 | 
230 |             /* Round 3 */
231 |             HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
232 |             HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
233 |             HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
234 |             HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
235 |             HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
236 |             HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
237 |             HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
238 |             HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
239 |             HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
240 |             HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
241 |             HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
242 |             HH (b, c, d, a, x[ 6], S34,  0x4881d05); /* 44 */
243 |             HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
244 |             HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
245 |             HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
246 |             HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
247 | 
248 |             /* Round 4 */
249 |             II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
250 |             II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
251 |             II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
252 |             II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
253 |             II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
254 |             II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
255 |             II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
256 |             II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
257 |             II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
258 |             II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
259 |             II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
260 |             II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
261 |             II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
262 |             II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
263 |             II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
264 |             II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
265 | 
266 |             state[0] += a;
267 |             state[1] += b;
268 |             state[2] += c;
269 |             state[3] += d;
270 | 
271 |             // Zeroize sensitive information.
272 |             memset(x, 0, sizeof x);
273 |         }
274 |         static void decode(uint4 output[], const uint1 input[], size_type len)
275 |         {
276 | 
277 |             for (unsigned int i = 0, j = 0; j < len; i++, j += 4)
278 |                 output[i] = ((uint4)input[j]) | (((uint4)input[j+1]) << 8) |
279 |                     (((uint4)input[j+2]) << 16) | (((uint4)input[j+3]) << 24);
280 |         }
281 |         static void encode(uint1 output[], const uint4 input[], size_type len)
282 |         {
283 | 
284 |             for (size_type i = 0, j = 0; j < len; i++, j += 4) {
285 |                 output[j] = input[i] & 0xff;
286 |                 output[j+1] = (input[i] >> 8) & 0xff;
287 |                 output[j+2] = (input[i] >> 16) & 0xff;
288 |                 output[j+3] = (input[i] >> 24) & 0xff;
289 |             }
290 |         }
291 | 
292 |         bool finalized;
293 |         uint1 buffer[blocksize]; // bytes that didn't fit in last 64 byte chunk
294 |         uint4 count[2];   // 64bit counter for number of bits (lo, hi)
295 |         uint4 state[4];   // digest so far
296 |         uint1 digest[16]; // the result
297 | 
298 |         // low level logic operations
299 |         static inline uint4 F(uint4 x, uint4 y, uint4 z)
300 |         {
301 |             return (x&y) | (~x&z);
302 |         }
303 |         static inline uint4 G(uint4 x, uint4 y, uint4 z)
304 |         {
305 |             return (x&z) | (y&~z);
306 |         }
307 |         static inline uint4 H(uint4 x, uint4 y, uint4 z)
308 |         {
309 |             return x^y^z;
310 |         }
311 |         static inline uint4 I(uint4 x, uint4 y, uint4 z)
312 |         {
313 |             return y ^ (x | ~z);
314 |         }
315 | 
316 |         static inline uint4 rotate_left(uint4 x, int n)
317 |         {
318 |             return (x << n) | (x >> (32-n));
319 |         }
320 | 
321 |         static inline void FF(uint4 &a, uint4 b, uint4 c, uint4 d, uint4 x, uint4 s, uint4 ac)
322 |         {
323 |             a = rotate_left(a+ F(b,c,d) + x + ac, s) + b;
324 |         }
325 |         static inline void GG(uint4 &a, uint4 b, uint4 c, uint4 d, uint4 x, uint4 s, uint4 ac)
326 |         {
327 |             a = rotate_left(a + G(b,c,d) + x + ac, s) + b;
328 |         }
329 |         static inline void HH(uint4 &a, uint4 b, uint4 c, uint4 d, uint4 x, uint4 s, uint4 ac)
330 |         {
331 |             a = rotate_left(a + H(b,c,d) + x + ac, s) + b;
332 |         }
333 |         static inline void II(uint4 &a, uint4 b, uint4 c, uint4 d, uint4 x, uint4 s, uint4 ac)
334 |         {
335 |             a = rotate_left(a + I(b,c,d) + x + ac, s) + b;
336 |         }
337 | 
338 | };
339 | 
340 | //////////////////////////////
341 | 
342 | std::ostream& operator<<(std::ostream& out, MD5 md5)
343 | {
344 |     return out << md5.hexdigest();
345 | }
346 | 
347 | //////////////////////////////
348 | 
349 | std::string md5(const std::string str)
350 | {
351 |     MD5 md5 = MD5(str);
352 | 
353 |     return md5.hexdigest();
354 | }
355 | #endif
356 | 
357 | 


--------------------------------------------------------------------------------
/src/common/util.h:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 | 	> File Name: util.h
  3 | 	> Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 | 	> Created Time: 8/19/2013 Monday 2:17:56 PM
  5 | 	> Functions: 
  6 |  ************************************************************************/
  7 | #ifndef HEADER_UTIL
  8 | #define HEADER_UTIL
  9 | 
 10 | #include "init_param.h"
 11 | 
 12 | #include <cstring>
 13 | #include <numeric>
 14 | #include <math.h>
 15 | #include <ctype.h>
 16 | 
 17 | #if WIN32
 18 | #include <direct.h>
 19 | #include <io.h>
 20 | #include <windows.h>
 21 | #define SOL_ACCESS(x) _access(x,0)
 22 | #else
 23 | #include <unistd.h>
 24 | #include <sys/stat.h>
 25 | #include <sys/types.h>
 26 | #include <sys/time.h>
 27 | #define SOL_ACCESS(x) access(x,F_OK)
 28 | #endif
 29 | 
 30 | 
 31 | template <typename T>
 32 | inline char Sgn(T x) {
 33 | 	if (x > 0) return 1;
 34 | 	else if (x < 0) return -1;
 35 | 	else  return 0;
 36 | }
 37 | 
 38 | //#define ABS(x) (x > 0 ? x : -x)
 39 | template <typename T>
 40 | inline T ABS(T x) {
 41 |     return x > 0 ? x : -x;
 42 | }
 43 | 
 44 | template <typename T>
 45 | inline float Average(const T* data, int dim) {
 46 | 	return std::accumulate(data,data + dim, 0.f) / (float)dim;
 47 | }
 48 | 
 49 | template <typename T>
 50 | float Variance(const T* data, int dim) {
 51 | 	if (dim <= 1)
 52 | 		return 0;
 53 | 	float ave = std::accumulate(data, data + dim,0.f) / (float)dim;
 54 | 	double var(0);
 55 | 	for (int i = 0; i < dim; i++)
 56 | 		var += (data[i] - ave) * (data[i] - ave);
 57 | 	return (float)(sqrt(var / (dim - 1)));
 58 | }
 59 | 
 60 | inline float trunc_weight(float w, float gravity){
 61 |     if (w > 0)
 62 |         return (gravity < w) ? w - gravity : 0.f;
 63 |     else
 64 |         return (gravity < -w) ? w + gravity : 0.f;
 65 | }
 66 | inline float trunc_weight2(float w, float gravity){
 67 |     if (w > 0)
 68 |         return (gravity < w) ?  -gravity : -w;
 69 |     else
 70 |         return (gravity < -w) ? gravity : -w;
 71 | }
 72 | 
 73 | inline void ToUpperCase(string &str) {
 74 | 	string dst_str;
 75 | 	int len = str.length();
 76 | 	for (int i = 0; i < len; i++)
 77 | 		dst_str.push_back(toupper(str[i]));
 78 | 	std::swap(str,dst_str);
 79 | }
 80 | 
 81 | inline void ToLowerCase(string &str) {
 82 | 	string dst_str;
 83 | 	int len = str.length();
 84 | 	for (int i = 0; i < len; i++)
 85 | 		dst_str.push_back(tolower(str[i]));
 86 | 	std::swap(str,dst_str);
 87 | }
 88 | 
 89 | 
 90 | 
 91 | inline double get_current_time(){
 92 | #if _WIN32
 93 | 	return GetTickCount() / 1000.0;
 94 | #else
 95 | 	struct timeval tim;
 96 | 	gettimeofday(&tim, NULL);
 97 | 	return tim.tv_sec + tim.tv_usec / 1000000.0;
 98 | #endif
 99 | }
100 | #endif
101 | 


--------------------------------------------------------------------------------
/src/data/Cacher.cpp:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 |   > File Name: test.cpp
  3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 |   > Created Time: Mon 04 Nov 2013 09:50:06 PM
  5 |   > Descriptions: 
  6 |  ************************************************************************/
  7 | #if defined(_MSC_VER) && defined(_DEBUG)
  8 | #define _CRTDBG_MAP_ALLOC
  9 | #include <stdlib.h>
 10 | #include <crtdbg.h>
 11 | #endif
 12 | 
 13 | #include "libsvmread.h"
 14 | #include "libsvm_binary.h"
 15 | #include "DataSet.h"
 16 | 
 17 | #include <fstream>
 18 | #include <cstdio>
 19 | #include <vector>
 20 | 
 21 | using namespace SOL;
 22 | void Usage(){
 23 |     cout<<"Usage: Cache input_file output_file [-d]"<<endl;
 24 | }
 25 | 
 26 | void Cache(const string &input_file, const string &output_file);
 27 | void Cache2(const string &input_file, const string &output_file);
 28 | void De_Cache(const string &input_file, const string &output_file);
 29 | 
 30 | int main(int argc, char** args){
 31 | 	//check memory leak in VC++
 32 | #if defined(_MSC_VER) && defined(_DEBUG)
 33 | 	int tmpFlag = _CrtSetDbgFlag( _CRTDBG_REPORT_FLAG );
 34 | 	tmpFlag |= _CRTDBG_LEAK_CHECK_DF;
 35 | 	_CrtSetDbgFlag( tmpFlag );
 36 | 	//_CrtSetBreakAlloc(1698);  
 37 | #endif
 38 | 	if (argc < 3){
 39 | 		Usage();
 40 | 		return 0;
 41 | 	}
 42 | 
 43 | 	string filename = args[1];
 44 | 	string outFileName;
 45 | 	bool cache = true;
 46 | 	if (argc == 3){
 47 | 		if (strcmp(args[2], "-d") == 0)
 48 | 			cache = false;
 49 | 		else
 50 | 			outFileName = args[2];
 51 | 	}
 52 | 	else if (argc == 4){
 53 | 		outFileName = args[2];
 54 | 		if (strcmp(args[3], "-d") == 0)
 55 | 			cache = false;
 56 | 		else{
 57 | 			cerr<<"incorrect parameter "<<args[3]<<endl;
 58 | 			return -1;
 59 | 		}
 60 | 	}
 61 | 	if (cache == true){
 62 | 		
 63 | 		// memory allocations take place here
 64 | 		Cache2(filename, outFileName);
 65 | 	
 66 | 	}
 67 | 	else
 68 | 		De_Cache(filename, outFileName);
 69 | 
 70 | 	return 0;
 71 | }
 72 | 
 73 | void Cache2(const string &input_file, const string &output_file){
 74 | 	cout<<"Caching file..."<<endl;
 75 | 	LibSVMReader reader(input_file);
 76 | 	if (reader.OpenReading() == false){
 77 | 		return;
 78 | 	}
 79 | 	DataSet<float, char> dt;
 80 | 	dt.Load(input_file,output_file);
 81 | 	size_t dataNum = 0;
 82 | 	
 83 | 	if(dt.Rewind()){
 84 | 		while(1){
 85 | 			const DataChunk<float, char> chunk = dt.GetChunk();
 86 | 			dataNum += chunk.dataNum;
 87 | 			if (chunk.dataNum == 0){
 88 | 				dt.FinishRead();
 89 | 				break;
 90 | 			}
 91 | 			dt.FinishRead();
 92 | 		}
 93 | 	}
 94 | }
 95 | 
 96 | void Cache(const string &input_file, const string &output_file){
 97 | 	cout<<"Caching file..."<<endl;
 98 | 	LibSVMReader reader(input_file);
 99 | 	if (reader.OpenReading() == false){
100 | 		return;
101 | 	}
102 | 	string output_file_working = output_file + ".working";
103 | 	libsvm_binary writer(output_file_working);
104 | 	if (writer.OpenWriting() == false){
105 | 		return;
106 | 	}
107 | 	DataPoint<float, char> data;
108 | 	size_t dataNum = 0;
109 | 	size_t featNum = 0;
110 | 	while(reader.GetNextData(data) == true){
111 | 		dataNum++;
112 | 		featNum += data.indexes.size();
113 | 		if(!(data.label == 1 || data.label == -1)){
114 | 			cout<<"data index "<<dataNum<<" label = "<<(int)(data.label)<<endl;
115 | 		}
116 | 		writer.WriteData(data);
117 | 	}
118 | 	reader.Close();
119 | 	writer.Close();
120 | 	cout<<"data number: "<<dataNum<<endl;
121 | 	cout<<"feat number: "<<featNum<<endl;
122 | 
123 | 	string cmd = "mv \"";
124 | 	cmd += output_file_working;
125 | 	cmd += "\" \"";
126 | 	cmd += output_file;
127 | 	cmd += "\"";
128 | 	int ret = system(cmd.c_str());
129 | 	if (ret != 0){
130 | 		cerr<<"rename file failed!"<<endl;
131 | 	}
132 | }
133 | 
134 | void De_Cache(const string &input_file, const string &output_file){
135 | 	cout<<"De-Caching file..."<<endl;
136 | 	libsvm_binary reader(input_file);
137 | 	if (reader.OpenReading() == false){
138 | 		cerr<<"open "<<input_file<<" failed!"<<endl;
139 | 		return;
140 | 	}
141 | 	bool is_write = output_file.length() > 0 ? true : false;
142 | 	std::ofstream writer;
143 | 	if (is_write){
144 | 		writer.open(output_file.c_str(), ios::out);
145 | 		if (writer.good() == false){
146 | 			cerr<<"open output file" <<output_file<<" failed!"<<endl;
147 | 			return;
148 | 		}
149 | 	}
150 | 	DataPoint<float, char> data;
151 | 	size_t dataNum = 0;
152 | 	size_t featNum = 0;
153 | 	while(reader.GetNextData(data) == true){
154 | 		dataNum++;
155 | 		featNum += data.indexes.size();
156 | 		if (is_write){
157 | 			writer<<data.label;
158 | 			for (IndexType i = 0; i < data.dim(); i++){
159 | 				writer<<" "<<data.indexes[i]<<":"<<data.features[i];
160 | 			}
161 | 			writer<<"\n";
162 | 		}
163 | 	}
164 | 	reader.Close();
165 | 	if (is_write)
166 | 		writer.close();
167 | 	cout<<"data number: "<<dataNum<<endl;
168 | 	cout<<"feat number: "<<featNum<<endl;
169 | }
170 | 


--------------------------------------------------------------------------------
/src/data/DataPoint.h:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 | > File Name: DataPoint.h
  3 | > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 | > Created Time: 2013/8/18 星期日 20:13:31
  5 | > Functions: Data Point Definition
  6 | ************************************************************************/
  7 | 
  8 | #pragma once
  9 | 
 10 | #include "s_array.h"
 11 | #include "../common/init_param.h"
 12 | 
 13 | #include <cstring>
 14 | 
 15 | namespace SOL {
 16 | 	/**
 17 | 	*  Definitions of DataPoint: one lable, and DataPoints
 18 | 	*
 19 | 	* @tparam DataType
 20 | 	*/
 21 | 	template <typename FeatType, typename LabelType> 
 22 | 	class DataPoint {
 23 | 	public:
 24 | 		//////////////Member Variables
 25 | 		s_array<IndexType> indexes;
 26 | 		s_array<FeatType> features;
 27 | 		LabelType label;
 28 | 		FeatType sum_sq; //sum of square
 29 | 
 30 | 		//for copy and release control
 31 | 		int *count;
 32 | 
 33 | 		IndexType max_index; //max index, also the dimension
 34 | 	public:
 35 | 		DataPoint() {
 36 | 			this->count = new int;
 37 | 			*count = 1;
 38 | 			this->max_index = 0;
 39 | 			this->label = 0;
 40 | 			this->sum_sq = 0;
 41 | 		}
 42 | 
 43 | 		//copy constructor
 44 | 		DataPoint(const DataPoint &point) {
 45 | 			this->indexes = point.indexes;
 46 | 			this->features = point.features;
 47 | 			this->label = point.label;
 48 | 			this->count = point.count;
 49 | 			this->max_index = point.max_index;
 50 | 			this->sum_sq = 0;
 51 | 			++(*count);
 52 | 		}
 53 | 
 54 | 		~DataPoint(){
 55 | 			this->release();
 56 | 		} 
 57 | 
 58 | 		//assignment
 59 | 		DataPoint<FeatType, LabelType>& operator= 
 60 | 			(const DataPoint<FeatType, LabelType> &data) {
 61 | 				if (data.count == this->count)
 62 | 					return *this;
 63 | 				this->release();
 64 | 
 65 | 				this->indexes = data.indexes;
 66 | 				this->features = data.features;
 67 | 				this->label = data.label;
 68 | 				this->max_index = data.max_index;
 69 | 				this->sum_sq = data.sum_sq;
 70 | 				this->count = data.count;
 71 | 				++(*count);
 72 | 				return *this;
 73 | 		}
 74 | 		//set new index-value pair
 75 | 		void AddNewFeat(const IndexType &index, 
 76 | 			const FeatType &feat) {
 77 | 				this->indexes.push_back(index);
 78 | 				this->features.push_back(feat);
 79 | 				if(this->max_index < index){
 80 | 					this->max_index = index;
 81 | 				}
 82 | 				this->sum_sq += feat * feat;
 83 | 		}
 84 | 
 85 | 		void erase() {
 86 | 			this->indexes.erase();
 87 | 			this->features.erase();
 88 | 			this->max_index = 0;
 89 | 			this->sum_sq = 0;
 90 | 		}
 91 | 
 92 | 
 93 | 		DataPoint<FeatType, LabelType> clone() const{
 94 | 			DataPoint<FeatType, LabelType> newPt; 
 95 | 			newPt.label = this->label;
 96 | 			newPt.max_index = this->max_index;
 97 | 			newPt.sum_sq = this->sum_sq;
 98 | 			newPt.indexes.resize(this->indexes.size());
 99 | 			memcpy(newPt.indexes.begin,this->indexes.begin, this->indexes.size() * sizeof(IndexType) );
100 | 			newPt.features.resize(this->features.size());
101 | 			memcpy(newPt.features.begin, this->features.begin, this->features.size() * sizeof(FeatType));
102 | 			return newPt;				
103 | 		}
104 | 
105 | 		IndexType dim() const {return this->max_index;}
106 | 
107 | 	private:
108 | 		void release() {
109 | 			--(*count); 
110 | 			if (*count == 0)
111 | 				delete count;
112 | 			this->count = NULL;
113 | 		}
114 | 
115 | 	};
116 | 	template <typename FeatType, typename LabelType> 
117 | 	struct DataChunk{
118 | 		DataPoint<FeatType, LabelType> data[init_chunk_size];
119 | 		size_t dataNum;
120 | 		bool is_inuse;
121 | 		bool is_parsed;
122 | 		DataChunk *next;
123 | 
124 | 		DataChunk():dataNum(0),next(NULL), is_inuse(false), is_parsed(false){
125 | 		}
126 | 		void erase() {
127 | 			for (size_t i = 0; i < dataNum; i++)
128 | 				data[i].erase();
129 | 			dataNum = 0;
130 | 		}
131 | 	};
132 | 
133 | }
134 | 


--------------------------------------------------------------------------------
/src/data/DataReader.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |   > File Name: DataReader.h
 3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 |   > Created Time: 8/21/2013 Wednesday 4:48:28 PM
 5 |   > Functions: Interface for data reader
 6 |  ************************************************************************/
 7 | 
 8 | #pragma once
 9 | 
10 | 
11 | #include "DataPoint.h"
12 | #include <vector>
13 | 
14 | namespace SOL {
15 | 	template <typename FeatType, typename LabelType>
16 | 	class DataReader {
17 | 	public:
18 | 		virtual ~DataReader(){}
19 | 	public:
20 | 		/**
21 | 		* OpenReading: Open a dataset file and get it prepared to be read
22 | 		*
23 | 		* @Return: true if everything is ok
24 | 		*/
25 | 		virtual bool OpenReading() = 0;
26 | 		/**
27 | 		* GetNextData: for loading data sequentially
28 | 		*
29 | 		* @Param data: the variable to place the loaded data
30 | 		*
31 | 		* @Return: true if everything is ok
32 | 		*/
33 | 		virtual bool GetNextData(DataPoint<FeatType, LabelType> &data) = 0;
34 | 		/**
35 | 		* Rewind: Rewind the dataset to the beginning of the file 
36 | 		*/
37 | 		virtual void Rewind() = 0;
38 | 
39 | 		/**
40 | 		* Close: Close the dataset when finished loading data
41 | 		*/
42 | 		virtual void Close() = 0;
43 | 
44 | 		/**
45 | 		* Good : test the status of the data reader
46 | 		*
47 | 		* @Return: true if everything is ok
48 | 		*/
49 | 		virtual bool Good() = 0;
50 | 	};
51 | 
52 | }
53 | 


--------------------------------------------------------------------------------
/src/data/DataSet.h:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 |   > File Name: DataSet.h
  3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 |   > Created Time: 2013/8/18 星期日 15:38:09
  5 |   > Functions: Class to interact with datasets
  6 |  ************************************************************************/
  7 | 
  8 | #pragma once
  9 | 
 10 | 
 11 | #if WIN32
 12 | #include <windows.h>
 13 | #endif
 14 | 
 15 | #include "DataSetHelper.h"
 16 | #include "DataPoint.h"
 17 | #include "DataReader.h"
 18 | #include "libsvm_binary.h"
 19 | #include "libsvmread.h"
 20 | #include "../common/util.h"
 21 | 
 22 | #include "thread_primitive.h"
 23 | 
 24 | #include <vector>
 25 | #include <string>
 26 | #include <fstream>
 27 | 
 28 | using namespace std;
 29 | 
 30 | /**
 31 |  *  namespace: Sparse Online Learning
 32 |  */
 33 | namespace SOL {
 34 |     //data set, can work in both read-and-write mode and read-once mode
 35 |     template <typename FeatType, typename LabelType> class DataSet {		
 36 |         private:
 37 |             string fileName;
 38 |             string cache_fileName;
 39 |             bool is_cache;
 40 | 
 41 |             size_t bufSize; //buffer to load data
 42 |             size_t passNum; //number of passes
 43 |             size_t dataNum; //total data number
 44 | 
 45 |             size_t curChunkNum;  //data number in buffer
 46 | 
 47 |             //pointer to the first element, circlar linked list will be used
 48 |             DataChunk<FeatType,LabelType> *head; 
 49 |             DataChunk<FeatType,LabelType> *wt_ptr; //pointer to the write location
 50 |             DataChunk<FeatType,LabelType> *rd_ptr; //pointer to the read location
 51 | 
 52 |             bool load_finished; //this is used for GetChunk to test if current loading has finished
 53 |             bool is_on_loading; //this is used for Rewind to test if rewind can be performed
 54 | 
 55 |             DataReader<FeatType,LabelType> *reader;
 56 | 
 57 |             //thread-safety
 58 |             MUTEX data_lock;
 59 |             CV data_available;
 60 |             CV buffer_full;
 61 | 
 62 |         public:
 63 |             DataSet(size_t passes = 1, int buf_size = -1) {
 64 |                 this->head = NULL;
 65 |                 this->wt_ptr = NULL;
 66 |                 this->rd_ptr = NULL;
 67 | 
 68 |                 this->passNum = passes > 0 ? passes : 1; 
 69 |                 this->dataNum = 0;
 70 |                 this->curChunkNum = 0;
 71 | 
 72 |                 this->load_finished = false;
 73 |                 this->is_on_loading = false;
 74 |                 this->reader = NULL;
 75 |                 this->is_cache = false;
 76 | 
 77 |                 this->CreateBuffer(buf_size);
 78 | 
 79 |                 //init thread-safety 
 80 |                 initialize_mutex(&this->data_lock);
 81 |                 initialize_condition_variable(&data_available);
 82 |                 initialize_condition_variable(&buffer_full);
 83 |             }
 84 |             ~DataSet() {
 85 |                 delete_mutex(&data_lock);
 86 |                 if (this->reader != NULL)
 87 |                     delete this->reader;
 88 | 				this->reader = NULL;
 89 | 				this->ReleaseBuffer();
 90 |             }
 91 | 
 92 |         private:
 93 |             bool CreateBuffer(int buf_size = 0) {
 94 |                 this->ReleaseBuffer();
 95 |                 this->bufSize = buf_size > 0 ? buf_size : init_buf_size;
 96 |                 if (this->bufSize <= 0)
 97 |                     return true;
 98 | 
 99 |                 this->head = new DataChunk<FeatType,LabelType>;
100 |                 DataChunk<FeatType,LabelType> *p = this->head;
101 |                 for (size_t i = 1; i < this->bufSize; i++) {
102 |                     p->next = new DataChunk<FeatType,LabelType>;
103 |                     p = p->next;
104 |                 }
105 |                 p->next = this->head;
106 |                 this->wt_ptr = this->head;
107 |                 this->rd_ptr = this->head;
108 | 
109 |                 return true;
110 |             }
111 | 
112 |         private:
113 |             void ClearBuffer() {
114 |                 DataChunk<FeatType,LabelType> *p = this->head;
115 |                 if (p == NULL)
116 |                     return;
117 |                 p = p->next;
118 |                 while (p != this->head) {
119 |                     p->erase();
120 |                     p = p->next;
121 |                 }
122 |                 p->erase();
123 |                 this->dataNum = 0;
124 |                 this->curChunkNum = 0;
125 |                 this->wt_ptr = this->head;
126 |                 this->rd_ptr = this->head;
127 |             }
128 | 
129 |             void ReleaseBuffer() {
130 |                 DataChunk<FeatType,LabelType> *p = this->head;
131 |                 if (p == NULL)
132 |                     return;
133 |                 DataChunk<FeatType,LabelType> *q = p->next;
134 |                 while (q != this->head) {
135 |                     p = q->next;
136 |                     delete q;
137 |                     q = p;
138 |                 }
139 |                 delete this->head;
140 |                 this->head = NULL;
141 |                 this->wt_ptr = NULL;
142 |                 this->rd_ptr = NULL;
143 |                 this->dataNum = 0;
144 |             } 
145 | 
146 |         public:
147 | 			template <typename T1, typename T2> friend bool CacheLoad(DataSet<T1, T2> *dataset);
148 | #if WIN32
149 |             template <typename T1, typename T2> friend DWORD WINAPI thread_LoadData(LPVOID param);
150 | #else
151 |             template <typename T1, typename T2> friend void* thread_LoadData(void* param);
152 | #endif
153 | 
154 |             //bind a data reader to the dataset
155 |             bool Load(const string& filename,  const string& cache_filename) {
156 |                 this->fileName = filename;
157 |                 this->cache_fileName = cache_filename;
158 | 
159 |                 if (this->reader != NULL)
160 |                     delete this->reader;
161 |                 this->reader = NULL;
162 | 
163 |                 if (SOL_ACCESS(this->cache_fileName.c_str()) == 0){ //already cached
164 |                     this->is_cache = false;
165 |                     this->reader = new libsvm_binary_<FeatType, LabelType>(this->cache_fileName);
166 |                 }
167 |                 else if(SOL_ACCESS(this->fileName.c_str()) == 0){
168 |                     this->reader = new LibSVMReader_<FeatType, LabelType>(this->fileName);
169 |                     if (this->cache_fileName.length() == 0 && this->passNum > 1){ 
170 |                         this->cache_fileName = "cache_file";
171 | #if WIN32
172 | 						string cmd = "del " + this->cache_fileName;
173 | #else
174 | 						string cmd = "rm " + this->cache_fileName;
175 | #endif
176 | 						system(cmd.c_str());
177 | 						this->is_cache = true;
178 | 					}
179 | 					else if (this->cache_fileName.length() > 0)
180 | 						this->is_cache = true;
181 | 				}
182 | 				else
183 | 					return false;
184 | 
185 | 				if (this->reader != NULL){
186 | 					if (this->reader->OpenReading() == false){
187 | 						delete this->reader;
188 | 						this->reader = NULL;
189 | 						return false;
190 | 					}
191 | 				}
192 | 
193 | 				return true;
194 | 			}
195 | 
196 | 			/////////////Data Access/////////////////////
197 | 		public:
198 | 
199 | 			//get the next write chunk
200 | 			inline DataChunk<FeatType, LabelType> &GetWriteChunk(){
201 | 				mutex_lock(&this->data_lock); 
202 | 				if (this->wt_ptr->is_inuse == false){
203 | 					this->wt_ptr->is_inuse = true;
204 | 					DataChunk<FeatType, LabelType>* p = this->wt_ptr;
205 | 					mutex_unlock(&this->data_lock);
206 | 					return *p;
207 | 				}
208 | 				else{
209 | 					condition_variable_wait(&this->buffer_full,&this->data_lock);
210 | 					mutex_unlock(&this->data_lock);
211 | 					return this->GetWriteChunk();
212 | 				}
213 | 			}
214 | 
215 | 			inline void EndWriteChunk(){
216 | 				mutex_lock(&this->data_lock);
217 | 				this->wt_ptr->is_parsed = true;
218 | 				this->dataNum += this->wt_ptr->dataNum;
219 | 				//if (this->wt_ptr->dataNum == 0){
220 | 				//	cout<<"chunk size is zero!"<<endl;
221 | 				//}
222 | 				this->wt_ptr = this->wt_ptr->next;
223 | 				condition_variable_signal_all(&this->data_available);
224 | 				mutex_unlock(&this->data_lock);
225 | 			}
226 | 
227 | 			inline void FinishParse(){
228 | 				//notice that the all the data has been loaded
229 | 				mutex_lock(&this->data_lock);
230 | 				this->load_finished = true;
231 | 				this->is_on_loading = false;
232 | 				condition_variable_signal_all(&this->data_available);
233 | 				mutex_unlock(&this->data_lock);
234 | 			}
235 | 
236 | 			//get the data to read
237 | 			inline const DataChunk<FeatType, LabelType>& GetChunk() {
238 | 				mutex_lock(&this->data_lock);
239 | 				//check if there is available data
240 | 				if (this->rd_ptr->is_parsed == true){
241 | 					this->rd_ptr->is_parsed = false;
242 | 					mutex_unlock(&this->data_lock);
243 | 					return *(this->rd_ptr);
244 | 				}
245 | 				else{ //no available data 
246 | 					if (this->load_finished == true){
247 | 						this->rd_ptr->is_parsed = false;
248 | 						this->rd_ptr->erase();
249 | 						mutex_unlock(&this->data_lock);
250 | 						return *(this->rd_ptr); //return an invalid data
251 | 					}
252 | 					else{ //suspend the current thread
253 | 						condition_variable_wait(&this->data_available,&this->data_lock);
254 | 						mutex_unlock(&this->data_lock);
255 | 						return this->GetChunk();
256 | 					}
257 | 				}
258 | 			}
259 | 
260 | 			void FinishRead() {
261 | 				mutex_lock(&this->data_lock);
262 | 				this->rd_ptr->is_inuse = false;
263 | 				//notice that the last data have been processed
264 | 				this->rd_ptr = this->rd_ptr->next;
265 | 				condition_variable_signal_all(&this->buffer_full);
266 | 				mutex_unlock(&this->data_lock);
267 | 			}
268 | 
269 | 			//the number of features
270 | 			inline size_t size() const {return this->dataNum; }
271 | 			bool Rewind() {
272 | 				mutex_lock(&this->data_lock);
273 | 				if (this->is_on_loading == true) {
274 | 					cout<<"data is on loading"<<endl;
275 | 					mutex_unlock(&this->data_lock);
276 | 					return false;
277 | 				}
278 | 				reader->Rewind();
279 | 				this->ClearBuffer();
280 | 				this->load_finished = false;
281 | 				this->is_on_loading = true;
282 | 				mutex_unlock(&this->data_lock);
283 | 
284 | #if WIN32
285 | 				HANDLE thread = ::CreateThread(NULL, 0, static_cast<LPTHREAD_START_ROUTINE>(thread_LoadData<FeatType,LabelType>), this, NULL, NULL);
286 | #else
287 | 				pthread_t thread;
288 | 				pthread_create(&thread,NULL,thread_LoadData<FeatType,LabelType>,this);
289 | #endif
290 | 				return true;
291 | 			}
292 | 	};
293 | }
294 | 


--------------------------------------------------------------------------------
/src/data/DataSetHelper.h:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 | > File Name: DataSetHelper.h
  3 | > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 | > Created Time: Thu 24 Oct 2013 03:33:10 PM
  5 | > Descriptions: thread function definitions
  6 | ************************************************************************/
  7 | #pragma once
  8 | 
  9 | 
 10 | #include "libsvm_binary.h"
 11 | #include "thread_primitive.h"
 12 | 
 13 | namespace SOL{
 14 | 	template <typename T1, typename T2> class DataSet;
 15 | 
 16 | 	//load a chunk of data, return if file ended
 17 | 	template <typename T1, typename T2>
 18 | 	bool load_chunk(DataReader<T1, T2>* reader, DataChunk<T1,T2>&chunk){
 19 | 		bool not_file_end = true;
 20 | 		chunk.erase();
 21 | 		while(chunk.dataNum < init_chunk_size && not_file_end == true){
 22 | 			DataPoint<T1,T2> &data = chunk.data[chunk.dataNum];
 23 | 			not_file_end = reader->GetNextData(data);
 24 | 			if (not_file_end == true)
 25 | 				chunk.dataNum++;
 26 | 			else
 27 | 				break;
 28 | 		}
 29 | 		return not_file_end;
 30 | 	}
 31 | 
 32 | 	//save chunk to disk
 33 | 	template <typename T1, typename T2>
 34 | 	bool save_chunk(libsvm_binary_<T1, T2> *writer, DataChunk<T1, T2>&chunk){
 35 | 		size_t w_num = 0;
 36 | 		while(w_num < chunk.dataNum){
 37 | 			if (writer->WriteData(chunk.data[w_num]) == true)
 38 | 				w_num++;
 39 | 			else
 40 | 				return false;
 41 | 		}
 42 | 		return false;
 43 | 	}
 44 | 
 45 | 	template <typename T1, typename T2>
 46 | 	libsvm_binary_<T1, T2>* get_cacher(const std::string &cache_filename){
 47 | 		string tmpFileName = cache_filename + ".writing";
 48 | 		libsvm_binary_<T1, T2>* cacher = new libsvm_binary_<T1,T2>(tmpFileName);
 49 | 		if (cacher->OpenWriting() == false){
 50 | 			cerr<<"Open cache file failed!"<<endl;
 51 | 			delete cacher;
 52 | 			return NULL;
 53 | 		}
 54 | 		return cacher;
 55 | 	}
 56 | 
 57 | 	template <typename T1, typename T2>
 58 | 	bool end_cache(libsvm_binary_<T1, T2>**cacher, const std::string& cache_fileName){
 59 | 		string tmpFileName = (*cacher)->get_filename();
 60 | 		(*cacher)->Close();
 61 | 		delete *cacher;
 62 | 		*cacher = NULL;	
 63 | 
 64 | 		//rename
 65 | #if WIN32
 66 | 		string cmd = "ren \"";
 67 | 		cmd = cmd + tmpFileName + "\" \"";
 68 | 		//in windows, the second parameter of ren should not include path
 69 | 		cmd = cmd + cache_fileName.substr(cache_fileName.find_last_of("/\\") + 1) + "\"";
 70 | #else
 71 | 		string cmd = "mv \"";
 72 | 		cmd = cmd + tmpFileName + "\" \"";
 73 | 		cmd = cmd + cache_fileName + "\"";
 74 | #endif
 75 | 		
 76 | 		if(system(cmd.c_str()) != 0){
 77 | 			cerr<<"rename cahe file name failed!"<<endl;
 78 | 			return false;
 79 | 		}
 80 | 		return true;
 81 | 	}
 82 | 
 83 | 	template <typename T1, typename T2>
 84 | 	bool CacheLoad(DataSet<T1, T2> *dataset){
 85 | 		DataReader<T1,T2>* reader = dataset->reader;
 86 | 		reader->Rewind();
 87 | 		if (reader->Good() == false) {
 88 | 			cerr<<"reader is incorrect!"<<endl;
 89 | 			return false;
 90 | 		}
 91 | 
 92 | 		libsvm_binary_<T1,T2>* writer = get_cacher<T1,T2>(dataset->cache_fileName);
 93 | 		if (writer == NULL)
 94 | 			return false;
 95 | 
 96 | 		//load data
 97 | 		bool not_file_end = false;
 98 | 		do {
 99 | 			DataChunk<T1,T2> &chunk = dataset->GetWriteChunk();
100 | 			not_file_end = load_chunk(reader, chunk);
101 | 			save_chunk(writer, chunk);
102 | 			dataset->EndWriteChunk();
103 | 		}while(not_file_end == true);
104 | 
105 | 		return end_cache(&writer, dataset->cache_fileName);
106 | 	}
107 | 
108 | 	template <typename T1, typename T2> 
109 | #if WIN32
110 | 	DWORD WINAPI thread_LoadData(LPVOID param)
111 | #else
112 | 	void* thread_LoadData(void* param)
113 | #endif
114 | 	{
115 | 		DataSet<T1,T2>* dataset = static_cast<DataSet<T1,T2>*>(param);
116 | 		DataReader<T1,T2>* reader = dataset->reader;
117 | 
118 | 		size_t pass = 0;
119 | 		if (dataset->is_cache == true){
120 | 			if(CacheLoad(dataset) == false){
121 | 				cerr<<"caching data failed!"<<endl;
122 | 				dataset->FinishParse();
123 | 				return NULL;
124 | 			}
125 | 			dataset->reader->Close();
126 | 			delete dataset->reader;
127 | 			//load cache file
128 | 			dataset->reader = new libsvm_binary_<T1,T2>(dataset->cache_fileName);
129 | 			if (dataset->reader->OpenReading() == false){
130 | 				cerr<<"load cache data failed!"<<endl;
131 | 				dataset->FinishParse();
132 | 				return NULL;
133 | 			}
134 | 			reader = dataset->reader;
135 | 			dataset->is_cache = false;
136 | 			pass++;
137 | 		}
138 | 		//load cache
139 | 		for (;pass < dataset->passNum; pass++) {
140 | 			reader->Rewind();
141 | 			if (reader->Good()) {
142 | 				bool not_file_end = false;
143 | 				do {
144 | 					DataChunk<T1,T2> &chunk = dataset->GetWriteChunk();
145 | 					not_file_end = load_chunk(reader, chunk);
146 | 					dataset->EndWriteChunk();
147 | 				}while(not_file_end == true);
148 | 			}
149 | 			else {
150 | 				cerr<<"reader is incorrect!"<<endl;
151 | 				break;
152 | 			}
153 | 		}
154 | 		dataset->FinishParse();
155 | 		return NULL;
156 | 	}
157 | }
158 | 


--------------------------------------------------------------------------------
/src/data/MNISTConvert.cpp:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |   > File Name: Convert.cpp
 3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 |   > Created Time: Thu 14 Nov 2013 06:49:38 PM
 5 |   > Descriptions: Convert other file formats to LIBSVM
 6 |  ************************************************************************/
 7 | 
 8 | #include <string>
 9 | #include <fstream>
10 | using namespace std;
11 | 
12 | #include "MNISTReader.h"
13 | using namespace SOL;
14 | #define FeatType float
15 | #define LabelType char
16 | //define your own data reader here
17 | #define ReaderType MNISTReader<FeatType, LabelType>
18 | 
19 | int main(int argc, char** args){ 
20 |     if (argc != 6){
21 |         cout<<"Usage: MNISTConvert train_file label_file digit1 digit2 output_file"<<endl;
22 |         return 0;
23 |     }
24 |     string trainfile = args[1];
25 |     string labelfile = args[2];
26 |     int num1 = atoi(args[3]);
27 |     int num2 = atoi(args[4]);
28 |     string outfilename = args[5];
29 |     //string filename = "/home/matthew/work/Data/aut/aut_train";
30 |     ReaderType reader(trainfile, labelfile, num1, num2);
31 |     DataPoint<FeatType, LabelType> data;
32 |     if (reader.OpenReading() == false) {
33 |         return -1;
34 |     }
35 |     ofstream outFile(outfilename.c_str(), ios::out);
36 |     if (!outFile){
37 |         cerr<<"open file "<<outfilename<<" failed!"<<endl;
38 |         return -1;
39 |     }
40 | 
41 |     size_t dataNum = 0;
42 |     reader.Rewind();
43 |     cout<<"converting ...... "<<endl;
44 |     while(true) {
45 |         if (reader.GetNextData(data) == true) {
46 |             outFile<<(int)(data.label);
47 |             for (size_t i = 0; i < data.indexes.size(); i++){
48 |                 outFile<<" "<<data.indexes[i]<<":"<<data.features[i];
49 |             }
50 |             dataNum++;
51 |             outFile<<"\n";
52 |         }
53 |         else if (reader.Good() == true){
54 |             break;
55 |         }
56 |         else{
57 |             cerr<<"unexpected error occured when loading data"<<endl;
58 |             break;
59 |         }
60 |     }
61 |     reader.Close();
62 |     outFile.close();
63 |     cout<<"data number  : "<<dataNum<<"\n";
64 |     return 0;
65 | }
66 | 
67 | 


--------------------------------------------------------------------------------
/src/data/MNISTReader.h:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 | 
  3 | 	> File Name: MNISTReader.h
  4 | 
  5 | 	> Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  6 | 
  7 | 	> Created Time: 2013/8/18 Sunday 20:25:28
  8 | 
  9 | 	> Functions: MNIST reader
 10 | 
 11 |  ************************************************************************/
 12 | 
 13 | #ifndef HEADER_MINST_READER
 14 | #define HEADER_MINST_READER
 15 | #include "DataReader.h"
 16 | 
 17 | #include <fstream>
 18 | using std::ios_base;
 19 | using std::ios;
 20 | using std::ifstream;
 21 | using std::string;
 22 | 
 23 | namespace SOL {
 24 | 
 25 | template <typename T>
 26 | int MSB2LSB(T x) {
 27 | 	int y = x;
 28 | 	int byteNum = sizeof(x);
 29 | 	char *buf = new char[byteNum];
 30 | 	char *buf1 = new char[byteNum];
 31 | 	memcpy(buf, &x, byteNum);
 32 | 	for (int i = 0; i < byteNum; i++)
 33 | 		buf1[i] = buf[byteNum - 1 - i];
 34 | 	memcpy(&y,buf1, byteNum);
 35 | 
 36 | 	delete []buf;
 37 | 	delete []buf1;
 38 | 	return y;
 39 | }
 40 | 
 41 | 
 42 |     template <typename FeatType, typename LabelType>
 43 | 	class MNISTReader: public DataReader<FeatType, LabelType> {
 44 |         private:
 45 | 			ifstream inTrainFile, inLabelFile;
 46 | 			string trainFileName, labelFileName;
 47 | 			int num1, num2;
 48 | 
 49 | 			int featDim;
 50 | 			int imgNum;
 51 | 
 52 | 			std::streamoff trainFileStartPos;
 53 | 			std::streamoff labelFileStartPos;
 54 | 
 55 |             unsigned char* rd_buf;
 56 | 
 57 | 	public:
 58 | 		MNISTReader(const string &trainFile, const string &labelFile, 
 59 | 			int digit1 = -1, int digit2 = -1):
 60 | 			trainFileName(trainFile),labelFileName(labelFile),
 61 | 			num1(digit1),num2(digit2) {
 62 |             featDim = 0;
 63 |             imgNum = 0;
 64 |             rd_buf = NULL;
 65 |         }
 66 | 
 67 | 		~MNISTReader() {
 68 |             this->Close();
 69 |             if (rd_buf != NULL)
 70 |                 delete []this->rd_buf;
 71 | 		}
 72 | 
 73 | 		public:
 74 | 			virtual bool OpenReading() {
 75 |                 this->Close();
 76 |                 if (this->rd_buf != NULL)
 77 |                     delete []this->rd_buf;
 78 |                 this->rd_buf = NULL;
 79 | 
 80 | 				inTrainFile.open(trainFileName.c_str(), ios::in | ios::binary);
 81 | 				if(!inTrainFile) {
 82 | 					printf("can't open input file %s\n",trainFileName.c_str());
 83 | 					return false;
 84 | 				}
 85 | 
 86 | 				inLabelFile.open(labelFileName.c_str(), ios::in | ios::binary);
 87 | 				if(!inLabelFile) {
 88 | 					printf("can't open input file %s\n",labelFileName.c_str());
 89 | 					return false;
 90 | 					
 91 | 				}
 92 | 
 93 | 				return this->GetFeatInfo();
 94 | 			}
 95 | 
 96 | 			virtual void Rewind() {
 97 | 				if(this->inTrainFile.is_open() == true)
 98 | 					this->inTrainFile.seekg(trainFileStartPos,ios_base::beg);
 99 | 
100 | 				if(this->inLabelFile.is_open() == true)
101 | 					this->inLabelFile.seekg(labelFileStartPos,ios_base::beg);
102 | 
103 | 			}
104 |             virtual void Close() {
105 |                 this->inTrainFile.close();
106 | 				this->inLabelFile.close();
107 |             }
108 |             virtual bool Good() {
109 |                 if (this->inTrainFile.good() || this->inLabelFile.good() || 
110 |                         this->inTrainFile.eof() || this->inLabelFile.eof())
111 |                     return true;
112 |                 return false;
113 |             }
114 | 
115 | 			virtual bool GetNextData(DataPoint<FeatType, LabelType> &data) {
116 | 				if (num1 == -1 || num2 == -1)
117 | 					return ReadData(data);
118 | 				else {
119 | 					while(ReadData(data)) {
120 | 						if (data.label  == num1) {
121 |                             data.label = 1;
122 | 							return true;
123 | 						}
124 | 						else if (data.label  == num2) {
125 |                             data.label = -1;
126 | 							return true;
127 | 						}
128 | 					}
129 | 					return false;
130 | 				}
131 | 			}
132 | 
133 | 		private:
134 | 			bool ReadData(DataPoint<FeatType, LabelType> &data) {
135 | 				if (!inTrainFile.good() || !inLabelFile.good())
136 | 					return false;
137 | 
138 | 				data.erase();
139 | 				//get next label
140 | 				char label;
141 | 				inLabelFile.read((char*)&label,sizeof(char)); 
142 | 				//get feature
143 | 				inTrainFile.read((char*)this->rd_buf,sizeof(unsigned char) *featDim);
144 | 
145 |                 for (int i = 0; i < featDim; i++) {
146 |                     if (this->rd_buf[i] != 0)
147 |                         data.AddNewFeat(i + 1,this->rd_buf[i]);
148 |                 }
149 |                 data.label = label;
150 | 				return true;
151 | 			}
152 | 
153 | 		
154 | 		bool GetFeatInfo() {
155 | 			inTrainFile.seekg(0,ios_base::beg);
156 | 			inLabelFile.seekg(0,ios_base::beg);
157 | 
158 | 			if(!inTrainFile || !inLabelFile)
159 | 				return false;
160 | 
161 | 			//label file
162 | 			//Load header
163 | 			int magicNum(0);
164 | 			inLabelFile.seekg(0,ios_base::beg);
165 | 			inLabelFile.read((char*)&magicNum, sizeof(int));
166 | 			magicNum = MSB2LSB(magicNum);
167 | 			if (magicNum != 2049) {
168 | 				std::cerr<<"Incorrect file!"<<std::endl;
169 | 				inLabelFile.close();
170 | 				return false;
171 | 			}
172 | 
173 | 			int imgNum_label(0);
174 | 			inLabelFile.read((char*)&imgNum_label, sizeof(int));
175 | 			imgNum_label = MSB2LSB(imgNum_label);
176 | 
177 | 			//Train File
178 | 			//Load header
179 | 			inTrainFile.read((char*)&magicNum, sizeof(int));
180 | 			magicNum = MSB2LSB(magicNum);
181 | 			if (magicNum != 2051) {
182 | 				std::cerr<<"Incorrect file!"<<std::endl;
183 | 				inTrainFile.close();
184 | 				return false;
185 | 			}
186 | 
187 | 			inTrainFile.read((char*)&imgNum, sizeof(int));
188 | 			imgNum = MSB2LSB(imgNum);
189 | 			if(imgNum != imgNum_label) {
190 | 				std::cerr<<"Label and Training set is not consistent!"<<std::endl;
191 | 				inTrainFile.close();
192 | 				return false;
193 | 			}
194 | 
195 | 			int rowNum(0), colNum(0);
196 | 			inTrainFile.read((char*)&rowNum, sizeof(int));
197 | 			inTrainFile.read((char*)&colNum, sizeof(int));
198 | 			rowNum = MSB2LSB(rowNum);
199 | 			colNum = MSB2LSB(colNum);
200 | 
201 | 			featDim = rowNum * colNum;
202 | 
203 | 			trainFileStartPos = inTrainFile.tellg();
204 | 			labelFileStartPos = inLabelFile.tellg();
205 | 
206 |             if (featDim > 0)
207 |                 this->rd_buf = new unsigned char[featDim];
208 | 
209 | 			return true;
210 | 		}
211 | 	};
212 | }
213 | #endif
214 | 


--------------------------------------------------------------------------------
/src/data/basic_io.cpp:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 |   > File Name: basic_io.cpp
  3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 |   > Created Time: Wed 06 Nov 2013 03:49:24 PM
  5 |   > Descriptions: most basic io handler, work with FILE
  6 |  ************************************************************************/
  7 | 
  8 | #include "basic_io.h"
  9 | 
 10 | #include <cstring>
 11 | #include <cstdlib>
 12 | 
 13 | using namespace std;
 14 | 
 15 | namespace SOL{
 16 |     bool basic_io::open_file(const char* filename, const char* mode){
 17 |         this->close_file();
 18 | #if _WIN32
 19 | 		errno_t ret = fopen_s(&file,filename, mode);
 20 |         if ( ret != 0){
 21 |             printf("error %d: can't open input file %s\n",ret,filename);
 22 |             return false;
 23 |         }
 24 | 
 25 | #else
 26 |         file = fopen(filename, mode);
 27 |         if (file == NULL){
 28 |             fprintf(stderr,"open file failed!");
 29 |             return false;
 30 |         }
 31 | #endif
 32 |         if (this->good() != 0){
 33 |             this->close_file();
 34 |             return false;
 35 |         }
 36 |         return true;
 37 |     }
 38 | 
 39 |     // bind_stdin: bind the input to stdin
 40 |     bool basic_io::open_stdin(){
 41 |         file = stdin;
 42 |         return true;
 43 |     }
 44 | 
 45 |     // bind_stdin: bind the output to stdout
 46 |     bool basic_io::open_stdout(){
 47 |         file = stdout;
 48 |         return true;
 49 |     }
 50 | 
 51 |     void basic_io::close_file(){
 52 |         if (file != NULL && file != stdin && file != stdout){
 53 |             fclose(file);
 54 |         }
 55 |         file = NULL;
 56 |     }
 57 | 
 58 |     void basic_io::rewind(){
 59 |         if (file != NULL)
 60 |             std::rewind(file);
 61 |     }
 62 |     /**
 63 |      * good : test if the io is good
 64 |      *
 65 |      * @Return: zero if correct, else zero code
 66 |      */
 67 |     int basic_io::good(){
 68 |         return ferror(file);
 69 |     }
 70 | 
 71 | 
 72 |     /**
 73 |      * read_data : read the data from file
 74 |      *
 75 |      * @Param dst: container to place the read data
 76 |      * @Param length: length of data of read in bytes
 77 |      *
 78 |      * @Return: true if succeed
 79 |      */
 80 |     bool basic_io::read_data(char* dst, size_t length){
 81 |         return fread(dst, 1, length, file) == length;
 82 |     }
 83 | 
 84 |     /**
 85 |      * read_line : read a line from disk
 86 |      *
 87 |      * @Param dst: container to place the read data
 88 |      * @Param dst_len: length of dst
 89 |      *
 90 |      * @Return: size of data read in bytes
 91 |      */
 92 |     char* basic_io::read_line(char* &dst, size_t &dst_len){
 93 |         size_t len;
 94 |         if(fgets(dst,dst_len,file) == NULL)
 95 |             return NULL;
 96 |         while(strrchr(dst,'\n') == NULL) {
 97 |             dst_len *= 2;
 98 |             dst = (char *) realloc(dst, dst_len);
 99 |             len = strlen(dst);
100 |             if(fgets(dst+len,dst_len-len,file) == NULL)
101 |                 break;
102 |         }
103 |         return dst;
104 |     }
105 | 
106 |     /**
107 |      * write_data : write content to disk
108 |      *
109 |      * @Param src: source of the data
110 |      * @Param length: length to write the data
111 |      *
112 |      * @Return: true of succeed
113 |      */
114 |     bool basic_io::write_data(char* src, size_t length){
115 |         return fwrite(src, 1, length, file) == length;
116 |     }
117 | }
118 | 
119 | 
120 | 


--------------------------------------------------------------------------------
/src/data/basic_io.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |   > File Name: basic_io.h
 3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 |   > Created Time: Wed 06 Nov 2013 03:44:46 PM
 5 |   > Descriptions: most basic io handler, work with FILE
 6 |  ************************************************************************/
 7 | 
 8 | #ifndef HEADER_BASIC_IO
 9 | #define HEADER_BASIC_IO
10 | 
11 | #include "io_interface.h"
12 | #include <cstdio>
13 | 
14 | namespace SOL{
15 |     class basic_io: public io_interface {
16 |         private:
17 |             FILE* file;
18 | 
19 |         public:
20 |             basic_io():file(NULL){}
21 |             virtual ~basic_io(){
22 |                 this->close_file();
23 |             }
24 | 
25 |         public:
26 |             virtual bool open_file(const char* filename, const char* mode);
27 |             // bind_stdin: bind the input to stdin
28 |             virtual bool open_stdin();
29 |             // bind_stdin: bind the output to stdout
30 |             virtual bool open_stdout();
31 | 
32 |             virtual void close_file();
33 |             virtual void rewind();
34 | 
35 |             /**
36 |              * good : test if the io is good
37 |              *
38 |              * @Return: zero if correct, else zero code
39 |              */
40 |             virtual int good();
41 | 
42 |         public:
43 |             /**
44 |              * read_data : read the data from file
45 |              *
46 |              * @Param dst: container to place the read data
47 |              * @Param length: length of data of read in bytes
48 |              *
49 |              * @Return: true if succeed
50 |              */
51 |             virtual bool read_data(char* dst, size_t length);
52 | 
53 |             /**
54 |              * read_line : read a line from disk
55 |              *
56 |              * @Param dst: container to place the read data
57 |              * @Param dst_len: length of dst
58 |              *
59 |              * @Return: pointer to the read line, null if failed
60 |              */
61 |             virtual char* read_line(char* &dst, size_t &dst_len);
62 |             
63 |             /**
64 |              * write_data : write content to disk
65 |              *
66 |              * @Param src: source of the data
67 |              * @Param length: length to write the data
68 |              *
69 |              * @Return: true if succeed
70 |              */
71 |             virtual bool write_data(char* src, size_t length);
72 |     };
73 | }
74 | 
75 | #endif
76 | 


--------------------------------------------------------------------------------
/src/data/comp.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |   > File Name: comp.h
 3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 |   > Created Time: Thu 07 Nov 2013 11:01:37 PM
 5 |   > Descriptions: compression algorithms
 6 |  ************************************************************************/
 7 | 
 8 | #ifndef HEADER_COMP_ALGO
 9 | #define HEADER_COMP_ALGO
10 | 
11 | #include "DataPoint.h"
12 | #include <assert.h>
13 | #include <stdint.h>
14 | 
15 | namespace SOL{
16 | 
17 |     inline uint32_t ZigZagEncode(int32_t n) { 
18 |         uint32_t ret = (n << 1) ^ (n >> 31);
19 |         return ret;
20 |     }
21 |     inline int32_t ZigZagDecode(uint32_t n) { 
22 |         return (n >> 1) ^ -static_cast<int32_t>(n & 1); 
23 |     }
24 | 
25 |     //encode  an unsigned int with run length encoding
26 |     //if encode signed int, first map it to unsigned with ZigZag Encoding
27 |     inline void run_len_encode(s_array<char> &codes, uint32_t i){ 
28 |         // store an int 7 bits at a time.
29 |         while (i >= 128)    {
30 |             codes.push_back((i & 127) | 128);
31 |             i = i >> 7;
32 |         }
33 |         codes.push_back((i & 127));
34 |     }
35 | 
36 |     inline char* run_len_decode(char* p, uint32_t& i) { // read an int 7 bits at a time.
37 |         size_t count = 0;
38 |         while(*p & 128)\
39 |             i = i | ((*(p++) & 127) << 7*count++);
40 |         i = i | (*(p++) << 7*count);
41 |         return p;
42 |     }
43 | 
44 |     
45 |     /**
46 |      * comp : compress the index list, note that the indexes must be sorted from small to big
47 |      *  Note: the function will not erase codes by iteself
48 |      *
49 |      * @Param indexes: indexes to be encoded
50 |      * @Param codes: ouput codes
51 |      */
52 |     inline void comp_index(const s_array<uint32_t>& indexes, s_array<char> &codes){
53 |         uint32_t last = 0;
54 |         size_t featNum = indexes.size();
55 |         for (size_t i = 0; i< featNum; i++) {
56 |             run_len_encode(codes,indexes[i] - last);
57 |             last = indexes[i];
58 |         }
59 |     }
60 | 
61 |     /**
62 |      * decomp_index : de-compress the codes to indexes
63 |      *
64 |      * @Param codes: input codes 
65 |      * @Param indexes: output indexes
66 |      */
67 |     inline void decomp_index(s_array<char> &codes, s_array<uint32_t> &indexes){
68 |         indexes.erase();
69 |         uint32_t last = 0;
70 |         uint32_t index = 0;
71 | 
72 |         char* p = codes.begin;
73 |         while(p < codes.end){
74 |             index = 0;
75 |             p = run_len_decode(p,index);
76 |             index += last;
77 |             last = index;
78 |             indexes.push_back(index);
79 |         }
80 |         assert(p == codes.end );
81 |     }
82 | }
83 | #endif
84 | 


--------------------------------------------------------------------------------
/src/data/data_analysis.cpp:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 |   > File Name: data_analysis.cpp
  3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 |   > Created Time: Thu 24 Oct 2013 08:09:38 PM
  5 |   > Descriptions: analyse the sparsity of data
  6 |  ************************************************************************/
  7 | #include "DataPoint.h"
  8 | #include "DataReader.h"
  9 | #include "libsvmread.h"
 10 | #include "MNISTReader.h"
 11 | 
 12 | #include <string>
 13 | using namespace std;
 14 | using namespace SOL;
 15 | 
 16 | template <typename FeatType, typename LabelType>
 17 | bool Analyze(DataReader<FeatType, LabelType> *reader) {
 18 |     if (reader == NULL){
 19 |         cerr<<"data reader is emptyp!"<<endl;
 20 |         return false;
 21 |     }
 22 | 
 23 | 	size_t max_show_count = 100000;
 24 | 	size_t show_count = 1000;
 25 |     size_t dataNum = 0;
 26 |     size_t featNum = 0;
 27 | 	size_t pos_num = 0;
 28 | 	size_t neg_num = 0;
 29 |     IndexType max_index = 0;
 30 |     s_array<char> index_set;
 31 |     DataPoint<FeatType, LabelType> data;
 32 |     if (reader->OpenReading() == true) {
 33 |         reader->Rewind();
 34 |         while(true) {
 35 |             if (reader->GetNextData(data) == true) {
 36 |                 if (data.indexes.size() == 0)
 37 |                     continue;
 38 |                 if (max_index < data.dim()){
 39 |                     max_index = data.dim();
 40 |                 }
 41 |                 size_t prev_size = index_set.size();
 42 |                 if (max_index > prev_size){
 43 |                     index_set.reserve(max_index);
 44 |                     index_set.resize(max_index);
 45 |                     //set the new value to zero
 46 |                     index_set.zeros(index_set.begin + prev_size, 
 47 |                             index_set.end);
 48 |                 }
 49 |                 for (size_t i = 0; i < data.indexes.size(); i++){
 50 |                     index_set[data.indexes[i] - 1] = 1;
 51 |                 }
 52 | 
 53 |                 dataNum++;
 54 | 				if (data.label == 1)
 55 | 					pos_num++;
 56 | 				else if (data.label == -1)
 57 | 					neg_num++;
 58 | 				else{
 59 | 					cerr<<"\nunrecognized label!"<<endl;
 60 | 					break;
 61 | 				}
 62 | 
 63 |                 featNum += data.indexes.size();
 64 |                 
 65 | 				if (dataNum % show_count == 0){
 66 | 					cerr<<"data number  : "<<dataNum<<"    ";
 67 | 					cerr<<"valid dim    : "<<max_index<<"\r";
 68 | 					show_count *= 2;
 69 | 					show_count = show_count > max_show_count ? 
 70 | 						max_show_count : show_count;
 71 | 				}
 72 | 			}
 73 | 			else
 74 | 				break;
 75 | 		}
 76 | 	}
 77 | 	else {
 78 | 		cerr<<"Can not open file to read!"<<endl;
 79 | 		return false;
 80 | 	}
 81 | 	cerr<<"\n";
 82 | 	reader->Close();
 83 | 	size_t valid_dim = 0;
 84 | 	for (size_t i = 0; i < index_set.size(); i++) {
 85 | 		if (index_set[i] == 1)
 86 | 			valid_dim++;
 87 | 	}
 88 | 	cout<<"data number  : "<<dataNum<<"\n";
 89 | 	cout<<"feat number  : "<<featNum<<"\n";
 90 | 	cout<<"valid dim    : "<<max_index<<"\n";
 91 | 	cout<<"nonzero feat : "<<valid_dim<<"\n";
 92 | 	cout<<"positive num	: "<<pos_num<<"\n";
 93 | 	cout<<"negtive num	: "<<neg_num<<"\n";
 94 | 	if (max_index > 0){
 95 | 		printf("data sparsity: %.2lf%%\n",100 - valid_dim * 100.0 / max_index);
 96 | 	}
 97 | 
 98 | 	return true;
 99 | }
100 | 
101 | int main(int argc, char** args){ 
102 | 	if (argc != 2){
103 | 		cout<<"Usage: data_analysis data_file"<<endl;
104 | 		return 0;
105 | 	}
106 | 	//check memory leak in VC++
107 | #if defined(_MSC_VER) && defined(_DEBUG)
108 | 	int tmpFlag = _CrtSetDbgFlag( _CRTDBG_REPORT_FLAG );
109 | 	tmpFlag |= _CRTDBG_LEAK_CHECK_DF;
110 | 	_CrtSetDbgFlag( tmpFlag );
111 | #endif
112 | 	string filename = args[1];
113 | 	//string filename = "/home/matthew/work/Data/aut/aut_train";
114 | 	LibSVMReader reader(filename);
115 | 	if (Analyze(&reader) == false)
116 | 		cerr<<"analyze dataset failed!"<<endl;
117 | 	return 0;
118 | }
119 | 


--------------------------------------------------------------------------------
/src/data/gzip_io.cpp:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 |   > File Name: gzip_io.cpp
  3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 |   > Created Time: Wed 06 Nov 2013 04:01:46 PM
  5 |   > Descriptions: read and write file in gzip format
  6 |  ************************************************************************/
  7 | #include "gzip_io.h"
  8 | 
  9 | #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
 10 | #  include <fcntl.h>
 11 | #  include <io.h>
 12 | #  define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
 13 | #else
 14 | #  define SET_BINARY_MODE(file)
 15 | #endif
 16 | 
 17 | #include <iostream>
 18 | using namespace std;
 19 | 
 20 | namespace SOL{
 21 |     bool gzip_io::open_file(const char* filename, const char* mode){
 22 |         this->close_file();
 23 | 
 24 |         file = gzopen(filename, mode);
 25 |         if (file == NULL){
 26 |             cerr<<"open file failed!"<<endl;
 27 |             return false;
 28 |         }
 29 |         if (this->good() != 0){
 30 |             this->close_file();
 31 |             return false;
 32 |         }
 33 |         return true;
 34 |     }
 35 |     // bind_stdin: bind the input to stdin
 36 |     bool gzip_io::open_stdin(){
 37 |         file = gzdopen(fileno(stdin),"rb");
 38 |         return true;
 39 |     }
 40 | 
 41 |     // bind_stdin: bind the output to stdout
 42 |     bool gzip_io::open_stdout(){
 43 |         file = gzdopen(fileno(stdout),"wb");
 44 |         return true;
 45 |     }
 46 | 
 47 |     void gzip_io::close_file(){
 48 |         if (file != NULL && file != stdin && file != stdout){
 49 |             gzclose(file);
 50 |         }
 51 |         file = NULL;
 52 |     }
 53 | 
 54 |     void gzip_io::rewind(){
 55 |         if (file != NULL)
 56 |             gzrewind(file);
 57 |     }
 58 | 
 59 |     /**
 60 |      * good : test if the io is good
 61 |      *
 62 |      * @Return: zero if correct, else zero code
 63 |      */
 64 |     int gzip_io::good(){
 65 |         int errCode;
 66 |         const char* errmsg = gzerror(file ,&errCode);;
 67 |         if (errCode != Z_OK){
 68 |             if (gzeof(file) == 1) //eof is not an error
 69 |                 return 0;
 70 |             printf("%s\n",errmsg);
 71 |         }
 72 |         return errCode;
 73 |     }
 74 | 
 75 |     /**
 76 |      * read_data : read the data from file
 77 |      *
 78 |      * @Param dst: container to place the read data
 79 |      * @Param length: length of data of read in bytes
 80 |      *
 81 |      * @Return: true if succeed
 82 |      */
 83 |     bool gzip_io::read_data(char* dst, size_t length){
 84 |         return size_t(gzread(file, dst, length)) == length;
 85 |     }
 86 | 
 87 |     /**
 88 |      * read_line : read a line from disk
 89 |      *
 90 |      * @Param dst: container to place the read data
 91 |      * @Param dst_len: length of dst
 92 |      *
 93 |      * @Return: size of data read in bytes
 94 |      */
 95 |     char* gzip_io::read_line(char* &dst, size_t &dst_len){
 96 |         printf("error: no read line is supported in gzip io\n");
 97 |         return NULL;
 98 |     }
 99 | 
100 |     /**
101 |      * write_data : write content to disk
102 |      *
103 |      * @Param src: source of the data
104 |      * @Param length: length to write the data
105 |      *
106 |      * @Return: true if succeed
107 |      */
108 |     bool gzip_io::write_data(char* src, size_t length){
109 |         return size_t(gzwrite(file, src, length)) == length;
110 |     }
111 | }
112 | 
113 | 
114 | 


--------------------------------------------------------------------------------
/src/data/gzip_io.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |   > File Name: /home/matthew/work/SOL/src/data/gzip_io.h
 3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 |   > Created Time: Wed 06 Nov 2013 04:02:12 PM
 5 |   > Descriptions: 
 6 |  ************************************************************************/
 7 | #ifndef HEADER_GZIP_IO
 8 | #define HEADER_GZIP_IO
 9 | 
10 | #include "io_interface.h"
11 | 
12 | #include <stdio.h>
13 | #include <string.h>
14 | #include <assert.h>
15 | #include "zlib.h"
16 | 
17 | namespace SOL{
18 |     class gzip_io: public io_interface{
19 |         private:
20 |             gzFile file;
21 | 
22 |         public:
23 |             gzip_io():file(NULL){}
24 |             ~gzip_io(){
25 |                 this->close_file();
26 |             }
27 | 
28 | 
29 |         public:
30 |             virtual bool open_file(const char* filename, const char* mode);
31 |             virtual void close_file();
32 |             virtual void rewind();
33 |             // bind_stdin: bind the input to stdin
34 |             virtual bool open_stdin();
35 |             // bind_stdin: bind the output to stdout
36 |             virtual bool open_stdout();
37 | 
38 |             /**
39 |              * good : test if the io is good
40 |              *
41 |              * @Return: zero if correct, else zero code
42 |              */
43 |             virtual int good();
44 | 
45 |         public:
46 |             /**
47 |              * read_data : read the data from file
48 |              *
49 |              * @Param dst: container to place the read data
50 |              * @Param length: length of data of read in bytes
51 |              *
52 |              * @Return: true if succeed
53 |              */
54 |             virtual bool read_data(char* dst, size_t length);
55 | 
56 |             /**
57 |              * read_line : read a line from disk
58 |              *
59 |              * @Param dst: container to place the read data
60 |              * @Param dst_len: length of dst
61 |              *
62 |              * @Return: pointer to the read line, null if failed
63 |              */
64 |             virtual char* read_line(char* &dst, size_t &dst_len);
65 |             
66 |             /**
67 |              * write_data : write content to disk
68 |              *
69 |              * @Param src: source of the data
70 |              * @Param length: length to write the data
71 |              *
72 |              * @Return: true if succeed
73 |              */
74 |             virtual bool write_data(char* src, size_t length);
75 |     };
76 | }
77 | 
78 | #endif
79 | 


--------------------------------------------------------------------------------
/src/data/io_handler.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |   > File Name: io_handler.h
 3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 |   > Created Time: Wed 06 Nov 2013 03:23:19 PM
 5 |   > Descriptions: handler for io
 6 |  ************************************************************************/
 7 | 
 8 | #include <cstdio>
 9 | 
10 | using namespace std;
11 | namespace SOL{
12 |     class io_handler{
13 |         public:
14 |             io_handler(){}
15 |             ~io_handler(){}
16 | 
17 |         public:
18 |             bool open_file(const char* filename);
19 |             void close_file();
20 | 
21 |         public:
22 |             int read_data(unsigned char* dst, size_t length);
23 |             int write_data(unsigned char* src, size_t length);
24 |     };
25 | }
26 | 
27 | 


--------------------------------------------------------------------------------
/src/data/io_interface.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 |   > File Name: io_interface.h
 3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 |   > Created Time: Wed 06 Nov 2013 03:26:20 PM
 5 |   > Descriptions: interface definition for io
 6 |  ************************************************************************/
 7 | #ifndef HEADER_IO_INTERFACE_
 8 | #define HEADER_IO_INTERFACE_
 9 | 
10 | #include <cstdio>
11 | 
12 | namespace SOL{
13 |     class io_interface{
14 |         public:
15 |             virtual bool open_file(const char* filename, const char* mode) = 0;
16 |             // bind_stdin: bind the input to stdin
17 |             virtual bool open_stdin() = 0;
18 |             // bind_stdin: bind the output to stdout
19 |             virtual bool open_stdout() = 0;
20 | 
21 |             virtual void close_file() = 0;
22 |             virtual void rewind() = 0;
23 |             /**
24 |              * good : test if the io is good
25 |              *
26 |              * @Return: zero if correct, else zero code
27 |              */
28 |             virtual int good() = 0;
29 | 
30 |         public:
31 |             /**
32 |              * read_data : read the data from file
33 |              *
34 |              * @Param dst: container to place the read data
35 |              * @Param length: length of data of read in bytes
36 |              *
37 |              * @Return: true if succeed
38 |              */
39 |             virtual bool read_data(char* dst, size_t length) = 0;
40 | 
41 |             /**
42 |              * read_line : read a line from disk
43 |              *
44 |              * @Param dst: container to place the read data
45 |              * @Param dst_len: length of dst
46 |              *
47 |              * @Return: pointer to the read line, null if failed
48 |              */
49 |             virtual char* read_line(char* &dst, size_t &dst_len) = 0;
50 |             
51 |             /**
52 |              * write_data : write content to disk
53 |              *
54 |              * @Param src: source of the data
55 |              * @Param length: length to write the data
56 |              *
57 |              * @Return: true if succeed
58 |              */
59 |             virtual bool write_data(char* src, size_t length) = 0;
60 |     };
61 | }
62 | 
63 | #endif
64 | 


--------------------------------------------------------------------------------
/src/data/libsvm_binary.h:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 |   > File Name: libsvm_binary.h
  3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 |   > Created Time: Sat 21 Sep 2013 10:52:41 PM SGT
  5 |   > Functions:  io for binary libsvm dataset
  6 |  ************************************************************************/
  7 | 
  8 | #ifndef HEADER_LIBSVM_BINARY
  9 | #define HEADER_LIBSVM_BINARY
 10 | 
 11 | 
 12 | #include "DataReader.h"
 13 | #include "basic_io.h"
 14 | //#include "zlib_io.h"
 15 | //#include "gzip_io.h"
 16 | 
 17 | #include "comp.h"
 18 | 
 19 | #include <new>
 20 | 
 21 | using namespace std;
 22 | 
 23 | namespace SOL {
 24 |     template <typename FeatType, typename LabelType>
 25 |         class libsvm_binary_:public DataReader<FeatType, LabelType> {
 26 |             private:
 27 |                 std::string fileName;
 28 |                 basic_io io_handler;
 29 |                 //gzip_io io_handler.
 30 |                 //zlib_io io_handler.
 31 |                 
 32 |                 //compressed codes of indexes
 33 |                 s_array<char> comp_codes;
 34 | 
 35 |             public:
 36 |                 libsvm_binary_(const std::string &fileName) {
 37 |                     this->fileName = fileName;
 38 |                 }
 39 | 
 40 |                 ~libsvm_binary_() {
 41 |                     this->Close();
 42 |                 }
 43 | 				const std::string& get_filename() const {
 44 | 					return this->fileName;
 45 | 				}
 46 | 
 47 |                 //////////////////online mode//////////////////
 48 |             public:
 49 |                 bool OpenReading() {
 50 |                     this->Close();
 51 |                     return io_handler.open_file(this->fileName.c_str(), "rb");
 52 |                 }
 53 | 
 54 |                 bool OpenWriting() {
 55 |                     this->Close();
 56 |                     return io_handler.open_file(this->fileName.c_str(), "wb");
 57 |                 }
 58 | 
 59 |                 void Rewind() {
 60 |                     io_handler.rewind();
 61 |                 }
 62 | 
 63 |                 void Close() {
 64 |                     io_handler.close_file();
 65 |                 }
 66 | 
 67 |                 inline bool Good() {
 68 |                     return io_handler.good() == 0 ? true : false;
 69 |                 }
 70 | 
 71 |                 bool GetNextData(DataPoint<FeatType, LabelType> &data) {
 72 |                     data.erase();
 73 |                     if (io_handler.read_data((char*)&(data.label),sizeof(LabelType)) == false){
 74 |                         if (this->Good() == true){
 75 |                             return false;
 76 |                         }
 77 |                         else{
 78 |                             cerr<<"unexpected error occured when loading data!"<<endl;
 79 |                             return false;
 80 |                         }
 81 |                     }
 82 |                     //assert(data.label == 1 || data.label == -1);
 83 |                     
 84 |                     size_t featNum = 0;
 85 |                     if(io_handler.read_data((char*)&featNum,sizeof(size_t)) == false){
 86 |                         cerr<<"load feature number failed!"<<endl;
 87 |                         return false;
 88 |                     }
 89 |                     if (featNum > 0){
 90 |                         if(io_handler.read_data((char*)&data.max_index,sizeof(IndexType)) == false){
 91 |                             cerr<<"load max index failed!"<<endl;
 92 |                             return false;
 93 |                         }
 94 |                         unsigned int code_len = 0;
 95 |                         if(io_handler.read_data((char*)&code_len, 
 96 |                                     sizeof(unsigned int)) == false){
 97 |                             cerr<<"read coded index length failed!"<<endl;
 98 |                             return false;
 99 |                         }
100 |                         this->comp_codes.resize(code_len);
101 |                         if(io_handler.read_data(this->comp_codes.begin,
102 |                                     code_len) == false){
103 |                             cerr<<"read coded index failed!"<<endl;
104 |                             return false;
105 |                         }
106 |                         decomp_index(this->comp_codes, data.indexes); 
107 |                         if (data.indexes.size() != featNum){
108 |                             cerr<<"decoded index number is not correct!"<<endl;
109 |                             return false;
110 |                         }
111 |                         data.features.resize(featNum);
112 |                         if (io_handler.read_data((char*)(data.features.begin), 
113 |                                     sizeof(float) * featNum) ==false){
114 |                             cerr<<"load features failed!"<<endl;
115 |                             return false;
116 |                         }
117 | 						if (io_handler.read_data((char*)&(data.sum_sq),sizeof(float)) == false){
118 |                             cerr<<"load sum of square failed!"<<endl;
119 |                             return false;
120 | 						}
121 |                     }
122 |                     return true;
123 |                 }
124 | 
125 |                 bool WriteData(DataPoint<FeatType, LabelType> &data) {
126 |                     size_t featNum = data.indexes.size();
127 |                     if(io_handler.write_data((char*)&data.label,sizeof(LabelType)) == false){
128 |                         cerr<<"write label failed!"<<endl;
129 |                         return false;
130 |                     }
131 |                     
132 |                     if(io_handler.write_data((char*)&featNum,sizeof(size_t)) == false){
133 |                         cerr<<"write feat number failed!"<<endl;
134 |                         return false;
135 |                     }
136 |                     if (featNum > 0){
137 |                         if(io_handler.write_data((char*)&data.max_index,
138 |                                     sizeof(IndexType)) == false){
139 |                             cerr<<"write max index failed!"<<endl;
140 |                             return false;
141 |                         }
142 |                         this->comp_codes.erase();
143 |                         comp_index(data.indexes, this->comp_codes); 
144 |                         unsigned int code_len = (unsigned int)(this->comp_codes.size());
145 |                         if(io_handler.write_data((char*)&code_len, 
146 |                                     sizeof(unsigned int)) == false){
147 |                             cerr<<"write coded index length failed!"<<endl;
148 |                             return false;
149 |                         }
150 |                         if(io_handler.write_data(this->comp_codes.begin,
151 |                                     code_len) == false){
152 |                             cerr<<"write coded index failed!"<<endl;
153 |                             return false;
154 |                         }
155 |                         if(io_handler.write_data((char*)(data.features.begin), 
156 |                                     sizeof(float) * featNum) == false){
157 |                             cerr<<"write features failed!"<<endl;
158 |                             return false;
159 |                         }
160 | 						if (io_handler.write_data((char*)&(data.sum_sq),sizeof(float)) == false){
161 | 							cerr<<"write sum of square failed!"<<endl;
162 | 							return false;
163 | 						}
164 | 					}
165 | 					return true;
166 | 				}
167 | 		};
168 | 
169 | 		//for special definition
170 | 		typedef libsvm_binary_<float, char> libsvm_binary;
171 | }
172 | #endif
173 | 


--------------------------------------------------------------------------------
/src/data/libsvmread.h:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 |   > File Name: libsvmread.h
  3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 |   > Created Time: 2013/8/18 星期日 20:25:28
  5 |   > Functions: libsvm reader
  6 |  ************************************************************************/
  7 | #pragma once
  8 | 
  9 | #if _WIN32
 10 | #define _CRT_SECURE_NO_WARNINGS
 11 | #endif
 12 | 
 13 | #include "DataReader.h"
 14 | #include "basic_io.h"
 15 | #include "parser.h"
 16 | 
 17 | #include <stdio.h>
 18 | #include <vector>
 19 | #include <string.h>
 20 | #include <stdlib.h>
 21 | #include <ctype.h>
 22 | #include <limits>
 23 | #include <cmath>
 24 | 
 25 | using namespace std;
 26 | 
 27 | namespace SOL {
 28 |     template <typename FeatType, typename LabelType>
 29 |         class LibSVMReader_: public DataReader<FeatType, LabelType> { 
 30 |             private:
 31 |                 string fileName;
 32 |                 basic_io reader;
 33 | 
 34 |                 char *line;
 35 |                 size_t max_line_len;
 36 | 
 37 |             public:
 38 |                 LibSVMReader_(const string &fileName) {
 39 |                     this->max_line_len = 4096;
 40 |                     this->fileName = fileName;
 41 |                     line = (char *) malloc(max_line_len*sizeof(char));
 42 |                 }
 43 |                 ~LibSVMReader_() {
 44 |                     this->Close();
 45 |                     if (line != NULL)
 46 |                         free(line);
 47 |                 }
 48 | 
 49 |                 //////////////////online mode//////////////////
 50 |             public:
 51 |                 virtual bool OpenReading() {
 52 |                     this->Close();
 53 |                     return reader.open_file(this->fileName.c_str(), "r");
 54 |                 }
 55 |                 virtual void Rewind() {
 56 |                     reader.rewind();
 57 |                 }
 58 |                 virtual void Close() {
 59 |                     reader.close_file();
 60 |                 }
 61 | 
 62 |                 virtual inline bool Good() {
 63 |                     return reader.good() == 0 ? true: false;
 64 |                 }
 65 | 
 66 |                 virtual bool GetNextData(DataPoint<FeatType, LabelType> &data) {
 67 |                     if(reader.read_line(line, max_line_len) == NULL)
 68 |                         return false;
 69 | 
 70 |                     LabelType labelVal;
 71 |                     char* p = line, *endptr = NULL;
 72 |                     if (*p == '\0')
 73 |                         return false;
 74 |                     labelVal = (LabelType)parseInt(p,&endptr);
 75 |                     if (endptr == p) {
 76 | 						return false;
 77 |                     }
 78 | 
 79 |                     data.erase();
 80 |                     IndexType index;
 81 |                     FeatType feat;
 82 |                     // features
 83 |                     while(1) {
 84 |                         p = strip_line(endptr);
 85 |                         if (*p == '\0')
 86 |                             break;
 87 |                         index = (IndexType)(parseUint(p,&endptr));
 88 |                         if (endptr == p) { //parse index failed
 89 |                             fprintf(stderr,"parse index value failed!\n%s", p);
 90 |                             return false;
 91 |                         }
 92 | 
 93 |                         p = endptr;
 94 |                         feat = parseFloat(p,&endptr);
 95 |                         //feat =(float)(strtod(val,&endptr));
 96 |                         if (endptr == p) {
 97 |                             fprintf(stderr,"parse feature value failed!\n");
 98 |                             return false;
 99 |                         }
100 | 
101 |                         data.AddNewFeat(index,feat);
102 |                     }
103 |                     data.label = labelVal;
104 | 					
105 |                     return true;
106 |                 }
107 |         };
108 | 
109 |     //for special definition
110 |     typedef LibSVMReader_<float, char> LibSVMReader;
111 | }
112 | 


--------------------------------------------------------------------------------
/src/data/makefile:
--------------------------------------------------------------------------------
 1 | FLAGS = -g -Wall
 2 | #FLAGS = -O2 -s
 3 | 
 4 | LIBS=basic_io.o zlib_io.o gzip_io.o
 5 | TARGETS=test
 6 | 
 7 | all: $(TARGETS)
 8 | 
 9 | test:test.o
10 | 	g++ test.o -o test
11 | 
12 | MNISTConvert:MNISTConvert.cpp
13 | 	g++ $^ $(FLAGS) -o $@
14 | 
15 | analysis:data_analysis.o $(LIBS)
16 | 	g++ $^ -lz -o $@
17 | 
18 | Cacher:Cacher.o $(LIBS)
19 | 	g++ $^ -lz -o $@
20 | 
21 | %.o:%.cpp
22 | 	g++ -c $^ -o $@ $(FLAGS)
23 | 
24 | .PHONY: clean
25 | clean:
26 | 	-rm $(TARGETS) *.o
27 | 


--------------------------------------------------------------------------------
/src/data/parser.h:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 |   > File Name: parser.h
  3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 |   > Created Time: Thu 07 Nov 2013 08:16:26 PM
  5 |   > Descriptions: public funtions to parse
  6 |  ************************************************************************/
  7 | 
  8 | #ifndef HEADER_PARSER
  9 | #define HEADER_PARSER
 10 | #include <cstdio>
 11 | #include <cmath>
 12 | 
 13 | namespace SOL{
 14 | 
 15 |     inline bool is_space(char* p){
 16 |         return (*p == ' ' || *p == '\t' || *p == '\n' || *p == '\r');
 17 |     }
 18 | 
 19 |     inline char* strip_line(char* p){
 20 |         while(is_space(p) == true)
 21 |             p++;
 22 |         return p;
 23 |     }
 24 | 
 25 |     //The following function is a home made strtoi
 26 |     inline int parseInt(char * p, char **end) {
 27 |         *end = p;
 28 |         p = strip_line(p);
 29 | 
 30 |         if (*p == '\0'){
 31 |             return 0;
 32 |         }
 33 |         int s = 1;
 34 |         if (*p == '+')p++;
 35 |         if (*p == '-') {
 36 |             s = -1; p++;
 37 |         }
 38 |         int acc = 0;
 39 |         while (*p >= '0' && *p <= '9')
 40 |             acc = acc * 10 + *p++ - '0';
 41 | 
 42 |         int num_dec = 0;
 43 |         if (*p == '.') {
 44 |             p++;
 45 |             while (*p >= '0' && *p <= '9') {
 46 |                 acc = acc *10 + *p++ - '0' ;
 47 |                 num_dec++;
 48 |             }
 49 |         }
 50 |         int exp_acc = 0;
 51 |         if(*p == 'e' || *p == 'E'){
 52 |             p++;
 53 |             if (*p == '+')p++;
 54 |             while (*p >= '0' && *p <= '9')
 55 |                 exp_acc = exp_acc * 10 + *p++ - '0';
 56 | 
 57 |         }
 58 |         if (is_space(p)== true) {//easy case succeeded.
 59 |             exp_acc -= num_dec;
 60 |             if (exp_acc < 0)
 61 |                 return 0;
 62 |             else
 63 |                 acc *= (int)(powf(10.f,(float)exp_acc));
 64 | 
 65 |             *end = p;
 66 |             return s * acc;
 67 |         }
 68 |         else {
 69 |             return 0;
 70 |         }
 71 |     }
 72 | 
 73 |     //The following function is a home made strtoi
 74 |     inline unsigned int parseUint(char * p, char **end) {
 75 |         *end = p;
 76 |         p = strip_line(p);
 77 | 
 78 |         if (*p == '\0'){
 79 |             return 0;
 80 |         }
 81 |         unsigned int acc = 0;
 82 |         while (*p >= '0' && *p <= '9')
 83 |             acc = acc * 10 + *p++ - '0';
 84 | 
 85 |         int num_dec = 0;
 86 |         if (*p == '.') {
 87 |             p++;
 88 |             while (*p >= '0' && *p <= '9') {
 89 |                 acc = acc *10 + *p++ - '0' ;
 90 |                 num_dec++;
 91 |             }
 92 |         }
 93 |         int exp_acc = 0;
 94 |         if(*p == 'e' || *p == 'E'){
 95 |             p++;
 96 |             if (*p == '+')p++;
 97 |             while (*p >= '0' && *p <= '9')
 98 |                 exp_acc = exp_acc * 10 + *p++ - '0';
 99 |         }
100 |         if (*p == ':') {//easy case succeeded.
101 |             if (exp_acc < num_dec)
102 |                 return 0;
103 |             else
104 |                 acc *= (unsigned int)(powf(10.f,(float)(exp_acc - num_dec)));
105 |             *end = ++p;
106 |             return acc;
107 |         }
108 |         else {
109 |             return 0;
110 |         }
111 |     }
112 | 
113 | 	/*
114 | 	inline string parseString(char*p, char**end){
115 | 		p = strip_line(p);
116 | 		char* start_pos = p;
117 | 		char* end_pos = p;
118 | 		if (*start_pos == '\"'){
119 | 			start_pos++;
120 | 			end_pos = start_pos;
121 | 			while(*end_pos != '\"' && *end_pos != '\0')end_pos++;
122 | 			if (*end_pos != '\"'){
123 | 				*end = p;
124 | 				return string();
125 | 			}
126 | 		}
127 | 		*end = end_pos + 1;
128 | 		return string(start_pos,end_pos - start_pos - 1);
129 | 	}
130 | 	*/
131 | 
132 | 	// The following function is a home made strtof. The
133 | 	// differences are :
134 | 	//  - much faster (around 50% but depends on the string to parse)
135 | 	//  - less error control, but utilised inside a very strict parser
136 | 	//    in charge of error detection.
137 | 	inline float parseFloat(char * p, char **end) {
138 | 		*end = p;
139 | 		p = strip_line(p);
140 | 
141 | 		if (*p == '\0'){
142 | 			return 0;
143 | 		}
144 | 		int s = 1;
145 | 		if (*p == '+') p++;
146 | 		if (*p == '-') {
147 | 			s = -1; p++;
148 | 		}
149 | 
150 | 		int acc = 0;
151 | 		while (*p >= '0' && *p <= '9')
152 | 			acc = acc * 10 + *p++ - '0';
153 | 
154 | 		int num_dec = 0;
155 | 		if (*p == '.') {
156 | 			p++;
157 | 			while (*p >= '0' && *p <= '9') {
158 | 				acc = acc *10 + *p++ - '0' ;
159 | 				num_dec++;
160 | 			}
161 | 		}
162 | 
163 | 		int exp_acc = 0;
164 | 		if(*p == 'e' || *p == 'E'){
165 | 			p++;
166 | 			int exp_s = 1;
167 | 			if (*p == '+') p++;
168 | 			if (*p == '-') {
169 | 				exp_s = -1; p++;
170 | 			}
171 | 			while (*p >= '0' && *p <= '9')
172 | 				exp_acc = exp_acc * 10 + *p++ - '0';
173 | 			exp_acc *= exp_s;
174 | 		}
175 | 		if (is_space(p) == true || *p == '\0'){//easy case succeeded.
176 | 			exp_acc -= num_dec;
177 | 			*end = p;
178 | 			return s * acc * powf(10.f,(float)(exp_acc));
179 | 		}
180 | 		else
181 | 			return 0;
182 | 	}
183 | 
184 | }
185 | #endif
186 | 


--------------------------------------------------------------------------------
/src/data/s_array.h:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 |   > File Name: s_array.h
  3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 |   > Created Time: 2013/9/19 15:14:53
  5 |   > Functions: customized array
  6 |  ************************************************************************/
  7 | 
  8 | #pragma once 
  9 | 
 10 | #include <iostream>
 11 | #include <cstdlib>
 12 | #include <stdexcept>
 13 | #include <cstring>
 14 | 
 15 | 
 16 | namespace SOL {
 17 |     //the difference of s_array with vector is that vector copies the data, while
 18 |     //s_array only copies the pointer and increase counter
 19 |     template <typename T> class s_array {
 20 |         public:
 21 |             T* begin; //point to the first element
 22 |             T* end; //point to the next postion of the last element
 23 |             size_t capacity; //capacity of the array
 24 |             int *count;
 25 | 
 26 |             T first() const {return *begin;}
 27 |             T last() const {return *(end - 1);}
 28 |             T pop() {return *(--end);}
 29 |             bool empty() const {return begin == end;}
 30 |             size_t size() const {return end - begin;}
 31 |             T& operator[] (size_t i) {return begin[i];}
 32 |             const T& operator[] (size_t i) const { return begin[i];}
 33 | 
 34 | 			void allocate(size_t new_size){
 35 | 				T* new_begin = NULL;
 36 | 				try{
 37 | 					new_begin = new T[new_size];
 38 | 				}catch(std::bad_alloc &ex){
 39 | 					std::cerr<<ex.what();
 40 | 					std::cerr<<" realloc of "<< new_size
 41 | 						<<" failed in resize(). out of memory? in file " 
 42 | 						<<__FILE__<<" line "<<__LINE__<<std::endl;
 43 | 					exit(1);
 44 | 				}
 45 | 				if (new_begin == NULL && sizeof(T) *  new_size > 0) {
 46 | 					std::cerr<<"realloc of "<< new_size
 47 | 						<<" failed in resize(). out of memory?\n" 
 48 | 						<<__FILE__<<"\n"<<__LINE__<<std::endl;
 49 | 					exit(1);
 50 | 				}
 51 | 
 52 | 				size_t old_len = this->size();
 53 | 				//copy data
 54 | 				memcpy(new_begin,begin,sizeof(T) * old_len);
 55 | 				if (begin != NULL)
 56 | 					delete []begin;
 57 | 				begin = new_begin;
 58 | 				end = begin + old_len;
 59 | 				capacity = new_size;
 60 | 			}
 61 | 
 62 | 			void resize(size_t newSize) {
 63 | 				if (capacity < newSize){ //allocate more memory
 64 | 					this->allocate(newSize);
 65 | 				}
 66 | 				end = begin + newSize;
 67 | 			}
 68 | 			void erase(void) { resize(0); }
 69 | 
 70 | 			void push_back(const T& elem) {
 71 | 				size_t old_len = size();
 72 | 				if (old_len == capacity) {//full array
 73 | 					this->allocate(2 * old_len + 3);
 74 | 				}
 75 | 				*(end++) = elem;
 76 | 			}
 77 | 
 78 | 			void reserve(size_t new_size){
 79 | 				if(this->capacity < new_size){
 80 | 					size_t alloc_size = this->capacity;
 81 | 					do{
 82 | 						alloc_size = 2 * alloc_size + 3;
 83 | 					}while(alloc_size < new_size);
 84 | 					this->allocate(alloc_size);
 85 | 				}
 86 | 			}
 87 | 
 88 | 			s_array<T>& operator= (const s_array<T> &arr) {
 89 | 				if (this->count == arr.count)
 90 | 					return *this;
 91 | 				this->release();
 92 | 
 93 | 				this->begin =arr.begin;
 94 | 				this->end = arr.end;
 95 | 				this->capacity = arr.capacity;
 96 | 				this->count = arr.count;
 97 | 				++(*count);
 98 | 				return *this;
 99 | 			}
100 | 
101 | 			//reset all the elements in the array to zero
102 | 			void zeros(){
103 | 				memset(this->begin, 0, sizeof(T) * this->size());
104 | 			}
105 | 			//reset all the elements in the array to zero
106 | 			void zeros(T* iter_begin, T* iter_end){
107 | 				memset(iter_begin, 0, sizeof(T) * (iter_end - iter_begin));
108 | 			}
109 | 
110 | 			//set the elements in the array to val
111 | 			void set_value(const T& val){
112 | 				T* p = this->begin;
113 | 				while(p < this->end){
114 | 					*p = val;
115 | 					p++;
116 | 				}
117 | 			}
118 | 			//set the elements in the given range to the val
119 | 			void set_value(T* iter_begin, T* iter_end, const T& val){
120 | 				while(iter_begin < iter_end){
121 | 					*iter_begin = val;
122 | 					iter_begin++;
123 | 				}
124 | 			}
125 | 
126 | 			void release() {
127 | 				--(*count);
128 | 				if (*count == 0) {
129 | 					if (this->begin != NULL)
130 | 						delete []this->begin;
131 | 					delete this->count;
132 | 				}
133 | 				this->begin = NULL;
134 | 				this->end = NULL;
135 | 				this->capacity = 0;
136 | 				this->count = NULL;
137 | 			}
138 | 
139 | 			s_array() {
140 | 				begin = NULL; end = NULL; count = NULL; capacity = 0;
141 | 				count = new int;
142 | 				*count = 1;
143 | 			}
144 | 			s_array(const s_array &arr) {
145 | 				this->begin =arr.begin;
146 | 				this->end = arr.end;
147 | 				this->capacity = arr.capacity;
148 | 				this->count = arr.count;
149 | 				++(*count);
150 | 			}
151 | 
152 | 			~s_array() { this->release(); }
153 | 	};
154 | }
155 | 


--------------------------------------------------------------------------------
/src/data/thread_primitive.h:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 |   > File Name: thread.h
  3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 |   > Created Time: Sun 22 Sep 2013 03:22:34 PM SGT
  5 |   > Functions: Primitives for thread
  6 |  ************************************************************************/
  7 | #pragma once
  8 | 
  9 | using namespace std;
 10 | 
 11 | namespace SOL
 12 | {
 13 | #ifdef _WIN32
 14 | #include <Windows.h>
 15 |     typedef CRITICAL_SECTION MUTEX;
 16 |     typedef CONDITION_VARIABLE CV;
 17 | #else
 18 |     typedef pthread_mutex_t MUTEX;
 19 |     typedef pthread_cond_t CV;
 20 | #endif
 21 | 
 22 |     void initialize_mutex(MUTEX *pm)
 23 |     {
 24 | #ifdef _WIN32
 25 |         ::InitializeCriticalSection(pm);
 26 | #else
 27 |         pthread_mutex_init(pm,NULL);
 28 | #endif
 29 |     }
 30 | 
 31 |     void delete_mutex(MUTEX *pm)
 32 |     {
 33 | #ifdef _WIN32 
 34 |         ::DeleteCriticalSection(pm);
 35 | #else
 36 |         //no operation needed here
 37 | #endif
 38 |     }
 39 | 
 40 |     void initialize_condition_variable(CV *pcv)
 41 |     {
 42 | #ifdef _WIN32
 43 |         ::InitializeConditionVariable(pcv);
 44 | #else
 45 |         pthread_cond_init(pcv,NULL);
 46 | #endif
 47 |     }
 48 | 
 49 |     void mutex_lock(MUTEX *pm)
 50 |     {
 51 | 		//cout<<"obtain lock"<<endl;
 52 | #ifdef _WIN32
 53 |         ::EnterCriticalSection(pm);
 54 | #else
 55 |         pthread_mutex_lock(pm);
 56 | #endif
 57 |     }
 58 |     void mutex_unlock(MUTEX *pm)
 59 |     {
 60 | 		//cout<<"release lock"<<endl;
 61 | #ifdef _WIN32
 62 |         ::LeaveCriticalSection(pm);
 63 | #else
 64 |         pthread_mutex_unlock(pm);
 65 | #endif
 66 |     }
 67 | 
 68 |     void condition_variable_wait(CV* pcv, MUTEX *pm)
 69 |     {
 70 | #ifdef _WIN32
 71 |         ::SleepConditionVariableCS(pcv,pm,INFINITE);
 72 | #else
 73 |         pthread_cond_wait(pcv,pm);
 74 | #endif
 75 |     }
 76 |     void condition_variable_signal(CV *pcv)
 77 |     {
 78 | #ifdef _WIN32
 79 |         ::WakeConditionVariable(pcv);
 80 | #else
 81 |         pthread_cond_signal(pcv);
 82 | #endif
 83 |     }
 84 |     void condition_variable_signal_all(CV *pcv)
 85 |     {
 86 | #ifdef _WIN32
 87 |         ::WakeAllConditionVariable(pcv);
 88 | #else
 89 |         pthread_cond_broadcast(pcv);
 90 | #endif
 91 |     }
 92 |     /*
 93 | #ifdef _WIN32
 94 |     void WaitThread(HANDLE &thread){
 95 |         WaitForSingleObject(thread,INFINITE);
 96 |     }
 97 | #else
 98 |     void WaitThread(pthread_t &thread){
 99 |         pthread_join(thread, NULL);
100 |     }
101 | #endif
102 | */
103 | }
104 | 


--------------------------------------------------------------------------------
/src/data/zlib_io.cpp:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 |   > File Name: zlib_io.cpp
  3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 |   > Created Time: Wed 06 Nov 2013 04:15:51 PM
  5 |   > Descriptions: read and write file in default zlib format
  6 |  ************************************************************************/
  7 | 
  8 | #include "zlib_io.h"
  9 | #include "../common/init_param.h"
 10 | 
 11 | #if defined(MSDOS) || defined(OS2) || defined(WIN32) || defined(__CYGWIN__)
 12 | #  include <fcntl.h>
 13 | #  include <io.h>
 14 | #  define SET_BINARY_MODE(file) setmode(fileno(file), O_BINARY)
 15 | #else
 16 | #  define SET_BINARY_MODE(file)
 17 | #endif
 18 | 
 19 | 
 20 | #include <iostream>
 21 | 
 22 | using namespace std;
 23 | 
 24 | namespace SOL{
 25 |     bool zlib_io::open_file(const char* filename, const char* mode){
 26 |         if (this->alloc_buf() == false){
 27 |             this->free_buf();
 28 |             return false;
 29 |         }
 30 | 
 31 |         this->close_file();
 32 |         switch(mode[0]){
 33 |             case 'w':
 34 |                 /* allocate deflate state */
 35 |                 strm.zalloc = Z_NULL;
 36 |                 strm.zfree = Z_NULL;
 37 |                 strm.opaque = Z_NULL;
 38 |                 if(deflateInit(&strm,zlib_deflate_level) != Z_OK)
 39 |                     return false;
 40 |                 rw_mode = mode_write; 
 41 |                 this->de_avail_count = zlib_buf_size;
 42 |                 this->cur_de_pos = this->de_data; 
 43 |                 break;
 44 |             case 'r':
 45 |                 /* allocate deflate state */
 46 |                 strm.zalloc = Z_NULL;
 47 |                 strm.zfree = Z_NULL;
 48 |                 strm.opaque = Z_NULL;
 49 |                 if(inflateInit(&strm) != Z_OK)
 50 |                     return false;
 51 |                 strm.avail_in = 0;
 52 |                 strm.next_in = NULL;
 53 | 
 54 |                 rw_mode = mode_read;
 55 |                 this->de_avail_count = 0;
 56 |                 this->cur_de_pos = this->de_data + zlib_buf_size;
 57 | 
 58 |                 break;
 59 |             default:
 60 |                 cerr<<"unrecognized file open mode!"<<endl;
 61 |                 return false;
 62 |         }
 63 |         
 64 |         file = fopen(filename, mode);
 65 |         if (file == NULL){
 66 |             cerr<<"open file failed!"<<endl;
 67 |             return false;
 68 |         }
 69 |         if (this->good() != 0){
 70 |             this->close_file();
 71 |             return false;
 72 |         }
 73 |         return true;
 74 |     }
 75 | 
 76 |     // bind_stdin: bind the input to stdin
 77 |     bool zlib_io::open_stdin(){
 78 |         file = stdin;
 79 |         return true;
 80 |     }
 81 | 
 82 |     // bind_stdin: bind the output to stdout
 83 |     bool zlib_io::open_stdout(){
 84 |         file = stdout;
 85 |         return true;
 86 |     }
 87 |     void zlib_io::close_file(){
 88 |         if (file != NULL){
 89 |             if (rw_mode == mode_write){
 90 |                 if (this->finalize_write() != 0){
 91 |                     /* clean up and return */
 92 |                     (void)deflateEnd(&strm);
 93 |                 }
 94 |             }
 95 |             else if (rw_mode == mode_read){
 96 |                 /* clean up and return */
 97 |                 (void)inflateEnd(&strm);
 98 |             }
 99 | 
100 |             fclose(file);
101 |         }
102 |         file = NULL;
103 |     }
104 | 
105 |     bool zlib_io::alloc_buf(){
106 |         if (this->en_data == NULL){
107 |             try{
108 |                 this->en_data = new unsigned char[zlib_buf_size];
109 |             }catch(std::bad_alloc &ex){
110 |                 cerr<<"allocate memory for encoded buffer failed\n";
111 |                 cerr<<ex.what()<<endl;
112 |                 return false;
113 |             }
114 |         }
115 |         if (this->de_data == NULL){
116 |             try{
117 |                 this->de_data = new unsigned char[zlib_buf_size];
118 |             }catch(std::bad_alloc &ex){
119 |                 cerr<<"allocate memory for decoded buffer failed\n";
120 |                 cerr<<ex.what()<<endl;
121 |                 return false;
122 |             }
123 |         }
124 |         return true;
125 |     }
126 | 
127 |     void zlib_io::free_buf(){
128 |         if (this->en_data != NULL){
129 |             delete []this->en_data;
130 |             this->en_data = NULL;
131 |         }
132 |         if (this->de_data != NULL){
133 |             delete []this->de_data;
134 |             this->de_data = NULL;
135 |         }
136 |     }
137 | 
138 |     void zlib_io::rewind(){
139 |         if (file != NULL){
140 |             std::rewind(file);
141 |             this->cur_de_pos = this->de_data;
142 |             this->de_avail_count = 0;
143 |         }
144 |     }
145 | 
146 |     /**
147 |      * good : test if the io is good
148 |      *
149 |      * @Return: zero if correct, else zero code
150 |      */
151 |     int zlib_io::good(){
152 |         if (file == NULL)
153 |             return -1;
154 |         return ferror(file);
155 |     }
156 | 
157 |     /**
158 |      * read_data : read the data from file
159 |      *
160 |      * @Param dst: container to place the read data
161 |      * @Param length: length of data of read in bytes
162 |      *
163 |      * @Return: true if succeed
164 |      */
165 |     bool zlib_io::read_data(char* dst, size_t len){
166 |         while (this->de_avail_count < len){
167 |             memcpy(dst, this->cur_de_pos, this->de_avail_count);
168 |             len -= this->de_avail_count;
169 |             dst += this->de_avail_count;
170 |             //this->buf_in_pos += this->buf_in_have; //can be ignored
171 |             this->de_avail_count = 0; //can be ignored
172 | 
173 |             if (strm.avail_in == 0){
174 |                 strm.avail_in = fread(this->en_data, 1,zlib_buf_size,this->file);
175 |                 if (ferror(this->file)) {
176 |                     (void)inflateEnd(&strm);
177 |                     cerr<<"unexpected error occured when loading cache!"<<endl;
178 |                     return false;
179 |                 }
180 |                 if (strm.avail_in == 0){
181 |                     return false;
182 |                 }
183 |                 strm.next_in = this->en_data;
184 |             }
185 | 
186 |             /* run inflate() */
187 |             strm.avail_out = zlib_buf_size;
188 |             strm.next_out = this->de_data; 
189 |             int ret = inflate(&strm, Z_NO_FLUSH);
190 |             assert(ret != Z_STREAM_ERROR);  /* state not clobbered */
191 |             switch (ret) {
192 |                 case Z_NEED_DICT:
193 |                     ret = Z_DATA_ERROR;     /* and fall through */
194 |                 case Z_DATA_ERROR:
195 |                 case Z_MEM_ERROR:
196 |                     (void)inflateEnd(&strm);
197 |                     cerr<<"error occured when parsing file!"<<endl;
198 |                     return false;
199 |             }
200 |             this->de_avail_count = zlib_buf_size - strm.avail_out;
201 | 
202 |             this->cur_de_pos = this->de_data;
203 |             if (this->cur_de_pos == 0){
204 |                 cerr<<"load compressed content failed!"<<endl;
205 |                 return false;
206 |             }
207 |         }
208 |         memcpy(dst,this->cur_de_pos, len);
209 |         this->cur_de_pos += len;
210 |         this->de_avail_count -= len;
211 |         //len -= len; //can be ignored
212 |         //dst += len; //can be ignored
213 |         return true;
214 |     }
215 | 
216 |     /**
217 |      * read_line : read a line from disk
218 |      *
219 |      * @Param dst: container to place the read data
220 |      * @Param dst_len: length of dst
221 |      *
222 |      * @Return: size of data read in bytes
223 |      */
224 |     char* zlib_io::read_line(char* &dst, size_t &dst_len){
225 |         printf("error: no read line is supported in zlib io\n");
226 |         return NULL;
227 |     }
228 | 
229 |     /**
230 |      * write_data : write content to disk
231 |      *
232 |      * @Param src: source of the data
233 |      * @Param length: length to write the data
234 |      *
235 |      * @Return: true if succeed
236 |      */
237 |     bool zlib_io::write_data(char* src, size_t len){
238 |         while(this->de_avail_count < len){
239 |             memcpy(this->cur_de_pos,src, this->de_avail_count);
240 |             src += this->de_avail_count;
241 |             len -= this->de_avail_count;
242 | 
243 |             this->strm.avail_in = zlib_buf_size;
244 |             this->strm.next_in = this->de_data;
245 | 
246 |             // run deflate()
247 |             do {
248 |                 strm.avail_out = zlib_buf_size;
249 |                 strm.next_out = this->en_data;
250 |                 int ret = deflate(&(this->strm), Z_NO_FLUSH);   //no bad return value 
251 |                 assert(ret != Z_STREAM_ERROR);  // state not clobbered 
252 |                 unsigned int have = zlib_buf_size - this->strm.avail_out;
253 |                 if (fwrite(this->en_data, 1, have,this->file) != have 
254 |                         || ferror(this->file)) {
255 |                     (void)deflateEnd(&(this->strm));
256 |                     cerr<<"unexpected error occured when writing file!"<<endl;
257 |                     return false;
258 |                 }
259 |             } while (this->strm.avail_out == 0);
260 |             assert(this->strm.avail_in == 0);     // all input will be used 
261 | 
262 |             this->de_avail_count = zlib_buf_size;
263 |             this->cur_de_pos = this->de_data;
264 |         }
265 | 
266 |         memcpy(this->cur_de_pos,src, len);
267 |         this->cur_de_pos += len;
268 |         this->de_avail_count -= len;
269 |         this->strm.avail_in += len;
270 | 
271 |         return true;
272 |     }
273 |     /**
274 |      * finalize_write : finalize write of deflate
275 |      *
276 |      * @Return: 0 if ok
277 |      */
278 |     int zlib_io::finalize_write(){
279 |         this->strm.next_in = this->de_data;
280 | 
281 |         // run deflate()
282 |         do {
283 |             strm.avail_out = zlib_buf_size;
284 |             strm.next_out = this->en_data;
285 |             int ret = deflate(&(this->strm),Z_FINISH);   //no bad return value 
286 |             assert(ret != Z_STREAM_ERROR);  // state not clobbered 
287 |             unsigned int have = zlib_buf_size - this->strm.avail_out;
288 |             if (fwrite(this->en_data, 1, have,this->file) != have 
289 |                     || ferror(this->file)) {
290 |                 (void)deflateEnd(&(this->strm));
291 |                 cerr<<"unexpected error occured when writing file!"<<endl;
292 |                 return -1;
293 |             }
294 |         } while (this->strm.avail_out == 0);
295 |         assert(this->strm.avail_in == 0);     // all input will be used 
296 |         (void)deflateEnd(&(this->strm));
297 |         return 0;
298 |     }
299 | 
300 | }
301 | 
302 | 
303 | 
304 | 


--------------------------------------------------------------------------------
/src/data/zlib_io.h:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 |   > File Name: /home/matthew/work/SOL/src/data/zlib_io.h
  3 |   > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 |   > Created Time: Wed 06 Nov 2013 05:01:04 PM
  5 |   > Descriptions: 
  6 |  ************************************************************************/
  7 | #ifndef HEADER_ZLIB_IO
  8 | #define HEADER_ZLIB_IO
  9 | 
 10 | #include "io_interface.h"
 11 | 
 12 | #include <cstdio>
 13 | #include <string.h>
 14 | #include <assert.h>
 15 | #include "zlib.h"
 16 | 
 17 | 
 18 | namespace SOL{
 19 | #define ZLIB_BUF_SIZE  16348
 20 | 
 21 |     class zlib_io: public io_interface{
 22 |         private:
 23 |             enum RW_MODE{
 24 |                 mode_null = 0,
 25 |                 mode_read = 1,
 26 |                 mode_write = 2,
 27 |             };
 28 | 
 29 |         private:
 30 |             FILE* file;
 31 |             z_stream strm;
 32 |             
 33 |             unsigned char* en_data; //encoded data
 34 |             unsigned char* de_data; //decoded data
 35 |             unsigned char* cur_de_pos;  //current read position of decoded data
 36 |             size_t de_avail_count; //available decoded data count
 37 | 
 38 |             int rw_mode;
 39 |         public:
 40 |             zlib_io():
 41 |                 file(NULL), en_data(NULL), de_data(NULL),
 42 |                 cur_de_pos(NULL), de_avail_count(0), rw_mode(mode_null){}
 43 | 
 44 |             ~zlib_io(){
 45 |                 this->free_buf();
 46 |             }
 47 | 
 48 |         private:
 49 |             bool alloc_buf();
 50 |             void free_buf();
 51 |         public:
 52 |             virtual bool open_file(const char* filename, const char* mode);
 53 |             // bind_stdin: bind the input to stdin
 54 |             virtual bool open_stdin();
 55 |             // bind_stdin: bind the output to stdout
 56 |             virtual bool open_stdout();
 57 | 
 58 |             virtual void close_file();
 59 |             virtual void rewind();
 60 | 
 61 |             /**
 62 |              * good : test if the io is good
 63 |              *
 64 |              * @Return: zero if correct, else zero code
 65 |              */
 66 |             virtual int good();
 67 | 
 68 |         public:
 69 |             /**
 70 |              * read_data : read the data from file
 71 |              *
 72 |              * @Param dst: container to place the read data
 73 |              * @Param length: length of data of read in bytes
 74 |              *
 75 |              * @Return: true if succeed
 76 |              */
 77 |             virtual bool read_data(char* dst, size_t length);
 78 | 
 79 |             /**
 80 |              * read_line : read a line from disk
 81 |              *
 82 |              * @Param dst: container to place the read data
 83 |              * @Param dst_len: length of dst
 84 |              *
 85 |              * @Return: pointer to the read line, null if failed
 86 |              */
 87 |             virtual char* read_line(char* &dst, size_t &dst_len);
 88 |             
 89 |             /**
 90 |              * write_data : write content to disk
 91 |              *
 92 |              * @Param src: source of the data
 93 |              * @Param length: length to write the data
 94 |              *
 95 |              * @Return: true if succeed
 96 |              */
 97 |             virtual bool write_data(char* src, size_t length);
 98 | 
 99 |         private:
100 |             /**
101 |              * finalize_write : finalize write of deflate
102 |              *
103 |              * @Return: 0 if ok
104 |              */
105 |             int finalize_write();
106 |     };
107 | }
108 | 
109 | #endif
110 | 


--------------------------------------------------------------------------------
/src/kernel/kernel_RBP.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #pragma once
 3 | 
 4 | #include "kernel_optim.h"
 5 | #include <iostream>
 6 | #include <stdlib.h>
 7 | #include <time.h>
 8 | 
 9 | namespace SOL
10 | {
11 | template <typename FeatType, typename LabelType>
12 | class kernel_RBP: public Kernel_optim<FeatType, LabelType>
13 | {
14 | 
15 | protected:
16 |     int Budget;
17 | public:
18 |     kernel_RBP(const Params &param,DataSet<FeatType, LabelType> &dataset,
19 |                LossFunction<FeatType, LabelType> &lossFunc);
20 |     virtual ~ kernel_RBP();
21 | 
22 | protected:
23 |     //this is the core of different updating algorithms
24 |     virtual float UpdateWeightVec(const DataPoint<FeatType, LabelType> &x);
25 | 	    virtual float Predict(const DataPoint<FeatType, LabelType> &data);
26 | 			virtual void begin_test(void){}
27 | };
28 | 
29 | template <typename FeatType, typename LabelType>
30 | kernel_RBP<FeatType, LabelType>:: kernel_RBP(const Params &param,
31 |     DataSet<FeatType, LabelType> &dataset,
32 |     LossFunction<FeatType, LabelType> &lossFunc): Kernel_optim<FeatType, LabelType>(param,dataset, lossFunc)
33 | {
34 |     this->id_str = " kernel_RBP";
35 |     this->Budget=param.Budget_set;
36 | }
37 | 
38 | template <typename FeatType, typename LabelType>
39 | kernel_RBP<FeatType, LabelType>::~ kernel_RBP()
40 | {
41 | }
42 | 
43 | //update weight vector with stochastic gradient descent
44 | template <typename FeatType, typename LabelType>
45 | float  kernel_RBP<FeatType,LabelType>::UpdateWeightVec(const DataPoint<FeatType, LabelType> &x)
46 | {
47 |     float y = this->Predict(x);
48 |     if (y*x.label<=0)
49 |     {
50 | 
51 |         SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(x.label,x);
52 |         this->add_SV(support);
53 | 
54 |     }
55 |     //delete SV
56 |     if(this->size_SV==Budget+1)
57 |     {
58 |         srand((unsigned)time(NULL));
59 |         int SV_to_delete=rand() % (Budget);//from 0 to Budget-1
60 |         this->delete_SV(SV_to_delete);
61 |     }
62 |     return y;
63 | }
64 | template <typename FeatType, typename LabelType>
65 | float kernel_RBP<FeatType, LabelType>::Predict(const DataPoint<FeatType, LabelType> &data)
66 | {
67 |     float predict = 0;
68 | 
69 |     SV<FeatType, LabelType>* p_predict = this->SV_begin;
70 |     while (p_predict!=NULL)
71 |     {
72 |         predict+=p_predict->SV_alpha* this->kern(p_predict->SV_data,data);
73 |         p_predict=p_predict->next;
74 |     }
75 |     return predict;
76 | }
77 | 
78 | 
79 | }
80 | 


--------------------------------------------------------------------------------
/src/kernel/kernel_bogd.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #pragma once
 4 | 
 5 | #include "kernel_optim.h"
 6 | 
 7 | namespace SOL
 8 | {
 9 | template <typename FeatType, typename LabelType>
10 | class kernel_bogd: public Kernel_optim<FeatType, LabelType>
11 | {
12 | 
13 | protected:
14 |     int Budget;
15 |     float lambda;
16 | 
17 | public:
18 |     kernel_bogd(const Params &param,DataSet<FeatType, LabelType> &dataset,
19 |                 LossFunction<FeatType, LabelType> &lossFunc);
20 |     virtual ~kernel_bogd();
21 | 
22 | protected:
23 |     //this is the core of different updating algorithms
24 |     virtual float UpdateWeightVec(const DataPoint<FeatType, LabelType> &x);
25 | 	    virtual float Predict(const DataPoint<FeatType, LabelType> &data);
26 | 			virtual void begin_test(void){}
27 | };
28 | 
29 | template <typename FeatType, typename LabelType>
30 | kernel_bogd<FeatType, LabelType>::kernel_bogd(const Params &param,
31 |     DataSet<FeatType, LabelType> &dataset,
32 |     LossFunction<FeatType, LabelType> &lossFunc): Kernel_optim<FeatType, LabelType>(param,dataset, lossFunc)
33 | {
34 |     this->id_str = "kernel_bogd";
35 |     this->Budget=param.Budget_set;
36 |     this->lambda=param.lambda;
37 | 	this->eta0=param.eta;
38 | }
39 | 
40 | template <typename FeatType, typename LabelType>
41 | kernel_bogd<FeatType, LabelType>::~kernel_bogd()
42 | {
43 | }
44 | 
45 | //update weight vector with stochastic gradient descent
46 | template <typename FeatType, typename LabelType>
47 | float kernel_bogd<FeatType,LabelType>::UpdateWeightVec(const
48 |         DataPoint<FeatType, LabelType> &x)
49 | {
50 |     float y = this->Predict(x);
51 | 
52 |     float gt_i = this->lossFunc->GetGradient(x.label,y);
53 | 
54 |     SV<FeatType, LabelType>* p_alpha=this->SV_begin;
55 |     while(p_alpha!=NULL)
56 |     {
57 |         p_alpha->SV_alpha=p_alpha->SV_alpha*(1-this->eta0*lambda);
58 |         p_alpha=p_alpha->next;
59 |     }
60 |     if(gt_i!=0)
61 |     {
62 |         SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(-this->eta0 * gt_i,x);
63 |         this->add_SV(support);
64 |     }
65 |     //delete SV
66 |     if(this->size_SV==Budget+1)
67 |         this->delete_SV();
68 | 
69 |     return y;
70 | }
71 | 
72 | template <typename FeatType, typename LabelType>
73 | float kernel_bogd<FeatType, LabelType>::Predict(const DataPoint<FeatType, LabelType> &data)
74 | {
75 |     float predict = 0;
76 | 
77 |     SV<FeatType, LabelType>* p_predict = this->SV_begin;
78 |     while (p_predict!=NULL)
79 |     {
80 |         predict+=p_predict->SV_alpha* this->kern(p_predict->SV_data,data);
81 |         p_predict=p_predict->next;
82 |     }
83 |     return predict;
84 | }
85 | 
86 | 
87 | 
88 | }
89 | 


--------------------------------------------------------------------------------
/src/kernel/kernel_bpas.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "kernel_optim.h"
  4 | #include <iostream>
  5 | 
  6 | namespace SOL
  7 | {
  8 | template <typename FeatType, typename LabelType>
  9 | class kernel_bpas: public Kernel_optim<FeatType, LabelType>
 10 | {
 11 | 
 12 | protected:
 13 |     int Budget;
 14 |     float C_bpas;
 15 | public:
 16 |     kernel_bpas(const Params &param,DataSet<FeatType, LabelType> &dataset,
 17 |                 LossFunction<FeatType, LabelType> &lossFunc);
 18 |     virtual ~ kernel_bpas();
 19 | 
 20 | protected:
 21 |     //this is the core of different updating algorithms
 22 |     virtual float UpdateWeightVec(const DataPoint<FeatType, LabelType> &x);
 23 | 	    virtual float Predict(const DataPoint<FeatType, LabelType> &data);
 24 | 			virtual void begin_test(void){}
 25 | };
 26 | 
 27 | template <typename FeatType, typename LabelType>
 28 | kernel_bpas<FeatType, LabelType>:: kernel_bpas(const Params &param,
 29 |     DataSet<FeatType, LabelType> &dataset,
 30 |     LossFunction<FeatType, LabelType> &lossFunc): Kernel_optim<FeatType, LabelType>(param,
 31 |             dataset, lossFunc)
 32 | {
 33 |     this->id_str = " kernel_BPAS";
 34 | this->Budget=param.Budget_set;
 35 |     this->C_bpas=param.C_bpas;
 36 | }
 37 | 
 38 | template <typename FeatType, typename LabelType>
 39 | kernel_bpas<FeatType, LabelType>::~kernel_bpas()
 40 | {
 41 | }
 42 | 
 43 | //update weight vector with stochastic gradient descent
 44 | template <typename FeatType, typename LabelType>
 45 | float  kernel_bpas<FeatType,LabelType>::UpdateWeightVec(
 46 |     const DataPoint<FeatType, LabelType> &x)
 47 | {
 48 |     float y=0;
 49 |     float *k_t=NULL;
 50 |     //calculate k_t
 51 |     if(this->size_SV!=0)
 52 |     {
 53 |         SV<FeatType, LabelType>* p_predict=this->SV_begin;
 54 |         k_t=new float [this->size_SV];
 55 |         int i=0;
 56 |         while (p_predict!=NULL)
 57 |         {
 58 |             k_t[i]=this->kern(p_predict->SV_data,x);
 59 |             p_predict=p_predict->next;
 60 |             i++;
 61 |         }
 62 | 
 63 |         //k_t done
 64 | 
 65 |         //get prediction
 66 |         p_predict=this->SV_begin;
 67 |         i=0;
 68 |         while (p_predict!=NULL)
 69 |         {
 70 |             y+=p_predict->SV_alpha* k_t[i];
 71 |             p_predict=p_predict->next;
 72 |             i++;
 73 |         }
 74 |     }
 75 |     //prediction is in y
 76 |     float l_t=1-x.label*y;
 77 |     if(l_t<0)
 78 |     {
 79 |         l_t=0;
 80 |     }
 81 | 
 82 |     //get the Hinge Loss
 83 | 
 84 |     if (l_t>0)
 85 |     {
 86 |         float tao= (std::min)(C_bpas,l_t);
 87 |         if(this->size_SV<Budget)
 88 |         {
 89 |             SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(x.label*tao,x);
 90 |             this->add_SV(support);
 91 |         }
 92 |         else  //full Budget
 93 |         {
 94 |             double Q_star=1000000;
 95 |             int star=1;
 96 |             double star_alpha=1.0;
 97 | 
 98 |             SV<FeatType, LabelType> *p_search=this->SV_begin;
 99 | 
100 |             for(int i=0; i<this->size_SV; i++)
101 |             {
102 |                 double k_rt=k_t[i];
103 |                 double alpha_r=p_search->SV_alpha;
104 |                 double beta_t=alpha_r*k_rt+tao*x.label;
105 |                 double distance=alpha_r*alpha_r+beta_t*beta_t-2*beta_t*alpha_r*k_rt;
106 |                 double f_rt=y-alpha_r*k_rt+beta_t;
107 |                 double l_rt=1-x.label*f_rt;
108 |                 if(l_rt<0)
109 |                     l_rt=0;
110 |                 double Q_r=0.5*distance+C_bpas*l_rt;
111 |                 if(Q_r<Q_star)
112 |                 {
113 |                     Q_star=Q_r;
114 |                     star=i;
115 |                     star_alpha= beta_t;
116 |                 }
117 |                 p_search=p_search->next;
118 |             }
119 |             this->delete_SV(star);
120 |             SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(float(star_alpha),x);
121 |             this->add_SV(support);
122 |         }
123 |     }
124 | 	delete [] k_t;
125 |     return y;
126 | }
127 | 
128 | template <typename FeatType, typename LabelType>
129 | float kernel_bpas<FeatType, LabelType>::Predict(const DataPoint<FeatType, LabelType> &data)
130 | {
131 |     float predict = 0;
132 | 
133 |     SV<FeatType, LabelType>* p_predict = this->SV_begin;
134 |     while (p_predict!=NULL)
135 |     {
136 |         predict+=p_predict->SV_alpha* this->kern(p_predict->SV_data,data);
137 |         p_predict=p_predict->next;
138 |     }
139 |     return predict;
140 | }
141 | 
142 | }
143 | 


--------------------------------------------------------------------------------
/src/kernel/kernel_fogd.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | #include <iostream>
  3 | #include <random>
  4 | #include "kernel_optim.h"
  5 | #include <math.h>
  6 | #include <time.h>
  7 | 
  8 | #define B_cos 1.273239
  9 | #define P_cos 0.225
 10 | #define C_cos -0.40528
 11 | #define pi_cos 3.1415926
 12 | 
 13 | namespace SOL
 14 | {
 15 | template <typename FeatType, typename LabelType>
 16 | class kernel_fogd: public Kernel_optim<FeatType, LabelType>
 17 | {
 18 | 
 19 | protected:
 20 |     int D;
 21 |     IndexType u_dimension;
 22 |     s_array<double> w_fogd;
 23 |     s_array<double> u;
 24 |     s_array<double> w_fogd_sum;
 25 | 	s_array<double> ux;
 26 |     s_array<double> ux_cos;
 27 | 	int num_update;
 28 | 
 29 | 
 30 | 
 31 | 	double a;
 32 |     std::default_random_engine generator;
 33 |     std::normal_distribution<double> distribution;
 34 | 
 35 | public:
 36 |     kernel_fogd(const Params &param,DataSet<FeatType, LabelType> &dataset,
 37 |                 LossFunction<FeatType, LabelType> &lossFunc);
 38 | 
 39 |     virtual ~kernel_fogd();
 40 | 
 41 | protected:
 42 |     //this is the core of different updating algorithms
 43 |     virtual float UpdateWeightVec(const DataPoint<FeatType, LabelType> &x);
 44 | 	    virtual float Predict(const DataPoint<FeatType, LabelType> &data);
 45 | 		virtual void begin_test(void);
 46 | };
 47 | 
 48 | template <typename FeatType, typename LabelType>
 49 | kernel_fogd<FeatType, LabelType>::kernel_fogd(const Params &param,
 50 |     DataSet<FeatType, LabelType> &dataset,
 51 |     LossFunction<FeatType, LabelType> &lossFunc): Kernel_optim<FeatType, LabelType>(param,dataset, lossFunc)
 52 | {
 53 |     this->id_str = "kernel_fogd";
 54 |     this->D=param.D_set;
 55 | 
 56 |     w_fogd.resize(2 * D);
 57 |     w_fogd.zeros();
 58 | 	w_fogd_sum.resize(2*D);
 59 | 	w_fogd_sum.zeros();
 60 |     this->ux.resize(D);
 61 |     this->ux_cos.resize(2 * D);
 62 | 	num_update=0;
 63 | 
 64 |     this->u_dimension=0;
 65 |     this->distribution=normal_distribution<double>(0.0,sqrt(param.gamma*2));
 66 |     this->generator=default_random_engine((unsigned)time(NULL));
 67 |     this->eta0 = param.eta;
 68 | }
 69 | 
 70 | template <typename FeatType, typename LabelType>
 71 | kernel_fogd<FeatType, LabelType>::~kernel_fogd()
 72 | {
 73 | }
 74 | 
 75 | //update weight vector with stochastic gradient descent
 76 | template <typename FeatType, typename LabelType>
 77 | float kernel_fogd<FeatType,LabelType>::UpdateWeightVec(const DataPoint<FeatType, LabelType> &x)
 78 | {
 79 | 	
 80 |     IndexType x_dimension=x.max_index;
 81 |     //generate u
 82 |     if(u_dimension<x.max_index)
 83 |     {
 84 |         //update dimension
 85 |        this->u.reserve(D * x_dimension);
 86 |        this->u.resize(D * x_dimension);
 87 |        for(IndexType i=(D*u_dimension); i<(D*x_dimension); i++)
 88 |             this->u[i]=distribution(generator);
 89 |        this->u_dimension=x_dimension;
 90 |     }
 91 | 
 92 |     this->ux.zeros();
 93 | 
 94 |     size_t index_begin;
 95 |     float feature;
 96 |     for(size_t j=0; j<x.indexes.size(); j++)
 97 |     {
 98 |         index_begin=(x.indexes[j]-1)*D;
 99 |         feature=x.features[j];
100 |         for(int i=0; i<D; i++)
101 |         {
102 |             ux[i]+=u[index_begin]*feature;
103 |             index_begin++;
104 |         }
105 |     }
106 | 	double *p1=ux_cos.begin;
107 | 	double *p2=p1+D;
108 | 
109 |     for(int i=0; i<D; i++)
110 |     {
111 | 		while(ux[i]< -3.14159265)
112 | 			ux[i]+= 6.28318531;
113 | 		while(ux[i]> 3.14159265)
114 | 			ux[i]-= 6.28318531;
115 | 		a = B_cos * ux[i] + C_cos * ux[i] * abs(ux[i]);
116 |         *p1 = P_cos * (a * abs(a) - a) + a;
117 | 
118 |         *p2=sqrt(1-(*p1)*(*p1));
119 | 		if(ux[i]<0)
120 | 			*p2=-(*p2);
121 | 
122 | 		p1++;
123 | 		p2++;
124 | 	}
125 | 
126 |     double y=0;
127 |     for(int i=0; i<2*D; i++)
128 |         y=y+w_fogd[i]*ux_cos[i];
129 | 
130 |     if(y*x.label<1)
131 |     {
132 | 		num_update++;
133 |         for(int i=0; i<2*D; i++)
134 |         {   
135 | 			w_fogd[i]=w_fogd[i]+this->eta0*x.label*ux_cos[i];
136 | 		    w_fogd_sum[i]=w_fogd_sum[i]+w_fogd[i];		
137 | 		}
138 |     }
139 |     return float(y);
140 | }
141 | 
142 | template <typename FeatType, typename LabelType>
143 | float kernel_fogd<FeatType, LabelType>::Predict(const DataPoint<FeatType, LabelType> &data)
144 | {
145 | 
146 |     this->ux.zeros();
147 | 
148 |     size_t index_begin;
149 |     float feature;
150 |     for(size_t j=0; j<data.indexes.size(); j++)
151 |     {
152 |         index_begin=(data.indexes[j]-1)*D;
153 |         feature=data.features[j];
154 |         for(int i=0; i<D; i++)
155 |         {
156 |             ux[i]+=u[index_begin]*feature;
157 |             index_begin++;
158 |         }
159 |     }
160 | 	double *p1=ux_cos.begin;
161 | 	double *p2=p1+D;
162 | 
163 |     for(int i=0; i<D; i++)
164 |     {
165 | 		while(ux[i]< -3.14159265)
166 | 			ux[i]+= 6.28318531;
167 | 		while(ux[i]> 3.14159265)
168 | 			ux[i]-= 6.28318531;
169 | 		a = B_cos * ux[i] + C_cos * ux[i] * abs(ux[i]);
170 |         *p1 = P_cos * (a * abs(a) - a) + a;
171 |         *p2=sqrt(1-(*p1)*(*p1));
172 | 		if(ux[i]<0)
173 | 			*p2=-(*p2);
174 | 		p1++;
175 | 		p2++;
176 | 	}
177 | 
178 |     double y=0;
179 |     for(int i=0; i<2*D; i++)
180 |         y=y+w_fogd[i]*ux_cos[i];
181 |     return float(y);
182 | }
183 | 
184 | template <typename FeatType, typename LabelType>
185 | void kernel_fogd<FeatType, LabelType>::begin_test(void)
186 | {
187 |         for(int i=0; i<2*D; i++)
188 |         {   
189 | 		    w_fogd[i]=w_fogd_sum[i]/num_update;	
190 | 		}
191 | }
192 | 
193 | }
194 | 


--------------------------------------------------------------------------------
/src/kernel/kernel_forgetron.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #pragma once
  3 | 
  4 | #include "kernel_optim.h"
  5 | #include <iostream>
  6 | #include <stdlib.h>
  7 | #include <time.h>
  8 | #include "../data/DataPoint.h"
  9 | #include <math.h>
 10 | #include <cmath>
 11 | 
 12 | namespace SOL
 13 | {
 14 | template <typename FeatType, typename LabelType>
 15 | class kernel_forgetron: public Kernel_optim<FeatType, LabelType>
 16 | {
 17 | protected:
 18 |     int Budget;
 19 |     int err_until_now;
 20 |     double Q;
 21 | 
 22 | public:
 23 |     kernel_forgetron(const Params &param,DataSet<FeatType, LabelType> &dataset,
 24 |                      LossFunction<FeatType, LabelType> &lossFunc);
 25 | 
 26 |     virtual ~kernel_forgetron();
 27 | 
 28 | protected:
 29 |     //this is the core of different updating algorithms
 30 |     virtual float UpdateWeightVec(const DataPoint<FeatType, LabelType> &x);
 31 | 	    virtual float Predict(const DataPoint<FeatType, LabelType> &data);
 32 | 			virtual void begin_test(void){}
 33 | };
 34 | 
 35 | template <typename FeatType, typename LabelType>
 36 | kernel_forgetron<FeatType, LabelType>:: kernel_forgetron(const Params &param,
 37 |     DataSet<FeatType, LabelType> &dataset,
 38 |     LossFunction<FeatType, LabelType> &lossFunc): Kernel_optim<FeatType, LabelType>(param,dataset, lossFunc)
 39 | {
 40 |     this->id_str = " kernel_forgetron";
 41 | this->Budget=param.Budget_set;
 42 |     this->err_until_now=0;
 43 |     this->Q=0;
 44 | }
 45 | 
 46 | template <typename FeatType, typename LabelType>
 47 | kernel_forgetron<FeatType, LabelType>::~ kernel_forgetron()
 48 | {
 49 | }
 50 | 
 51 | //update weight vector with stochastic gradient descent
 52 | template <typename FeatType, typename LabelType>
 53 | float  kernel_forgetron<FeatType,LabelType>::UpdateWeightVec(const DataPoint<FeatType, LabelType> &x)
 54 | {
 55 |     float y = this->Predict(x);
 56 |     if (y*x.label<=0)
 57 |     {
 58 |         err_until_now++;
 59 | 
 60 |         SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(x.label,x);
 61 |         this->add_SV(support);
 62 |     }
 63 | 
 64 |     //delete SV
 65 |     if(this->size_SV==Budget+1)
 66 |     {
 67 |         float predict = this->Predict(this->SV_begin->SV_data);
 68 | 
 69 |         double mu=this->SV_begin->SV_data.label*predict;
 70 |         double delta=this->SV_begin->SV_alpha/this->SV_begin->SV_data.label;
 71 | 
 72 |         double coeA=delta*delta-2*delta*mu;
 73 |         double coeB=2*delta;
 74 |         double coeC=Q-(15.0/32.0)*err_until_now;
 75 | 
 76 |         double phi=0;
 77 |         if (coeA==0)
 78 |             phi=(std::max)(0.0,(std::min)(1.0,-coeC/coeB));
 79 |         else if (coeA>0)
 80 |         {
 81 |             if (coeA+coeB+coeC<=0)
 82 |                 phi=1;
 83 |             else
 84 |                 phi=(-coeB+sqrt(coeB*coeB-4*coeA*coeC))/(2*coeA);
 85 |         }
 86 |         else if (coeA<0)
 87 |         {
 88 |             if (coeA+coeB+coeC<=0)
 89 |                 phi=1;
 90 |             else
 91 |                 phi=(-coeB-sqrt(coeB*coeB-4*coeA*coeC))/(2*coeA);
 92 |         }
 93 | 
 94 |         //alpha=phi*alpha_t;
 95 |         SV<FeatType, LabelType>* p_change_alpha=this->SV_begin;
 96 |         while(p_change_alpha!=NULL)
 97 |         {
 98 |             p_change_alpha->SV_alpha= (float)(p_change_alpha->SV_alpha*phi);
 99 |             p_change_alpha=p_change_alpha->next;
100 |         }
101 | 
102 |         Q=Q+(delta*phi)*(delta*phi)+2*delta*phi*(1-phi*mu);
103 |         this->delete_SV(0);
104 |     }
105 |     return y;
106 | }
107 | template <typename FeatType, typename LabelType>
108 | float kernel_forgetron<FeatType, LabelType>::Predict(const DataPoint<FeatType, LabelType> &data)
109 | {
110 |     float predict = 0;
111 | 
112 |     SV<FeatType, LabelType>* p_predict = this->SV_begin;
113 |     while (p_predict!=NULL)
114 |     {
115 |         predict+=p_predict->SV_alpha* this->kern(p_predict->SV_data,data);
116 |         p_predict=p_predict->next;
117 |     }
118 |     return predict;
119 | }
120 | 
121 | 
122 | }
123 | 


--------------------------------------------------------------------------------
/src/kernel/kernel_nogd.h:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #pragma once
  4 | 
  5 | #include "kernel_optim.h"
  6 | #include <Eigen>
  7 | #include "cmath"
  8 | using namespace Eigen;
  9 | 
 10 | namespace SOL
 11 | {
 12 | template <typename FeatType, typename LabelType>
 13 | class kernel_nogd: public Kernel_optim<FeatType, LabelType>
 14 | {
 15 | 
 16 | protected:
 17 |     int k_nogd;
 18 |     MatrixXf * K_budget;
 19 |     virtual ~kernel_nogd();
 20 |     int Budget;
 21 |     VectorXf *w_nogd;
 22 | 	VectorXf *w_nogd_sum;
 23 |     MatrixXf * M_nogd;
 24 |     bool flag;
 25 | 	int num_update;
 26 | 	float eta1;
 27 | 
 28 | public:
 29 |     kernel_nogd(const Params &param,DataSet<FeatType, LabelType> &dataset,
 30 |                 LossFunction<FeatType, LabelType> &lossFunc);
 31 | 
 32 | protected:
 33 |     //this is the core of different updating algorithms
 34 |     virtual float UpdateWeightVec(const DataPoint<FeatType, LabelType> &x);
 35 | 	virtual float Predict(const DataPoint<FeatType, LabelType> &data);
 36 | 	virtual void begin_test(void);
 37 | };
 38 | 
 39 | template <typename FeatType, typename LabelType>
 40 | kernel_nogd<FeatType, LabelType>::kernel_nogd(const Params &param,
 41 |     DataSet<FeatType, LabelType> &dataset,
 42 |     LossFunction<FeatType, LabelType> &lossFunc ): Kernel_optim<FeatType, LabelType>(param,dataset, lossFunc)
 43 | {
 44 | 	 eta1=param.eta1;
 45 | 	 this->eta0=param.eta;
 46 | 	num_update=0;
 47 |     this->id_str = "kernel_nogd";
 48 |     this->k_nogd=param.k_nogd;
 49 |     this->Budget=param.Budget_set;
 50 |     this->K_budget=new MatrixXf(Budget,Budget);
 51 |     for(int i=0; i<Budget; i++)
 52 |     {
 53 |         (*K_budget)(i,i)=1;
 54 |     }
 55 |     this->w_nogd=new VectorXf(k_nogd);
 56 |     for(int i=0; i<k_nogd; i++)
 57 |     {
 58 |         (*w_nogd)(i)=0;
 59 |     }
 60 | 	
 61 | 	this->w_nogd_sum=new VectorXf(k_nogd);
 62 |     for(int i=0; i<k_nogd; i++)
 63 |     {
 64 |         (*w_nogd_sum)(i)=0;
 65 |     }
 66 | 
 67 |     this->M_nogd= new MatrixXf(k_nogd,Budget);
 68 |     this->flag=0;
 69 | }
 70 | 
 71 | template <typename FeatType, typename LabelType>
 72 | kernel_nogd<FeatType, LabelType>::~kernel_nogd()
 73 | {
 74 |     delete w_nogd;
 75 |     delete M_nogd;
 76 |     delete K_budget;
 77 | }
 78 | 
 79 | //update weight vector with stochastic gradient descent
 80 | template <typename FeatType, typename LabelType>
 81 | float kernel_nogd<FeatType,LabelType>::UpdateWeightVec(const DataPoint<FeatType, LabelType> &x)
 82 | {
 83 |     float y=0;
 84 |     VectorXf kt(this->size_SV);
 85 |     VectorXf zt(k_nogd);
 86 |     //calculate k_t
 87 |     if((this->size_SV!=0)&&(flag==0))
 88 |     {
 89 |         SV<FeatType, LabelType>* p_predict=this->SV_begin;
 90 |         int i=0;
 91 |         while (p_predict!=NULL)
 92 |         {
 93 |             kt(i)=this->kern(p_predict->SV_data,x);
 94 |             p_predict=p_predict->next;
 95 |             i++;
 96 |         }
 97 |         //k_t done
 98 | 
 99 |         //get prediction
100 |         p_predict=this->SV_begin;
101 |         i=0;
102 |         while (p_predict!=NULL)
103 |         {
104 |             y+=p_predict->SV_alpha* kt(i);
105 |             p_predict=p_predict->next;
106 |             i++;
107 |         }
108 |     }
109 |     if(flag!=0) //linear predict
110 |     {
111 |         SV<FeatType, LabelType>* p_predict=this->SV_begin;
112 |         int i=0;
113 |         while (p_predict!=NULL)
114 |         {
115 |             kt[i]=this->kern(p_predict->SV_data,x);
116 |             p_predict=p_predict->next;
117 |             i++;
118 |         }
119 |         zt=(*M_nogd)*kt;
120 |         y=(*w_nogd).dot(zt);
121 |     }
122 |     //update
123 |     if(y*x.label<1)
124 |     {
125 |         if(this->size_SV<Budget) //kernel update
126 |         {
127 |             SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(x.label*this->eta0,x);
128 |             this->add_SV(support);
129 | 
130 |             for(int i=0; i<this->size_SV-1; i++)
131 |             {
132 |                 (*K_budget)(i,this->size_SV-1)=kt(i);
133 |                 (*K_budget)(this->size_SV-1,i)=kt(i);
134 |             }
135 | 
136 |         }
137 |         else
138 |         {
139 |             if(flag==0) //SVD
140 |             {
141 | 				this->curIterNum=1;
142 |                 flag=1;
143 |                 EigenSolver<MatrixXf> es(*K_budget);
144 |                 MatrixXcf V = es.eigenvectors();
145 |                 //cout<<es.eigenvalues()<<endl;
146 | 
147 |                 for(int i=0; i<k_nogd; i++)
148 |                 {
149 |                     float length=0;
150 |                     for(int j=0; j<Budget; j++)
151 |                     {
152 |                         length=length+V(j,i).real()*V(j,i).real();
153 |                     }
154 |                     for(int j=0; j<Budget; j++)
155 |                     {
156 |                         V(j,i)=V(j,i)/length;
157 |                     }
158 |                 }
159 | 
160 |                 for(int i=0; i<k_nogd; i++)
161 |                 {
162 |                     for(int j=0; j<Budget; j++)
163 |                     {
164 |                         (*M_nogd)(i,j)=V(j,i).real()/sqrt(es.eigenvalues()[i].real());
165 |                     }
166 |                 }
167 |                 zt=(*M_nogd)*kt;
168 |                 (*w_nogd)=(*w_nogd)+eta1*x.label*zt;
169 | 				(*w_nogd_sum)=(*w_nogd_sum)+(*w_nogd);
170 | 				num_update++;
171 |             }
172 |             else
173 |             {
174 |                 (*w_nogd)=(*w_nogd)+eta1*x.label*zt;
175 | 				(*w_nogd_sum)=(*w_nogd_sum)+(*w_nogd);
176 | 				num_update++;
177 |             }
178 |         }
179 |     }
180 | 
181 |     return y;
182 | }
183 | 
184 | template <typename FeatType, typename LabelType>
185 | void kernel_nogd<FeatType, LabelType>::begin_test(void)
186 | {
187 | 	(*w_nogd)=(*w_nogd_sum)/float(num_update);
188 | }
189 | 
190 | template <typename FeatType, typename LabelType>
191 | float kernel_nogd<FeatType, LabelType>::Predict(const DataPoint<FeatType, LabelType> &data)
192 | {
193 |        float y=0;
194 |        SV<FeatType, LabelType>* p_predict=this->SV_begin;
195 |         int i=0;
196 | 		VectorXf kt(this->size_SV);
197 |         VectorXf zt(k_nogd);
198 |         while (p_predict!=NULL)
199 |         {
200 |             kt[i]=this->kern(p_predict->SV_data,data);
201 |             p_predict=p_predict->next;
202 |             i++;
203 |         }
204 |         zt=(*M_nogd)*kt;
205 |         y=(*w_nogd).dot(zt);
206 | 		return y;
207 | }
208 | 
209 | 
210 | 
211 | }
212 | 


--------------------------------------------------------------------------------
/src/kernel/kernel_optim.h:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | #pragma once
  4 | #include "../data/DataPoint.h"
  5 | #include "../data/DataSet.h"
  6 | #include "../loss/LossFunction.h"
  7 | #include "../common/init_param.h"
  8 | #include "../common/util.h"
  9 | 
 10 | #include <algorithm>
 11 | #include <numeric>
 12 | #include <cstdio>
 13 | #include <math.h>
 14 | 
 15 | namespace SOL
 16 | {
 17 | 
 18 | /**
 19 | *  namespace: Sparse Online Learning
 20 | */
 21 | template <typename FeatType, typename LabelType>
 22 | struct SV
 23 | {
 24 | public:
 25 | 	float SV_alpha_sum;
 26 |     float SV_alpha;
 27 |     DataPoint<FeatType, LabelType> SV_data;
 28 |     SV * next;
 29 | 
 30 |     SV(float alpha, DataPoint<FeatType, LabelType> x)
 31 |     {
 32 | 		SV_alpha_sum=0;
 33 |         SV_alpha=alpha;
 34 |         SV_data= x.clone();
 35 |         next=NULL;
 36 |     }
 37 | };
 38 | 
 39 | 
 40 | template <typename FeatType, typename LabelType> class Kernel_optim
 41 | {
 42 |     //Iteration
 43 | protected:
 44 |     //iteration number
 45 |     unsigned int curIterNum;
 46 | 	float eta_now;
 47 |     //parameters
 48 |     float eta0; //learning rate
 49 | 	float gamma;
 50 | 	int weight;
 51 | 
 52 | 	bool use_average_weight;
 53 |     DataSet<FeatType, LabelType> &dataSet;
 54 | 
 55 |     //weight vector
 56 | protected:
 57 |     SV<FeatType, LabelType> * SV_begin;
 58 |     SV<FeatType, LabelType> * SV_end;
 59 | public:
 60 |     int size_SV;
 61 | 
 62 | protected:
 63 |     LossFunction<FeatType, LabelType> *lossFunc;
 64 | 
 65 | protected:
 66 |     string id_str;
 67 | 
 68 | public:
 69 |     void PrintOptInfo()const
 70 |     {
 71 |         printf("--------------------------------------------------\n");
 72 |         printf("Algorithm: %s\n",this->Id_Str().c_str());
 73 |     }
 74 | 
 75 | public:
 76 |     Kernel_optim(const Params &param,DataSet<FeatType, LabelType> &dataset, LossFunction<FeatType, LabelType> &lossFunc);
 77 | 
 78 | public:
 79 | 	void SetParameter(float gamma_a=8, float eta_a = -1);
 80 | 
 81 |     virtual ~Kernel_optim()
 82 |     {		
 83 | 		SV<FeatType, LabelType> * SV_free;
 84 | 		for(int i=0;i<size_SV;i++)
 85 | 		{
 86 | 			SV_free=SV_begin;
 87 | 			SV_begin=SV_begin->next;
 88 | 			delete SV_free;
 89 | 		}		
 90 |     }
 91 |     const string& Id_Str() const
 92 |     {
 93 |         return this->id_str;
 94 |     }
 95 | 
 96 | protected:
 97 |     //train the data
 98 |     float Train();
 99 |     //predict a new feature
100 | 	void sum_SV();
101 |     //this is the core of different updating algorithms
102 |     //return the predict
103 |     virtual float UpdateWeightVec(const DataPoint<FeatType, LabelType> &x) = 0;
104 | 	virtual float Predict(const DataPoint<FeatType, LabelType> &data) = 0;
105 | 	virtual void begin_test(void)=0;
106 | public:
107 | 
108 |     float kern(
109 |         const DataPoint<FeatType, LabelType> &SV_data,
110 |         const DataPoint<FeatType, LabelType> &x);
111 |     void add_SV(SV<FeatType, LabelType> *p_newSV);
112 |     void delete_SV(int index_SV=0);
113 | public:
114 |     //learn a model
115 |     inline float Learn(int numOfTimes = 1);
116 |     //learn a model and return the mistake rate and its variance
117 |     float Learn(float &aveErrRate, float &varErrRate, float &sparseRate, int numOfTimes = 1);
118 |     //test the performance on the given set
119 |     float Test(const Params &param, DataSet<FeatType, LabelType> &testSet);
120 | };
121 | 
122 | 	template <typename FeatType, typename LabelType>
123 | 	void Kernel_optim<FeatType, LabelType>::SetParameter(float gamma_a , float eta_a) {
124 | 		this->gamma  = gamma_a;
125 | 		this->eta0 = eta_a;
126 | 	}
127 | 
128 | 
129 | template <typename FeatType, typename LabelType>
130 | Kernel_optim<FeatType, LabelType>::Kernel_optim(const Params &param,DataSet<FeatType, LabelType> &dataset,
131 |         LossFunction<FeatType, LabelType> &lossFunc): dataSet(dataset)
132 | {
133 |     this->lossFunc = &lossFunc;
134 |     //this->eta0 = init_eta;/////////////////////////////////////////
135 |     this->curIterNum = 0;
136 | 
137 |     this->size_SV=0;
138 |     this->SV_begin=NULL;
139 |     this->SV_end=NULL;
140 | 	this->weight=param.weight_sum;
141 | 	//this->sigma=sigma_kernel;
142 | }
143 | 
144 | //////////////////////////////
145 | 
146 | template <typename FeatType, typename LabelType>
147 | float  Kernel_optim<FeatType, LabelType>::Train()
148 | {
149 | 	float errorNum=0;
150 |     if(dataSet.Rewind() == false)
151 |         return 1.f;
152 |     //reset
153 |     while(1)
154 |     {
155 |         const DataChunk<FeatType,LabelType> &chunk = dataSet.GetChunk();
156 |         //all the data has been processed!
157 |         if(chunk.dataNum  == 0)
158 |             break;
159 | 
160 |         for (size_t i = 0; i < chunk.dataNum; i++)
161 |         {
162 | 			if(curIterNum%10000==0)
163 | 				cout<<curIterNum<<"\t"<<flush;
164 | 
165 |             this->curIterNum++;
166 |             const DataPoint<FeatType, LabelType> &data = chunk.data[i];
167 |             float y = this->UpdateWeightVec(data);
168 |             //loss
169 |             if (this->lossFunc->IsCorrect(data.label,y) == false)
170 |             {
171 |                 errorNum++;
172 |             }
173 |         }
174 |         dataSet.FinishRead();
175 |     }
176 | 	cout<<"\n#Training Instances:"<<curIterNum;
177 |     return errorNum / dataSet.size();
178 | }
179 | 
180 | //learn a model and return the mistake rate and its variance
181 | template <typename FeatType, typename LabelType>
182 | float  Kernel_optim<FeatType, LabelType>::Learn(float &aveErrRate, float &varErrRate,
183 |         float &sparseRate, int numOfTimes)
184 | {
185 |     float * errorRateVec = new float[numOfTimes];
186 | 
187 |     for (int i = 0; i < numOfTimes; i++)
188 |     {
189 |         //random order
190 | 
191 |         errorRateVec[i] = this->Train();
192 |     }
193 |     aveErrRate = Average(errorRateVec, numOfTimes);
194 |     varErrRate = Variance(errorRateVec, numOfTimes);
195 |     sparseRate=1;
196 | 
197 |     delete []errorRateVec;
198 | 
199 |     return aveErrRate;
200 | }
201 | 
202 | //learn a model
203 | template <typename FeatType, typename LabelType>
204 | float  Kernel_optim<FeatType, LabelType>::Learn(int numOfTimes)
205 | {
206 |     float aveErrRate, varErrRate, sparseRate;
207 |     return this->Learn(aveErrRate, varErrRate,sparseRate, numOfTimes);
208 | }//???
209 | 
210 | //test the performance on the given set
211 | template <typename FeatType, typename LabelType>
212 | float Kernel_optim<FeatType, LabelType>::Test(const Params &param,DataSet<FeatType, LabelType> &testSet)
213 | {
214 | 	if(param.ave==0)
215 | 	{
216 | 		begin_test();
217 | 	}
218 |     if(testSet.Rewind() == false)
219 |         exit(0);
220 |     float errorRate(0);
221 |     //test
222 |     while(1)
223 |     {
224 |         const DataChunk<FeatType,LabelType> &chunk = testSet.GetChunk();
225 |         if(chunk.dataNum  == 0) //"all the data has been processed!"
226 |             break;
227 |         for (size_t i = 0; i < chunk.dataNum; i++)
228 |         {
229 |             const DataPoint<FeatType , LabelType> &data = chunk.data[i];
230 |             //predict
231 |             float predict = this->Predict(data);
232 |             if (this->lossFunc->IsCorrect(data.label,predict) == false)
233 |                 errorRate++;
234 |         }
235 |         testSet.FinishRead();
236 |     }
237 |     errorRate /= testSet.size();
238 |     return errorRate;
239 | }
240 | 
241 | 
242 | template <typename FeatType, typename LabelType>
243 | float Kernel_optim<FeatType, LabelType>::kern(const DataPoint<FeatType, LabelType> &SV_data,const DataPoint<FeatType, LabelType> &x)
244 | {
245 |     float sum=0;
246 |     int i=0;
247 |     int j=0;
248 |     int size_SV_dimension=SV_data.indexes.size();
249 |     int size_data_dimension=x.indexes.size();
250 | 
251 | 
252 |     while((i!=size_SV_dimension)&&(j!=size_data_dimension))
253 |     {
254 |         if((SV_data.indexes[i])>(x.indexes[j]))
255 |         {
256 |             sum=sum+x.features[j]*x.features[j];
257 |             j++;
258 |         }
259 |         else if((SV_data.indexes[i])<(x.indexes[j]))
260 |         {
261 |             sum=sum+SV_data.features[i]*SV_data.features[i];
262 |             i++;
263 |         }
264 |         else
265 |         {
266 |             sum=sum+(SV_data.features[i]-x.features[j])*(SV_data.features[i]-x.features[j]);
267 |             i++;
268 |             j++;
269 |         }
270 |     }
271 |     if(i==size_SV_dimension)//i first reach the end
272 |     {
273 |         for(int a=j; a<size_data_dimension; a++)
274 |         {
275 |             sum=sum+x.features[a]*x.features[a];
276 |         }
277 |     }
278 |     if(j==size_data_dimension)//i first reach the end
279 |     {
280 |         for(int a=i; a<size_SV_dimension; a++)
281 |         {
282 |             sum=sum+SV_data.features[a]*SV_data.features[a];
283 |         }
284 |     }
285 | 
286 | 
287 |     sum=sum*(-1)*(this->gamma);
288 |     float a=exp(sum);
289 |     return a;
290 | 
291 | }
292 | 
293 | 
294 | template <typename FeatType, typename LabelType>
295 | void Kernel_optim<FeatType, LabelType>::add_SV(SV<FeatType, LabelType> *p_newSV)
296 | {
297 |     if(SV_end!=NULL)
298 |     {
299 |         SV_end->next=p_newSV;
300 |         SV_end=p_newSV;
301 |     }
302 |     else
303 |     {
304 |         SV_begin=p_newSV;
305 |         SV_end=p_newSV;
306 |     }
307 |     size_SV++;
308 | }
309 | 
310 | template <typename FeatType, typename LabelType>
311 | void Kernel_optim<FeatType, LabelType>::delete_SV(int index_SV)
312 | {
313 |     //index_SV is the index of SV to be deleted from 0 to B-1
314 |     SV<FeatType, LabelType>* p_delete=SV_begin;
315 |     SV<FeatType, LabelType>* q_delete=NULL;
316 |     if((index_SV!=0)&&(index_SV!=size_SV-1))
317 |     {
318 |         int i=0;
319 |         while(i<index_SV-1)
320 |         {
321 |             p_delete=p_delete->next;
322 |             i++;
323 |         }
324 |         q_delete=p_delete->next;
325 |         p_delete->next=q_delete->next;
326 |         delete q_delete;
327 |     }
328 |     else if(index_SV==0)
329 |     {
330 |         SV_begin=p_delete->next;
331 |         delete p_delete;
332 |     }
333 |     else
334 |     {
335 |         int i=0;
336 |         while(i<index_SV-1)
337 |         {
338 |             p_delete=p_delete->next;
339 |             i++;
340 |         }
341 |         q_delete=p_delete->next;
342 |         p_delete->next=NULL;
343 |         delete q_delete;
344 |         SV_end=p_delete;
345 |     }
346 |     size_SV--;
347 | }
348 | template <typename FeatType, typename LabelType>
349 | void  Kernel_optim<FeatType, LabelType>::sum_SV()
350 | {
351 | 	//float weight_now =	(float(weight+1))/(float(curIterNum+weight));
352 | 	SV<FeatType, LabelType>* p_sum=SV_begin;
353 | 	while(p_sum!=NULL)
354 |     {
355 |         //p_sum->SV_alpha_sum=p_sum->SV_alpha_sum*(1-weight_now)+p_sum->SV_alpha*weight_now;
356 |         p_sum->SV_alpha_sum=p_sum->SV_alpha_sum+p_sum->SV_alpha;
357 | 		p_sum=p_sum->next;
358 |     }
359 | }
360 | 
361 | 
362 | 
363 | }
364 | 


--------------------------------------------------------------------------------
/src/kernel/kernel_pa.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "kernel_optim.h"
 4 | 
 5 | 
 6 | namespace SOL
 7 | {
 8 | template <typename FeatType, typename LabelType>
 9 | class kernel_pa: public Kernel_optim<FeatType, LabelType>
10 | {
11 | public:
12 |     kernel_pa(const Params &param,DataSet<FeatType, LabelType> &dataset,
13 |                LossFunction<FeatType, LabelType> &lossFunc);
14 |     virtual ~kernel_pa();
15 | 	float C;
16 | protected:
17 |     //this is the core of different updating algorithms
18 |     virtual float UpdateWeightVec(const DataPoint<FeatType, LabelType> &x);
19 |     virtual float Predict(const DataPoint<FeatType, LabelType> &data);
20 | 	virtual void begin_test(void){}
21 | };
22 | 
23 | template <typename FeatType, typename LabelType>
24 | kernel_pa<FeatType, LabelType>::kernel_pa(const Params &param,
25 |     DataSet<FeatType, LabelType> &dataset,
26 |     LossFunction<FeatType, LabelType> &lossFunc): Kernel_optim<FeatType, LabelType>(param,dataset, lossFunc)
27 | {
28 |     this->id_str = "kernel_pa";
29 | 	this->C=param.C;
30 | }
31 | 
32 | template <typename FeatType, typename LabelType>
33 | kernel_pa<FeatType, LabelType>::~kernel_pa()
34 | {
35 | }
36 | 
37 | //update weight vector with stochastic gradient descent
38 | template <typename FeatType, typename LabelType>
39 | float kernel_pa<FeatType,LabelType>::UpdateWeightVec(const DataPoint<FeatType, LabelType> &x)
40 | {
41 |     float y = this->Predict(x);
42 | 
43 |     float lt=1-x.label*y;
44 | 	//cout<<lt<<"\t";
45 | 	if(C<lt)
46 | 		lt=C;
47 | 
48 |     if(lt>0)
49 |     {
50 |         SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(x.label*lt,x);
51 |         add_SV(support);
52 |     }
53 |     return y;
54 | }
55 | 
56 | 
57 | template <typename FeatType, typename LabelType>
58 | float kernel_pa<FeatType, LabelType>::Predict(const DataPoint<FeatType, LabelType> &data)
59 | {
60 |     float predict = 0;
61 | 
62 |     SV<FeatType, LabelType>* p_predict = this->SV_begin;
63 |     while (p_predict!=NULL)
64 |     {
65 |         predict+=p_predict->SV_alpha* kern(p_predict->SV_data,data);
66 |         p_predict=p_predict->next;
67 |     }
68 |     return predict;
69 | }
70 | 
71 | 
72 | 
73 | }
74 | 


--------------------------------------------------------------------------------
/src/kernel/kernel_perceptron.h:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | #pragma once
 4 | 
 5 | #include "kernel_optim.h"
 6 | 
 7 | namespace SOL
 8 | {
 9 | template <typename FeatType, typename LabelType>
10 | class kernel_perceptron: public Kernel_optim<FeatType, LabelType>
11 | {
12 | public:
13 |     kernel_perceptron(const Params &param,DataSet<FeatType, LabelType> &dataset,
14 |                       LossFunction<FeatType, LabelType> &lossFunc);
15 |     virtual ~ kernel_perceptron();
16 | 
17 | protected:
18 |     //this is the core of different updating algorithms
19 |     virtual float UpdateWeightVec(const DataPoint<FeatType, LabelType> &x);
20 | 	    virtual float Predict(const DataPoint<FeatType, LabelType> &data);
21 | 			virtual void begin_test(void){}
22 | };
23 | 
24 | template <typename FeatType, typename LabelType>
25 | kernel_perceptron<FeatType, LabelType>:: kernel_perceptron(const Params &param,
26 |     DataSet<FeatType, LabelType> &dataset,
27 |     LossFunction<FeatType, LabelType> &lossFunc): Kernel_optim<FeatType, LabelType>(param,dataset, lossFunc)
28 | {
29 |     this->id_str = " kernel_perceptron";
30 | }
31 | 
32 | template <typename FeatType, typename LabelType>
33 | kernel_perceptron<FeatType, LabelType>::~ kernel_perceptron()
34 | {
35 | }
36 | 
37 | //update weight vector with stochastic gradient descent
38 | template <typename FeatType, typename LabelType>
39 | float  kernel_perceptron<FeatType,LabelType>::UpdateWeightVec(const DataPoint<FeatType, LabelType> &x)
40 | {
41 |     float y = this->Predict(x);
42 | 
43 |     if (y*x.label<=0)
44 |     {
45 |         SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(x.label,x);
46 | 
47 |         this->add_SV(support);
48 |     }
49 |     return y;
50 | }
51 | template <typename FeatType, typename LabelType>
52 | float kernel_perceptron<FeatType, LabelType>::Predict(const DataPoint<FeatType, LabelType> &data)
53 | {
54 |     float predict = 0;
55 | 
56 |     SV<FeatType, LabelType>* p_predict = this->SV_begin;
57 |     while (p_predict!=NULL)
58 |     {
59 |         predict+=p_predict->SV_alpha* this->kern(p_predict->SV_data,data);
60 |         p_predict=p_predict->next;
61 |     }
62 |     return predict;
63 | }
64 | 
65 | 
66 | }
67 | 


--------------------------------------------------------------------------------
/src/kernel/kernel_projectron.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "kernel_optim.h"
  4 | 
  5 | namespace SOL
  6 | {
  7 | template <typename FeatType, typename LabelType>
  8 | class kernel_projectron: public Kernel_optim<FeatType, LabelType>
  9 | {
 10 | 
 11 | protected:
 12 |     s_array<float> K_inverse;
 13 | 	int Budget;
 14 | public:
 15 |     kernel_projectron(const Params &param,DataSet<FeatType, LabelType> &dataset,
 16 |                       LossFunction<FeatType, LabelType> &lossFunc);
 17 |     virtual ~ kernel_projectron();
 18 | 
 19 | protected:
 20 |     //this is the core of different updating algorithms
 21 |     virtual float UpdateWeightVec(const DataPoint<FeatType, LabelType> &x);
 22 | 	virtual float Predict(const DataPoint<FeatType, LabelType> &data);
 23 | 	virtual void begin_test(void){};
 24 | };
 25 | 
 26 | template <typename FeatType, typename LabelType>
 27 | kernel_projectron<FeatType, LabelType>:: kernel_projectron(const Params &param,
 28 |     DataSet<FeatType, LabelType> &dataset,
 29 |     LossFunction<FeatType, LabelType> &lossFunc): Kernel_optim<FeatType, LabelType>(param,dataset, lossFunc)
 30 | {
 31 | 	this->Budget=param.Budget_set;
 32 |     this->id_str = " kernel_projectron";
 33 |     this->K_inverse.resize(Budget*Budget);
 34 |     this->K_inverse.zeros();
 35 | }
 36 | 
 37 | template <typename FeatType, typename LabelType>
 38 | kernel_projectron<FeatType, LabelType>::~kernel_projectron()
 39 | {
 40 | }
 41 | 
 42 | //add by yuewu: 2013/12/11
 43 | //Memory optimization
 44 | 
 45 | //update weight vector with stochastic gradient descent
 46 | template <typename FeatType, typename LabelType>
 47 | float  kernel_projectron<FeatType,LabelType>::UpdateWeightVec(const DataPoint<FeatType, LabelType> &x)
 48 | {
 49 |     float y=0;
 50 |     float *k_t=NULL;
 51 |     //calculate k_t
 52 |     if(this->size_SV!=0)
 53 |     {
 54 |         SV<FeatType, LabelType>* p_predict=this->SV_begin;
 55 |         k_t=new float [this->size_SV];
 56 |         int i=0;
 57 |         while (p_predict!=NULL)
 58 |         {
 59 |             k_t[i]=this->kern(p_predict->SV_data,x);
 60 |             p_predict=p_predict->next;
 61 |             i++;
 62 |         }
 63 | 
 64 |         //k_t done
 65 | 
 66 |         //get prediction
 67 |         p_predict=this->SV_begin;
 68 |         i=0;
 69 |         while (p_predict!=NULL)
 70 |         {
 71 |             y+=p_predict->SV_alpha* k_t[i];
 72 |             p_predict=p_predict->next;
 73 |             i++;
 74 |         }
 75 |     }
 76 |     //prediction is in y
 77 | 	float l_t=1-x.label*y;
 78 |     // if there is mistake, make update
 79 |     if (y*x.label<=0)
 80 |     {
 81 |         if(this->size_SV==0)
 82 |         {
 83 | 
 84 |             SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(x.label,x);
 85 | 
 86 |             this->add_SV(support);
 87 | 
 88 |             //ini K_inverse
 89 |             K_inverse[0]=1;
 90 |         }
 91 |         else  //have SV
 92 |         {
 93 |             // calculate d_star=K_t_inver*k_t;
 94 |             float * d_star=new float [this->size_SV];
 95 |             for(int i=0; i<this->size_SV; i++)
 96 |             {
 97 |                 d_star[i]=0;
 98 |                 for(int j=0; j<this->size_SV; j++)
 99 |                 {
100 |                     d_star[i]=d_star[i]+K_inverse[i*Budget+j]*k_t[j];
101 |                 }
102 |             }
103 | 
104 |             //caculate delta
105 |             double k_t_d_star=0;
106 |             for(int i=0; i<this->size_SV; i++)
107 |             {
108 |                 k_t_d_star=k_t_d_star+k_t[i]*d_star[i];
109 |             }
110 |             double delta_project=1-k_t_d_star;
111 | 		    
112 | 
113 |             //full budget projectron
114 |             if(this->size_SV==Budget)
115 |             {
116 |                 SV<FeatType, LabelType> *p_predict=this->SV_begin;
117 |                 for(int i=0; i<this->size_SV; i++)
118 |                 {
119 |                     p_predict->SV_alpha=p_predict->SV_alpha+x.label*d_star[i];
120 |                     p_predict=p_predict->next;
121 |                 }
122 |             }
123 |             else  // not full
124 |             {
125 |                 //add SV
126 | 
127 |                 SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(x.label,x);
128 |                 this->add_SV(support);
129 | 
130 | 
131 |                 //updata K_inverse
132 |                 for(int i=0; i<this->size_SV-1; i++)
133 |                 {
134 |                     for(int j=0; j<this->size_SV-1; j++)
135 |                     {
136 |                         K_inverse[i*Budget+j]=K_inverse[i*Budget+j]+d_star[i]*d_star[j]/delta_project;
137 |                     }
138 |                 }
139 |                 for(int i=0; i<this->size_SV-1; i++)
140 |                 {
141 |                     K_inverse[i*Budget+this->size_SV-1]=(-1)*d_star[i]/delta_project;
142 |                     K_inverse[(this->size_SV-1)*Budget+i]=(-1)*d_star[i]/delta_project;
143 |                 }
144 |                 K_inverse[(this->size_SV-1)*Budget+(this->size_SV-1)]=1/delta_project;
145 |             }
146 |             delete[] d_star;
147 |         }
148 |     }
149 |     delete[] k_t;
150 |     return y;
151 | }
152 | 
153 | 
154 | template <typename FeatType, typename LabelType>
155 | float kernel_projectron<FeatType, LabelType>::Predict(const DataPoint<FeatType, LabelType> &data)
156 | {
157 |     float predict = 0;
158 | 
159 |     SV<FeatType, LabelType>* p_predict = this->SV_begin;
160 |     while (p_predict!=NULL)
161 |     {
162 |         predict+=p_predict->SV_alpha* this->kern(p_predict->SV_data,data);
163 |         p_predict=p_predict->next;
164 |     }
165 |     return predict;
166 | }
167 | }
168 | 


--------------------------------------------------------------------------------
/src/kernel/kernel_projectronpp.h:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | #include "kernel_optim.h"
  4 | 
  5 | namespace SOL
  6 | {
  7 | template <typename FeatType, typename LabelType>
  8 | class kernel_projectronpp: public Kernel_optim<FeatType, LabelType>
  9 | {
 10 | 
 11 | protected:
 12 |     int Budget;
 13 |     float U;
 14 |     s_array<float> K_inverse;
 15 |     s_array<float> K_t;
 16 | 
 17 | public:
 18 |     kernel_projectronpp(const Params &param,DataSet<FeatType, LabelType> &dataset,
 19 |                         LossFunction<FeatType, LabelType> &lossFunc);
 20 |     virtual ~kernel_projectronpp();
 21 | 
 22 | protected:
 23 |     //this is the core of different updating algorithms
 24 |     virtual float UpdateWeightVec(const DataPoint<FeatType, LabelType> &x);
 25 | 	    virtual float Predict(const DataPoint<FeatType, LabelType> &data);
 26 | 			virtual void begin_test(void){}
 27 | };
 28 | 
 29 | template <typename FeatType, typename LabelType>
 30 | kernel_projectronpp<FeatType, LabelType>:: kernel_projectronpp(const Params &param,
 31 |     DataSet<FeatType, LabelType> &dataset,
 32 |     LossFunction<FeatType, LabelType> &lossFunc): Kernel_optim<FeatType, LabelType>(param,dataset, lossFunc)
 33 | {
 34 |     this->id_str = " kernel_projectronpp";
 35 | this->Budget=param.Budget_set;
 36 |     this->U=(1.f/4.f)*sqrtf((Budget+1.f)/logf(Budget+1.f));
 37 | 
 38 |     this->K_inverse.resize(Budget*Budget);
 39 |     this->K_inverse.zeros();
 40 | 
 41 |     this->K_t.resize(Budget * Budget);
 42 |     this->K_t.zeros();
 43 | }
 44 | 
 45 | template <typename FeatType, typename LabelType>
 46 | kernel_projectronpp<FeatType, LabelType>::~ kernel_projectronpp()
 47 | {
 48 | }
 49 | 
 50 | 
 51 | //update weight vector with stochastic gradient descent
 52 | template <typename FeatType, typename LabelType>
 53 | float  kernel_projectronpp<FeatType,LabelType>::UpdateWeightVec(const DataPoint<FeatType, LabelType> &x)
 54 | {
 55 |     float y=0;
 56 |     float *k_t=NULL;
 57 |     //calculate k_t
 58 |     if(this->size_SV!=0)
 59 |     {
 60 |         SV<FeatType, LabelType>* p_predict=this->SV_begin;
 61 |         k_t=new float [this->size_SV];
 62 |         int i=0;
 63 | 
 64 |         while (p_predict!=NULL)
 65 |         {
 66 |             k_t[i]=this->kern(p_predict->SV_data,x);
 67 |             p_predict=p_predict->next;
 68 |             i++;
 69 |         }
 70 |         //k_t done
 71 | 
 72 |         //get prediction
 73 |         p_predict=this->SV_begin;
 74 |         i=0;
 75 |         while (p_predict!=NULL)
 76 |         {
 77 |             y+=p_predict->SV_alpha* k_t[i];
 78 |             p_predict=p_predict->next;
 79 |             i++;
 80 |         }
 81 |     }
 82 |     //prediction is in y
 83 | 
 84 |     // if there is mistake, make update
 85 |     if(this->size_SV==0)
 86 |     {
 87 | 
 88 |         SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(x.label,x);
 89 | 
 90 |         this->add_SV(support);
 91 | 
 92 |         //ini K_inverse
 93 |         K_inverse[0]=1;
 94 |         K_t[0]=1;
 95 |     }
 96 |     else  //have SV
 97 |     {
 98 |         float l_t=1-x.label*y;
 99 |         if(y*x.label<=0)
100 |         {
101 |             // calculate d_star=K_t_inver*k_t;
102 |             float * d_star=new float [this->size_SV];
103 |             for(int i=0; i<this->size_SV; i++)
104 |             {
105 |                 d_star[i]=0;
106 |                 for(int j=0; j<this->size_SV; j++)
107 |                 {
108 |                     d_star[i]=d_star[i]+K_inverse[i*Budget+j]*k_t[j];
109 |                 }
110 |             }
111 | 
112 |             //caculate delta
113 |             float k_t_d_star=0;
114 |             for(int i=0; i<this->size_SV; i++)
115 |             {
116 |                 k_t_d_star=k_t_d_star+k_t[i]*d_star[i];
117 |             }
118 |             float delta_project=1-k_t_d_star;
119 | 
120 | 
121 |             //full budget projectron
122 |             if(this->size_SV==Budget)
123 |             {
124 |                 SV<FeatType, LabelType> *p_predict=this->SV_begin;
125 |                 for(int i=0; i<Budget; i++)
126 |                 {
127 |                     p_predict->SV_alpha=p_predict->SV_alpha+x.label*d_star[i];
128 |                     p_predict=p_predict->next;
129 |                 }
130 |             }
131 |             else  // not full
132 |             {
133 | 
134 |                 SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(x.label,x);
135 |                 this->add_SV(support);
136 |                 //updata K_inverse
137 |                 for(int i=0; i<this->size_SV-1; i++)
138 |                 {
139 |                     for(int j=0; j<this->size_SV-1; j++)
140 |                     {
141 |                         K_inverse[i*Budget+j]=K_inverse[i*Budget+j]+d_star[i]*d_star[j]/delta_project;
142 |                     }
143 |                 }
144 |                 for(int i=0; i<this->size_SV-1; i++)
145 |                 {
146 |                     K_inverse[i*Budget+this->size_SV-1]=(-1)*d_star[i]/delta_project;
147 |                     K_inverse[(this->size_SV-1)*Budget+i]=(-1)*d_star[i]/delta_project;
148 |                 }
149 |                 K_inverse[(this->size_SV-1)*Budget+(this->size_SV-1)]=1/delta_project;
150 | 
151 |                 //updata K_t
152 |                 for(int i=0; i<this->size_SV-1; i++)
153 |                 {
154 |                     K_t[i*Budget+this->size_SV-1]=k_t[i];
155 |                     K_t[(this->size_SV-1)*Budget+i]=k_t[i];
156 |                 }
157 |                 K_t[(this->size_SV-1)*Budget+(this->size_SV-1)]=1;///////////////////////
158 |             }
159 |             delete[] d_star;
160 |         }//mistake
161 |         else if((l_t<1)&&(l_t>0))
162 |         {
163 | 
164 |             float * d_star=new float [this->size_SV];
165 |             for(int i=0; i<this->size_SV; i++)
166 |             {
167 |                 d_star[i]=0;
168 |                 for(int j=0; j<this->size_SV; j++)
169 |                 {
170 |                     d_star[i]=d_star[i]+K_inverse[i*Budget+j]*k_t[j];
171 |                 }
172 |             }
173 | 
174 |             //caculate delta
175 |             float k_t_d_star=0;
176 |             for(int i=0; i<this->size_SV; i++)
177 |             {
178 |                 k_t_d_star=k_t_d_star+k_t[i]*d_star[i];
179 |             }
180 |             float delta_project=1-k_t_d_star;
181 | 
182 |             float power_p_k_t=0;
183 | 
184 |             for(int i=0; i<this->size_SV; i++)
185 |             {
186 |                 for(int j=0; j<this->size_SV; j++)
187 |                 {
188 |                     power_p_k_t=power_p_k_t+K_t[i*Budget+j]*d_star[i]*d_star[j];
189 |                 }
190 |             }
191 | 
192 |             float tau_t= (std::min)(l_t/power_p_k_t,1.f);
193 |             float beta_t=tau_t*(2*l_t-tau_t*power_p_k_t-2*U*sqrt(delta_project));
194 |             if(beta_t>=0)
195 |             {
196 |                 SV<FeatType, LabelType> *p_predict=this->SV_begin;
197 |                 for(int i=0; i<this->size_SV; i++)
198 |                 {
199 |                     p_predict->SV_alpha=p_predict->SV_alpha+tau_t*d_star[i]*x.label;
200 |                     p_predict=p_predict->next;
201 |                 }
202 |             }
203 |             delete[] d_star;
204 |         }//margin loss
205 |     }//have SV
206 |     delete[] k_t;
207 | 
208 |     return y;
209 | }
210 | template <typename FeatType, typename LabelType>
211 | float kernel_projectronpp<FeatType, LabelType>::Predict(const DataPoint<FeatType, LabelType> &data)
212 | {
213 |     float predict = 0;
214 | 
215 |     SV<FeatType, LabelType>* p_predict = this->SV_begin;
216 |     while (p_predict!=NULL)
217 |     {
218 |         predict+=p_predict->SV_alpha* this->kern(p_predict->SV_data,data);
219 |         p_predict=p_predict->next;
220 |     }
221 |     return predict;
222 | }
223 | 
224 | 
225 | }
226 | 


--------------------------------------------------------------------------------
/src/kernel/kernel_sgd.h:
--------------------------------------------------------------------------------
 1 | #pragma once
 2 | 
 3 | #include "kernel_optim.h"
 4 | 
 5 | 
 6 | namespace SOL
 7 | {
 8 | template <typename FeatType, typename LabelType>
 9 | class kernel_sgd: public Kernel_optim<FeatType, LabelType>
10 | {
11 | public:
12 |     kernel_sgd(const Params &param,DataSet<FeatType, LabelType> &dataset,
13 |                LossFunction<FeatType, LabelType> &lossFunc);
14 |     virtual ~kernel_sgd();
15 | 
16 | protected:
17 |     //this is the core of different updating algorithms
18 |     virtual float UpdateWeightVec(const DataPoint<FeatType, LabelType> &x);
19 |     virtual float Predict(const DataPoint<FeatType, LabelType> &data);
20 | 	virtual void begin_test(void){}
21 | };
22 | 
23 | template <typename FeatType, typename LabelType>
24 | kernel_sgd<FeatType, LabelType>::kernel_sgd(const Params &param,
25 |     DataSet<FeatType, LabelType> &dataset,
26 |     LossFunction<FeatType, LabelType> &lossFunc): Kernel_optim<FeatType, LabelType>(param,dataset, lossFunc)
27 | {
28 |     this->id_str = "kernel_ogd";
29 | 	this->eta0=param.eta;
30 | }
31 | 
32 | template <typename FeatType, typename LabelType>
33 | kernel_sgd<FeatType, LabelType>::~kernel_sgd()
34 | {
35 | }
36 | 
37 | //update weight vector with stochastic gradient descent
38 | template <typename FeatType, typename LabelType>
39 | float kernel_sgd<FeatType,LabelType>::UpdateWeightVec(const DataPoint<FeatType, LabelType> &x)
40 | {
41 |     float y = this->Predict(x);
42 | 
43 |     float gt_i = this->lossFunc->GetGradient(x.label,y);
44 | 
45 |     if(gt_i!=0)
46 |     {
47 |         SV<FeatType, LabelType>* support = new SV<FeatType, LabelType>(-this->eta0 * gt_i,x);
48 |         this->add_SV(support);
49 |     }
50 |     return y;
51 | }
52 | 
53 | 
54 | template <typename FeatType, typename LabelType>
55 | float kernel_sgd<FeatType, LabelType>::Predict(const DataPoint<FeatType, LabelType> &data)
56 | {
57 |     float predict = 0;
58 | 
59 |     SV<FeatType, LabelType>* p_predict = this->SV_begin;
60 |     while (p_predict!=NULL)
61 |     {
62 |         predict+=p_predict->SV_alpha* this->kern(p_predict->SV_data,data);
63 |         p_predict=p_predict->next;
64 |     }
65 |     return predict;
66 | }
67 | 
68 | 
69 | 
70 | }
71 | 


--------------------------------------------------------------------------------
/src/loss/HingeLoss.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | 	> File Name: HingeLoss.h
 3 | 	> Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 | 	> Created Time: 2013/8/18 星期日 16:58:22
 5 | 	> Functions: Hinge Loss function, for SVM
 6 |  ************************************************************************/
 7 | 
 8 | #ifndef HEADER_HINGE_LOSS
 9 | #define HEADER_HINGE_LOSS
10 | 
11 | #include "LossFunction.h"
12 | 
13 | namespace SOL {
14 | 	template <typename FeatType, typename LabelType>
15 | 	class HingeLoss: public LossFunction<FeatType, LabelType> {
16 | 		public:
17 | 			virtual  float GetLoss(LabelType label, float predict) {
18 |                 return (std::max)(0.0f, 1.f - predict * label);
19 | 			}
20 | 
21 |             virtual  float GetGradient(LabelType label, float predict) {
22 |                 if (this->GetLoss(label,predict) > 0)
23 |                     return (float)(-label);
24 |                 else
25 | 					return 0;
26 | 			}
27 | 	};
28 | }
29 | 
30 | #endif
31 | 


--------------------------------------------------------------------------------
/src/loss/LogisticLoss.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | 	> File Name: LogisticLoss.h
 3 | 	> Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 | 	> Created Time: 2013/8/18 星期日 17:11:42
 5 | 	> Functions: Logistic loss for binary classification
 6 |  ************************************************************************/
 7 | 
 8 | #pragma once
 9 | #include "LossFunction.h"
10 | 
11 | namespace SOL {
12 | 	template <typename FeatType, typename LabelType>
13 | 	class LogisticLoss: public LossFunction<FeatType, LabelType> {
14 | 		public:
15 | 			virtual float GetLoss(LabelType label, float predict) {
16 | 				float tmp = -predict * label;
17 | 				if (tmp > 100.f) return tmp; 
18 | 				else if (tmp < -100.f) return 0.f;
19 | 				else
20 | 					return std::log(1.f + std::exp(tmp));
21 | 			}
22 | 
23 |             //aggressive learning 
24 | 			virtual float GetGradient(LabelType label, float predict) {
25 | 				float tmp = predict * label;
26 | 				if (tmp > 100.f) //to reject numeric problems
27 | 					return 0.f;
28 | 				else if (tmp  < -100.f)
29 | 					return (float)(-label); 
30 | 				else
31 | 					return -label / (1.f + std::exp(tmp)); 
32 | 			}
33 |  	};
34 | }
35 | 


--------------------------------------------------------------------------------
/src/loss/LossFunction.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | > File Name: LossFunction.h
 3 | > Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 | > Created Time: 2013/8/18 星期日 16:48:55
 5 | > Functions: base class for loss function
 6 | ************************************************************************/
 7 | 
 8 | #pragma once
 9 | #include <cmath>
10 | #include "../common/util.h"
11 | 
12 | namespace SOL {
13 | 	template <typename FeatType, typename LabelType>
14 | 	class LossFunction {
15 |         inline char Sign(float x) {
16 |             if (x > 0.f) 
17 |                 return 1;
18 |             else
19 |                 return -1;
20 |         }
21 | 
22 |         public:
23 | 		virtual inline bool IsCorrect(LabelType label, float predict) {
24 |             return Sign(predict) == label ? true : false;
25 |         }
26 | 
27 |         virtual float GetLoss(LabelType label, float predict) = 0;
28 |         virtual float GetGradient(LabelType label, float predict) = 0;
29 | 
30 | 	public:
31 | 		virtual ~LossFunction(){}
32 |     };
33 | }
34 | 


--------------------------------------------------------------------------------
/src/loss/SquareLoss.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | 	> File Name: SquareLoss.h
 3 | 	> Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 | 	> Created Time: 2013/8/18 星期日 17:19:33
 5 | 	> Functions: Square Loss
 6 |  ************************************************************************/
 7 | 
 8 | #pragma once
 9 | #include "LossFunction.h"
10 | 
11 | namespace SOL {
12 | 	template <typename FeatType, typename LabelType>
13 | 	class SquareLoss: public LossFunction<FeatType, LabelType> {
14 |         public:
15 |             virtual float GetLoss(LabelType label, float predict) {
16 |                 return (predict - label) * (predict - label);
17 |             }
18 | 
19 |             virtual float GetGradient(LabelType label, float predict) {
20 |                 return 2 * (predict - label); 
21 |             }
22 |     };
23 | }
24 | 


--------------------------------------------------------------------------------
/src/loss/SquaredHingeLoss.h:
--------------------------------------------------------------------------------
 1 | /*************************************************************************
 2 | 	> File Name: SquaredHingeLoss.h
 3 | 	> Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
 4 | 	> Created Time: 2013/11/27 11:30:44
 5 | 	> Functions: Squared Hinge loss
 6 |  ************************************************************************/
 7 | #ifndef HEADER_SQUARE_HINGE_LOSS
 8 | #define HEADER_SQUARE_HINGE_LOSS
 9 | 
10 | #include "LossFunction.h"
11 | 
12 | namespace SOL {
13 | 	template <typename FeatType, typename LabelType>
14 | 	class SquaredHingeLoss: public LossFunction<FeatType, LabelType> {
15 | 		public:
16 | 			virtual  float GetLoss(LabelType label, float predict) {
17 |                 float loss = (std::max)(0.0f, 1.f - predict * label);
18 |                 return loss * loss;
19 | 			}
20 | 
21 |             virtual  float GetGradient(LabelType label, float predict) {
22 |                 float loss = (std::max)(0.0f, 1.f - predict * label);
23 |                 if (loss > 0)
24 |                     return -label * loss * 2.f;
25 |                 else
26 | 					return 0;
27 | 			}
28 | 	};
29 | }
30 | 
31 | #endif
32 | 


--------------------------------------------------------------------------------
/src/main.cpp:
--------------------------------------------------------------------------------
  1 | /*************************************************************************
  2 | 	> File Name: main.cpp
  3 | 	> Copyright (C) 2013 Yue Wu<yuewu@outlook.com>
  4 | 	> Created Time: 2013/9/20 13:18:02
  5 | 	> Functions: 
  6 |  ************************************************************************/
  7 | #include "Params.h"
  8 | #include "common/util.h"
  9 | 
 10 | #include "data/DataSet.h"
 11 | #include "data/libsvmread.h"
 12 | 
 13 | #include "loss/LogisticLoss.h"
 14 | #include "loss/HingeLoss.h"
 15 | #include "loss/SquareLoss.h"
 16 | #include "loss/SquaredHingeLoss.h"
 17 | 
 18 | #include <string>
 19 | #include <iostream>
 20 | #include <fstream>
 21 | #include <cmath>
 22 | 
 23 | #include "kernel/kernel_optim.h"
 24 | #include "kernel/kernel_perceptron.h"
 25 | #include "kernel/kernel_sgd.h"
 26 | #include "kernel/kernel_RBP.h"
 27 | #include "kernel/kernel_forgetron.h"
 28 | #include "kernel/kernel_projectron.h"
 29 | #include "kernel/kernel_projectronpp.h"
 30 | #include "kernel/kernel_bogd.h"
 31 | #include "kernel/kernel_bpas.h"
 32 | #include "kernel/kernel_nogd.h"
 33 | #include "kernel/kernel_fogd.h"
 34 | #include "kernel/kernel_pa.h"
 35 | 
 36 | using namespace std;
 37 | using namespace SOL;
 38 | 
 39 | #define FeatType float
 40 | #define LabelType char
 41 | 
 42 | ///////////////////////////function declarications/////////////////////
 43 | void FakeInput(int &argc, char **args, char** &argv);
 44 | template <typename T1, typename T2> LossFunction<T1,T2>* GetLossFunc(const Params &param);
 45 | template <typename T1, typename T2>
 46 | Kernel_optim<T1,T2>* GetOptimizer(const Params &param, DataSet<T1,T2> &dataset, LossFunction<T1,T2> &lossFun);
 47 | ///////////////////
 48 | int main(int argc, const char** args) {
 49 | 
 50 |    //check memory leak in VC++
 51 | #if defined(_MSC_VER) && defined(_DEBUG)
 52 | 	int tmpFlag = _CrtSetDbgFlag( _CRTDBG_REPORT_FLAG );
 53 | 	tmpFlag |= _CRTDBG_LEAK_CHECK_DF;
 54 | 	_CrtSetDbgFlag( tmpFlag );
 55 | #endif
 56 | 	Params param;
 57 | 	if (param.Parse(argc, args) == false){
 58 | 		return -1;
 59 | 	}
 60 | 
 61 | 	LossFunction<FeatType, LabelType> *lossFunc = GetLossFunc<FeatType, LabelType>(param);
 62 | 	if(lossFunc == NULL)
 63 | 		return -1;
 64 | 
 65 | 	DataSet<FeatType, LabelType> dataset(param.passNum,param.buf_size);
 66 | 	if (dataset.Load(param.fileName, param.cache_fileName) == false){
 67 | 		cerr<<"ERROR: Load dataset "<<param.fileName<<" failed!"<<endl;
 68 | 		delete lossFunc;
 69 | 		return -1;
 70 | 	}
 71 | 
 72 | 	Kernel_optim<FeatType, LabelType> *opti = GetOptimizer(param,dataset,*lossFunc);
 73 | 	if (opti == NULL)
 74 | 		return -1;
 75 | 
 76 | 	opti->SetParameter(param.gamma,param.eta);
 77 | 
 78 |     float l_errRate(0), l_varErr(0);	//learning error rate
 79 | 	float sparseRate(0);
 80 | 	opti->PrintOptInfo();
 81 | 	//learning the model
 82 |     double time1 = get_current_time();
 83 | 
 84 | 	opti->Learn(l_errRate,l_varErr,sparseRate);
 85 | 
 86 |     double time2 = get_current_time();
 87 | 
 88 |     printf("\nLearn acuracy: %.6f%%\n",(1-l_errRate)* 100);
 89 | 	cout<<"#SV:"<<opti->size_SV<<endl;
 90 | 	double time3 = 0;
 91 |     printf("Learning time: %.6f s\n", (float)(time2 - time1));
 92 | 
 93 | 
 94 | 	//test the model
 95 |     bool is_test = param.test_cache_fileName.length() > 0 || param.test_fileName.length() > 0;
 96 | 	if ( is_test) {
 97 | 		DataSet<FeatType, LabelType> testset(1,param.buf_size);
 98 | 		if (testset.Load(param.test_fileName, param.test_cache_fileName) == true) {
 99 | 			float t_errRate(0);	//test error rate
100 | 			t_errRate = opti->Test(param,testset);
101 | 			time3 = get_current_time();
102 | 
103 | 			printf("Test acuracy: %.6f %%\n",(1-t_errRate) * 100); 
104 | 		}
105 |         else
106 |             cout<<"load test set failed!"<<endl;
107 | 	}
108 | 
109 | 
110 |     if (is_test)
111 |         printf("Test time: %.6f s\n", (float)(time3 - time2));
112 | 
113 |     delete lossFunc;
114 |     delete opti;
115 | 
116 |     return 0;
117 | }
118 | 
119 | template <typename T1, typename T2>
120 | LossFunction<T1,T2>* GetLossFunc(const Params &param) {
121 | 	if (param.str_loss == "Hinge")
122 | 		return new HingeLoss<T1,T2>();
123 | 	else if (param.str_loss == "Logit")
124 | 		return new LogisticLoss<T1,T2>();
125 | 	else if (param.str_loss == "Square")
126 | 		return new SquareLoss<T1,T2>();
127 | 	else if (param.str_loss == "SquareHinge")
128 | 		return new SquaredHingeLoss<T1, T2>();
129 | 	else{
130 | 		cerr<<"ERROR: unrecognized Loss function "<<param.str_loss<<endl;
131 | 		return NULL;
132 | 	}
133 | }
134 | 
135 | 
136 | template <typename T1, typename T2>
137 | Kernel_optim<T1,T2>* GetOptimizer(const Params &param, DataSet<T1,T2> &dataset, LossFunction<T1,T2> &lossFunc) {
138 | 	string method = param.str_opt;
139 | 	ToUpperCase(method);
140 | 	const char* c_str = method.c_str();
141 | 	if (strcmp(c_str, "KERNEL-PERCEPTRON") == 0)
142 | 		return new kernel_perceptron<T1, T2>(param,dataset,lossFunc);
143 | 	else if (strcmp(c_str, "KERNEL-OGD") == 0) 
144 | 		return new kernel_sgd<T1, T2>(param,dataset,lossFunc); 
145 | 	else if (strcmp(c_str, "KERNEL-RBP") == 0)
146 | 		return new kernel_RBP<T1, T2>(param,dataset,lossFunc);
147 | 	else if (strcmp(c_str, "KERNEL-FORGETRON") == 0)
148 | 		return new kernel_forgetron<T1, T2>(param,dataset,lossFunc);
149 | 
150 | 	else if (strcmp(c_str, "KERNEL-PROJECTRON") == 0)
151 | 		return new kernel_projectron<T1, T2>(param,dataset,lossFunc);
152 | 	else if (strcmp(c_str, "KERNEL-PROJECTRONPP") == 0)
153 | 		return new kernel_projectronpp<T1, T2>(param,dataset,lossFunc);
154 | 	else if (strcmp(c_str, "KERNEL-BOGD") == 0)
155 | 		return new kernel_bogd<T1, T2>(param,dataset,lossFunc);
156 | 	else if (strcmp(c_str, "KERNEL-BPAS") == 0)
157 | 		return new kernel_bpas<T1, T2>(param,dataset,lossFunc);
158 | 	else if (strcmp(c_str, "KERNEL-FOGD") == 0)
159 | 		return new kernel_fogd<T1, T2>(param,dataset,lossFunc);
160 | 	else if (strcmp(c_str, "KERNEL-NOGD") == 0)
161 | 		return new kernel_nogd<T1, T2>(param,dataset,lossFunc);
162 | 	else{
163 | 		cerr<<"ERROR: unrecgonized optimization method "<<param.str_opt<<endl;
164 | 		return NULL;
165 | 	}
166 | }
167 | 


--------------------------------------------------------------------------------
/src/makefile:
--------------------------------------------------------------------------------
 1 | DEBUG_FLAG=1
 2 | 
 3 | DEBUG=-g -Wall
 4 | RELEASE=-O2 -s
 5 | 
 6 | INCLUDE=-I . -I ./data -I ./loss -I./optimizer
 7 | VPATH=.:./data:./loss:./optimizer
 8 | 
 9 | CFLAGS=-c $(INCLUDE) -Wno-write-strings
10 | LDFLAGS=-lpthread -lz
11 | 
12 | ifeq ($(DEBUG_FLAG),1)
13 | 	CFLAGS += $(DEBUG)
14 | else
15 | 	CFLAGS += $(RELEASE)
16 | endif
17 | 
18 | OBJS=Params.o basic_io.o
19 | TARGET=SOL
20 | 
21 | all:$(TARGET)
22 | 
23 | $(TARGET):main.o $(OBJS)
24 | 	g++ $^ -o $@ $(LDFLAGS)
25 | 
26 | main.o: main.cpp optimizer/STG.h optimizer/Optimizer.h
27 | %.o:%.cpp
28 | 	g++ $< -o $@ $(CFLAGS)
29 | 
30 | .PHONY:clean
31 | clean:
32 | 	-rm -f *.o $(TARGET) $(addsuffix .exe, $(TARGET)) tags cscope*
33 | 
34 | 
35 | 


--------------------------------------------------------------------------------