├── CMakeLists.txt
├── LICENSE
├── README.md
├── data.c
├── data.lua
├── dok
    ├── index.dok
    ├── liblinear.dok
    ├── libsvm.dok
    └── sgd.dok
├── init.c
├── init.lua
├── liblinear
    ├── CMakeLists.txt
    ├── COPYRIGHT
    ├── init.c
    ├── init.lua
    ├── liblinear
    │   ├── COPYRIGHT
    │   ├── Makefile
    │   ├── Makefile.win
    │   ├── README
    │   ├── blas
    │   │   ├── Makefile
    │   │   ├── blas.h
    │   │   ├── blasp.h
    │   │   ├── daxpy.c
    │   │   ├── ddot.c
    │   │   ├── dnrm2.c
    │   │   └── dscal.c
    │   ├── heart_scale
    │   ├── linear.cpp
    │   ├── linear.def
    │   ├── linear.h
    │   ├── predict.c
    │   ├── train.c
    │   ├── tron.cpp
    │   └── tron.h
    ├── liblinear_predict.c
    ├── liblinear_train.c
    ├── linear_model_torch.c
    └── linear_model_torch.h
├── libsvm
    ├── CMakeLists.txt
    ├── COPYRIGHT
    ├── init.c
    ├── init.lua
    ├── libsvm
    │   ├── COPYRIGHT
    │   ├── FAQ.html
    │   ├── Makefile
    │   ├── Makefile.win
    │   ├── README
    │   ├── heart_scale
    │   ├── svm-predict.c
    │   ├── svm-scale.c
    │   ├── svm-train.c
    │   ├── svm.cpp
    │   ├── svm.def
    │   └── svm.h
    ├── libsvm_predict.c
    ├── libsvm_train.c
    ├── svm_model_torch.c
    └── svm_model_torch.h
├── sgd
    ├── CMakeLists.txt
    ├── asgd.lua
    ├── init.lua
    ├── loss.lua
    ├── sgd.lua
    └── test.lua
├── svm-0.1-0.rockspec
└── util.c


/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | SET(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake ${CMAKE_MODULE_PATH})
 2 | 
 3 | CMAKE_MINIMUM_REQUIRED(VERSION 2.6 FATAL_ERROR)
 4 | CMAKE_POLICY(VERSION 2.6)
 5 | 
 6 | 
 7 | FIND_PACKAGE(Torch REQUIRED)
 8 | 
 9 | ADD_SUBDIRECTORY(sgd)
10 | ADD_SUBDIRECTORY(liblinear)
11 | ADD_SUBDIRECTORY(libsvm)
12 | 
13 | SET(utilsrc init.c data.c util.c)
14 | ADD_LIBRARY(svmutil MODULE ${utilsrc})
15 | TARGET_LINK_LIBRARIES(svmutil luaT TH)
16 | 
17 | IF(LUALIB)
18 |   TARGET_LINK_LIBRARIES(svmutil ${LUALIB})
19 | ELSE(LUALIB)
20 |   FIND_LIBRARY(LUAJIT_LIBRARIES luajit ${LUA_LIBDIR})
21 |   IF (NOT ${LUAJIT_LIBRARIES} MATCHES "LUAJIT_LIBRARIES-NOTFOUND")
22 |      MESSAGE("Found LuaJIT, linking with: " ${LUAJIT_LIBRARIES})
23 |      TARGET_LINK_LIBRARIES(svmutil luajit)
24 |   ELSE(${LUAJIT_LIBRARIES})
25 |      MESSAGE("Did not find LuaJIT, attempting to link with vanilla Lua")
26 |      TARGET_LINK_LIBRARIES(svmutil lua)
27 |   ENDIF(${LUAJIT_LIBRARIES})
28 | ENDIF(LUALIB)
29 | 
30 | INSTALL(TARGETS svmutil LIBRARY DESTINATION ${Torch_INSTALL_LUA_CPATH_SUBDIR})
31 | 
32 | SET(src)
33 | SET(luasrc init.lua data.lua)
34 | 
35 | ADD_TORCH_PACKAGE(svm "${src}" "${luasrc}")
36 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2012-2014, Koray Kavukcuoglu
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 |     * Redistributions of source code must retain the above copyright
 7 |       notice, this list of conditions and the following disclaimer.
 8 |     * Redistributions in binary form must reproduce the above copyright
 9 |       notice, this list of conditions and the following disclaimer in the
10 |       documentation and/or other materials provided with the distribution.
11 |     * Neither the name of the <organization> nor the
12 |       names of its contributors may be used to endorse or promote products
13 |       derived from this software without specific prior written permission.
14 | 
15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 | DISCLAIMED. IN NO EVENT SHALL KORAY KAVUKCUOGLU BE LIABLE FOR ANY
19 | DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 | (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
22 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 | 
26 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | torch-svm
 2 | =========
 3 | 
 4 | SVM packages for Torch7.
 5 | 
 6 | torch-svm/sgd
 7 | -------------
 8 | 
 9 | Reimplementation of Leon Bottou's svmsgd and svmasgd (http://leon.bottou.org/projects/sgd). 
10 | This implementation is 2-10 times slower depending on the sparsity of the input.
11 | 
12 | torch-svm/liblinear
13 | -------------------
14 | 
15 | This is wrapper around the well known LIBLINEAR library (http://www.csie.ntu.edu.tw/~cjlin/liblinear/).
16 | 
17 | Requirements
18 | ------------
19 | 
20 | Only Torch7 (http://github.com/torch/torch7)
21 | 
22 | Building
23 | --------
24 | 
25 | ```
26 | git clone git://github.com/koraykv/torch-svm.git
27 | luarocks make
28 | ```
29 | 
30 | Using
31 | ----
32 | 
33 | ```
34 | require 'svm'
35 | 
36 | d = svm.ascread('liblinear/liblinear/heart_scale')
37 | model = liblinear.train(d)
38 | labels,accuracy,dec = liblinear.predict(d,model)
39 | ```
40 | 
41 | Status
42 | ------
43 | 
44 |  - svmsgd and svmasgd implementations are supposed to be complete.
45 |  - LIBLINEAR interface is supposed to be complete, I did not check all the possible flags. If you find that something is not working, let me know.
46 |  - LIBSVM interface is mostly complete, but as of now passing in a precomputed kernel option is missing.
47 | 


--------------------------------------------------------------------------------
/data.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include "TH.h"
  3 | #include "luaT.h"
  4 | 
  5 | #include "lualib.h"
  6 | 
  7 | #define max_(a,b) (a>=b ? a : b)
  8 | #define min_(a,b) (a<=b ? a : b)
  9 | 
 10 | 
 11 | static int svm_readbinary(lua_State *L)
 12 | {
 13 | 	int normindex = 2;
 14 | 	int maxrows = -1;
 15 | 
 16 | 	// read the file name or the file pointer
 17 | 	int ownfile = 1;
 18 | 	const char *fname = lua_tostring(L,1);
 19 | 	FILE *fp;
 20 | 	if (fname == NULL)
 21 | 	{
 22 | 		fp = (*(FILE **)luaL_checkudata(L, 1, LUA_FILEHANDLE));
 23 | 		ownfile = 0;
 24 | 		// check if next entry is a number, then use it as number of 
 25 | 		// samples to read
 26 | 		if (lua_isnumber(L,2))
 27 | 		{
 28 | 			maxrows = (int)lua_tonumber(L,2);
 29 | 			normindex = 3;
 30 | 		}
 31 | 	}
 32 | 	else
 33 | 	{
 34 | 		fp = fopen(fname,"r");
 35 | 		printf("Reading %s\n",fname);
 36 | 	}
 37 | 
 38 | 	luaL_argcheck(L, fp != NULL, 1, "File could not be opened");
 39 | 
 40 | 	// 
 41 | 
 42 | 	int normalize = 0;
 43 | 	
 44 | 	if lua_isnil(L,normindex)
 45 | 		normalize = 1;
 46 | 	else if lua_isboolean(L,normindex)
 47 | 		normalize = lua_toboolean(L,normindex);
 48 | 
 49 | 	// printf("norm=%d nil=%d bool=%d \n",normalize,lua_isnil(L,2),lua_isboolean(L,2));
 50 | 
 51 | 	char y;
 52 | 	int nf;
 53 | 	int i;
 54 | 	lua_newtable(L);
 55 | 	int cntr = 1;
 56 | 	int npos = 0;
 57 | 	int nneg = 0;
 58 | 	int maxdim = 0;
 59 | 	int minsparse = INT_MAX;
 60 | 	int maxsparse = 0;
 61 | 	while (maxrows-- && fread((void*)&y,sizeof(char),1,fp))
 62 | 	{
 63 | 		fread((void*)&nf,sizeof(int),1,fp);
 64 | 		THIntTensor *indices = THIntTensor_newWithSize1d(nf);
 65 | 		THFloatTensor *vals = THFloatTensor_newWithSize1d(nf);
 66 | 		int *indices_data = THIntTensor_data(indices);
 67 | 		float *vals_data = THFloatTensor_data(vals);
 68 | 		for (i=0; i<nf; i++)
 69 | 		{
 70 | 			fread((void*)indices_data++,sizeof(int),1,fp);
 71 | 			fread((void*)vals_data++,sizeof(float),1,fp);
 72 | 		}
 73 | 		if (normalize)
 74 | 			THFloatTensor_div(vals,vals,THFloatTensor_normall(vals,2));
 75 | 
 76 | 		if (y>0)
 77 | 			npos += 1;
 78 | 		else
 79 | 			nneg += 1;
 80 | 
 81 | 		maxdim = max_(maxdim,indices_data[-1]);
 82 | 		minsparse = min_(minsparse,nf);
 83 | 		maxsparse = max_(maxsparse,nf);
 84 | 
 85 | 		lua_newtable(L);
 86 | 		{
 87 | 			lua_pushnumber(L,(y ? 1 : -1));
 88 | 			lua_rawseti(L,-2,1);
 89 | 			lua_newtable(L);
 90 | 			{
 91 | 				luaT_pushudata(L,indices,"torch.IntTensor");
 92 | 				lua_rawseti(L,-2,1);
 93 | 				luaT_pushudata(L,vals,"torch.FloatTensor");
 94 | 				lua_rawseti(L,-2,2);
 95 | 			}
 96 | 			lua_rawseti(L,-2,2);
 97 | 		}
 98 | 		lua_rawseti(L,-2,cntr);
 99 | 		cntr++;
100 | 	}
101 | 	cntr--;
102 | 	if (ownfile)
103 | 	{
104 | 		fclose(fp);
105 | 	}
106 | 	if (maxrows < -1)
107 | 	{
108 | 		printf("# of positive samples = %d\n",npos);
109 | 		printf("# of negative samples = %d\n",nneg);
110 | 		printf("# of total    samples = %d\n",cntr);
111 | 		printf("# of max dimensions   = %d\n",maxdim);
112 | 		printf("Min # of dims = %d\n",minsparse);
113 | 		printf("Max # of dims = %d\n",maxsparse);
114 | 		lua_pushnumber(L,(double)maxdim);
115 | 		return 2;
116 | 	}
117 | 	return 1;
118 | }
119 | 
120 | static int svm_infobinary(lua_State *L)
121 | {
122 | 	// read the file name or the file pointer
123 | 	const char *fname = lua_tostring(L,1);
124 | 	FILE *fp = fopen(fname,"r");
125 | 	printf("Reading %s\n",fname);
126 | 
127 | 	luaL_argcheck(L, fp != NULL, 1, "File could not be opened");
128 | 
129 | 	char y;
130 | 	int nf;
131 | 	int cntr = 1;
132 | 	int npos = 0;
133 | 	int nneg = 0;
134 | 	int maxdim = 0;
135 | 	while (fread((void*)&y,sizeof(char),1,fp))
136 | 	{
137 | 		if (y>0)
138 | 			npos += 1;
139 | 		else
140 | 			nneg += 1;
141 | 
142 | 		fread((void*)&nf,sizeof(int),1,fp);
143 | 		fseek(fp,(nf-1)*2*4,SEEK_CUR);
144 | 		fread((void*)&nf,sizeof(int),1,fp);
145 | 		fseek(fp,4,SEEK_CUR);
146 | 		maxdim = max_(maxdim,nf);
147 | 		cntr++;
148 | 	}
149 | 	cntr--;
150 | 	fclose(fp);
151 | 	printf("# of positive samples = %d\n",npos);
152 | 	printf("# of negative samples = %d\n",nneg);
153 | 	printf("# of total    samples = %d\n",cntr);
154 | 	printf("# of max dimensions   = %d\n",maxdim);
155 | 	lua_pushnumber(L,(double)cntr);
156 | 	lua_pushnumber(L,(double)maxdim);
157 | 	return 2;
158 | }
159 | 
160 | static const struct luaL_Reg svm_util__ [] = {
161 |   {"binread", svm_readbinary},
162 |   {"bininfo", svm_infobinary},
163 |   {NULL, NULL}
164 | };
165 | 
166 | int libsvm_data_init(lua_State *L)
167 | {
168 |   luaL_register(L, "svm", svm_util__);
169 |   return 1;
170 | }
171 | 


--------------------------------------------------------------------------------
/data.lua:
--------------------------------------------------------------------------------
  1 | 
  2 | -- write a data/label file in libsvmformatted file.
  3 | -- fname : libsvm formatted file name
  4 | -- data  : {l,d}
  5 | -- d[1] is supposed to be index tensor, d[2] is supposed to be value tensor, where each line is a sample
  6 | -- l is supposed to be a vector where each entry is the label
  7 | function svm.ascwrite(fname,data)
  8 | 	print('Writing ' .. fname)
  9 | 	local function vectostr(i,x)
 10 | 		local str = {}
 11 | 		local cntr = 1
 12 | 		x:apply(function(v) 
 13 | 			table.insert(str,string.format('%d:%g', i[cntr], v))
 14 | 			cntr = cntr + 1
 15 | 			return v
 16 | 			end)
 17 | 		return table.concat(str, ' ')
 18 | 	end
 19 | 
 20 | 	local of = torch.DiskFile(fname,'w')
 21 | 	for i=1,#data do
 22 | 		local ex = data[i]
 23 | 		of:writeString(string.format('%+g %s\n', ex[1], vectostr(ex[2][1],ex[2][2])))
 24 | 	end
 25 | 	of:close()
 26 | end
 27 | 
 28 | -- read libsvm formatted data file into a label and data tensor
 29 | -- returns two outputs, the data and label
 30 | function svm.ascread(fname)
 31 | 	print('Reading ' .. fname)
 32 | 	local function readline(line)
 33 | 		local label = tonumber(string.match(line,'^([%+%-]?%s?%d+)'))
 34 | 		if not label then
 35 | 			error('could not read label')
 36 | 		end
 37 | 		-- label can be anything
 38 | 		-- if label ~= 1 and label ~=-1 then
 39 | 		-- 	error('label has to be +1 or -1')
 40 | 		-- end
 41 | 		local vals = {}
 42 | 		local inds = {}
 43 | 		local indcntr = 0
 44 | 		for ind,val in string.gmatch(line,'(%d+):([%+%-]?%d?%.?%d+)') do
 45 | 			indcntr = indcntr + 1
 46 | 			ind = tonumber(ind)
 47 | 			val = tonumber(val)
 48 | 			if not ind or not val then
 49 | 				error('reading failed')
 50 | 			end
 51 | 			if ind < indcntr then
 52 | 				error('indices are not in increasing order')
 53 | 			end
 54 | 			table.insert(inds,ind)
 55 | 			table.insert(vals,val)
 56 | 		end
 57 | 		return label,{torch.IntTensor(inds),torch.FloatTensor(vals)}
 58 | 	end
 59 | 	local data = {}
 60 | 	local maxdim = 0
 61 | 	local npos = 0
 62 | 	local nneg = 0
 63 | 	local minsparse = math.huge
 64 | 	local maxsparse = 0
 65 | 	for line in io.lines(fname) do
 66 | 		local lbl,vals = readline(line)
 67 | 		table.insert(data,{lbl,vals})
 68 | 		-- stats
 69 | 		maxdim = math.max(maxdim,vals[1][-1])
 70 | 		if lbl == 1 then npos = npos + 1 else nneg = nneg + 1 end
 71 | 		minsparse = math.min(minsparse,vals[1]:size(1))
 72 | 		maxsparse = math.max(maxsparse,vals[1]:size(1))
 73 | 	end
 74 | 	io.write(string.format("# of positive samples = %d\n",npos))
 75 | 	io.write(string.format("# of negative samples = %d\n",nneg))
 76 | 	io.write(string.format("# of total    samples = %d\n",#data))
 77 | 	io.write(string.format("# of max dimensions   = %d\n",maxdim))
 78 | 	io.write(string.format("Min # of dims = %d\n",minsparse))
 79 | 	io.write(string.format("Max # of dims = %d\n",maxsparse))
 80 | 	return data,maxdim
 81 | end
 82 | 
 83 | 
 84 | --[[
 85 | 	A simple dataset table
 86 | 	If the filename extension is .bin, then 
 87 | 	it will be assumed to be binary, otherwise it will be assumed
 88 | 	ascii formatted file.
 89 | 	The format of the file is svmlight format, for binary format,
 90 | 	format suggested by Leon Bottou is used.
 91 | ]]--
 92 | 
 93 | function svm.dataset(fname)
 94 | 	if not paths.filep(fname) then
 95 | 		error('File does not exist ' .. fname)
 96 | 	end
 97 | 
 98 | 	local data,maxdim
 99 | 	if fname:match('%.bin') then
100 | 		data,maxdim = svm.binread(fname,true)
101 | 	else
102 | 		data,maxdim = svm.ascread(fname)
103 | 	end
104 | 	local nsamples = #data
105 | 	local dataset = {}
106 | 	function dataset:size() return nsamples end
107 | 	function dataset:nfeature() return maxdim end
108 | 	function dataset:data() return data end
109 | 
110 | 
111 | 	-- be careful , this is just for experimentation, it will be very very very slooooooow.
112 | 	local dense = false
113 | 	function dataset:dense()
114 | 		dense = true
115 | 	end
116 | 
117 | 	local dx
118 | 	local function todense(ind,x)
119 | 		dx = dx or torch.FloatTensor(maxdim)
120 | 		dx:zero()
121 | 		for i=1,ind:size(1) do
122 | 			dx[ind[i]] = x[i]
123 | 		end
124 | 		return {nil,dx}
125 | 	end
126 | 
127 | 	setmetatable(dataset,{__index = function(self,i)
128 | 		local ind = math.mod(i-1,nsamples)+1
129 | 		if dense then
130 | 			local ex = data[ind]
131 | 			return {ex[1],todense(ex[2][1],ex[2][2])}
132 | 		else
133 | 			return data[ind]
134 | 		end
135 | 	end})
136 | 
137 | 	return dataset
138 | end
139 | 


--------------------------------------------------------------------------------
/dok/index.dok:
--------------------------------------------------------------------------------
 1 | ====== Support Vector Machines =======
 2 | {{anchor:svm.dok}}
 3 | 
 4 | This package provides popular SVM implementations.
 5 | 
 6 | ===== What is implemented? =====
 7 | 
 8 |   * [[sgd#svm.sgd|svmsgd]] : Reimplementation of [[http://leon.bottou.org/projects/sgd|Leon Bottou's svmsgd and svmasgd]].
 9 |   * [[liblinear#svm.liblinear|liblinear]] : A wrapper around the well known [[http://www.csie.ntu.edu.tw/~cjlin/liblinear/|LIBLINEAR library]].
10 |   * [[libsvm#svm.libsvm|libsvm]] : A wrapper around the well known [[http://www.csie.ntu.edu.tw/~cjlin/libsvm/|LIBSVM library]].
11 | 
12 | ===== I/O Utilities =====
13 | 
14 | It is very common for SVMs to use sparse data as input. For that reason, ''svm'' package provides sparse data reading and writing in ''SVMLight'' format.
15 | 
16 | ==== d, maxdim = svm.ascread(filename) ====
17 | {{anchor:svm.ascread}}
18 | 
19 | This function returns a table ''d'' of tables representing the data given in file ''filename''.
20 | 
21 |   - ''#d'' is equal to the number of rows in ''filename'', which is equal to the number of samples.
22 |   - ''d[i]'' is a table with ''2'' entries.
23 |     - ''d[i][1]'' is a number that contains the label value for ''ith'' sample.
24 |     - ''d[i][2]'' is a table with ''2'' entries representing sparse input features.
25 |       - ''d[i][2][1]'' is a ''torch.IntTensor'' containing ''1-based'' indices of the non-zero features.
26 |       - ''d[2][2][2]'' is a ''torch.FloatTensor'' containing the values of non-zero features.
27 | It also returns the maximum number of dimensions in the dataset.
28 | 
29 | ==== svm.ascwrite(filename,data) ====
30 | {{anchor:svm.ascwrite}}
31 | 
32 | This function writes the data object (which should be in format returned by [[#svmascread|svm.ascread]]) into file ''filename''.
33 | 
34 | ==== d, maxdim = svm.binread(filename) ====
35 | {{anchor:svm.binread}}
36 | 
37 | This function is equivalent to [[#svmascread|svm.ascread]], except it operates on binary data. This format is especially useful for reading large data files. The specification of the format is the same as the one used in [[http://leon.bottou.org/projects/sgd|Leon Bottou's sgd project]].
38 | 
39 | ==== nsamples, maxdim = svm.bininfo(filename) ====
40 | {{anchor:svm.bininfo}}
41 | 
42 | This function goes over a binary input file and prints out the following information.
43 |   - number of positive samples in the dataset.
44 |   - number of negative samples in the dataset.
45 |   - total number of samples.
46 |   - maximum number of feature dimensions.
47 | 
48 | It returns two numbers, number of samples and maximum number of dimensions. 
49 | 
50 | ==== d = svm.dataset(filename) ====
51 | {{anchor:svm.dataset}}
52 | 
53 | This function returns a dataset object that can be used with [[sgd#svmsgd|stochastic sradient SVMs]]. Returned object provides useful functions to query the size and dimension of the whole data.
54 | 
55 |   - ''d[i]'' is a table with ''2'' entries.
56 |     - ''d[i][1]'' is a number that contains the label value for ''ith'' sample.
57 |     - ''d[i][2]'' is a table with ''2'' entries representing sparse input features.
58 |       - ''d[i][2][1]'' is a ''torch.IntTensor'' containing ''1-based'' indices of the non-zero features.
59 |       - ''d[2][2][2]'' is a ''torch.FloatTensor'' containing the values of non-zero features.
60 |   * ''d:size()'' : number of samples.
61 |   * ''d:nfeature()'' : maximum number of features in the dataset.
62 |   * ''d:data()'' : original data structure returned from [[#svmascread|svm.ascread]] or [[#svmbinread|svm.binread]]. If the extension of ''filename'' is ''.bin'', then [[#svmbinread|svm.binread]] is used, otherwise [[#svmascread|svm.ascread]] is used.
63 |   * ''d:dense()'' : sets a flag so that ''d[i]'' returns dense data. ''d[i][2][1]'' is ''nil'' and ''d[i][2][2]'' is a ''torch.FloatTensor'' of size ''data:nfeature()''. [[sgd#svmsgd|svm.SvmSgd and svm.SvmAsgd]] accept dense input type too.
64 | 
65 | 


--------------------------------------------------------------------------------
/dok/liblinear.dok:
--------------------------------------------------------------------------------
 1 | ====== LIBLINEAR Interface =======
 2 | {{anchor:svm.liblinear.dok}}
 3 | 
 4 | This package provides an interface for the well known [[http://www.csie.ntu.edu.tw/~cjlin/liblinear/|LIBLINEAR library]]. The interface follows the interface for matlab very closely.
 5 | 
 6 | Two functions are provided, ''liblinear.train'' and ''liblinear.predict''. Calling these functions with no arguments prints the usage information.
 7 | 
 8 | <code>
 9 | liblinear.train()
10 | Usage: model = train(training_data, 'liblinear_options');
11 | liblinear_options:
12 | -s type : set type of solver (default 1)
13 | 	 0 -- L2-regularized logistic regression (primal)
14 | 	 1 -- L2-regularized L2-loss support vector classification (dual)
15 | 	 2 -- L2-regularized L2-loss support vector classification (primal)
16 | 	 3 -- L2-regularized L1-loss support vector classification (dual)
17 | 	 4 -- multi-class support vector classification by Crammer and Singer
18 | 	 5 -- L1-regularized L2-loss support vector classification
19 | 	 6 -- L1-regularized logistic regression
20 | 	 7 -- L2-regularized logistic regression (dual)
21 | 	11 -- L2-regularized L2-loss epsilon support vector regression (primal)
22 | 	12 -- L2-regularized L2-loss epsilon support vector regression (dual)
23 | 	13 -- L2-regularized L1-loss epsilon support vector regression (dual)
24 | -c cost : set the parameter C (default 1)
25 | -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
26 | -e epsilon : set tolerance of termination criterion
27 | 	-s 0 and 2
28 | 		|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
29 | 		where f is the primal function and pos/neg are # of
30 | 		positive/negative data (default 0.01)
31 | 	-s 11
32 | 		|f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
33 | 	-s 1, 3, 4 and 7
34 | 		Dual maximal violation <= eps; similar to libsvm (default 0.1)
35 | 	-s 5 and 6
36 | 		|f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,
37 | 		where f is the primal function (default 0.01)
38 | 	-s 12 and 13
39 | 		|f'(alpha)|_1 <= eps |f'(alpha0)|,
40 | 		where f is the dual function (default 0.1)
41 | -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
42 | -wi weight: weights adjust the parameter C of different classes (see README for details)
43 | -v n: n-fold cross validation mode
44 | -q : quiet mode (no outputs)
45 | </code>
46 | 
47 | ===== A Sample Session =====
48 | 
49 | <code lua>
50 | 
51 | dtr=svm.ascread('liblinear/liblinear/heart_scale')
52 | dte=svm.ascread('liblinear/liblinear/heart_scale')
53 | 
54 | print('======================================')
55 | print('LIBLINEAR L2-regularized L2-loss support vector classification (dual)')
56 | model = liblinear.train(dtr)
57 | l,acc,d = liblinear.predict(dte,model)
58 | </code>
59 | And you should see 
60 | <code>
61 | Accuracy=84.4444 % (228/270)
62 | </code>
63 | as the last line.
64 | 
65 | Note that the dataset used for ''liblinear'' is the raw output of ''svm.ascread''.
66 | 
67 | ==== model = liblinear.train( data [, options]) ====
68 | {{anchor:svm.liblinear.train}}
69 | 
70 | This function trains a model using the dataset and given options. The options are given as a string and has the same syntax as command line liblinear ''train'' program arguments. An empty call to ''liblinear.train()'' prints the usage information.
71 | 
72 | ==== label,accuracy,decision = liblinear.predict(data, model [,options]) ====
73 | {{anchor:svm.liblinear.predict}}
74 | 
75 | This functions tests on the ''data'' using the ''model'' trained in previous step. Again, the options are given as a single string and the syntax is the same as ''predict'' program arguments. Any empty call to ''liblinear.predict()'' prints the usage information.
76 | 
77 | 


--------------------------------------------------------------------------------
/dok/libsvm.dok:
--------------------------------------------------------------------------------
 1 | ====== LIBSVM Interface =======
 2 | {{anchor:svm.libsvm.dok}}
 3 | 
 4 | This package provides an interface for the well known [[http://www.csie.ntu.edu.tw/~cjlin/libsvm/|LIBSVM library]]. The interface follows the interface for matlab very closely.
 5 | 
 6 | Two functions are provided, ''libsvm.train'' and ''libsvm.predict''. Calling these functions with no arguments prints the usage information.
 7 | 
 8 | <code>
 9 | libsvm.train()
10 | t7> libsvm.train( )
11 | Usage: model = svmtrain(training_data, 'libsvm_options');
12 | libsvm_options:
13 | -s svm_type : set type of SVM (default 0)
14 | 	0 -- C-SVC
15 | 	1 -- nu-SVC
16 | 	2 -- one-class SVM
17 | 	3 -- epsilon-SVR
18 | 	4 -- nu-SVR
19 | -t kernel_type : set type of kernel function (default 2)
20 | 	0 -- linear: u'*v
21 | 	1 -- polynomial: (gamma*u'*v + coef0)^degree
22 | 	2 -- radial basis function: exp(-gamma*|u-v|^2)
23 | 	3 -- sigmoid: tanh(gamma*u'*v + coef0)
24 | 	4 -- precomputed kernel (kernel values in training_instance_matrix)
25 | -d degree : set degree in kernel function (default 3)
26 | -g gamma : set gamma in kernel function (default 1/num_features)
27 | -r coef0 : set coef0 in kernel function (default 0)
28 | -c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)
29 | -n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)
30 | -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
31 | -m cachesize : set cache memory size in MB (default 100)
32 | -e epsilon : set tolerance of termination criterion (default 0.001)
33 | -h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)
34 | -b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)
35 | -wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)
36 | -v n : n-fold cross validation mode
37 | -q : quiet mode (no outputs)
38 | </code>
39 | 
40 | ===== A Sample Session =====
41 | 
42 | <code lua>
43 | 
44 | dtr=svm.ascread('libsvm/libsvm/heart_scale')
45 | dte=svm.ascread('libsvm/libsvm/heart_scale')
46 | 
47 | print('======================================')
48 | print('LIBSVM C-SVC RBF Kernel support vector classification')
49 | model = libsvm.train(dtr)
50 | l,acc,d = libsvm.predict(dte,model)
51 | </code>
52 | And you should see 
53 | <code>
54 | Accuracy = 86.6667% (234/270) (classification)
55 | </code>
56 | as the last line.
57 | 
58 | Note that the dataset used for ''libsvm'' is the raw output of ''svm.ascread''.
59 | 
60 | ==== model = libsvm.train( data [, options]) ====
61 | {{anchor:svm.libsvm.train}}
62 | 
63 | This function trains a model using the dataset and given options. The options are given as a string and has the same syntax as command line libsvm ''train'' program arguments. An empty call to ''libsvm.train()'' prints the usage information.
64 | 
65 | ==== label,accuracy,decision = libsvm.predict(data, model [,options]) ====
66 | {{anchor:svm.libsvm.predict}}
67 | 
68 | This functions tests on the ''data'' using the ''model'' trained in previous step. Again, the options are given as a single string and the syntax is the same as ''predict'' program arguments. Any empty call to ''libsvm.predict()'' prints the usage information.
69 | 
70 | 


--------------------------------------------------------------------------------
/dok/sgd.dok:
--------------------------------------------------------------------------------
 1 | ====== Stochastic Gradient Descent SVM =======
 2 | {{anchor:svm.sgd.dok}}
 3 | 
 4 | This package provides a Torch7 implementation of SVM and ASVM from Leon Bottou's [[http://leon.bottou.org/projects/sgd|sgd project]].
 5 | 
 6 | ===== A Sample Session =====
 7 | 
 8 | <code lua>
 9 | require 'svm'
10 | 
11 | dtr=svm.dataset('liblinear/liblinear/heart_scale')
12 | dte=svm.dataset('liblinear/liblinear/heart_scale')
13 | 
14 | print('======================================')
15 | print('SVM SGD HingeLoss')
16 | mysvm = svm.SvmSgd(dtr:nfeature(),1e-4)
17 | print(mysvm)
18 | mysvm:determineEta0(1001,dtr)
19 | mysvm:train(dtr,dte,5)
20 | l,acc,d = mysvm:predict(dte)
21 | </code>
22 | And you should see 
23 | <code>
24 | Accuracy=84.4444 % (228/270)
25 | </code>
26 | as the last line.
27 | 
28 | Note that the dataset used for stochastic svms has to be read through ''svm.dataset'' function.
29 | 
30 | ===== Stochastic Gradient Descent SVM Classes =====
31 | 
32 | ==== svm.SvmSgd(nfeatures,lambda) ====
33 | {{anchor:svm.svmsgd}}
34 | 
35 | This class provides stochastic gradient descent solver for standard linear SVM. There are several options that can be set to further configure the class.
36 | 
37 |   * ''self.regbias'' : default ''0'', set to ''1'' for regularizing the bias.
38 |   * ''self.svmloss'' : default ''svm.hingeloss'', you can also use ''svm.logloss'' or ''svm.squaredhingeloss''.
39 | 
40 | ==== svm.SvmSgd.determineEta0(nsample, data) ====
41 |  {{anchor:svm.svmsgd.detEta}}
42 | 
43 | This function evaluates several initial learning rates to find a suitable one to start the training. ''nsample'' samples are used from set ''data'' for this process. The ''self.eta0'' field of the class is set automatically by this function. If not called, the default initial learning rate ''1'' is used in training.
44 | 
45 | ==== svm.SvmSgd.train(dtr, dte , nepoch) ====
46 | {{anchor:svm.svmsgd.train}}
47 | 
48 |  This function trains the SVM for ''nepoch'' epochs over the training set ''dtr''. If ''dte'' is given, the performance on ''dte'' is tested after every epoch of training.
49 | 
50 | ==== label,accuracy,decision_vals = svm.SvmSgd.predict(data) ====
51 | {{anchor:svm.svmsgd.predict}}
52 | 
53 | This function runs testing on given dataset ''data'' and returns 3 outputs.
54 |   - ''label'' : predicted labels for each sample.
55 |   - ''accuracy'' : A table of 3 elements, ''{accuracy, loss, cost}''.
56 |   - ''decision_vals'' : The decision value (''wx+b'') for each sample.
57 | 
58 | ==== svm.SvmAsgd(nfeatures,lambda) ====
59 | {{anchor:svm.svmasgd}}
60 | 
61 | This class provides Averaging stochastic gradient descent solver for linear SVM. In addition to the options in [[#svmsvmsgd|svm.SvmSgd]], the following options are available.
62 | 
63 |   * ''self.avstart'' : default ''1'', set to any positive integer to specify the number of epochs at which to start averaging the weights.
64 | 
65 | This class also provides the same functions as in [[#svmsvmsgd|svm.SvmSgd]] for training, testing and finding initial learning rate.
66 | 
67 | 


--------------------------------------------------------------------------------
/init.c:
--------------------------------------------------------------------------------
 1 | #include "luaT.h"
 2 | 
 3 | extern int libsvm_data_init(lua_State *L);
 4 | extern int libsvm_util_init(lua_State *L);
 5 | 
 6 | DLL_EXPORT int luaopen_libsvmutil(lua_State *L)
 7 | {
 8 | 	libsvm_data_init(L);
 9 | 	libsvm_util_init(L);
10 | 	return 1;
11 | }
12 | 


--------------------------------------------------------------------------------
/init.lua:
--------------------------------------------------------------------------------
1 | require 'torch'
2 | require 'libsvmutil'
3 | require 'svmsgd'
4 | require 'liblinear'
5 | require 'libsvm'
6 | 
7 | include('data.lua')
8 | 


--------------------------------------------------------------------------------
/liblinear/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | SET(src
 3 | 	linear_model_torch.h 
 4 | 	linear_model_torch.c 
 5 | 	init.c
 6 | 	liblinear_train.c 
 7 | 	liblinear_predict.c 
 8 | 	liblinear/linear.h liblinear/linear.cpp 
 9 | 	liblinear/tron.h liblinear/tron.cpp
10 | )
11 | 
12 | SET(luasrc init.lua 
13 |   )
14 | 
15 | SET(blassrc
16 | 	liblinear/blas/blas.h
17 | 	liblinear/blas/blasp.h
18 | 	liblinear/blas/daxpy.c
19 | 	liblinear/blas/ddot.c
20 | 	liblinear/blas/dnrm2.c
21 | 	liblinear/blas/dscal.c
22 | )
23 | 
24 | SET(CMAKE_C_FLAGS "-fPIC ${CMAKE_C_FLAGS}")
25 | ADD_LIBRARY(liblinearblas STATIC ${blassrc})
26 | 
27 | ADD_TORCH_PACKAGE(liblinear "${src}" "${luasrc}" "LIBLINEAR Interface")
28 | 
29 | TARGET_LINK_LIBRARIES(liblinear luaT TH liblinearblas)
30 | 


--------------------------------------------------------------------------------
/liblinear/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | 
 2 | The following is the copyright for the LIBLINEAR project. We include their 
 3 | sources in this package.
 4 | 
 5 | Copyright (c) 2007-2012 The LIBLINEAR Project.
 6 | All rights reserved.
 7 | 
 8 | Redistribution and use in source and binary forms, with or without
 9 | modification, are permitted provided that the following conditions
10 | are met:
11 | 
12 | 1. Redistributions of source code must retain the above copyright
13 | notice, this list of conditions and the following disclaimer.
14 | 
15 | 2. Redistributions in binary form must reproduce the above copyright
16 | notice, this list of conditions and the following disclaimer in the
17 | documentation and/or other materials provided with the distribution.
18 | 
19 | 3. Neither name of copyright holders nor the names of its contributors
20 | may be used to endorse or promote products derived from this software
21 | without specific prior written permission.
22 | 
23 | 
24 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
25 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
26 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
27 | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
28 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
29 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
30 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
31 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
32 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
33 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 | 


--------------------------------------------------------------------------------
/liblinear/init.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "luaT.h"
 3 | 
 4 | extern int libliblinear_predict_init(lua_State *L);
 5 | extern int libliblinear_train_init(lua_State *L);
 6 | 
 7 | DLL_EXPORT int luaopen_libliblinear(lua_State *L)
 8 | {
 9 |   	libliblinear_predict_init(L);
10 |   	libliblinear_train_init(L);
11 |   	return 1;
12 | }
13 | 


--------------------------------------------------------------------------------
/liblinear/init.lua:
--------------------------------------------------------------------------------
1 | require 'libliblinear'


--------------------------------------------------------------------------------
/liblinear/liblinear/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2007-2012 The LIBLINEAR Project.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither name of copyright holders nor the names of its contributors
17 | may be used to endorse or promote products derived from this software
18 | without specific prior written permission.
19 | 
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/Makefile:
--------------------------------------------------------------------------------
 1 | CXX ?= g++
 2 | CC ?= gcc
 3 | CFLAGS = -Wall -Wconversion -O3 -fPIC
 4 | LIBS = blas/blas.a
 5 | SHVER = 1
 6 | OS = $(shell uname)
 7 | #LIBS = -lblas
 8 | 
 9 | all: train predict
10 | 
11 | lib: linear.o tron.o blas/blas.a
12 | 	if [ "$(OS)" = "Darwin" ]; then \
13 | 		SHARED_LIB_FLAG="-dynamiclib -Wl,-install_name,liblinear.so.$(SHVER)"; \
14 | 	else \
15 | 		SHARED_LIB_FLAG="-shared -Wl,-soname,liblinear.so.$(SHVER)"; \
16 | 	fi; \
17 | 	$(CXX) $${SHARED_LIB_FLAG} linear.o tron.o blas/blas.a -o liblinear.so.$(SHVER)
18 | 
19 | train: tron.o linear.o train.c blas/blas.a
20 | 	$(CXX) $(CFLAGS) -o train train.c tron.o linear.o $(LIBS)
21 | 
22 | predict: tron.o linear.o predict.c blas/blas.a
23 | 	$(CXX) $(CFLAGS) -o predict predict.c tron.o linear.o $(LIBS)
24 | 
25 | tron.o: tron.cpp tron.h
26 | 	$(CXX) $(CFLAGS) -c -o tron.o tron.cpp
27 | 
28 | linear.o: linear.cpp linear.h
29 | 	$(CXX) $(CFLAGS) -c -o linear.o linear.cpp
30 | 
31 | blas/blas.a: blas/*.c blas/*.h
32 | 	make -C blas OPTFLAGS='$(CFLAGS)' CC='$(CC)';
33 | 
34 | clean:
35 | 	make -C blas clean
36 | 	make -C matlab clean
37 | 	rm -f *~ tron.o linear.o train predict liblinear.so.$(SHVER)
38 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/Makefile.win:
--------------------------------------------------------------------------------
 1 | #You must ensure nmake.exe, cl.exe, link.exe are in system path.
 2 | #VCVARS32.bat
 3 | #Under dosbox prompt
 4 | #nmake -f Makefile.win
 5 | 
 6 | ##########################################
 7 | CXXC = cl.exe
 8 | CFLAGS = -nologo -O2 -EHsc -I. -D __WIN32__ -D _CRT_SECURE_NO_DEPRECATE
 9 | TARGET = windows
10 | 
11 | all: $(TARGET)\train.exe $(TARGET)\predict.exe
12 | 
13 | $(TARGET)\train.exe: tron.obj linear.obj train.c blas\*.c
14 | 	$(CXX) $(CFLAGS) -Fe$(TARGET)\train.exe tron.obj linear.obj train.c blas\*.c
15 | 
16 | $(TARGET)\predict.exe: tron.obj linear.obj predict.c blas\*.c
17 | 	$(CXX) $(CFLAGS) -Fe$(TARGET)\predict.exe tron.obj linear.obj predict.c blas\*.c
18 | 
19 | linear.obj: linear.cpp linear.h
20 | 	$(CXX) $(CFLAGS) -c linear.cpp
21 | 
22 | tron.obj: tron.cpp tron.h
23 | 	$(CXX) $(CFLAGS) -c tron.cpp
24 | 
25 | lib: linear.cpp linear.h linear.def tron.obj
26 | 	$(CXX) $(CFLAGS) -LD linear.cpp tron.obj blas\*.c -Fe$(TARGET)\liblinear -link -DEF:linear.def 
27 | 
28 | clean:
29 | 	 -erase /Q *.obj $(TARGET)\.
30 | 
31 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/README:
--------------------------------------------------------------------------------
  1 | LIBLINEAR is a simple package for solving large-scale regularized linear 
  2 | classification and regression. It currently supports 
  3 | - L2-regularized logistic regression/L2-loss support vector classification/L1-loss support vector classification
  4 | - L1-regularized L2-loss support vector classification/L1-regularized logistic regression
  5 | - L2-regularized L2-loss support vector regression/L1-loss support vector regression. 
  6 | This document explains the usage of LIBLINEAR.
  7 | 
  8 | To get started, please read the ``Quick Start'' section first.
  9 | For developers, please check the ``Library Usage'' section to learn
 10 | how to integrate LIBLINEAR in your software.
 11 | 
 12 | Table of Contents
 13 | =================
 14 | 
 15 | - When to use LIBLINEAR but not LIBSVM
 16 | - Quick Start
 17 | - Installation
 18 | - `train' Usage
 19 | - `predict' Usage
 20 | - Examples
 21 | - Library Usage
 22 | - Building Windows Binaries
 23 | - Additional Information
 24 | - MATLAB/OCTAVE interface
 25 | - PYTHON interface
 26 | 
 27 | When to use LIBLINEAR but not LIBSVM
 28 | ====================================
 29 | 
 30 | There are some large data for which with/without nonlinear mappings
 31 | gives similar performances.  Without using kernels, one can
 32 | efficiently train a much larger set via linear classification/regression.  
 33 | These data usually have a large number of features. Document classification
 34 | is an example.
 35 | 
 36 | Warning: While generally liblinear is very fast, its default solver
 37 | may be slow under certain situations (e.g., data not scaled or C is
 38 | large). See Appendix B of our SVM guide about how to handle such
 39 | cases.
 40 | http://www.csie.ntu.edu.tw/~cjlin/papers/guide/guide.pdf
 41 | 
 42 | Warning: If you are a beginner and your data sets are not large, you
 43 | should consider LIBSVM first.
 44 | 
 45 | LIBSVM page:
 46 | http://www.csie.ntu.edu.tw/~cjlin/libsvm
 47 | 
 48 | 
 49 | Quick Start
 50 | ===========
 51 | 
 52 | See the section ``Installation'' for installing LIBLINEAR.
 53 | 
 54 | After installation, there are programs `train' and `predict' for
 55 | training and testing, respectively.
 56 | 
 57 | About the data format, please check the README file of LIBSVM. Note
 58 | that feature index must start from 1 (but not 0).
 59 | 
 60 | A sample classification data included in this package is `heart_scale'.
 61 | 
 62 | Type `train heart_scale', and the program will read the training
 63 | data and output the model file `heart_scale.model'. If you have a test
 64 | set called heart_scale.t, then type `predict heart_scale.t
 65 | heart_scale.model output' to see the prediction accuracy. The `output'
 66 | file contains the predicted class labels.
 67 | 
 68 | For more information about `train' and `predict', see the sections
 69 | `train' Usage and `predict' Usage.
 70 | 
 71 | To obtain good performances, sometimes one needs to scale the
 72 | data. Please check the program `svm-scale' of LIBSVM. For large and
 73 | sparse data, use `-l 0' to keep the sparsity.
 74 | 
 75 | Installation
 76 | ============
 77 | 
 78 | On Unix systems, type `make' to build the `train' and `predict'
 79 | programs. Run them without arguments to show the usages.
 80 | 
 81 | On other systems, consult `Makefile' to build them (e.g., see
 82 | 'Building Windows binaries' in this file) or use the pre-built
 83 | binaries (Windows binaries are in the directory `windows').
 84 | 
 85 | This software uses some level-1 BLAS subroutines. The needed functions are
 86 | included in this package.  If a BLAS library is available on your
 87 | machine, you may use it by modifying the Makefile: Unmark the following line
 88 | 
 89 |         #LIBS ?= -lblas
 90 | 
 91 | and mark
 92 | 
 93 |         LIBS ?= blas/blas.a
 94 | 
 95 | `train' Usage
 96 | =============
 97 | 
 98 | Usage: train [options] training_set_file [model_file]
 99 | options:
100 | -s type : set type of solver (default 1)
101 | 	 0 -- L2-regularized logistic regression (primal)
102 | 	 1 -- L2-regularized L2-loss support vector classification (dual)
103 | 	 2 -- L2-regularized L2-loss support vector classification (primal)
104 | 	 3 -- L2-regularized L1-loss support vector classification (dual)
105 | 	 4 -- multi-class support vector classification by Crammer and Singer
106 | 	 5 -- L1-regularized L2-loss support vector classification
107 | 	 6 -- L1-regularized logistic regression
108 | 	 7 -- L2-regularized logistic regression (dual)
109 | 	11 -- L2-regularized L2-loss epsilon support vector regression (primal)
110 | 	12 -- L2-regularized L2-loss epsilon support vector regression (dual)
111 | 	13 -- L2-regularized L1-loss epsilon support vector regression (dual)
112 | -c cost : set the parameter C (default 1)
113 | -p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)
114 | -e epsilon : set tolerance of termination criterion
115 | 	-s 0 and 2
116 | 		|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
117 | 		where f is the primal function and pos/neg are # of
118 | 		positive/negative data (default 0.01)
119 | 	-s 11
120 | 		|f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001) 
121 | 	-s 1, 3, 4 and 7
122 | 		Dual maximal violation <= eps; similar to libsvm (default 0.1)
123 | 	-s 5 and 6
124 | 		|f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
125 | 		where f is the primal function (default 0.01)
126 | 	-s 12 and 13\n"
127 | 		|f'(alpha)|_1 <= eps |f'(alpha0)|,
128 | 		where f is the dual function (default 0.1)
129 | -B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
130 | -wi weight: weights adjust the parameter C of different classes (see README for details)
131 | -v n: n-fold cross validation mode
132 | -q : quiet mode (no outputs)
133 | 
134 | Option -v randomly splits the data into n parts and calculates cross
135 | validation accuracy on them.
136 | 
137 | Formulations:
138 | 
139 | For L2-regularized logistic regression (-s 0), we solve
140 | 
141 | min_w w^Tw/2 + C \sum log(1 + exp(-y_i w^Tx_i))
142 | 
143 | For L2-regularized L2-loss SVC dual (-s 1), we solve
144 | 
145 | min_alpha  0.5(alpha^T (Q + I/2/C) alpha) - e^T alpha
146 |     s.t.   0 <= alpha_i,
147 | 
148 | For L2-regularized L2-loss SVC (-s 2), we solve
149 | 
150 | min_w w^Tw/2 + C \sum max(0, 1- y_i w^Tx_i)^2
151 | 
152 | For L2-regularized L1-loss SVC dual (-s 3), we solve
153 | 
154 | min_alpha  0.5(alpha^T Q alpha) - e^T alpha
155 |     s.t.   0 <= alpha_i <= C,
156 | 
157 | For L1-regularized L2-loss SVC (-s 5), we solve
158 | 
159 | min_w \sum |w_j| + C \sum max(0, 1- y_i w^Tx_i)^2
160 | 
161 | For L1-regularized logistic regression (-s 6), we solve
162 | 
163 | min_w \sum |w_j| + C \sum log(1 + exp(-y_i w^Tx_i))
164 | 
165 | For L2-regularized logistic regression (-s 7), we solve
166 | 
167 | min_alpha  0.5(alpha^T Q alpha) + \sum alpha_i*log(alpha_i) + \sum (C-alpha_i)*log(C-alpha_i) - a constant
168 |     s.t.   0 <= alpha_i <= C,
169 | 
170 | where
171 | 
172 | Q is a matrix with Q_ij = y_i y_j x_i^T x_j.
173 | 
174 | For L2-regularized L2-loss SVR (-s 11), we solve
175 | 
176 | min_w w^Tw/2 + C \sum max(0, |y_i-w^Tx_i|-epsilon)^2
177 | 
178 | For L2-regularized L2-loss SVR dual (-s 12), we solve
179 | 
180 | min_beta  0.5(beta^T (Q + lambda I/2/C) beta) - y^T beta + \sum |beta_i|
181 | 
182 | For L2-regularized L1-loss SVR dual (-s 13), we solve
183 | 
184 | min_beta  0.5(beta^T Q beta) - y^T beta + \sum |beta_i|
185 |     s.t.   -C <= beta_i <= C,
186 | 
187 | where
188 | 
189 | Q is a matrix with Q_ij = x_i^T x_j.
190 | 
191 | If bias >= 0, w becomes [w; w_{n+1}] and x becomes [x; bias].
192 | 
193 | The primal-dual relationship implies that -s 1 and -s 2 give the same
194 | model, -s 0 and -s 7 give the same, and -s 11 and -s 12 give the same.
195 | 
196 | We implement 1-vs-the rest multi-class strategy for classification. 
197 | In training i vs. non_i, their C parameters are (weight from -wi)*C 
198 | and C, respectively. If there are only two classes, we train only one
199 | model. Thus weight1*C vs. weight2*C is used. See examples below.
200 | 
201 | We also implement multi-class SVM by Crammer and Singer (-s 4):
202 | 
203 | min_{w_m, \xi_i}  0.5 \sum_m ||w_m||^2 + C \sum_i \xi_i
204 |     s.t.  w^T_{y_i} x_i - w^T_m x_i >= \e^m_i - \xi_i \forall m,i
205 | 
206 | where e^m_i = 0 if y_i  = m,
207 |       e^m_i = 1 if y_i != m,
208 | 
209 | Here we solve the dual problem:
210 | 
211 | min_{\alpha}  0.5 \sum_m ||w_m(\alpha)||^2 + \sum_i \sum_m e^m_i alpha^m_i
212 |     s.t.  \alpha^m_i <= C^m_i \forall m,i , \sum_m \alpha^m_i=0 \forall i
213 | 
214 | where w_m(\alpha) = \sum_i \alpha^m_i x_i,
215 | and C^m_i = C if m  = y_i,
216 |     C^m_i = 0 if m != y_i.
217 | 
218 | `predict' Usage
219 | ===============
220 | 
221 | Usage: predict [options] test_file model_file output_file
222 | options:
223 | -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
224 | 
225 | Note that -b is only needed in the prediction phase. This is different
226 | from the setting of LIBSVM.
227 | 
228 | Examples
229 | ========
230 | 
231 | > train data_file
232 | 
233 | Train linear SVM with L2-loss function.
234 | 
235 | > train -s 0 data_file
236 | 
237 | Train a logistic regression model.
238 | 
239 | > train -v 5 -e 0.001 data_file
240 | 
241 | Do five-fold cross-validation using L2-loss svm.
242 | Use a smaller stopping tolerance 0.001 than the default
243 | 0.1 if you want more accurate solutions.
244 | 
245 | > train -c 10 -w1 2 -w2 5 -w3 2 four_class_data_file
246 | 
247 | Train four classifiers:
248 | positive        negative        Cp      Cn
249 | class 1         class 2,3,4.    20      10
250 | class 2         class 1,3,4.    50      10
251 | class 3         class 1,2,4.    20      10
252 | class 4         class 1,2,3.    10      10
253 | 
254 | > train -c 10 -w3 1 -w2 5 two_class_data_file
255 | 
256 | If there are only two classes, we train ONE model.
257 | The C values for the two classes are 10 and 50.
258 | 
259 | > predict -b 1 test_file data_file.model output_file
260 | 
261 | Output probability estimates (for logistic regression only).
262 | 
263 | Library Usage
264 | =============
265 | 
266 | - Function: model* train(const struct problem *prob,
267 |                 const struct parameter *param);
268 | 
269 |     This function constructs and returns a linear classification 
270 |     or regression model according to the given training data and 
271 |     parameters.
272 | 
273 |     struct problem describes the problem:
274 | 
275 |         struct problem
276 |         {
277 |             int l, n;
278 |             int *y;
279 |             struct feature_node **x;
280 |             double bias;
281 |         };
282 | 
283 |     where `l' is the number of training data. If bias >= 0, we assume
284 |     that one additional feature is added to the end of each data
285 |     instance. `n' is the number of feature (including the bias feature
286 |     if bias >= 0). `y' is an array containing the target values. (integers 
287 |     in classification, real numbers in regression) And `x' is an array 
288 |     of pointers, each of which points to a sparse representation (array 
289 |     of feature_node) of one training vector.
290 | 
291 |     For example, if we have the following training data:
292 | 
293 |     LABEL       ATTR1   ATTR2   ATTR3   ATTR4   ATTR5
294 |     -----       -----   -----   -----   -----   -----
295 |     1           0       0.1     0.2     0       0
296 |     2           0       0.1     0.3    -1.2     0
297 |     1           0.4     0       0       0       0
298 |     2           0       0.1     0       1.4     0.5
299 |     3          -0.1    -0.2     0.1     1.1     0.1
300 | 
301 |     and bias = 1, then the components of problem are:
302 | 
303 |     l = 5
304 |     n = 6
305 | 
306 |     y -> 1 2 1 2 3
307 | 
308 |     x -> [ ] -> (2,0.1) (3,0.2) (6,1) (-1,?)
309 |          [ ] -> (2,0.1) (3,0.3) (4,-1.2) (6,1) (-1,?)
310 |          [ ] -> (1,0.4) (6,1) (-1,?)
311 |          [ ] -> (2,0.1) (4,1.4) (5,0.5) (6,1) (-1,?)
312 |          [ ] -> (1,-0.1) (2,-0.2) (3,0.1) (4,1.1) (5,0.1) (6,1) (-1,?)
313 | 
314 |     struct parameter describes the parameters of a linear classification 
315 |     or regression model:
316 | 
317 |         struct parameter
318 |         {
319 |                 int solver_type;
320 | 
321 |                 /* these are for training only */
322 |                 double eps;             /* stopping criteria */
323 |                 double C;
324 |                 int nr_weight;
325 |                 int *weight_label;
326 |                 double* weight;
327 |                 double p;
328 |         };
329 | 
330 |     solver_type can be one of L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL.
331 | 
332 |     L2R_LR                L2-regularized logistic regression (primal)
333 |     L2R_L2LOSS_SVC_DUAL   L2-regularized L2-loss support vector classification (dual)
334 |     L2R_L2LOSS_SVC        L2-regularized L2-loss support vector classification (primal)
335 |     L2R_L1LOSS_SVC_DUAL   L2-regularized L1-loss support vector classification (dual)
336 |     MCSVM_CS              multi-class support vector classification by Crammer and Singer
337 |     L1R_L2LOSS_SVC        L1-regularized L2-loss support vector classification
338 |     L1R_LR                L1-regularized logistic regression
339 |     L2R_LR_DUAL           L2-regularized logistic regression (dual)
340 |     L2R_L2LOSS_SVR        L2-regularized L2-loss support vector regression (primal)
341 |     L2R_L2LOSS_SVR_DUAL   L2-regularized L2-loss support vector regression (dual)
342 |     L2R_L1LOSS_SVR_DUAL   L2-regularized L1-loss support vector regression (dual)
343 | 
344 |     C is the cost of constraints violation.
345 |     p is the sensitiveness of loss of support vector regression. 
346 |     eps is the stopping criterion.
347 | 
348 |     nr_weight, weight_label, and weight are used to change the penalty
349 |     for some classes (If the weight for a class is not changed, it is
350 |     set to 1). This is useful for training classifier using unbalanced
351 |     input data or with asymmetric misclassification cost.
352 | 
353 |     nr_weight is the number of elements in the array weight_label and
354 |     weight. Each weight[i] corresponds to weight_label[i], meaning that
355 |     the penalty of class weight_label[i] is scaled by a factor of weight[i].
356 | 
357 |     If you do not want to change penalty for any of the classes,
358 |     just set nr_weight to 0.
359 | 
360 |     *NOTE* To avoid wrong parameters, check_parameter() should be
361 |     called before train().
362 | 
363 |     struct model stores the model obtained from the training procedure:
364 | 
365 |         struct model
366 |         {
367 |                 struct parameter param;
368 |                 int nr_class;           /* number of classes */
369 |                 int nr_feature;
370 |                 double *w;
371 |                 int *label;             /* label of each class */
372 |                 double bias;
373 |         };
374 | 
375 |      param describes the parameters used to obtain the model.
376 | 
377 |      nr_class and nr_feature are the number of classes and features, 
378 |      respectively. nr_class = 2 for regression. 
379 | 
380 |      The nr_feature*nr_class array w gives feature weights. We use one
381 |      against the rest for multi-class classification, so each feature
382 |      index corresponds to nr_class weight values. Weights are
383 |      organized in the following way
384 | 
385 |      +------------------+------------------+------------+
386 |      | nr_class weights | nr_class weights |  ...
387 |      | for 1st feature  | for 2nd feature  |
388 |      +------------------+------------------+------------+
389 | 
390 |      If bias >= 0, x becomes [x; bias]. The number of features is
391 |      increased by one, so w is a (nr_feature+1)*nr_class array. The
392 |      value of bias is stored in the variable bias.
393 | 
394 |      The array label stores class labels.
395 | 
396 | - Function: void cross_validation(const problem *prob, const parameter *param, int nr_fold, double *target);
397 | 
398 |     This function conducts cross validation. Data are separated to
399 |     nr_fold folds. Under given parameters, sequentially each fold is
400 |     validated using the model from training the remaining. Predicted
401 |     labels in the validation process are stored in the array called
402 |     target.
403 | 
404 |     The format of prob is same as that for train().
405 | 
406 | - Function: double predict(const model *model_, const feature_node *x);
407 | 
408 |     For a classification model, the predicted class for x is returned.
409 |     For a regression model, the function value of x calculated using
410 |     the model is returned. 
411 | 
412 | - Function: double predict_values(const struct model *model_,
413 |             const struct feature_node *x, double* dec_values);
414 | 
415 |     This function gives nr_w decision values in the array dec_values. 
416 |     nr_w=1 if regression is applied or the number of classes is two. An exception is
417 |     multi-class svm by Crammer and Singer (-s 4), where nr_w = 2 if there are two classes. For all other situations, nr_w is the 
418 |     number of classes.
419 | 
420 |     We implement one-vs-the rest multi-class strategy (-s 0,1,2,3,5,6,7) 
421 |     and multi-class svm by Crammer and Singer (-s 4) for multi-class SVM.
422 |     The class with the highest decision value is returned.
423 | 
424 | - Function: double predict_probability(const struct model *model_,
425 |             const struct feature_node *x, double* prob_estimates);
426 | 
427 |     This function gives nr_class probability estimates in the array
428 |     prob_estimates. nr_class can be obtained from the function
429 |     get_nr_class. The class with the highest probability is
430 |     returned. Currently, we support only the probability outputs of
431 |     logistic regression.
432 | 
433 | - Function: int get_nr_feature(const model *model_);
434 | 
435 |     The function gives the number of attributes of the model.
436 | 
437 | - Function: int get_nr_class(const model *model_);
438 | 
439 |     The function gives the number of classes of the model.
440 |     For a regression model, 2 is returned.
441 | 
442 | - Function: void get_labels(const model *model_, int* label);
443 | 
444 |     This function outputs the name of labels into an array called label.
445 |     For a regression model, label is unchanged.
446 | 
447 | - Function: const char *check_parameter(const struct problem *prob,
448 |             const struct parameter *param);
449 | 
450 |     This function checks whether the parameters are within the feasible
451 |     range of the problem. This function should be called before calling
452 |     train() and cross_validation(). It returns NULL if the
453 |     parameters are feasible, otherwise an error message is returned.
454 | 
455 | - Function: int save_model(const char *model_file_name,
456 |             const struct model *model_);
457 | 
458 |     This function saves a model to a file; returns 0 on success, or -1
459 |     if an error occurs.
460 | 
461 | - Function: struct model *load_model(const char *model_file_name);
462 | 
463 |     This function returns a pointer to the model read from the file,
464 |     or a null pointer if the model could not be loaded.
465 | 
466 | - Function: void free_model_content(struct model *model_ptr);
467 | 
468 |     This function frees the memory used by the entries in a model structure.
469 | 
470 | - Function: void free_and_destroy_model(struct model **model_ptr_ptr);
471 | 
472 |     This function frees the memory used by a model and destroys the model
473 |     structure.
474 | 
475 | - Function: void destroy_param(struct parameter *param);
476 | 
477 |     This function frees the memory used by a parameter set.
478 | 
479 | - Function: void set_print_string_function(void (*print_func)(const char *));
480 | 
481 |     Users can specify their output format by a function. Use
482 |         set_print_string_function(NULL); 
483 |     for default printing to stdout.
484 | 
485 | Building Windows Binaries
486 | =========================
487 | 
488 | Windows binaries are in the directory `windows'. To build them via
489 | Visual C++, use the following steps:
490 | 
491 | 1. Open a dos command box and change to liblinear directory. If
492 | environment variables of VC++ have not been set, type
493 | 
494 | "C:\Program Files\Microsoft Visual Studio 10.0\VC\bin\vcvars32.bat"
495 | 
496 | You may have to modify the above command according which version of
497 | VC++ or where it is installed.
498 | 
499 | 2. Type
500 | 
501 | nmake -f Makefile.win clean all
502 | 
503 | 
504 | MATLAB/OCTAVE Interface
505 | =======================
506 | 
507 | Please check the file README in the directory `matlab'.
508 | 
509 | PYTHON Interface
510 | ================
511 | 
512 | Please check the file README in the directory `python'.
513 | 
514 | Additional Information
515 | ======================
516 | 
517 | If you find LIBLINEAR helpful, please cite it as
518 | 
519 | R.-E. Fan, K.-W. Chang, C.-J. Hsieh, X.-R. Wang, and C.-J. Lin.
520 | LIBLINEAR: A Library for Large Linear Classification, Journal of
521 | Machine Learning Research 9(2008), 1871-1874. Software available at
522 | http://www.csie.ntu.edu.tw/~cjlin/liblinear
523 | 
524 | For any questions and comments, please send your email to
525 | cjlin@csie.ntu.edu.tw
526 | 
527 | 
528 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/blas/Makefile:
--------------------------------------------------------------------------------
 1 | AR     = ar rcv
 2 | RANLIB = ranlib 
 3 | 
 4 | HEADERS = blas.h blas.h blasp.h
 5 | FILES = dnrm2.o daxpy.o ddot.o dscal.o 
 6 | 
 7 | CFLAGS = $(OPTFLAGS) 
 8 | FFLAGS = $(OPTFLAGS)
 9 | 
10 | blas: $(FILES) $(HEADERS)
11 | 	$(AR) blas.a $(FILES)  
12 | 	$(RANLIB) blas.a
13 | 
14 | clean:
15 | 	- rm -f *.o
16 | 	- rm -f *.a
17 | 	- rm -f *~
18 | 
19 | .c.o:
20 | 	$(CC) $(CFLAGS) -c $*.c
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/blas/blas.h:
--------------------------------------------------------------------------------
 1 | /* blas.h  --  C header file for BLAS                         Ver 1.0 */
 2 | /* Jesse Bennett                                       March 23, 2000 */
 3 | 
 4 | /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 5 | 
 6 | 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 7 | 
 8 | #ifndef BLAS_INCLUDE
 9 | #define BLAS_INCLUDE
10 | 
11 | /* Data types specific to BLAS implementation */
12 | typedef struct { float r, i; } fcomplex;
13 | typedef struct { double r, i; } dcomplex;
14 | typedef int blasbool;
15 | 
16 | #include "blasp.h"    /* Prototypes for all BLAS functions */
17 | 
18 | #define FALSE 0
19 | #define TRUE  1
20 | 
21 | /* Macro functions */
22 | #define MIN(a,b) ((a) <= (b) ? (a) : (b))
23 | #define MAX(a,b) ((a) >= (b) ? (a) : (b))
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/blas/blasp.h:
--------------------------------------------------------------------------------
  1 | /* blasp.h  --  C prototypes for BLAS                         Ver 1.0 */
  2 | /* Jesse Bennett                                       March 23, 2000 */
  3 | 
  4 | /* Functions  listed in alphabetical order */
  5 | 
  6 | #ifdef F2C_COMPAT
  7 | 
  8 | void cdotc_(fcomplex *dotval, int *n, fcomplex *cx, int *incx,
  9 |             fcomplex *cy, int *incy);
 10 | 
 11 | void cdotu_(fcomplex *dotval, int *n, fcomplex *cx, int *incx,
 12 |             fcomplex *cy, int *incy);
 13 | 
 14 | double sasum_(int *n, float *sx, int *incx);
 15 | 
 16 | double scasum_(int *n, fcomplex *cx, int *incx);
 17 | 
 18 | double scnrm2_(int *n, fcomplex *x, int *incx);
 19 | 
 20 | double sdot_(int *n, float *sx, int *incx, float *sy, int *incy);
 21 | 
 22 | double snrm2_(int *n, float *x, int *incx);
 23 | 
 24 | void zdotc_(dcomplex *dotval, int *n, dcomplex *cx, int *incx,
 25 |             dcomplex *cy, int *incy);
 26 | 
 27 | void zdotu_(dcomplex *dotval, int *n, dcomplex *cx, int *incx,
 28 |             dcomplex *cy, int *incy);
 29 | 
 30 | #else
 31 | 
 32 | fcomplex cdotc_(int *n, fcomplex *cx, int *incx, fcomplex *cy, int *incy);
 33 | 
 34 | fcomplex cdotu_(int *n, fcomplex *cx, int *incx, fcomplex *cy, int *incy);
 35 | 
 36 | float sasum_(int *n, float *sx, int *incx);
 37 | 
 38 | float scasum_(int *n, fcomplex *cx, int *incx);
 39 | 
 40 | float scnrm2_(int *n, fcomplex *x, int *incx);
 41 | 
 42 | float sdot_(int *n, float *sx, int *incx, float *sy, int *incy);
 43 | 
 44 | float snrm2_(int *n, float *x, int *incx);
 45 | 
 46 | dcomplex zdotc_(int *n, dcomplex *cx, int *incx, dcomplex *cy, int *incy);
 47 | 
 48 | dcomplex zdotu_(int *n, dcomplex *cx, int *incx, dcomplex *cy, int *incy);
 49 | 
 50 | #endif
 51 | 
 52 | /* Remaining functions listed in alphabetical order */
 53 | 
 54 | int caxpy_(int *n, fcomplex *ca, fcomplex *cx, int *incx, fcomplex *cy,
 55 |            int *incy);
 56 | 
 57 | int ccopy_(int *n, fcomplex *cx, int *incx, fcomplex *cy, int *incy);
 58 | 
 59 | int cgbmv_(char *trans, int *m, int *n, int *kl, int *ku,
 60 |            fcomplex *alpha, fcomplex *a, int *lda, fcomplex *x, int *incx,
 61 |            fcomplex *beta, fcomplex *y, int *incy);
 62 | 
 63 | int cgemm_(char *transa, char *transb, int *m, int *n, int *k,
 64 |            fcomplex *alpha, fcomplex *a, int *lda, fcomplex *b, int *ldb,
 65 |            fcomplex *beta, fcomplex *c, int *ldc);
 66 | 
 67 | int cgemv_(char *trans, int *m, int *n, fcomplex *alpha, fcomplex *a,
 68 |            int *lda, fcomplex *x, int *incx, fcomplex *beta, fcomplex *y,
 69 |            int *incy);
 70 | 
 71 | int cgerc_(int *m, int *n, fcomplex *alpha, fcomplex *x, int *incx,
 72 |            fcomplex *y, int *incy, fcomplex *a, int *lda);
 73 | 
 74 | int cgeru_(int *m, int *n, fcomplex *alpha, fcomplex *x, int *incx,
 75 |            fcomplex *y, int *incy, fcomplex *a, int *lda);
 76 | 
 77 | int chbmv_(char *uplo, int *n, int *k, fcomplex *alpha, fcomplex *a,
 78 |            int *lda, fcomplex *x, int *incx, fcomplex *beta, fcomplex *y,
 79 |            int *incy);
 80 | 
 81 | int chemm_(char *side, char *uplo, int *m, int *n, fcomplex *alpha,
 82 |            fcomplex *a, int *lda, fcomplex *b, int *ldb, fcomplex *beta,
 83 |            fcomplex *c, int *ldc);
 84 | 
 85 | int chemv_(char *uplo, int *n, fcomplex *alpha, fcomplex *a, int *lda,
 86 |            fcomplex *x, int *incx, fcomplex *beta, fcomplex *y, int *incy);
 87 | 
 88 | int cher_(char *uplo, int *n, float *alpha, fcomplex *x, int *incx,
 89 |           fcomplex *a, int *lda);
 90 | 
 91 | int cher2_(char *uplo, int *n, fcomplex *alpha, fcomplex *x, int *incx,
 92 |            fcomplex *y, int *incy, fcomplex *a, int *lda);
 93 | 
 94 | int cher2k_(char *uplo, char *trans, int *n, int *k, fcomplex *alpha,
 95 |             fcomplex *a, int *lda, fcomplex *b, int *ldb, float *beta,
 96 |             fcomplex *c, int *ldc);
 97 | 
 98 | int cherk_(char *uplo, char *trans, int *n, int *k, float *alpha,
 99 |            fcomplex *a, int *lda, float *beta, fcomplex *c, int *ldc);
100 | 
101 | int chpmv_(char *uplo, int *n, fcomplex *alpha, fcomplex *ap, fcomplex *x,
102 |            int *incx, fcomplex *beta, fcomplex *y, int *incy);
103 | 
104 | int chpr_(char *uplo, int *n, float *alpha, fcomplex *x, int *incx,
105 |           fcomplex *ap);
106 | 
107 | int chpr2_(char *uplo, int *n, fcomplex *alpha, fcomplex *x, int *incx,
108 |            fcomplex *y, int *incy, fcomplex *ap);
109 | 
110 | int crotg_(fcomplex *ca, fcomplex *cb, float *c, fcomplex *s);
111 | 
112 | int cscal_(int *n, fcomplex *ca, fcomplex *cx, int *incx);
113 | 
114 | int csscal_(int *n, float *sa, fcomplex *cx, int *incx);
115 | 
116 | int cswap_(int *n, fcomplex *cx, int *incx, fcomplex *cy, int *incy);
117 | 
118 | int csymm_(char *side, char *uplo, int *m, int *n, fcomplex *alpha,
119 |            fcomplex *a, int *lda, fcomplex *b, int *ldb, fcomplex *beta,
120 |            fcomplex *c, int *ldc);
121 | 
122 | int csyr2k_(char *uplo, char *trans, int *n, int *k, fcomplex *alpha,
123 |             fcomplex *a, int *lda, fcomplex *b, int *ldb, fcomplex *beta,
124 |             fcomplex *c, int *ldc);
125 | 
126 | int csyrk_(char *uplo, char *trans, int *n, int *k, fcomplex *alpha,
127 |            fcomplex *a, int *lda, fcomplex *beta, fcomplex *c, int *ldc);
128 | 
129 | int ctbmv_(char *uplo, char *trans, char *diag, int *n, int *k,
130 |            fcomplex *a, int *lda, fcomplex *x, int *incx);
131 | 
132 | int ctbsv_(char *uplo, char *trans, char *diag, int *n, int *k,
133 |            fcomplex *a, int *lda, fcomplex *x, int *incx);
134 | 
135 | int ctpmv_(char *uplo, char *trans, char *diag, int *n, fcomplex *ap,
136 |            fcomplex *x, int *incx);
137 | 
138 | int ctpsv_(char *uplo, char *trans, char *diag, int *n, fcomplex *ap,
139 |            fcomplex *x, int *incx);
140 | 
141 | int ctrmm_(char *side, char *uplo, char *transa, char *diag, int *m,
142 |            int *n, fcomplex *alpha, fcomplex *a, int *lda, fcomplex *b,
143 |            int *ldb);
144 | 
145 | int ctrmv_(char *uplo, char *trans, char *diag, int *n, fcomplex *a,
146 |            int *lda, fcomplex *x, int *incx);
147 | 
148 | int ctrsm_(char *side, char *uplo, char *transa, char *diag, int *m,
149 |            int *n, fcomplex *alpha, fcomplex *a, int *lda, fcomplex *b,
150 |            int *ldb);
151 | 
152 | int ctrsv_(char *uplo, char *trans, char *diag, int *n, fcomplex *a,
153 |            int *lda, fcomplex *x, int *incx);
154 | 
155 | int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
156 |            int *incy);
157 | 
158 | int dcopy_(int *n, double *sx, int *incx, double *sy, int *incy);
159 | 
160 | int dgbmv_(char *trans, int *m, int *n, int *kl, int *ku,
161 |            double *alpha, double *a, int *lda, double *x, int *incx,
162 |            double *beta, double *y, int *incy);
163 | 
164 | int dgemm_(char *transa, char *transb, int *m, int *n, int *k,
165 |            double *alpha, double *a, int *lda, double *b, int *ldb,
166 |            double *beta, double *c, int *ldc);
167 | 
168 | int dgemv_(char *trans, int *m, int *n, double *alpha, double *a,
169 |            int *lda, double *x, int *incx, double *beta, double *y, 
170 |            int *incy);
171 | 
172 | int dger_(int *m, int *n, double *alpha, double *x, int *incx,
173 |           double *y, int *incy, double *a, int *lda);
174 | 
175 | int drot_(int *n, double *sx, int *incx, double *sy, int *incy,
176 |           double *c, double *s);
177 | 
178 | int drotg_(double *sa, double *sb, double *c, double *s);
179 | 
180 | int dsbmv_(char *uplo, int *n, int *k, double *alpha, double *a,
181 |            int *lda, double *x, int *incx, double *beta, double *y, 
182 |            int *incy);
183 | 
184 | int dscal_(int *n, double *sa, double *sx, int *incx);
185 | 
186 | int dspmv_(char *uplo, int *n, double *alpha, double *ap, double *x,
187 |            int *incx, double *beta, double *y, int *incy);
188 | 
189 | int dspr_(char *uplo, int *n, double *alpha, double *x, int *incx,
190 |           double *ap);
191 | 
192 | int dspr2_(char *uplo, int *n, double *alpha, double *x, int *incx,
193 |            double *y, int *incy, double *ap);
194 | 
195 | int dswap_(int *n, double *sx, int *incx, double *sy, int *incy);
196 | 
197 | int dsymm_(char *side, char *uplo, int *m, int *n, double *alpha,
198 |            double *a, int *lda, double *b, int *ldb, double *beta,
199 |            double *c, int *ldc);
200 | 
201 | int dsymv_(char *uplo, int *n, double *alpha, double *a, int *lda,
202 |            double *x, int *incx, double *beta, double *y, int *incy);
203 | 
204 | int dsyr_(char *uplo, int *n, double *alpha, double *x, int *incx,
205 |           double *a, int *lda);
206 | 
207 | int dsyr2_(char *uplo, int *n, double *alpha, double *x, int *incx,
208 |            double *y, int *incy, double *a, int *lda);
209 | 
210 | int dsyr2k_(char *uplo, char *trans, int *n, int *k, double *alpha,
211 |             double *a, int *lda, double *b, int *ldb, double *beta,
212 |             double *c, int *ldc);
213 | 
214 | int dsyrk_(char *uplo, char *trans, int *n, int *k, double *alpha,
215 |            double *a, int *lda, double *beta, double *c, int *ldc);
216 | 
217 | int dtbmv_(char *uplo, char *trans, char *diag, int *n, int *k,
218 |            double *a, int *lda, double *x, int *incx);
219 | 
220 | int dtbsv_(char *uplo, char *trans, char *diag, int *n, int *k,
221 |            double *a, int *lda, double *x, int *incx);
222 | 
223 | int dtpmv_(char *uplo, char *trans, char *diag, int *n, double *ap,
224 |            double *x, int *incx);
225 | 
226 | int dtpsv_(char *uplo, char *trans, char *diag, int *n, double *ap,
227 |            double *x, int *incx);
228 | 
229 | int dtrmm_(char *side, char *uplo, char *transa, char *diag, int *m,
230 |            int *n, double *alpha, double *a, int *lda, double *b, 
231 |            int *ldb);
232 | 
233 | int dtrmv_(char *uplo, char *trans, char *diag, int *n, double *a,
234 |            int *lda, double *x, int *incx);
235 | 
236 | int dtrsm_(char *side, char *uplo, char *transa, char *diag, int *m,
237 |            int *n, double *alpha, double *a, int *lda, double *b, 
238 |            int *ldb);
239 | 
240 | int dtrsv_(char *uplo, char *trans, char *diag, int *n, double *a,
241 |            int *lda, double *x, int *incx);
242 | 
243 | 
244 | int saxpy_(int *n, float *sa, float *sx, int *incx, float *sy, int *incy);
245 | 
246 | int scopy_(int *n, float *sx, int *incx, float *sy, int *incy);
247 | 
248 | int sgbmv_(char *trans, int *m, int *n, int *kl, int *ku,
249 |            float *alpha, float *a, int *lda, float *x, int *incx,
250 |            float *beta, float *y, int *incy);
251 | 
252 | int sgemm_(char *transa, char *transb, int *m, int *n, int *k,
253 |            float *alpha, float *a, int *lda, float *b, int *ldb,
254 |            float *beta, float *c, int *ldc);
255 | 
256 | int sgemv_(char *trans, int *m, int *n, float *alpha, float *a,
257 |            int *lda, float *x, int *incx, float *beta, float *y, 
258 |            int *incy);
259 | 
260 | int sger_(int *m, int *n, float *alpha, float *x, int *incx,
261 |           float *y, int *incy, float *a, int *lda);
262 | 
263 | int srot_(int *n, float *sx, int *incx, float *sy, int *incy,
264 |           float *c, float *s);
265 | 
266 | int srotg_(float *sa, float *sb, float *c, float *s);
267 | 
268 | int ssbmv_(char *uplo, int *n, int *k, float *alpha, float *a,
269 |            int *lda, float *x, int *incx, float *beta, float *y, 
270 |            int *incy);
271 | 
272 | int sscal_(int *n, float *sa, float *sx, int *incx);
273 | 
274 | int sspmv_(char *uplo, int *n, float *alpha, float *ap, float *x,
275 |            int *incx, float *beta, float *y, int *incy);
276 | 
277 | int sspr_(char *uplo, int *n, float *alpha, float *x, int *incx,
278 |           float *ap);
279 | 
280 | int sspr2_(char *uplo, int *n, float *alpha, float *x, int *incx,
281 |            float *y, int *incy, float *ap);
282 | 
283 | int sswap_(int *n, float *sx, int *incx, float *sy, int *incy);
284 | 
285 | int ssymm_(char *side, char *uplo, int *m, int *n, float *alpha,
286 |            float *a, int *lda, float *b, int *ldb, float *beta,
287 |            float *c, int *ldc);
288 | 
289 | int ssymv_(char *uplo, int *n, float *alpha, float *a, int *lda,
290 |            float *x, int *incx, float *beta, float *y, int *incy);
291 | 
292 | int ssyr_(char *uplo, int *n, float *alpha, float *x, int *incx,
293 |           float *a, int *lda);
294 | 
295 | int ssyr2_(char *uplo, int *n, float *alpha, float *x, int *incx,
296 |            float *y, int *incy, float *a, int *lda);
297 | 
298 | int ssyr2k_(char *uplo, char *trans, int *n, int *k, float *alpha,
299 |             float *a, int *lda, float *b, int *ldb, float *beta,
300 |             float *c, int *ldc);
301 | 
302 | int ssyrk_(char *uplo, char *trans, int *n, int *k, float *alpha,
303 |            float *a, int *lda, float *beta, float *c, int *ldc);
304 | 
305 | int stbmv_(char *uplo, char *trans, char *diag, int *n, int *k,
306 |            float *a, int *lda, float *x, int *incx);
307 | 
308 | int stbsv_(char *uplo, char *trans, char *diag, int *n, int *k,
309 |            float *a, int *lda, float *x, int *incx);
310 | 
311 | int stpmv_(char *uplo, char *trans, char *diag, int *n, float *ap,
312 |            float *x, int *incx);
313 | 
314 | int stpsv_(char *uplo, char *trans, char *diag, int *n, float *ap,
315 |            float *x, int *incx);
316 | 
317 | int strmm_(char *side, char *uplo, char *transa, char *diag, int *m,
318 |            int *n, float *alpha, float *a, int *lda, float *b, 
319 |            int *ldb);
320 | 
321 | int strmv_(char *uplo, char *trans, char *diag, int *n, float *a,
322 |            int *lda, float *x, int *incx);
323 | 
324 | int strsm_(char *side, char *uplo, char *transa, char *diag, int *m,
325 |            int *n, float *alpha, float *a, int *lda, float *b, 
326 |            int *ldb);
327 | 
328 | int strsv_(char *uplo, char *trans, char *diag, int *n, float *a,
329 |            int *lda, float *x, int *incx);
330 | 
331 | int zaxpy_(int *n, dcomplex *ca, dcomplex *cx, int *incx, dcomplex *cy,
332 |            int *incy);
333 | 
334 | int zcopy_(int *n, dcomplex *cx, int *incx, dcomplex *cy, int *incy);
335 | 
336 | int zdscal_(int *n, double *sa, dcomplex *cx, int *incx);
337 | 
338 | int zgbmv_(char *trans, int *m, int *n, int *kl, int *ku,
339 |            dcomplex *alpha, dcomplex *a, int *lda, dcomplex *x, int *incx,
340 |            dcomplex *beta, dcomplex *y, int *incy);
341 | 
342 | int zgemm_(char *transa, char *transb, int *m, int *n, int *k,
343 |            dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b, int *ldb,
344 |            dcomplex *beta, dcomplex *c, int *ldc);
345 | 
346 | int zgemv_(char *trans, int *m, int *n, dcomplex *alpha, dcomplex *a,
347 |            int *lda, dcomplex *x, int *incx, dcomplex *beta, dcomplex *y,
348 |            int *incy);
349 | 
350 | int zgerc_(int *m, int *n, dcomplex *alpha, dcomplex *x, int *incx,
351 |            dcomplex *y, int *incy, dcomplex *a, int *lda);
352 | 
353 | int zgeru_(int *m, int *n, dcomplex *alpha, dcomplex *x, int *incx,
354 |            dcomplex *y, int *incy, dcomplex *a, int *lda);
355 | 
356 | int zhbmv_(char *uplo, int *n, int *k, dcomplex *alpha, dcomplex *a,
357 |            int *lda, dcomplex *x, int *incx, dcomplex *beta, dcomplex *y,
358 |            int *incy);
359 | 
360 | int zhemm_(char *side, char *uplo, int *m, int *n, dcomplex *alpha,
361 |            dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta,
362 |            dcomplex *c, int *ldc);
363 | 
364 | int zhemv_(char *uplo, int *n, dcomplex *alpha, dcomplex *a, int *lda,
365 |            dcomplex *x, int *incx, dcomplex *beta, dcomplex *y, int *incy);
366 | 
367 | int zher_(char *uplo, int *n, double *alpha, dcomplex *x, int *incx,
368 |           dcomplex *a, int *lda);
369 | 
370 | int zher2_(char *uplo, int *n, dcomplex *alpha, dcomplex *x, int *incx,
371 |            dcomplex *y, int *incy, dcomplex *a, int *lda);
372 | 
373 | int zher2k_(char *uplo, char *trans, int *n, int *k, dcomplex *alpha,
374 |             dcomplex *a, int *lda, dcomplex *b, int *ldb, double *beta,
375 |             dcomplex *c, int *ldc);
376 | 
377 | int zherk_(char *uplo, char *trans, int *n, int *k, double *alpha,
378 |            dcomplex *a, int *lda, double *beta, dcomplex *c, int *ldc);
379 | 
380 | int zhpmv_(char *uplo, int *n, dcomplex *alpha, dcomplex *ap, dcomplex *x,
381 |            int *incx, dcomplex *beta, dcomplex *y, int *incy);
382 | 
383 | int zhpr_(char *uplo, int *n, double *alpha, dcomplex *x, int *incx,
384 |           dcomplex *ap);
385 | 
386 | int zhpr2_(char *uplo, int *n, dcomplex *alpha, dcomplex *x, int *incx,
387 |            dcomplex *y, int *incy, dcomplex *ap);
388 | 
389 | int zrotg_(dcomplex *ca, dcomplex *cb, double *c, dcomplex *s);
390 | 
391 | int zscal_(int *n, dcomplex *ca, dcomplex *cx, int *incx);
392 | 
393 | int zswap_(int *n, dcomplex *cx, int *incx, dcomplex *cy, int *incy);
394 | 
395 | int zsymm_(char *side, char *uplo, int *m, int *n, dcomplex *alpha,
396 |            dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta,
397 |            dcomplex *c, int *ldc);
398 | 
399 | int zsyr2k_(char *uplo, char *trans, int *n, int *k, dcomplex *alpha,
400 |             dcomplex *a, int *lda, dcomplex *b, int *ldb, dcomplex *beta,
401 |             dcomplex *c, int *ldc);
402 | 
403 | int zsyrk_(char *uplo, char *trans, int *n, int *k, dcomplex *alpha,
404 |            dcomplex *a, int *lda, dcomplex *beta, dcomplex *c, int *ldc);
405 | 
406 | int ztbmv_(char *uplo, char *trans, char *diag, int *n, int *k,
407 |            dcomplex *a, int *lda, dcomplex *x, int *incx);
408 | 
409 | int ztbsv_(char *uplo, char *trans, char *diag, int *n, int *k,
410 |            dcomplex *a, int *lda, dcomplex *x, int *incx);
411 | 
412 | int ztpmv_(char *uplo, char *trans, char *diag, int *n, dcomplex *ap,
413 |            dcomplex *x, int *incx);
414 | 
415 | int ztpsv_(char *uplo, char *trans, char *diag, int *n, dcomplex *ap,
416 |            dcomplex *x, int *incx);
417 | 
418 | int ztrmm_(char *side, char *uplo, char *transa, char *diag, int *m,
419 |            int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b,
420 |            int *ldb);
421 | 
422 | int ztrmv_(char *uplo, char *trans, char *diag, int *n, dcomplex *a,
423 |            int *lda, dcomplex *x, int *incx);
424 | 
425 | int ztrsm_(char *side, char *uplo, char *transa, char *diag, int *m,
426 |            int *n, dcomplex *alpha, dcomplex *a, int *lda, dcomplex *b,
427 |            int *ldb);
428 | 
429 | int ztrsv_(char *uplo, char *trans, char *diag, int *n, dcomplex *a,
430 |            int *lda, dcomplex *x, int *incx);
431 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/blas/daxpy.c:
--------------------------------------------------------------------------------
 1 | #include "blas.h"
 2 | 
 3 | int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
 4 |            int *incy)
 5 | {
 6 |   long int i, m, ix, iy, nn, iincx, iincy;
 7 |   register double ssa;
 8 | 
 9 |   /* constant times a vector plus a vector.   
10 |      uses unrolled loop for increments equal to one.   
11 |      jack dongarra, linpack, 3/11/78.   
12 |      modified 12/3/93, array(1) declarations changed to array(*) */
13 | 
14 |   /* Dereference inputs */
15 |   nn = *n;
16 |   ssa = *sa;
17 |   iincx = *incx;
18 |   iincy = *incy;
19 | 
20 |   if( nn > 0 && ssa != 0.0 )
21 |   {
22 |     if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
23 |     {
24 |       m = nn-3;
25 |       for (i = 0; i < m; i += 4)
26 |       {
27 |         sy[i] += ssa * sx[i];
28 |         sy[i+1] += ssa * sx[i+1];
29 |         sy[i+2] += ssa * sx[i+2];
30 |         sy[i+3] += ssa * sx[i+3];
31 |       }
32 |       for ( ; i < nn; ++i) /* clean-up loop */
33 |         sy[i] += ssa * sx[i];
34 |     }
35 |     else /* code for unequal increments or equal increments not equal to 1 */
36 |     {
37 |       ix = iincx >= 0 ? 0 : (1 - nn) * iincx;
38 |       iy = iincy >= 0 ? 0 : (1 - nn) * iincy;
39 |       for (i = 0; i < nn; i++)
40 |       {
41 |         sy[iy] += ssa * sx[ix];
42 |         ix += iincx;
43 |         iy += iincy;
44 |       }
45 |     }
46 |   }
47 | 
48 |   return 0;
49 | } /* daxpy_ */
50 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/blas/ddot.c:
--------------------------------------------------------------------------------
 1 | #include "blas.h"
 2 | 
 3 | double ddot_(int *n, double *sx, int *incx, double *sy, int *incy)
 4 | {
 5 |   long int i, m, nn, iincx, iincy;
 6 |   double stemp;
 7 |   long int ix, iy;
 8 | 
 9 |   /* forms the dot product of two vectors.   
10 |      uses unrolled loops for increments equal to one.   
11 |      jack dongarra, linpack, 3/11/78.   
12 |      modified 12/3/93, array(1) declarations changed to array(*) */
13 | 
14 |   /* Dereference inputs */
15 |   nn = *n;
16 |   iincx = *incx;
17 |   iincy = *incy;
18 | 
19 |   stemp = 0.0;
20 |   if (nn > 0)
21 |   {
22 |     if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
23 |     {
24 |       m = nn-4;
25 |       for (i = 0; i < m; i += 5)
26 |         stemp += sx[i] * sy[i] + sx[i+1] * sy[i+1] + sx[i+2] * sy[i+2] +
27 |                  sx[i+3] * sy[i+3] + sx[i+4] * sy[i+4];
28 | 
29 |       for ( ; i < nn; i++)        /* clean-up loop */
30 |         stemp += sx[i] * sy[i];
31 |     }
32 |     else /* code for unequal increments or equal increments not equal to 1 */
33 |     {
34 |       ix = 0;
35 |       iy = 0;
36 |       if (iincx < 0)
37 |         ix = (1 - nn) * iincx;
38 |       if (iincy < 0)
39 |         iy = (1 - nn) * iincy;
40 |       for (i = 0; i < nn; i++)
41 |       {
42 |         stemp += sx[ix] * sy[iy];
43 |         ix += iincx;
44 |         iy += iincy;
45 |       }
46 |     }
47 |   }
48 | 
49 |   return stemp;
50 | } /* ddot_ */
51 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/blas/dnrm2.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>  /* Needed for fabs() and sqrt() */
 2 | #include "blas.h"
 3 | 
 4 | double dnrm2_(int *n, double *x, int *incx)
 5 | {
 6 |   long int ix, nn, iincx;
 7 |   double norm, scale, absxi, ssq, temp;
 8 | 
 9 | /*  DNRM2 returns the euclidean norm of a vector via the function   
10 |     name, so that   
11 | 
12 |        DNRM2 := sqrt( x'*x )   
13 | 
14 |     -- This version written on 25-October-1982.   
15 |        Modified on 14-October-1993 to inline the call to SLASSQ.   
16 |        Sven Hammarling, Nag Ltd.   */
17 | 
18 |   /* Dereference inputs */
19 |   nn = *n;
20 |   iincx = *incx;
21 | 
22 |   if( nn > 0 && iincx > 0 )
23 |   {
24 |     if (nn == 1)
25 |     {
26 |       norm = fabs(x[0]);
27 |     }  
28 |     else
29 |     {
30 |       scale = 0.0;
31 |       ssq = 1.0;
32 | 
33 |       /* The following loop is equivalent to this call to the LAPACK 
34 |          auxiliary routine:   CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */
35 | 
36 |       for (ix=(nn-1)*iincx; ix>=0; ix-=iincx)
37 |       {
38 |         if (x[ix] != 0.0)
39 |         {
40 |           absxi = fabs(x[ix]);
41 |           if (scale < absxi)
42 |           {
43 |             temp = scale / absxi;
44 |             ssq = ssq * (temp * temp) + 1.0;
45 |             scale = absxi;
46 |           }
47 |           else
48 |           {
49 |             temp = absxi / scale;
50 |             ssq += temp * temp;
51 |           }
52 |         }
53 |       }
54 |       norm = scale * sqrt(ssq);
55 |     }
56 |   }
57 |   else
58 |     norm = 0.0;
59 | 
60 |   return norm;
61 | 
62 | } /* dnrm2_ */
63 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/blas/dscal.c:
--------------------------------------------------------------------------------
 1 | #include "blas.h"
 2 | 
 3 | int dscal_(int *n, double *sa, double *sx, int *incx)
 4 | {
 5 |   long int i, m, nincx, nn, iincx;
 6 |   double ssa;
 7 | 
 8 |   /* scales a vector by a constant.   
 9 |      uses unrolled loops for increment equal to 1.   
10 |      jack dongarra, linpack, 3/11/78.   
11 |      modified 3/93 to return if incx .le. 0.   
12 |      modified 12/3/93, array(1) declarations changed to array(*) */
13 | 
14 |   /* Dereference inputs */
15 |   nn = *n;
16 |   iincx = *incx;
17 |   ssa = *sa;
18 | 
19 |   if (nn > 0 && iincx > 0)
20 |   {
21 |     if (iincx == 1) /* code for increment equal to 1 */
22 |     {
23 |       m = nn-4;
24 |       for (i = 0; i < m; i += 5)
25 |       {
26 |         sx[i] = ssa * sx[i];
27 |         sx[i+1] = ssa * sx[i+1];
28 |         sx[i+2] = ssa * sx[i+2];
29 |         sx[i+3] = ssa * sx[i+3];
30 |         sx[i+4] = ssa * sx[i+4];
31 |       }
32 |       for ( ; i < nn; ++i) /* clean-up loop */
33 |         sx[i] = ssa * sx[i];
34 |     }
35 |     else /* code for increment not equal to 1 */
36 |     {
37 |       nincx = nn * iincx;
38 |       for (i = 0; i < nincx; i += iincx)
39 |         sx[i] = ssa * sx[i];
40 |     }
41 |   }
42 | 
43 |   return 0;
44 | } /* dscal_ */
45 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/linear.def:
--------------------------------------------------------------------------------
 1 | LIBRARY liblinear
 2 | EXPORTS
 3 | 	train	@1
 4 | 	cross_validation	@2
 5 | 	save_model	@3
 6 | 	load_model	@4
 7 | 	get_nr_feature	@5
 8 | 	get_nr_class	@6
 9 | 	get_labels	@7
10 | 	predict_values	@8
11 | 	predict	@9
12 | 	predict_probability	@10
13 | 	free_and_destroy_model	@11
14 | 	free_model_content	@12
15 | 	destroy_param	@13
16 | 	check_parameter	@14
17 | 	check_probability_model	@15
18 | 	set_print_string_function	@16
19 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/linear.h:
--------------------------------------------------------------------------------
 1 | #ifndef _LIBLINEAR_H
 2 | #define _LIBLINEAR_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | struct feature_node
 9 | {
10 | 	int index;
11 | 	double value;
12 | };
13 | 
14 | struct problem
15 | {
16 | 	int l, n;
17 | 	double *y;
18 | 	struct feature_node **x;
19 | 	double bias;            /* < 0 if no bias term */  
20 | };
21 | 
22 | enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */
23 | 
24 | struct parameter
25 | {
26 | 	int solver_type;
27 | 
28 | 	/* these are for training only */
29 | 	double eps;	        /* stopping criteria */
30 | 	double C;
31 | 	int nr_weight;
32 | 	int *weight_label;
33 | 	double* weight;
34 | 	double p;
35 | };
36 | 
37 | struct model
38 | {
39 | 	struct parameter param;
40 | 	int nr_class;		/* number of classes */
41 | 	int nr_feature;
42 | 	double *w;
43 | 	int *label;		/* label of each class */
44 | 	double bias;
45 | };
46 | 
47 | struct model* train(const struct problem *prob, const struct parameter *param);
48 | void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
49 | 
50 | double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
51 | double predict(const struct model *model_, const struct feature_node *x);
52 | double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
53 | 
54 | int save_model(const char *model_file_name, const struct model *model_);
55 | struct model *load_model(const char *model_file_name);
56 | 
57 | int get_nr_feature(const struct model *model_);
58 | int get_nr_class(const struct model *model_);
59 | void get_labels(const struct model *model_, int* label);
60 | 
61 | void free_model_content(struct model *model_ptr);
62 | void free_and_destroy_model(struct model **model_ptr_ptr);
63 | void destroy_param(struct parameter *param);
64 | 
65 | const char *check_parameter(const struct problem *prob, const struct parameter *param);
66 | int check_probability_model(const struct model *model);
67 | void set_print_string_function(void (*print_func) (const char*));
68 | 
69 | #ifdef __cplusplus
70 | }
71 | #endif
72 | 
73 | #endif /* _LIBLINEAR_H */
74 | 
75 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/predict.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <ctype.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <errno.h>
  6 | #include "linear.h"
  7 | 
  8 | struct feature_node *x;
  9 | int max_nr_attr = 64;
 10 | 
 11 | struct model* model_;
 12 | int flag_predict_probability=0;
 13 | 
 14 | void exit_input_error(int line_num)
 15 | {
 16 | 	fprintf(stderr,"Wrong input format at line %d\n", line_num);
 17 | 	exit(1);
 18 | }
 19 | 
 20 | static char *line = NULL;
 21 | static int max_line_len;
 22 | 
 23 | static char* readline(FILE *input)
 24 | {
 25 | 	int len;
 26 | 	
 27 | 	if(fgets(line,max_line_len,input) == NULL)
 28 | 		return NULL;
 29 | 
 30 | 	while(strrchr(line,'\n') == NULL)
 31 | 	{
 32 | 		max_line_len *= 2;
 33 | 		line = (char *) realloc(line,max_line_len);
 34 | 		len = (int) strlen(line);
 35 | 		if(fgets(line+len,max_line_len-len,input) == NULL)
 36 | 			break;
 37 | 	}
 38 | 	return line;
 39 | }
 40 | 
 41 | void do_predict(FILE *input, FILE *output)
 42 | {
 43 | 	int correct = 0;
 44 | 	int total = 0;
 45 | 	double error = 0;
 46 | 	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
 47 | 
 48 | 	int nr_class=get_nr_class(model_);
 49 | 	double *prob_estimates=NULL;
 50 | 	int j, n;
 51 | 	int nr_feature=get_nr_feature(model_);
 52 | 	if(model_->bias>=0)
 53 | 		n=nr_feature+1;
 54 | 	else
 55 | 		n=nr_feature;
 56 | 
 57 | 	if(flag_predict_probability)
 58 | 	{
 59 | 		int *labels;
 60 | 
 61 | 		if(!check_probability_model(model_))
 62 | 		{
 63 | 			fprintf(stderr, "probability output is only supported for logistic regression\n");
 64 | 			exit(1);
 65 | 		}
 66 | 
 67 | 		labels=(int *) malloc(nr_class*sizeof(int));
 68 | 		get_labels(model_,labels);
 69 | 		prob_estimates = (double *) malloc(nr_class*sizeof(double));
 70 | 		fprintf(output,"labels");		
 71 | 		for(j=0;j<nr_class;j++)
 72 | 			fprintf(output," %d",labels[j]);
 73 | 		fprintf(output,"\n");
 74 | 		free(labels);
 75 | 	}
 76 | 
 77 | 	max_line_len = 1024;
 78 | 	line = (char *)malloc(max_line_len*sizeof(char));
 79 | 	while(readline(input) != NULL)
 80 | 	{
 81 | 		int i = 0;
 82 | 		double target_label, predict_label;
 83 | 		char *idx, *val, *label, *endptr;
 84 | 		int inst_max_index = 0; // strtol gives 0 if wrong format
 85 | 
 86 | 		label = strtok(line," \t\n");
 87 | 		if(label == NULL) // empty line
 88 | 			exit_input_error(total+1);
 89 | 
 90 | 		target_label = strtod(label,&endptr);
 91 | 		if(endptr == label || *endptr != '\0')
 92 | 			exit_input_error(total+1);
 93 | 
 94 | 		while(1)
 95 | 		{
 96 | 			if(i>=max_nr_attr-2)	// need one more for index = -1
 97 | 			{
 98 | 				max_nr_attr *= 2;
 99 | 				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
100 | 			}
101 | 
102 | 			idx = strtok(NULL,":");
103 | 			val = strtok(NULL," \t");
104 | 
105 | 			if(val == NULL)
106 | 				break;
107 | 			errno = 0;
108 | 			x[i].index = (int) strtol(idx,&endptr,10);
109 | 			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
110 | 				exit_input_error(total+1);
111 | 			else
112 | 				inst_max_index = x[i].index;
113 | 
114 | 			errno = 0;
115 | 			x[i].value = strtod(val,&endptr);
116 | 			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
117 | 				exit_input_error(total+1);
118 | 
119 | 			// feature indices larger than those in training are not used
120 | 			if(x[i].index <= nr_feature)
121 | 				++i;
122 | 		}
123 | 
124 | 		if(model_->bias>=0)
125 | 		{
126 | 			x[i].index = n;
127 | 			x[i].value = model_->bias;
128 | 			i++;
129 | 		}
130 | 		x[i].index = -1;
131 | 
132 | 		if(flag_predict_probability)
133 | 		{
134 | 			int j;
135 | 			predict_label = predict_probability(model_,x,prob_estimates);
136 | 			fprintf(output,"%g",predict_label);
137 | 			for(j=0;j<model_->nr_class;j++)
138 | 				fprintf(output," %g",prob_estimates[j]);
139 | 			fprintf(output,"\n");
140 | 		}
141 | 		else
142 | 		{
143 | 			predict_label = predict(model_,x);
144 | 			fprintf(output,"%g\n",predict_label);
145 | 		}
146 | 
147 | 		if(predict_label == target_label)
148 | 			++correct;
149 | 		error += (predict_label-target_label)*(predict_label-target_label);
150 |                 sump += predict_label;
151 |                 sumt += target_label;
152 |                 sumpp += predict_label*predict_label;
153 |                 sumtt += target_label*target_label;
154 |                 sumpt += predict_label*target_label;
155 |                 ++total;
156 |         }
157 |         if(model_->param.solver_type==L2R_L2LOSS_SVR || 
158 |            model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || 
159 |            model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
160 |         {
161 |                 printf("Mean squared error = %g (regression)\n",error/total);
162 |                 printf("Squared correlation coefficient = %g (regression)\n",
163 |                        ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
164 |                        ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
165 |                        );
166 |         }
167 | 	else
168 | 		printf("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
169 | 	if(flag_predict_probability)
170 | 		free(prob_estimates);
171 | }
172 | 
173 | void exit_with_help()
174 | {
175 | 	printf(
176 | 	"Usage: predict [options] test_file model_file output_file\n"
177 | 	"options:\n"
178 | 	"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
179 | 	);
180 | 	exit(1);
181 | }
182 | 
183 | int main(int argc, char **argv)
184 | {
185 | 	FILE *input, *output;
186 | 	int i;
187 | 
188 | 	// parse options
189 | 	for(i=1;i<argc;i++)
190 | 	{
191 | 		if(argv[i][0] != '-') break;
192 | 		++i;
193 | 		switch(argv[i-1][1])
194 | 		{
195 | 			case 'b':
196 | 				flag_predict_probability = atoi(argv[i]);
197 | 				break;
198 | 
199 | 			default:
200 | 				fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
201 | 				exit_with_help();
202 | 				break;
203 | 		}
204 | 	}
205 | 	if(i>=argc)
206 | 		exit_with_help();
207 | 
208 | 	input = fopen(argv[i],"r");
209 | 	if(input == NULL)
210 | 	{
211 | 		fprintf(stderr,"can't open input file %s\n",argv[i]);
212 | 		exit(1);
213 | 	}
214 | 
215 | 	output = fopen(argv[i+2],"w");
216 | 	if(output == NULL)
217 | 	{
218 | 		fprintf(stderr,"can't open output file %s\n",argv[i+2]);
219 | 		exit(1);
220 | 	}
221 | 
222 | 	if((model_=load_model(argv[i+1]))==0)
223 | 	{
224 | 		fprintf(stderr,"can't open model file %s\n",argv[i+1]);
225 | 		exit(1);
226 | 	}
227 | 
228 | 	x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node));
229 | 	do_predict(input, output);
230 | 	free_and_destroy_model(&model_);
231 | 	free(line);
232 | 	free(x);
233 | 	fclose(input);
234 | 	fclose(output);
235 | 	return 0;
236 | }
237 | 
238 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/train.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <math.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <ctype.h>
  6 | #include <errno.h>
  7 | #include "linear.h"
  8 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
  9 | #define INF HUGE_VAL
 10 | 
 11 | void print_null(const char *s) {}
 12 | 
 13 | void exit_with_help()
 14 | {
 15 | 	printf(
 16 | 	"Usage: train [options] training_set_file [model_file]\n"
 17 | 	"options:\n"
 18 | 	"-s type : set type of solver (default 1)\n"
 19 | 	"	 0 -- L2-regularized logistic regression (primal)\n"
 20 | 	"	 1 -- L2-regularized L2-loss support vector classification (dual)\n"	
 21 | 	"	 2 -- L2-regularized L2-loss support vector classification (primal)\n"
 22 | 	"	 3 -- L2-regularized L1-loss support vector classification (dual)\n"
 23 | 	"	 4 -- multi-class support vector classification by Crammer and Singer\n"
 24 | 	"	 5 -- L1-regularized L2-loss support vector classification\n"
 25 | 	"	 6 -- L1-regularized logistic regression\n"
 26 | 	"	 7 -- L2-regularized logistic regression (dual)\n"
 27 | 	"	11 -- L2-regularized L2-loss epsilon support vector regression (primal)\n"
 28 | 	"	12 -- L2-regularized L2-loss epsilon support vector regression (dual)\n"
 29 | 	"	13 -- L2-regularized L1-loss epsilon support vector regression (dual)\n"
 30 | 	"-c cost : set the parameter C (default 1)\n"
 31 | 	"-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
 32 | 	"-e epsilon : set tolerance of termination criterion\n"
 33 | 	"	-s 0 and 2\n" 
 34 | 	"		|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n" 
 35 | 	"		where f is the primal function and pos/neg are # of\n" 
 36 | 	"		positive/negative data (default 0.01)\n"
 37 | 	"	-s 11\n"
 38 | 	"		|f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)\n" 
 39 | 	"	-s 1, 3, 4, and 7\n"
 40 | 	"		Dual maximal violation <= eps; similar to libsvm (default 0.1)\n"
 41 | 	"	-s 5 and 6\n"
 42 | 	"		|f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n"
 43 | 	"		where f is the primal function (default 0.01)\n"
 44 | 	"	-s 12 and 13\n"
 45 | 	"		|f'(alpha)|_1 <= eps |f'(alpha0)|,\n"
 46 | 	"		where f is the dual function (default 0.1)\n"
 47 | 	"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
 48 | 	"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
 49 | 	"-v n: n-fold cross validation mode\n"
 50 | 	"-q : quiet mode (no outputs)\n"
 51 | 	);
 52 | 	exit(1);
 53 | }
 54 | 
 55 | void exit_input_error(int line_num)
 56 | {
 57 | 	fprintf(stderr,"Wrong input format at line %d\n", line_num);
 58 | 	exit(1);
 59 | }
 60 | 
 61 | static char *line = NULL;
 62 | static int max_line_len;
 63 | 
 64 | static char* readline(FILE *input)
 65 | {
 66 | 	int len;
 67 | 	
 68 | 	if(fgets(line,max_line_len,input) == NULL)
 69 | 		return NULL;
 70 | 
 71 | 	while(strrchr(line,'\n') == NULL)
 72 | 	{
 73 | 		max_line_len *= 2;
 74 | 		line = (char *) realloc(line,max_line_len);
 75 | 		len = (int) strlen(line);
 76 | 		if(fgets(line+len,max_line_len-len,input) == NULL)
 77 | 			break;
 78 | 	}
 79 | 	return line;
 80 | }
 81 | 
 82 | void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
 83 | void read_problem(const char *filename);
 84 | void do_cross_validation();
 85 | 
 86 | struct feature_node *x_space;
 87 | struct parameter param;
 88 | struct problem prob;
 89 | struct model* model_;
 90 | int flag_cross_validation;
 91 | int nr_fold;
 92 | double bias;
 93 | 
 94 | int main(int argc, char **argv)
 95 | {
 96 | 	char input_file_name[1024];
 97 | 	char model_file_name[1024];
 98 | 	const char *error_msg;
 99 | 
100 | 	parse_command_line(argc, argv, input_file_name, model_file_name);
101 | 	read_problem(input_file_name);
102 | 	error_msg = check_parameter(&prob,&param);
103 | 
104 | 	if(error_msg)
105 | 	{
106 | 		fprintf(stderr,"ERROR: %s\n",error_msg);
107 | 		exit(1);
108 | 	}
109 | 
110 | 	if(flag_cross_validation)
111 | 	{
112 | 		do_cross_validation();
113 | 	}
114 | 	else
115 | 	{
116 | 		model_=train(&prob, &param);
117 | 		if(save_model(model_file_name, model_))
118 | 		{
119 | 			fprintf(stderr,"can't save model to file %s\n",model_file_name);
120 | 			exit(1);
121 | 		}
122 | 		free_and_destroy_model(&model_);
123 | 	}
124 | 	destroy_param(&param);
125 | 	free(prob.y);
126 | 	free(prob.x);
127 | 	free(x_space);
128 | 	free(line);
129 | 
130 | 	return 0;
131 | }
132 | 
133 | void do_cross_validation()
134 | {
135 | 	int i;
136 | 	int total_correct = 0;
137 | 	double total_error = 0;
138 | 	double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
139 | 	double *target = Malloc(double, prob.l);
140 | 
141 | 	cross_validation(&prob,&param,nr_fold,target);
142 | 	if(param.solver_type == L2R_L2LOSS_SVR || 
143 | 	   param.solver_type == L2R_L1LOSS_SVR_DUAL || 
144 | 	   param.solver_type == L2R_L2LOSS_SVR_DUAL)
145 | 	{
146 | 		for(i=0;i<prob.l;i++)
147 |                 {
148 |                         double y = prob.y[i];
149 |                         double v = target[i];
150 |                         total_error += (v-y)*(v-y);
151 |                         sumv += v;
152 |                         sumy += y;
153 |                         sumvv += v*v;
154 |                         sumyy += y*y;
155 |                         sumvy += v*y;
156 |                 }
157 |                 printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
158 |                 printf("Cross Validation Squared correlation coefficient = %g\n",
159 |                         ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
160 |                         ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
161 |                         );
162 | 	}
163 | 	else
164 | 	{
165 | 		for(i=0;i<prob.l;i++)
166 | 			if(target[i] == prob.y[i])
167 | 				++total_correct;
168 | 		printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
169 | 	}
170 | 
171 | 	free(target);
172 | }
173 | 
174 | void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name)
175 | {
176 | 	int i;
177 | 	void (*print_func)(const char*) = NULL;	// default printing to stdout
178 | 
179 | 	// default values
180 | 	param.solver_type = L2R_L2LOSS_SVC_DUAL;
181 | 	param.C = 1;
182 | 	param.eps = INF; // see setting below
183 | 	param.p = 0.1;
184 | 	param.nr_weight = 0;
185 | 	param.weight_label = NULL;
186 | 	param.weight = NULL;
187 | 	flag_cross_validation = 0;
188 | 	bias = -1;
189 | 
190 | 	// parse options
191 | 	for(i=1;i<argc;i++)
192 | 	{
193 | 		if(argv[i][0] != '-') break;
194 | 		if(++i>=argc)
195 | 			exit_with_help();
196 | 		switch(argv[i-1][1])
197 | 		{
198 | 			case 's':
199 | 				param.solver_type = atoi(argv[i]);
200 | 				break;
201 | 
202 | 			case 'c':
203 | 				param.C = atof(argv[i]);
204 | 				break;
205 | 
206 | 			case 'p':
207 | 				param.p = atof(argv[i]);
208 | 				break;
209 | 
210 | 			case 'e':
211 | 				param.eps = atof(argv[i]);
212 | 				break;
213 | 
214 | 			case 'B':
215 | 				bias = atof(argv[i]);
216 | 				break;
217 | 
218 | 			case 'w':
219 | 				++param.nr_weight;
220 | 				param.weight_label = (int *) realloc(param.weight_label,sizeof(int)*param.nr_weight);
221 | 				param.weight = (double *) realloc(param.weight,sizeof(double)*param.nr_weight);
222 | 				param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]);
223 | 				param.weight[param.nr_weight-1] = atof(argv[i]);
224 | 				break;
225 | 
226 | 			case 'v':
227 | 				flag_cross_validation = 1;
228 | 				nr_fold = atoi(argv[i]);
229 | 				if(nr_fold < 2)
230 | 				{
231 | 					fprintf(stderr,"n-fold cross validation: n must >= 2\n");
232 | 					exit_with_help();
233 | 				}
234 | 				break;
235 | 
236 | 			case 'q':
237 | 				print_func = &print_null;
238 | 				i--;
239 | 				break;
240 | 
241 | 			default:
242 | 				fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
243 | 				exit_with_help();
244 | 				break;
245 | 		}
246 | 	}
247 | 
248 | 	set_print_string_function(print_func);
249 | 
250 | 	// determine filenames
251 | 	if(i>=argc)
252 | 		exit_with_help();
253 | 
254 | 	strcpy(input_file_name, argv[i]);
255 | 
256 | 	if(i<argc-1)
257 | 		strcpy(model_file_name,argv[i+1]);
258 | 	else
259 | 	{
260 | 		char *p = strrchr(argv[i],'/');
261 | 		if(p==NULL)
262 | 			p = argv[i];
263 | 		else
264 | 			++p;
265 | 		sprintf(model_file_name,"%s.model",p);
266 | 	}
267 | 
268 | 	if(param.eps == INF)
269 | 	{
270 | 		switch(param.solver_type)
271 | 		{
272 | 			case L2R_LR: 
273 | 			case L2R_L2LOSS_SVC:
274 | 				param.eps = 0.01;
275 | 				break;
276 | 			case L2R_L2LOSS_SVR:
277 | 				param.eps = 0.001;
278 | 				break;
279 | 			case L2R_L2LOSS_SVC_DUAL: 
280 | 			case L2R_L1LOSS_SVC_DUAL: 
281 | 			case MCSVM_CS: 
282 | 			case L2R_LR_DUAL: 
283 | 				param.eps = 0.1;
284 | 				break;
285 | 			case L1R_L2LOSS_SVC: 
286 | 			case L1R_LR:
287 | 				param.eps = 0.01;
288 | 				break;
289 | 			case L2R_L1LOSS_SVR_DUAL:
290 | 			case L2R_L2LOSS_SVR_DUAL:
291 | 				param.eps = 0.1;
292 | 				break;
293 | 		}
294 | 	}
295 | }
296 | 
297 | // read in a problem (in libsvm format)
298 | void read_problem(const char *filename)
299 | {
300 | 	int max_index, inst_max_index, i;
301 | 	long int elements, j;
302 | 	FILE *fp = fopen(filename,"r");
303 | 	char *endptr;
304 | 	char *idx, *val, *label;
305 | 
306 | 	if(fp == NULL)
307 | 	{
308 | 		fprintf(stderr,"can't open input file %s\n",filename);
309 | 		exit(1);
310 | 	}
311 | 
312 | 	prob.l = 0;
313 | 	elements = 0;
314 | 	max_line_len = 1024;
315 | 	line = Malloc(char,max_line_len);
316 | 	while(readline(fp)!=NULL)
317 | 	{
318 | 		char *p = strtok(line," \t"); // label
319 | 
320 | 		// features
321 | 		while(1)
322 | 		{
323 | 			p = strtok(NULL," \t");
324 | 			if(p == NULL || *p == '\n') // check '\n' as ' ' may be after the last feature
325 | 				break;
326 | 			elements++;
327 | 		}
328 | 		elements++; // for bias term
329 | 		prob.l++;
330 | 	}
331 | 	rewind(fp);
332 | 
333 | 	prob.bias=bias;
334 | 
335 | 	prob.y = Malloc(double,prob.l);
336 | 	prob.x = Malloc(struct feature_node *,prob.l);
337 | 	x_space = Malloc(struct feature_node,elements+prob.l);
338 | 
339 | 	max_index = 0;
340 | 	j=0;
341 | 	for(i=0;i<prob.l;i++)
342 | 	{
343 | 		inst_max_index = 0; // strtol gives 0 if wrong format
344 | 		readline(fp);
345 | 		prob.x[i] = &x_space[j];
346 | 		label = strtok(line," \t\n");
347 | 		if(label == NULL) // empty line
348 | 			exit_input_error(i+1);
349 | 
350 | 		prob.y[i] = strtod(label,&endptr);
351 | 		if(endptr == label || *endptr != '\0')
352 | 			exit_input_error(i+1);
353 | 
354 | 		while(1)
355 | 		{
356 | 			idx = strtok(NULL,":");
357 | 			val = strtok(NULL," \t");
358 | 
359 | 			if(val == NULL)
360 | 				break;
361 | 
362 | 			errno = 0;
363 | 			x_space[j].index = (int) strtol(idx,&endptr,10);
364 | 			if(endptr == idx || errno != 0 || *endptr != '\0' || x_space[j].index <= inst_max_index)
365 | 				exit_input_error(i+1);
366 | 			else
367 | 				inst_max_index = x_space[j].index;
368 | 
369 | 			errno = 0;
370 | 			x_space[j].value = strtod(val,&endptr);
371 | 			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
372 | 				exit_input_error(i+1);
373 | 
374 | 			++j;
375 | 		}
376 | 
377 | 		if(inst_max_index > max_index)
378 | 			max_index = inst_max_index;
379 | 
380 | 		if(prob.bias >= 0)
381 | 			x_space[j++].value = prob.bias;
382 | 
383 | 		x_space[j++].index = -1;
384 | 	}
385 | 
386 | 	if(prob.bias >= 0)
387 | 	{
388 | 		prob.n=max_index+1;
389 | 		for(i=1;i<prob.l;i++)
390 | 			(prob.x[i]-2)->index = prob.n; 
391 | 		x_space[j-2].index = prob.n;
392 | 	}
393 | 	else
394 | 		prob.n=max_index;
395 | 
396 | 	fclose(fp);
397 | }
398 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/tron.cpp:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | #include <stdarg.h>
  5 | #include "tron.h"
  6 | 
  7 | #ifndef min
  8 | template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
  9 | #endif
 10 | 
 11 | #ifndef max
 12 | template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
 13 | #endif
 14 | 
 15 | #ifdef __cplusplus
 16 | extern "C" {
 17 | #endif
 18 | 
 19 | extern double dnrm2_(int *, double *, int *);
 20 | extern double ddot_(int *, double *, int *, double *, int *);
 21 | extern int daxpy_(int *, double *, double *, int *, double *, int *);
 22 | extern int dscal_(int *, double *, double *, int *);
 23 | 
 24 | #ifdef __cplusplus
 25 | }
 26 | #endif
 27 | 
 28 | static void default_print(const char *buf)
 29 | {
 30 | 	fputs(buf,stdout);
 31 | 	fflush(stdout);
 32 | }
 33 | 
 34 | void TRON::info(const char *fmt,...)
 35 | {
 36 | 	char buf[BUFSIZ];
 37 | 	va_list ap;
 38 | 	va_start(ap,fmt);
 39 | 	vsprintf(buf,fmt,ap);
 40 | 	va_end(ap);
 41 | 	(*tron_print_string)(buf);
 42 | }
 43 | 
 44 | TRON::TRON(const function *fun_obj, double eps, int max_iter)
 45 | {
 46 | 	this->fun_obj=const_cast<function *>(fun_obj);
 47 | 	this->eps=eps;
 48 | 	this->max_iter=max_iter;
 49 | 	tron_print_string = default_print;
 50 | }
 51 | 
 52 | TRON::~TRON()
 53 | {
 54 | }
 55 | 
 56 | void TRON::tron(double *w)
 57 | {
 58 | 	// Parameters for updating the iterates.
 59 | 	double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
 60 | 
 61 | 	// Parameters for updating the trust region size delta.
 62 | 	double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
 63 | 
 64 | 	int n = fun_obj->get_nr_variable();
 65 | 	int i, cg_iter;
 66 | 	double delta, snorm, one=1.0;
 67 | 	double alpha, f, fnew, prered, actred, gs;
 68 | 	int search = 1, iter = 1, inc = 1;
 69 | 	double *s = new double[n];
 70 | 	double *r = new double[n];
 71 | 	double *w_new = new double[n];
 72 | 	double *g = new double[n];
 73 | 
 74 | 	for (i=0; i<n; i++)
 75 | 		w[i] = 0;
 76 | 
 77 |         f = fun_obj->fun(w);
 78 | 	fun_obj->grad(w, g);
 79 | 	delta = dnrm2_(&n, g, &inc);
 80 | 	double gnorm1 = delta;
 81 | 	double gnorm = gnorm1;
 82 | 
 83 | 	if (gnorm <= eps*gnorm1)
 84 | 		search = 0;
 85 | 
 86 | 	iter = 1;
 87 | 
 88 | 	while (iter <= max_iter && search)
 89 | 	{
 90 | 		cg_iter = trcg(delta, g, s, r);
 91 | 
 92 | 		memcpy(w_new, w, sizeof(double)*n);
 93 | 		daxpy_(&n, &one, s, &inc, w_new, &inc);
 94 | 
 95 | 		gs = ddot_(&n, g, &inc, s, &inc);
 96 | 		prered = -0.5*(gs-ddot_(&n, s, &inc, r, &inc));
 97 |                 fnew = fun_obj->fun(w_new);
 98 | 
 99 | 		// Compute the actual reduction.
100 | 	        actred = f - fnew;
101 | 
102 | 		// On the first iteration, adjust the initial step bound.
103 | 		snorm = dnrm2_(&n, s, &inc);
104 | 		if (iter == 1)
105 | 			delta = min(delta, snorm);
106 | 
107 | 		// Compute prediction alpha*snorm of the step.
108 | 		if (fnew - f - gs <= 0)
109 | 			alpha = sigma3;
110 | 		else
111 | 			alpha = max(sigma1, -0.5*(gs/(fnew - f - gs)));
112 | 
113 | 		// Update the trust region bound according to the ratio of actual to predicted reduction.
114 | 		if (actred < eta0*prered)
115 | 			delta = min(max(alpha, sigma1)*snorm, sigma2*delta);
116 | 		else if (actred < eta1*prered)
117 | 			delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta));
118 | 		else if (actred < eta2*prered)
119 | 			delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta));
120 | 		else
121 | 			delta = max(delta, min(alpha*snorm, sigma3*delta));
122 | 
123 | 		info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
124 | 
125 | 		if (actred > eta0*prered)
126 | 		{
127 | 			iter++;
128 | 			memcpy(w, w_new, sizeof(double)*n);
129 | 			f = fnew;
130 | 		        fun_obj->grad(w, g);
131 | 
132 | 			gnorm = dnrm2_(&n, g, &inc);
133 | 			if (gnorm <= eps*gnorm1)
134 | 				break;
135 | 		}
136 | 		if (f < -1.0e+32)
137 | 		{
138 | 			info("WARNING: f < -1.0e+32\n");
139 | 			break;
140 | 		}
141 | 		if (fabs(actred) <= 0 && prered <= 0)
142 | 		{
143 | 			info("WARNING: actred and prered <= 0\n");
144 | 			break;
145 | 		}
146 | 		if (fabs(actred) <= 1.0e-12*fabs(f) &&
147 | 		    fabs(prered) <= 1.0e-12*fabs(f))
148 | 		{
149 | 			info("WARNING: actred and prered too small\n");
150 | 			break;
151 | 		}
152 | 	}
153 | 
154 | 	delete[] g;
155 | 	delete[] r;
156 | 	delete[] w_new;
157 | 	delete[] s;
158 | }
159 | 
160 | int TRON::trcg(double delta, double *g, double *s, double *r)
161 | {
162 | 	int i, inc = 1;
163 | 	int n = fun_obj->get_nr_variable();
164 | 	double one = 1;
165 | 	double *d = new double[n];
166 | 	double *Hd = new double[n];
167 | 	double rTr, rnewTrnew, alpha, beta, cgtol;
168 | 
169 | 	for (i=0; i<n; i++)
170 | 	{
171 | 		s[i] = 0;
172 | 		r[i] = -g[i];
173 | 		d[i] = r[i];
174 | 	}
175 | 	cgtol = 0.1*dnrm2_(&n, g, &inc);
176 | 
177 | 	int cg_iter = 0;
178 | 	rTr = ddot_(&n, r, &inc, r, &inc);
179 | 	while (1)
180 | 	{
181 | 		if (dnrm2_(&n, r, &inc) <= cgtol)
182 | 			break;
183 | 		cg_iter++;
184 | 		fun_obj->Hv(d, Hd);
185 | 
186 | 		alpha = rTr/ddot_(&n, d, &inc, Hd, &inc);
187 | 		daxpy_(&n, &alpha, d, &inc, s, &inc);
188 | 		if (dnrm2_(&n, s, &inc) > delta)
189 | 		{
190 | 			info("cg reaches trust region boundary\n");
191 | 			alpha = -alpha;
192 | 			daxpy_(&n, &alpha, d, &inc, s, &inc);
193 | 
194 | 			double std = ddot_(&n, s, &inc, d, &inc);
195 | 			double sts = ddot_(&n, s, &inc, s, &inc);
196 | 			double dtd = ddot_(&n, d, &inc, d, &inc);
197 | 			double dsq = delta*delta;
198 | 			double rad = sqrt(std*std + dtd*(dsq-sts));
199 | 			if (std >= 0)
200 | 				alpha = (dsq - sts)/(std + rad);
201 | 			else
202 | 				alpha = (rad - std)/dtd;
203 | 			daxpy_(&n, &alpha, d, &inc, s, &inc);
204 | 			alpha = -alpha;
205 | 			daxpy_(&n, &alpha, Hd, &inc, r, &inc);
206 | 			break;
207 | 		}
208 | 		alpha = -alpha;
209 | 		daxpy_(&n, &alpha, Hd, &inc, r, &inc);
210 | 		rnewTrnew = ddot_(&n, r, &inc, r, &inc);
211 | 		beta = rnewTrnew/rTr;
212 | 		dscal_(&n, &beta, d, &inc);
213 | 		daxpy_(&n, &one, r, &inc, d, &inc);
214 | 		rTr = rnewTrnew;
215 | 	}
216 | 
217 | 	delete[] d;
218 | 	delete[] Hd;
219 | 
220 | 	return(cg_iter);
221 | }
222 | 
223 | double TRON::norm_inf(int n, double *x)
224 | {
225 | 	double dmax = fabs(x[0]);
226 | 	for (int i=1; i<n; i++)
227 | 		if (fabs(x[i]) >= dmax)
228 | 			dmax = fabs(x[i]);
229 | 	return(dmax);
230 | }
231 | 
232 | void TRON::set_print_string(void (*print_string) (const char *buf))
233 | {
234 | 	tron_print_string = print_string;
235 | }
236 | 


--------------------------------------------------------------------------------
/liblinear/liblinear/tron.h:
--------------------------------------------------------------------------------
 1 | #ifndef _TRON_H
 2 | #define _TRON_H
 3 | 
 4 | class function
 5 | {
 6 | public:
 7 | 	virtual double fun(double *w) = 0 ;
 8 | 	virtual void grad(double *w, double *g) = 0 ;
 9 | 	virtual void Hv(double *s, double *Hs) = 0 ;
10 | 
11 | 	virtual int get_nr_variable(void) = 0 ;
12 | 	virtual ~function(void){}
13 | };
14 | 
15 | class TRON
16 | {
17 | public:
18 | 	TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000);
19 | 	~TRON();
20 | 
21 | 	void tron(double *w);
22 | 	void set_print_string(void (*i_print) (const char *buf));
23 | 
24 | private:
25 | 	int trcg(double delta, double *g, double *s, double *r);
26 | 	double norm_inf(int n, double *x);
27 | 
28 | 	double eps;
29 | 	int max_iter;
30 | 	function *fun_obj;
31 | 	void info(const char *fmt,...);
32 | 	void (*tron_print_string)(const char *buf);
33 | };
34 | #endif
35 | 


--------------------------------------------------------------------------------
/liblinear/liblinear_predict.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | 
  5 | #include "TH.h"
  6 | #include "luaT.h"
  7 | 
  8 | #include "liblinear/linear.h"
  9 | #include "linear_model_torch.h"
 10 | 
 11 | 
 12 | #define CMD_LEN 2048
 13 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 14 | 
 15 | void read_sparse_instance(lua_State *L, int index, double *target_label, struct feature_node *x, int feature_number, double bias)
 16 | {
 17 | 	lua_pushnumber(L,index+1);lua_gettable(L,-2);
 18 | 	luaL_argcheck(L,lua_istable(L,1),1,"Expecting table in read_sparse_instance");
 19 | 	int j = 0;
 20 | 	{
 21 | 		// get label
 22 | 		lua_pushnumber(L,1);lua_gettable(L,-2);
 23 | 		*target_label = (double)lua_tonumber(L,-1);
 24 | 		lua_pop(L,1);
 25 | 		// get values
 26 | 		lua_pushnumber(L,2);lua_gettable(L,-2);
 27 | 		{
 28 | 			lua_pushnumber(L,1);lua_gettable(L,-2);
 29 | 			THIntTensor *indices = luaT_checkudata(L,-1,"torch.IntTensor");
 30 | 			lua_pop(L,1);
 31 | 			lua_pushnumber(L,2);lua_gettable(L,-2);
 32 | 			THFloatTensor *vals = luaT_checkudata(L,-1,"torch.FloatTensor");
 33 | 			lua_pop(L,1);
 34 | 
 35 | 			int *indices_data = THIntTensor_data(indices);
 36 | 			float *vals_data = THFloatTensor_data(vals);
 37 | 			int k;
 38 | 			for (k=0; k<(int)THIntTensor_nElement(indices); k++)
 39 | 			{
 40 | 				x[j].index = indices_data[k];
 41 | 				x[j].value = vals_data[k];
 42 | 				j++;
 43 | 			}
 44 | 			if (bias >= 0)
 45 | 			{
 46 | 				x[j].index = feature_number+1;
 47 | 				x[j].value = bias;
 48 | 				j++;
 49 | 			}
 50 | 			x[j++].index = -1;
 51 | 		}
 52 | 		lua_pop(L,1);
 53 | 	}
 54 | 	lua_pop(L,1);
 55 | }
 56 | 
 57 | int do_predict(lua_State *L, struct model *model_, const int predict_probability_flag)
 58 | {
 59 | 	int label_vector_row_num;
 60 | 	int feature_number, testing_instance_number;
 61 | 	int instance_index;
 62 | 	double *ptr_predict_label;
 63 | 	double *ptr_prob_estimates, *ptr_dec_values;
 64 | 	struct feature_node *x;
 65 | 	THDoubleTensor *label;
 66 | 	THDoubleTensor *dec;
 67 | 
 68 | 	int correct = 0;
 69 | 	int total = 0;
 70 | 	double error = 0;
 71 | 	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
 72 | 
 73 | 	int nr_class=get_nr_class(model_);
 74 | 	int nr_w;
 75 | 	double *prob_estimates=NULL;
 76 | 
 77 | 	if(nr_class==2 && model_->param.solver_type!=MCSVM_CS)
 78 | 		nr_w=1;
 79 | 	else
 80 | 		nr_w=nr_class;
 81 | 
 82 | 
 83 | 	luaL_argcheck(L,lua_istable(L,1),1,"Expecting table in do_predict");
 84 | 
 85 | 	// prhs[1] = testing instance matrix
 86 | 	feature_number = get_nr_feature(model_);
 87 | 	testing_instance_number = (int) lua_objlen(L,1);
 88 | 	label_vector_row_num = testing_instance_number;
 89 | 
 90 | 	prob_estimates = Malloc(double, nr_class);
 91 | 
 92 | 	label = THDoubleTensor_newWithSize1d(testing_instance_number);
 93 | 
 94 | 	if (predict_probability_flag)
 95 | 		dec = THDoubleTensor_newWithSize2d(testing_instance_number,nr_class);
 96 | 	else
 97 | 		dec = THDoubleTensor_newWithSize2d(testing_instance_number,nr_w);
 98 | 
 99 | 	ptr_predict_label = THDoubleTensor_data(label);
100 | 	ptr_prob_estimates = THDoubleTensor_data(dec);
101 | 	ptr_dec_values = THDoubleTensor_data(dec);
102 | 
103 | 	x = Malloc(struct feature_node, feature_number+2);
104 | 	for(instance_index=0;instance_index<testing_instance_number;instance_index++)
105 | 	{
106 | 		int i;
107 | 		double target_label, predict_label;
108 | 
109 | 		// prhs[1] and prhs[1]^T are sparse
110 | 		read_sparse_instance(L,instance_index, &target_label, x, feature_number, model_->bias);
111 | 
112 | 		if(predict_probability_flag)
113 | 		{
114 | 			predict_label = predict_probability(model_, x, prob_estimates);
115 | 			ptr_predict_label[instance_index] = predict_label;
116 | 			for(i=0;i<nr_class;i++)
117 | 				ptr_prob_estimates[instance_index*nr_class + i ] = prob_estimates[i];
118 | 		}
119 | 		else
120 | 		{
121 | 			double *dec_values = Malloc(double, nr_class);
122 | 			predict_label = predict_values(model_, x, dec_values);
123 | 			ptr_predict_label[instance_index] = predict_label;
124 | 
125 | 			for(i=0;i<nr_w;i++)
126 | 				ptr_dec_values[instance_index*nr_w + i] = dec_values[i];
127 | 			free(dec_values);
128 | 		}
129 | 
130 | 		if(predict_label == target_label)
131 | 			++correct;
132 | 		error += (predict_label-target_label)*(predict_label-target_label);
133 | 		sump += predict_label;
134 | 		sumt += target_label;
135 | 		sumpp += predict_label*predict_label;
136 | 		sumtt += target_label*target_label;
137 | 		sumpt += predict_label*target_label;
138 | 
139 | 		++total;
140 | 	}
141 | 	
142 | 	if(model_->param.solver_type==L2R_L2LOSS_SVR || 
143 |            model_->param.solver_type==L2R_L1LOSS_SVR_DUAL || 
144 |            model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
145 |         {
146 |                 printf("Mean squared error = %g (regression)\n",error/total);
147 |                 printf("Squared correlation coefficient = %g (regression)\n",
148 |                        ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
149 |                        ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
150 |                        );
151 |         }
152 | 	else
153 | 		printf("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
154 | 
155 | 
156 | 	// label = res[1]
157 | 	luaT_pushudata(L,label,"torch.DoubleTensor");
158 | 
159 | 	// acc = res[2] : {accuracy, mean squared error, squared correlation coefficient}
160 | 	lua_newtable(L);
161 | 	lua_pushnumber(L,1);
162 | 	lua_pushnumber(L,(double)correct/total*100);
163 | 	lua_settable(L,-3);
164 | 	
165 | 	lua_pushnumber(L,2);
166 | 	lua_pushnumber(L,(double)error/total);
167 | 	lua_settable(L,-3);
168 | 
169 | 	lua_pushnumber(L,3);
170 | 	lua_pushnumber(L,(double)
171 | 		((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
172 | 		((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)));
173 | 	lua_settable(L,-3);
174 | 
175 | 	// prob = res[3]
176 | 	luaT_pushudata(L,dec,"torch.DoubleTensor");
177 | 
178 | 	free(x);
179 | 	if(prob_estimates != NULL)
180 | 		free(prob_estimates);
181 | 
182 | 	return 3;
183 | }
184 | 
185 | static void exit_with_help()
186 | {
187 | 	printf(
188 | 			"Usage: [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_instance_data, model, 'liblinear_options','col')\n"
189 | 			"liblinear_options:\n"
190 | 			"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
191 | 			"Returns:\n"
192 | 			"  predicted_label: prediction output vector.\n"
193 | 			"  accuracy: a table with accuracy, mean squared error, squared correlation coefficient.\n"
194 | 			"  prob_estimates: If selected, probability estimate vector.\n"
195 | 			);
196 | }
197 | 
198 | static int liblinear_predict( lua_State *L )
199 | {
200 | 	int nrhs = lua_gettop(L);
201 | 	int prob_estimate_flag = 0;
202 | 	struct model *model_;
203 | 
204 | 	if(nrhs > 3 || nrhs < 2)
205 | 	{
206 | 		exit_with_help();
207 | 		return 0;
208 | 	}
209 | 
210 | 	// parse options
211 | 	if(nrhs == 3)
212 | 	{
213 | 		int i, argc = 1;
214 | 		char *argv[CMD_LEN/2];
215 | 
216 | 		// put options in argv[]
217 | 	        size_t slen;
218 | 		const char *tcmd = lua_tolstring(L,3,&slen);
219 | 		char cmd[slen];
220 | 		strcpy(cmd,tcmd);
221 | 		if((argv[argc] = strtok((char*)cmd, " ")) != NULL)
222 | 			while((argv[++argc] = strtok(NULL, " ")) != NULL)
223 | 				;
224 | 
225 | 		for(i=1;i<argc;i++)
226 | 		{
227 | 			if(argv[i][0] != '-') break;
228 | 			if(++i>=argc)
229 | 			{
230 | 				exit_with_help();
231 | 				return 0;
232 | 			}
233 | 			switch(argv[i-1][1])
234 | 			{
235 | 				case 'b':
236 | 					prob_estimate_flag = atoi(argv[i]);
237 | 					break;
238 | 				default:
239 | 					printf("unknown option\n");
240 | 					exit_with_help();
241 | 					return 0;
242 | 			}
243 | 		}
244 | 		lua_pop(L,1);
245 | 	}
246 | 
247 | 	model_ = Malloc(struct model, 1);
248 | 	torch_structure_to_liblinear_model(model_, L);
249 | 	lua_pop(L,1);
250 | 
251 | 	if(prob_estimate_flag)
252 | 	{
253 | 		if(!check_probability_model(model_))
254 | 		{
255 | 			printf("probability output is only supported for logistic regression\n");
256 | 			prob_estimate_flag=0;
257 | 		}
258 | 	}
259 | 	int nres = do_predict(L, model_, prob_estimate_flag);
260 | 	// destroy model_
261 | 	free_and_destroy_model(&model_);
262 | 
263 | 	return nres;
264 | }
265 | 
266 | static const struct luaL_Reg liblinear_predict_util__ [] = {
267 |   {"predict", liblinear_predict},
268 |   {NULL, NULL}
269 | };
270 | 
271 | 
272 | int libliblinear_predict_init(lua_State *L)
273 | {
274 |   luaL_register(L, "liblinear", liblinear_predict_util__);
275 |   return 1;
276 | }
277 | 


--------------------------------------------------------------------------------
/liblinear/liblinear_train.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <math.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <ctype.h>
  6 | 
  7 | #include "TH.h"
  8 | #include "luaT.h"
  9 | 
 10 | #include "liblinear/linear.h"
 11 | #include "linear_model_torch.h"
 12 | 
 13 | #define CMD_LEN 2048
 14 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 15 | #define INF HUGE_VAL
 16 | 
 17 | #define max_(a,b) (a>=b ? a : b)
 18 | #define min_(a,b) (a<=b ? a : b)
 19 | 
 20 | // liblinear arguments
 21 | struct parameter param;		// set by parse_command_line
 22 | struct problem prob;		// set by read_problem
 23 | struct model *model_;
 24 | struct feature_node *x_space;
 25 | int cross_validation_flag;
 26 | int nr_fold;
 27 | double bias;
 28 | 
 29 | void print_string_default(const char *s) {printf("%s",s);}
 30 | 
 31 | void print_null(const char *s) {}
 32 | 
 33 | static void exit_with_help()
 34 | {
 35 | 	printf(
 36 | 	"Usage: model = train(training_data, 'liblinear_options');\n"
 37 | 	"liblinear_options:\n"
 38 | 	"-s type : set type of solver (default 1)\n"
 39 | 	"	 0 -- L2-regularized logistic regression (primal)\n"
 40 | 	"	 1 -- L2-regularized L2-loss support vector classification (dual)\n"	
 41 | 	"	 2 -- L2-regularized L2-loss support vector classification (primal)\n"
 42 | 	"	 3 -- L2-regularized L1-loss support vector classification (dual)\n"
 43 | 	"	 4 -- multi-class support vector classification by Crammer and Singer\n"
 44 | 	"	 5 -- L1-regularized L2-loss support vector classification\n"
 45 | 	"	 6 -- L1-regularized logistic regression\n"
 46 | 	"	 7 -- L2-regularized logistic regression (dual)\n"
 47 | 	"	11 -- L2-regularized L2-loss epsilon support vector regression (primal)\n"
 48 | 	"	12 -- L2-regularized L2-loss epsilon support vector regression (dual)\n"
 49 | 	"	13 -- L2-regularized L1-loss epsilon support vector regression (dual)\n"
 50 | 	"-c cost : set the parameter C (default 1)\n"
 51 | 	"-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
 52 | 	"-e epsilon : set tolerance of termination criterion\n"
 53 | 	"	-s 0 and 2\n" 
 54 | 	"		|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,\n" 
 55 | 	"		where f is the primal function and pos/neg are # of\n" 
 56 | 	"		positive/negative data (default 0.01)\n"
 57 | 	"	-s 11\n"
 58 | 	"		|f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)\n" 
 59 | 	"	-s 1, 3, 4 and 7\n"
 60 | 	"		Dual maximal violation <= eps; similar to libsvm (default 0.1)\n"
 61 | 	"	-s 5 and 6\n"
 62 | 	"		|f'(w)|_1 <= eps*min(pos,neg)/l*|f'(w0)|_1,\n"
 63 | 	"		where f is the primal function (default 0.01)\n"
 64 | 	"	-s 12 and 13\n"
 65 | 	"		|f'(alpha)|_1 <= eps |f'(alpha0)|,\n"
 66 | 	"		where f is the dual function (default 0.1)\n"
 67 | 	"-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)\n"
 68 | 	"-wi weight: weights adjust the parameter C of different classes (see README for details)\n"
 69 | 	"-v n: n-fold cross validation mode\n"
 70 | 	"-q : quiet mode (no outputs)\n"
 71 | 	);
 72 | }
 73 | 
 74 | double do_cross_validation()
 75 | {
 76 | 	int i;
 77 | 	int total_correct = 0;
 78 | 	double total_error = 0;
 79 | 	double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
 80 | 	double *target = Malloc(double, prob.l);
 81 | 	double retval = 0.0;
 82 | 
 83 | 	cross_validation(&prob,&param,nr_fold,target);
 84 | 	if(param.solver_type == L2R_L2LOSS_SVR || 
 85 | 	   param.solver_type == L2R_L1LOSS_SVR_DUAL || 
 86 | 	   param.solver_type == L2R_L2LOSS_SVR_DUAL)
 87 | 	{
 88 | 		for(i=0;i<prob.l;i++)
 89 |                 {
 90 |                         double y = prob.y[i];
 91 |                         double v = target[i];
 92 |                         total_error += (v-y)*(v-y);
 93 |                         sumv += v;
 94 |                         sumy += y;
 95 |                         sumvv += v*v;
 96 |                         sumyy += y*y;
 97 |                         sumvy += v*y;
 98 |                 }
 99 |                 printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
100 |                 printf("Cross Validation Squared correlation coefficient = %g\n",
101 |                         ((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
102 |                         ((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
103 |                         );
104 | 		retval = total_error/prob.l;
105 | 	}
106 | 	else
107 | 	{
108 | 		for(i=0;i<prob.l;i++)
109 | 			if(target[i] == prob.y[i])
110 | 				++total_correct;
111 | 		printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
112 | 		retval = 100.0*total_correct/prob.l;
113 | 	}
114 | 
115 | 	free(target);
116 | 	return retval;
117 | }
118 | 
119 | // nrhs should be 3
120 | int parse_command_line(lua_State *L)
121 | {
122 | 	int i, argc = 1;
123 | 	char *argv[CMD_LEN/2];
124 | 	char cmd[CMD_LEN];
125 | 	void (*print_func)(const char *) = print_string_default;	// default printing to matlab display
126 | 
127 | 	// default values
128 | 	param.solver_type = L2R_L2LOSS_SVC_DUAL;
129 | 	param.C = 1;
130 | 	param.eps = INF; // see setting below
131 | 	param.p = 0.1;
132 | 	param.nr_weight = 0;
133 | 	param.weight_label = NULL;
134 | 	param.weight = NULL;
135 | 	cross_validation_flag = 0;
136 | 	bias = -1;
137 | 
138 | 	int nrhs = lua_gettop(L);
139 | 
140 | 	if(nrhs < 1)
141 | 		return 1;
142 | 
143 | 	// put options in argv[]
144 | 	if(nrhs > 1)
145 | 	{
146 | 	        size_t slen;
147 | 		const char *tcmd = lua_tolstring(L,2,&slen);
148 | 		strncpy(cmd,tcmd,slen);
149 | 		if((argv[argc] = strtok((char*)cmd, " ")) != NULL)
150 | 			while((argv[++argc] = strtok(NULL, " ")) != NULL)
151 | 				;
152 | 
153 | 		lua_pop(L,1);
154 | 	}
155 | 
156 | 	// parse options
157 | 	for(i=1;i<argc;i++)
158 | 	{
159 | 		if(argv[i][0] != '-') break;
160 | 		++i;
161 | 		if(i>=argc && argv[i-1][1] != 'q') // since option -q has no parameter
162 | 			return 1;
163 | 		switch(argv[i-1][1])
164 | 		{
165 | 			case 's':
166 | 				param.solver_type = atoi(argv[i]);
167 | 				break;
168 | 			case 'c':
169 | 				param.C = atof(argv[i]);
170 | 				break;
171 | 			case 'p':
172 | 				param.p = atof(argv[i]);
173 | 				break;
174 | 			case 'e':
175 | 				param.eps = atof(argv[i]);
176 | 				break;
177 | 			case 'B':
178 | 				bias = atof(argv[i]);
179 | 				break;
180 | 			case 'v':
181 | 				cross_validation_flag = 1;
182 | 				nr_fold = atoi(argv[i]);
183 | 				if(nr_fold < 2)
184 | 				{
185 | 					printf("n-fold cross validation: n must >= 2\n");
186 | 					return 1;
187 | 				}
188 | 				break;
189 | 			case 'w':
190 | 				++param.nr_weight;
191 | 				param.weight_label = (int *) realloc(param.weight_label,sizeof(int)*param.nr_weight);
192 | 				param.weight = (double *) realloc(param.weight,sizeof(double)*param.nr_weight);
193 | 				param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]);
194 | 				param.weight[param.nr_weight-1] = atof(argv[i]);
195 | 				break;
196 | 			case 'q':
197 | 				print_func = &print_null;
198 | 				i--;
199 | 				break;
200 | 			default:
201 | 				printf("unknown option\n");
202 | 				return 1;
203 | 		}
204 | 	}
205 | 
206 | 	set_print_string_function(print_func);
207 | 
208 | 	if(param.eps == INF)
209 | 	{
210 | 		switch(param.solver_type)
211 | 		{
212 | 			case L2R_LR: 
213 | 			case L2R_L2LOSS_SVC:
214 | 				param.eps = 0.01;
215 | 				break;
216 | 			case L2R_L2LOSS_SVR:
217 | 				param.eps = 0.001;
218 | 				break;
219 | 			case L2R_L2LOSS_SVC_DUAL: 
220 | 			case L2R_L1LOSS_SVC_DUAL: 
221 | 			case MCSVM_CS: 
222 | 			case L2R_LR_DUAL: 
223 | 				param.eps = 0.1;
224 | 				break;
225 | 			case L1R_L2LOSS_SVC: 
226 | 			case L1R_LR:
227 | 				param.eps = 0.01;
228 | 				break;
229 | 			case L2R_L1LOSS_SVR_DUAL:
230 | 			case L2R_L2LOSS_SVR_DUAL:
231 | 				param.eps = 0.1;
232 | 				break;
233 | 		}
234 | 	}
235 | 	return 0;
236 | }
237 | 
238 | int read_problem_sparse(lua_State *L)
239 | {
240 | 
241 | 	luaL_argcheck(L,lua_istable(L,1),1,"Expecting table in read_problem_sparse");
242 | 	int label_vector_row_num = lua_objlen(L,1);
243 | 	int num_samples = 0;
244 | 	int max_index = 0;
245 | 	int elements;
246 | 
247 | 	prob.l = label_vector_row_num;
248 | 
249 | 	int i;
250 | 	for (i=0; i< label_vector_row_num; i++)
251 | 	{
252 | 		// get the table elem
253 | 		lua_pushnumber(L,i+1);
254 | 		lua_gettable(L,-2);
255 | 		if (!lua_istable(L,-1))
256 | 			luaL_error(L,"expected table at index %d while getting max_index\n",i+1);
257 | 		{
258 | 			// get values
259 | 			lua_pushnumber(L,2);lua_gettable(L,-2);
260 | 			{
261 | 				lua_pushnumber(L,1);lua_gettable(L,-2);
262 | 				THIntTensor *indices = luaT_toudata(L,-1,"torch.IntTensor");
263 | 				num_samples += (int)THIntTensor_nElement(indices);
264 | 				max_index = max_(max_index,THIntTensor_get1d(indices,indices->size[0]-1));
265 | 				// lua_pushnumber(L,2);lua_gettable(L,-2);
266 | 				// THFloatTensor *indices = luaT_checkudata(L,-1,"torch.FloatTensor");
267 | 				lua_pop(L,1);
268 | 			}
269 | 			lua_pop(L,1);
270 | 		}
271 | 		lua_pop(L,1);
272 | 	}
273 | 
274 | 	elements = num_samples + prob.l*2;
275 | 	prob.y = Malloc(double, prob.l);
276 | 	prob.x = Malloc(struct feature_node*, prob.l);
277 | 	x_space = Malloc(struct feature_node, elements);
278 | 	prob.bias=bias;
279 | 
280 | 	int j = 0;
281 | 	for (i=0; i<prob.l; i++)
282 | 	{
283 | 		prob.x[i] = &x_space[j];
284 | 		// get the table elem
285 | 		lua_pushnumber(L,i+1);
286 | 		lua_gettable(L,-2);
287 | 		if (!lua_istable(L,-1))
288 | 			luaL_error(L,"expected table at index %d while reading data\n",i+1);
289 | 		{
290 | 			// get label
291 | 			lua_pushnumber(L,1);lua_gettable(L,-2);
292 | 			prob.y[i] = (double)lua_tonumber(L,-1);
293 | 			lua_pop(L,1);
294 | 			// get values
295 | 			lua_pushnumber(L,2);lua_gettable(L,-2);
296 | 			{
297 | 				lua_pushnumber(L,1);lua_gettable(L,-2);
298 | 				THIntTensor *indices = luaT_checkudata(L,-1,"torch.IntTensor");
299 | 				lua_pop(L,1);
300 | 				lua_pushnumber(L,2);lua_gettable(L,-2);
301 | 				THFloatTensor *vals = luaT_checkudata(L,-1,"torch.FloatTensor");
302 | 				lua_pop(L,1);
303 | 
304 | 				int *indices_data = THIntTensor_data(indices);
305 | 				float *vals_data = THFloatTensor_data(vals);
306 | 				int k;
307 | 				for (k=0; k<(int)THIntTensor_nElement(indices); k++)
308 | 				{
309 | 					x_space[j].index = indices_data[k];
310 | 					x_space[j].value = vals_data[k];
311 | 					j++;
312 | 				}
313 | 				if (prob.bias >= 0)
314 | 				{
315 | 					x_space[j].index = max_index+1;
316 | 					x_space[j].value = prob.bias;
317 | 					j++;
318 | 				}
319 | 				x_space[j++].index = -1;
320 | 			}
321 | 			lua_pop(L,1);
322 | 		}
323 | 		lua_pop(L,1);
324 | 	}
325 | 	if (prob.bias >= 0)
326 | 		prob.n = max_index+1;
327 | 	else
328 | 		prob.n = max_index;
329 | 
330 | 	return 0;
331 | }
332 | 
333 | // Interface function of torch
334 | static int liblinear_train( lua_State *L )
335 | {
336 | 
337 | 	const char *error_msg;
338 | 	// fix random seed to have same results for each run
339 | 	// (for cross validation)
340 | 	srand(1);
341 | 
342 | 	int nrhs = lua_gettop(L);
343 | 
344 | 	// Transform the input Matrix to libsvm format
345 | 	if(nrhs >= 1 && nrhs < 3)
346 | 	{
347 | 		int err=0;
348 | 
349 | 		if(parse_command_line(L))
350 | 		{
351 | 			printf("parsing failed\n");
352 | 			exit_with_help();
353 | 			destroy_param(&param);
354 | 			return 0;
355 | 		}
356 | 
357 | 		err = read_problem_sparse(L);
358 | 
359 | 		// train's original code
360 | 		error_msg = check_parameter(&prob, &param);
361 | 
362 | 		if(err || error_msg)
363 | 		{
364 | 			if (error_msg != NULL)
365 | 				printf("Error: %s\n", error_msg);
366 | 			destroy_param(&param);
367 | 			free(prob.y);
368 | 			free(prob.x);
369 | 			free(x_space);
370 | 			return 0;
371 | 		}
372 | 
373 | 		if(cross_validation_flag)
374 | 		{
375 | 			lua_pushnumber(L,do_cross_validation());
376 | 		}
377 | 		else
378 | 		{
379 | 			model_ = train(&prob, &param);
380 | 			liblinear_model_to_torch_structure(L, model_);
381 | 			free_and_destroy_model(&model_);
382 | 		}
383 | 		destroy_param(&param);
384 | 		free(prob.y);
385 | 		free(prob.x);
386 | 		free(x_space);
387 | 		return 1;
388 | 	}
389 | 	else
390 | 	{
391 | 		exit_with_help();
392 | 		return 0;
393 | 	}
394 | 	return 0;
395 | }
396 | 
397 | static const struct luaL_Reg liblinear_util__ [] = {
398 |   {"train", liblinear_train},
399 |   {NULL, NULL}
400 | };
401 | 
402 | 
403 | int libliblinear_train_init(lua_State *L)
404 | {
405 |   luaL_register(L, "liblinear", liblinear_util__);
406 |   return 1;
407 | }
408 | 


--------------------------------------------------------------------------------
/liblinear/linear_model_torch.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <string.h>
  3 | #include "liblinear/linear.h"
  4 | 
  5 | #include "TH.h"
  6 | #include "luaT.h"
  7 | 
  8 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
  9 | 
 10 | int liblinear_model_to_torch_structure(lua_State *L, struct model *model_)
 11 | {
 12 | 	int i;
 13 | 	int nr_w;
 14 | 	int n, w_size;
 15 | 
 16 | 	// model table
 17 | 	lua_newtable(L);
 18 | 
 19 | 	// solver type (Parameters, but we only use solver_type)
 20 | 	lua_pushstring(L,"solver_type");
 21 | 	lua_pushinteger(L,model_->param.solver_type);
 22 | 	lua_settable(L,-3);
 23 | 
 24 | 	// nr_class
 25 | 	lua_pushstring(L,"nr_class");
 26 | 	lua_pushinteger(L,model_->nr_class);
 27 | 	lua_settable(L,-3);
 28 | 
 29 | 	if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
 30 | 		nr_w=1;
 31 | 	else
 32 | 		nr_w=model_->nr_class;
 33 | 
 34 | 	// nr_feature
 35 | 	lua_pushstring(L,"nr_feature");
 36 | 	lua_pushinteger(L,model_->nr_feature);
 37 | 	lua_settable(L,-3);
 38 | 
 39 | 	// bias
 40 | 	lua_pushstring(L,"bias");
 41 | 	lua_pushnumber(L,model_->bias);
 42 | 	lua_settable(L,-3);
 43 | 
 44 | 	if(model_->bias>=0)
 45 | 		n=model_->nr_feature+1;
 46 | 	else
 47 | 		n=model_->nr_feature;
 48 | 
 49 | 	w_size = n;
 50 | 	// Label
 51 | 	THIntTensor *label;
 52 | 	if(model_->label)
 53 | 	{
 54 | 		label = THIntTensor_newWithSize1d((long)(model_->nr_class));
 55 | 		int *label_data = THIntTensor_data(label);
 56 | 		for(i = 0; i < model_->nr_class; i++)
 57 | 			label_data[i] = model_->label[i];
 58 | 	}
 59 | 	else
 60 | 	{
 61 | 		label = THIntTensor_new();		
 62 | 	}
 63 | 	lua_pushstring(L,"label");
 64 | 	luaT_pushudata(L,label,"torch.IntTensor");
 65 | 	lua_settable(L,-3);
 66 | 
 67 | 	// w
 68 | 	THDoubleTensor *w = THDoubleTensor_newWithSize2d((long)nr_w,(long)w_size);
 69 | 	double * w_data = THDoubleTensor_data(w);
 70 | 	for(i = 0; i < w_size*nr_w; i++)
 71 | 		w_data[i]=model_->w[i];
 72 | 	lua_pushstring(L,"weight");
 73 | 	luaT_pushudata(L,w,"torch.DoubleTensor");
 74 | 	lua_settable(L,-3);
 75 | 
 76 | 	return 1;
 77 | }
 78 | 
 79 | int torch_structure_to_liblinear_model(struct model *model_, lua_State *L)
 80 | {
 81 | 	int i, num_of_fields;
 82 | 	int nr_w;
 83 | 	int n, w_size;
 84 | 
 85 | 	num_of_fields = lua_gettop(L);
 86 | 
 87 | 	// init
 88 | 	model_->nr_class=0;
 89 | 	nr_w=0;
 90 | 	model_->nr_feature=0;
 91 | 	model_->w=NULL;
 92 | 	model_->label=NULL;
 93 | 
 94 | 	// Parameters
 95 | 	lua_pushstring(L,"solver_type");
 96 | 	lua_gettable(L,-2);
 97 | 	if (!lua_isnumber(L,-1))
 98 | 		luaL_error(L,"solver_type expected to be integer %s",luaL_typename(L,-1));
 99 | 	model_->param.solver_type = lua_tointeger(L,-1);
100 | 	lua_pop(L,1);
101 | 
102 | 	// nr_class
103 | 	lua_pushstring(L,"nr_class");
104 | 	lua_gettable(L,-2);
105 | 	if (!lua_isnumber(L,-1))
106 | 		luaL_error(L,"nr_class expected to be integer");
107 | 	model_->nr_class = lua_tointeger(L,-1);
108 | 	lua_pop(L,1);
109 | 
110 | 	if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
111 | 		nr_w=1;
112 | 	else
113 | 		nr_w=model_->nr_class;
114 | 
115 | 	// nr_feature
116 | 	lua_pushstring(L,"nr_feature");
117 | 	lua_gettable(L,-2);
118 | 	if (!lua_isnumber(L,-1))
119 | 		luaL_error(L,"nr_feature expected to be integer");
120 | 	model_->nr_feature = lua_tointeger(L,-1);
121 | 	lua_pop(L,1);
122 | 
123 | 	// bias
124 | 	lua_pushstring(L,"bias");
125 | 	lua_gettable(L,-2);
126 | 	if (!lua_isnumber(L,-1))
127 | 		luaL_error(L,"bias expected to be a number");
128 | 	model_->bias = lua_tonumber(L,-1);
129 | 	lua_pop(L,1);
130 | 
131 | 	if(model_->bias>=0)
132 | 		n=model_->nr_feature+1;
133 | 	else
134 | 		n=model_->nr_feature;
135 | 	w_size = n;
136 | 
137 | 	// Label
138 | 	lua_pushstring(L,"label");
139 | 	lua_gettable(L,-2);
140 | 	THIntTensor *label = luaT_checkudata(L,-1,"torch.IntTensor");
141 | 	lua_pop(L,1);
142 | 	int nlabel = (int)THIntTensor_nElement(label);
143 | 	if( nlabel > 0)
144 | 	{
145 | 		if (nlabel != model_->nr_class)
146 | 			luaL_error(L,"Number of elements in label vector is different than nr_class");
147 | 
148 | 		int *label_data = THIntTensor_data(label);
149 | 		model_->label = Malloc(int, model_->nr_class);
150 | 		for(i=0;i<model_->nr_class;i++)
151 | 			model_->label[i] = label_data[i];
152 | 	}
153 | 
154 | 	//w
155 | 	lua_pushstring(L,"weight");
156 | 	lua_gettable(L,-2);
157 | 	THDoubleTensor *w = luaT_checkudata(L,-1,"torch.DoubleTensor");
158 | 	lua_pop(L,1);
159 | 	double *w_data = THDoubleTensor_data(w);
160 | 	model_->w=Malloc(double, w_size*nr_w);
161 | 	for(i = 0; i < w_size*nr_w; i++)
162 | 		model_->w[i] = w_data[i];
163 | 
164 | 	return 1;
165 | }
166 | 
167 | 
168 | 


--------------------------------------------------------------------------------
/liblinear/linear_model_torch.h:
--------------------------------------------------------------------------------
1 | #include "TH.h"
2 | #include "luaT.h"
3 | 
4 | int liblinear_model_to_torch_structure( lua_State *L, struct model *model_);
5 | int torch_structure_to_liblinear_model(struct model *model_,  lua_State *L);
6 | 


--------------------------------------------------------------------------------
/libsvm/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | SET(src
 3 | 	svm_model_torch.h 
 4 | 	svm_model_torch.c 
 5 | 	init.c
 6 | 	libsvm_train.c 
 7 | 	libsvm_predict.c 
 8 | 	libsvm/svm.h
 9 | 	libsvm/svm.cpp
10 | )
11 | 
12 | SET(luasrc init.lua 
13 |   )
14 | 
15 | ADD_TORCH_PACKAGE(libsvm "${src}" "${luasrc}" "LIBSVM Interface")
16 | 
17 | TARGET_LINK_LIBRARIES(libsvm luaT TH )
18 | 


--------------------------------------------------------------------------------
/libsvm/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | 
 2 | The following is the copyright for the LIBSVM project. We include their 
 3 | sources in this package.
 4 | 
 5 | 
 6 | Copyright (c) 2000-2012 Chih-Chung Chang and Chih-Jen Lin
 7 | All rights reserved.
 8 | 
 9 | Redistribution and use in source and binary forms, with or without
10 | modification, are permitted provided that the following conditions
11 | are met:
12 | 
13 | 1. Redistributions of source code must retain the above copyright
14 | notice, this list of conditions and the following disclaimer.
15 | 
16 | 2. Redistributions in binary form must reproduce the above copyright
17 | notice, this list of conditions and the following disclaimer in the
18 | documentation and/or other materials provided with the distribution.
19 | 
20 | 3. Neither name of copyright holders nor the names of its contributors
21 | may be used to endorse or promote products derived from this software
22 | without specific prior written permission.
23 | 
24 | 
25 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28 | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
29 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 | 


--------------------------------------------------------------------------------
/libsvm/init.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "luaT.h"
 3 | 
 4 | extern int liblibsvm_predict_init(lua_State *L);
 5 | extern int liblibsvm_train_init(lua_State *L);
 6 | 
 7 | DLL_EXPORT int luaopen_liblibsvm(lua_State *L)
 8 | {
 9 |   	liblibsvm_predict_init(L);
10 |   	liblibsvm_train_init(L);
11 |   	return 1;
12 | }
13 | 


--------------------------------------------------------------------------------
/libsvm/init.lua:
--------------------------------------------------------------------------------
1 | require 'liblibsvm'


--------------------------------------------------------------------------------
/libsvm/libsvm/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2000-2012 Chih-Chung Chang and Chih-Jen Lin
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither name of copyright holders nor the names of its contributors
17 | may be used to endorse or promote products derived from this software
18 | without specific prior written permission.
19 | 
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/libsvm/libsvm/Makefile:
--------------------------------------------------------------------------------
 1 | CXX ?= g++
 2 | CFLAGS = -Wall -Wconversion -O3 -fPIC
 3 | SHVER = 2
 4 | OS = $(shell uname)
 5 | 
 6 | all: svm-train svm-predict svm-scale
 7 | 
 8 | lib: svm.o
 9 | 	if [ "$(OS)" = "Darwin" ]; then \
10 | 		SHARED_LIB_FLAG="-dynamiclib -W1,-install_name,libsvm.so.$(SHVER)"; \
11 | 	else \
12 | 		SHARED_LIB_FLAG="-shared -W1,-soname,libsvm.so.$(SHVER)"; \
13 | 	fi; \
14 | 	$(CXX) $${SHARED_LIB_FLAG} svm.o -o libsvm.so.$(SHVER)
15 | 
16 | svm-predict: svm-predict.c svm.o
17 | 	$(CXX) $(CFLAGS) svm-predict.c svm.o -o svm-predict -lm
18 | svm-train: svm-train.c svm.o
19 | 	$(CXX) $(CFLAGS) svm-train.c svm.o -o svm-train -lm
20 | svm-scale: svm-scale.c
21 | 	$(CXX) $(CFLAGS) svm-scale.c -o svm-scale
22 | svm.o: svm.cpp svm.h
23 | 	$(CXX) $(CFLAGS) -c svm.cpp
24 | clean:
25 | 	rm -f *~ svm.o svm-train svm-predict svm-scale libsvm.so.$(SHVER)
26 | 


--------------------------------------------------------------------------------
/libsvm/libsvm/Makefile.win:
--------------------------------------------------------------------------------
 1 | #You must ensure nmake.exe, cl.exe, link.exe are in system path.
 2 | #VCVARS32.bat
 3 | #Under dosbox prompt
 4 | #nmake -f Makefile.win
 5 | 
 6 | ##########################################
 7 | CXX = cl.exe
 8 | CFLAGS = -nologo -O2 -EHsc -I. -D __WIN32__ -D _CRT_SECURE_NO_DEPRECATE
 9 | TARGET = windows
10 | 
11 | all: $(TARGET)\svm-train.exe $(TARGET)\svm-predict.exe $(TARGET)\svm-scale.exe $(TARGET)\svm-toy.exe lib
12 | 
13 | $(TARGET)\svm-predict.exe: svm.h svm-predict.c svm.obj
14 | 	$(CXX) $(CFLAGS) svm-predict.c svm.obj -Fe$(TARGET)\svm-predict.exe
15 | 
16 | $(TARGET)\svm-train.exe: svm.h svm-train.c svm.obj
17 | 	$(CXX) $(CFLAGS) svm-train.c svm.obj -Fe$(TARGET)\svm-train.exe
18 | 
19 | $(TARGET)\svm-scale.exe: svm.h svm-scale.c
20 | 	$(CXX) $(CFLAGS) svm-scale.c -Fe$(TARGET)\svm-scale.exe
21 | 
22 | $(TARGET)\svm-toy.exe: svm.h svm.obj svm-toy\windows\svm-toy.cpp
23 | 	$(CXX) $(CFLAGS) svm-toy\windows\svm-toy.cpp svm.obj user32.lib gdi32.lib comdlg32.lib  -Fe$(TARGET)\svm-toy.exe
24 | 
25 | svm.obj: svm.cpp svm.h
26 | 	$(CXX) $(CFLAGS) -c svm.cpp
27 | 
28 | lib: svm.cpp svm.h svm.def
29 | 	$(CXX) $(CFLAGS) -LD svm.cpp -Fe$(TARGET)\libsvm -link -DEF:svm.def 
30 | 
31 | clean:
32 | 	-erase /Q *.obj $(TARGET)\.
33 | 
34 | 


--------------------------------------------------------------------------------
/libsvm/libsvm/svm-predict.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <ctype.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <errno.h>
  6 | #include "svm.h"
  7 | 
  8 | struct svm_node *x;
  9 | int max_nr_attr = 64;
 10 | 
 11 | struct svm_model* model;
 12 | int predict_probability=0;
 13 | 
 14 | static char *line = NULL;
 15 | static int max_line_len;
 16 | 
 17 | static char* readline(FILE *input)
 18 | {
 19 | 	int len;
 20 | 	
 21 | 	if(fgets(line,max_line_len,input) == NULL)
 22 | 		return NULL;
 23 | 
 24 | 	while(strrchr(line,'\n') == NULL)
 25 | 	{
 26 | 		max_line_len *= 2;
 27 | 		line = (char *) realloc(line,max_line_len);
 28 | 		len = (int) strlen(line);
 29 | 		if(fgets(line+len,max_line_len-len,input) == NULL)
 30 | 			break;
 31 | 	}
 32 | 	return line;
 33 | }
 34 | 
 35 | void exit_input_error(int line_num)
 36 | {
 37 | 	fprintf(stderr,"Wrong input format at line %d\n", line_num);
 38 | 	exit(1);
 39 | }
 40 | 
 41 | void predict(FILE *input, FILE *output)
 42 | {
 43 | 	int correct = 0;
 44 | 	int total = 0;
 45 | 	double error = 0;
 46 | 	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
 47 | 
 48 | 	int svm_type=svm_get_svm_type(model);
 49 | 	int nr_class=svm_get_nr_class(model);
 50 | 	double *prob_estimates=NULL;
 51 | 	int j;
 52 | 
 53 | 	if(predict_probability)
 54 | 	{
 55 | 		if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
 56 | 			printf("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model));
 57 | 		else
 58 | 		{
 59 | 			int *labels=(int *) malloc(nr_class*sizeof(int));
 60 | 			svm_get_labels(model,labels);
 61 | 			prob_estimates = (double *) malloc(nr_class*sizeof(double));
 62 | 			fprintf(output,"labels");		
 63 | 			for(j=0;j<nr_class;j++)
 64 | 				fprintf(output," %d",labels[j]);
 65 | 			fprintf(output,"\n");
 66 | 			free(labels);
 67 | 		}
 68 | 	}
 69 | 
 70 | 	max_line_len = 1024;
 71 | 	line = (char *)malloc(max_line_len*sizeof(char));
 72 | 	while(readline(input) != NULL)
 73 | 	{
 74 | 		int i = 0;
 75 | 		double target_label, predict_label;
 76 | 		char *idx, *val, *label, *endptr;
 77 | 		int inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
 78 | 
 79 | 		label = strtok(line," \t\n");
 80 | 		if(label == NULL) // empty line
 81 | 			exit_input_error(total+1);
 82 | 
 83 | 		target_label = strtod(label,&endptr);
 84 | 		if(endptr == label || *endptr != '\0')
 85 | 			exit_input_error(total+1);
 86 | 
 87 | 		while(1)
 88 | 		{
 89 | 			if(i>=max_nr_attr-1)	// need one more for index = -1
 90 | 			{
 91 | 				max_nr_attr *= 2;
 92 | 				x = (struct svm_node *) realloc(x,max_nr_attr*sizeof(struct svm_node));
 93 | 			}
 94 | 
 95 | 			idx = strtok(NULL,":");
 96 | 			val = strtok(NULL," \t");
 97 | 
 98 | 			if(val == NULL)
 99 | 				break;
100 | 			errno = 0;
101 | 			x[i].index = (int) strtol(idx,&endptr,10);
102 | 			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
103 | 				exit_input_error(total+1);
104 | 			else
105 | 				inst_max_index = x[i].index;
106 | 
107 | 			errno = 0;
108 | 			x[i].value = strtod(val,&endptr);
109 | 			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
110 | 				exit_input_error(total+1);
111 | 
112 | 			++i;
113 | 		}
114 | 		x[i].index = -1;
115 | 
116 | 		if (predict_probability && (svm_type==C_SVC || svm_type==NU_SVC))
117 | 		{
118 | 			predict_label = svm_predict_probability(model,x,prob_estimates);
119 | 			fprintf(output,"%g",predict_label);
120 | 			for(j=0;j<nr_class;j++)
121 | 				fprintf(output," %g",prob_estimates[j]);
122 | 			fprintf(output,"\n");
123 | 		}
124 | 		else
125 | 		{
126 | 			predict_label = svm_predict(model,x);
127 | 			fprintf(output,"%g\n",predict_label);
128 | 		}
129 | 
130 | 		if(predict_label == target_label)
131 | 			++correct;
132 | 		error += (predict_label-target_label)*(predict_label-target_label);
133 | 		sump += predict_label;
134 | 		sumt += target_label;
135 | 		sumpp += predict_label*predict_label;
136 | 		sumtt += target_label*target_label;
137 | 		sumpt += predict_label*target_label;
138 | 		++total;
139 | 	}
140 | 	if (svm_type==NU_SVR || svm_type==EPSILON_SVR)
141 | 	{
142 | 		printf("Mean squared error = %g (regression)\n",error/total);
143 | 		printf("Squared correlation coefficient = %g (regression)\n",
144 | 		       ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
145 | 		       ((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
146 | 		       );
147 | 	}
148 | 	else
149 | 		printf("Accuracy = %g%% (%d/%d) (classification)\n",
150 | 		       (double)correct/total*100,correct,total);
151 | 	if(predict_probability)
152 | 		free(prob_estimates);
153 | }
154 | 
155 | void exit_with_help()
156 | {
157 | 	printf(
158 | 	"Usage: svm-predict [options] test_file model_file output_file\n"
159 | 	"options:\n"
160 | 	"-b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); for one-class SVM only 0 is supported\n"
161 | 	);
162 | 	exit(1);
163 | }
164 | 
165 | int main(int argc, char **argv)
166 | {
167 | 	FILE *input, *output;
168 | 	int i;
169 | 
170 | 	// parse options
171 | 	for(i=1;i<argc;i++)
172 | 	{
173 | 		if(argv[i][0] != '-') break;
174 | 		++i;
175 | 		switch(argv[i-1][1])
176 | 		{
177 | 			case 'b':
178 | 				predict_probability = atoi(argv[i]);
179 | 				break;
180 | 			default:
181 | 				fprintf(stderr,"Unknown option: -%c\n", argv[i-1][1]);
182 | 				exit_with_help();
183 | 		}
184 | 	}
185 | 	if(i>=argc-2)
186 | 		exit_with_help();
187 | 	
188 | 	input = fopen(argv[i],"r");
189 | 	if(input == NULL)
190 | 	{
191 | 		fprintf(stderr,"can't open input file %s\n",argv[i]);
192 | 		exit(1);
193 | 	}
194 | 
195 | 	output = fopen(argv[i+2],"w");
196 | 	if(output == NULL)
197 | 	{
198 | 		fprintf(stderr,"can't open output file %s\n",argv[i+2]);
199 | 		exit(1);
200 | 	}
201 | 
202 | 	if((model=svm_load_model(argv[i+1]))==0)
203 | 	{
204 | 		fprintf(stderr,"can't open model file %s\n",argv[i+1]);
205 | 		exit(1);
206 | 	}
207 | 
208 | 	x = (struct svm_node *) malloc(max_nr_attr*sizeof(struct svm_node));
209 | 	if(predict_probability)
210 | 	{
211 | 		if(svm_check_probability_model(model)==0)
212 | 		{
213 | 			fprintf(stderr,"Model does not support probabiliy estimates\n");
214 | 			exit(1);
215 | 		}
216 | 	}
217 | 	else
218 | 	{
219 | 		if(svm_check_probability_model(model)!=0)
220 | 			printf("Model supports probability estimates, but disabled in prediction.\n");
221 | 	}
222 | 	predict(input,output);
223 | 	svm_free_and_destroy_model(&model);
224 | 	free(x);
225 | 	free(line);
226 | 	fclose(input);
227 | 	fclose(output);
228 | 	return 0;
229 | }
230 | 


--------------------------------------------------------------------------------
/libsvm/libsvm/svm-scale.c:
--------------------------------------------------------------------------------
  1 | #include <float.h>
  2 | #include <stdio.h>
  3 | #include <stdlib.h>
  4 | #include <ctype.h>
  5 | #include <string.h>
  6 | 
  7 | void exit_with_help()
  8 | {
  9 | 	printf(
 10 | 	"Usage: svm-scale [options] data_filename\n"
 11 | 	"options:\n"
 12 | 	"-l lower : x scaling lower limit (default -1)\n"
 13 | 	"-u upper : x scaling upper limit (default +1)\n"
 14 | 	"-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
 15 | 	"-s save_filename : save scaling parameters to save_filename\n"
 16 | 	"-r restore_filename : restore scaling parameters from restore_filename\n"
 17 | 	);
 18 | 	exit(1);
 19 | }
 20 | 
 21 | char *line = NULL;
 22 | int max_line_len = 1024;
 23 | double lower=-1.0,upper=1.0,y_lower,y_upper;
 24 | int y_scaling = 0;
 25 | double *feature_max;
 26 | double *feature_min;
 27 | double y_max = -DBL_MAX;
 28 | double y_min = DBL_MAX;
 29 | int max_index;
 30 | long int num_nonzeros = 0;
 31 | long int new_num_nonzeros = 0;
 32 | 
 33 | #define max(x,y) (((x)>(y))?(x):(y))
 34 | #define min(x,y) (((x)<(y))?(x):(y))
 35 | 
 36 | void output_target(double value);
 37 | void output(int index, double value);
 38 | char* readline(FILE *input);
 39 | 
 40 | int main(int argc,char **argv)
 41 | {
 42 | 	int i,index;
 43 | 	FILE *fp, *fp_restore = NULL;
 44 | 	char *save_filename = NULL;
 45 | 	char *restore_filename = NULL;
 46 | 
 47 | 	for(i=1;i<argc;i++)
 48 | 	{
 49 | 		if(argv[i][0] != '-') break;
 50 | 		++i;
 51 | 		switch(argv[i-1][1])
 52 | 		{
 53 | 			case 'l': lower = atof(argv[i]); break;
 54 | 			case 'u': upper = atof(argv[i]); break;
 55 | 			case 'y':
 56 | 				y_lower = atof(argv[i]);
 57 | 				++i;
 58 | 				y_upper = atof(argv[i]);
 59 | 				y_scaling = 1;
 60 | 				break;
 61 | 			case 's': save_filename = argv[i]; break;
 62 | 			case 'r': restore_filename = argv[i]; break;
 63 | 			default:
 64 | 				fprintf(stderr,"unknown option\n");
 65 | 				exit_with_help();
 66 | 		}
 67 | 	}
 68 | 
 69 | 	if(!(upper > lower) || (y_scaling && !(y_upper > y_lower)))
 70 | 	{
 71 | 		fprintf(stderr,"inconsistent lower/upper specification\n");
 72 | 		exit(1);
 73 | 	}
 74 | 	
 75 | 	if(restore_filename && save_filename)
 76 | 	{
 77 | 		fprintf(stderr,"cannot use -r and -s simultaneously\n");
 78 | 		exit(1);
 79 | 	}
 80 | 
 81 | 	if(argc != i+1) 
 82 | 		exit_with_help();
 83 | 
 84 | 	fp=fopen(argv[i],"r");
 85 | 	
 86 | 	if(fp==NULL)
 87 | 	{
 88 | 		fprintf(stderr,"can't open file %s\n", argv[i]);
 89 | 		exit(1);
 90 | 	}
 91 | 
 92 | 	line = (char *) malloc(max_line_len*sizeof(char));
 93 | 
 94 | #define SKIP_TARGET\
 95 | 	while(isspace(*p)) ++p;\
 96 | 	while(!isspace(*p)) ++p;
 97 | 
 98 | #define SKIP_ELEMENT\
 99 | 	while(*p!=':') ++p;\
100 | 	++p;\
101 | 	while(isspace(*p)) ++p;\
102 | 	while(*p && !isspace(*p)) ++p;
103 | 	
104 | 	/* assumption: min index of attributes is 1 */
105 | 	/* pass 1: find out max index of attributes */
106 | 	max_index = 0;
107 | 
108 | 	if(restore_filename)
109 | 	{
110 | 		int idx, c;
111 | 
112 | 		fp_restore = fopen(restore_filename,"r");
113 | 		if(fp_restore==NULL)
114 | 		{
115 | 			fprintf(stderr,"can't open file %s\n", restore_filename);
116 | 			exit(1);
117 | 		}
118 | 
119 | 		c = fgetc(fp_restore);
120 | 		if(c == 'y')
121 | 		{
122 | 			readline(fp_restore);
123 | 			readline(fp_restore);
124 | 			readline(fp_restore);
125 | 		}
126 | 		readline(fp_restore);
127 | 		readline(fp_restore);
128 | 
129 | 		while(fscanf(fp_restore,"%d %*f %*f\n",&idx) == 1)
130 | 			max_index = max(idx,max_index);
131 | 		rewind(fp_restore);
132 | 	}
133 | 
134 | 	while(readline(fp)!=NULL)
135 | 	{
136 | 		char *p=line;
137 | 
138 | 		SKIP_TARGET
139 | 
140 | 		while(sscanf(p,"%d:%*f",&index)==1)
141 | 		{
142 | 			max_index = max(max_index, index);
143 | 			SKIP_ELEMENT
144 | 			num_nonzeros++;
145 | 		}		
146 | 	}
147 | 	rewind(fp);
148 | 	
149 | 	feature_max = (double *)malloc((max_index+1)* sizeof(double));
150 | 	feature_min = (double *)malloc((max_index+1)* sizeof(double));
151 | 	
152 | 	if(feature_max == NULL || feature_min == NULL)
153 | 	{
154 | 		fprintf(stderr,"can't allocate enough memory\n");
155 | 		exit(1);
156 | 	}
157 | 
158 | 	for(i=0;i<=max_index;i++)
159 | 	{
160 | 		feature_max[i]=-DBL_MAX;
161 | 		feature_min[i]=DBL_MAX;
162 | 	}
163 | 
164 | 	/* pass 2: find out min/max value */
165 | 	while(readline(fp)!=NULL)
166 | 	{
167 | 		char *p=line;
168 | 		int next_index=1;
169 | 		double target;
170 | 		double value;
171 | 
172 | 		sscanf(p,"%lf",&target);
173 | 		y_max = max(y_max,target);
174 | 		y_min = min(y_min,target);
175 | 		
176 | 		SKIP_TARGET
177 | 
178 | 		while(sscanf(p,"%d:%lf",&index,&value)==2)
179 | 		{
180 | 			for(i=next_index;i<index;i++)
181 | 			{
182 | 				feature_max[i]=max(feature_max[i],0);
183 | 				feature_min[i]=min(feature_min[i],0);
184 | 			}
185 | 			
186 | 			feature_max[index]=max(feature_max[index],value);
187 | 			feature_min[index]=min(feature_min[index],value);
188 | 
189 | 			SKIP_ELEMENT
190 | 			next_index=index+1;
191 | 		}		
192 | 
193 | 		for(i=next_index;i<=max_index;i++)
194 | 		{
195 | 			feature_max[i]=max(feature_max[i],0);
196 | 			feature_min[i]=min(feature_min[i],0);
197 | 		}	
198 | 	}
199 | 
200 | 	rewind(fp);
201 | 
202 | 	/* pass 2.5: save/restore feature_min/feature_max */
203 | 	
204 | 	if(restore_filename)
205 | 	{
206 | 		/* fp_restore rewinded in finding max_index */
207 | 		int idx, c;
208 | 		double fmin, fmax;
209 | 		
210 | 		if((c = fgetc(fp_restore)) == 'y')
211 | 		{
212 | 			fscanf(fp_restore, "%lf %lf\n", &y_lower, &y_upper);
213 | 			fscanf(fp_restore, "%lf %lf\n", &y_min, &y_max);
214 | 			y_scaling = 1;
215 | 		}
216 | 		else
217 | 			ungetc(c, fp_restore);
218 | 
219 | 		if (fgetc(fp_restore) == 'x') {
220 | 			fscanf(fp_restore, "%lf %lf\n", &lower, &upper);
221 | 			while(fscanf(fp_restore,"%d %lf %lf\n",&idx,&fmin,&fmax)==3)
222 | 			{
223 | 				if(idx<=max_index)
224 | 				{
225 | 					feature_min[idx] = fmin;
226 | 					feature_max[idx] = fmax;
227 | 				}
228 | 			}
229 | 		}
230 | 		fclose(fp_restore);
231 | 	}
232 | 	
233 | 	if(save_filename)
234 | 	{
235 | 		FILE *fp_save = fopen(save_filename,"w");
236 | 		if(fp_save==NULL)
237 | 		{
238 | 			fprintf(stderr,"can't open file %s\n", save_filename);
239 | 			exit(1);
240 | 		}
241 | 		if(y_scaling)
242 | 		{
243 | 			fprintf(fp_save, "y\n");
244 | 			fprintf(fp_save, "%.16g %.16g\n", y_lower, y_upper);
245 | 			fprintf(fp_save, "%.16g %.16g\n", y_min, y_max);
246 | 		}
247 | 		fprintf(fp_save, "x\n");
248 | 		fprintf(fp_save, "%.16g %.16g\n", lower, upper);
249 | 		for(i=1;i<=max_index;i++)
250 | 		{
251 | 			if(feature_min[i]!=feature_max[i])
252 | 				fprintf(fp_save,"%d %.16g %.16g\n",i,feature_min[i],feature_max[i]);
253 | 		}
254 | 		fclose(fp_save);
255 | 	}
256 | 	
257 | 	/* pass 3: scale */
258 | 	while(readline(fp)!=NULL)
259 | 	{
260 | 		char *p=line;
261 | 		int next_index=1;
262 | 		double target;
263 | 		double value;
264 | 		
265 | 		sscanf(p,"%lf",&target);
266 | 		output_target(target);
267 | 
268 | 		SKIP_TARGET
269 | 
270 | 		while(sscanf(p,"%d:%lf",&index,&value)==2)
271 | 		{
272 | 			for(i=next_index;i<index;i++)
273 | 				output(i,0);
274 | 			
275 | 			output(index,value);
276 | 
277 | 			SKIP_ELEMENT
278 | 			next_index=index+1;
279 | 		}		
280 | 
281 | 		for(i=next_index;i<=max_index;i++)
282 | 			output(i,0);
283 | 
284 | 		printf("\n");
285 | 	}
286 | 
287 | 	if (new_num_nonzeros > num_nonzeros)
288 | 		fprintf(stderr, 
289 | 			"WARNING: original #nonzeros %ld\n"
290 | 			"         new      #nonzeros %ld\n"
291 | 			"Use -l 0 if many original feature values are zeros\n",
292 | 			num_nonzeros, new_num_nonzeros);
293 | 
294 | 	free(line);
295 | 	free(feature_max);
296 | 	free(feature_min);
297 | 	fclose(fp);
298 | 	return 0;
299 | }
300 | 
301 | char* readline(FILE *input)
302 | {
303 | 	int len;
304 | 	
305 | 	if(fgets(line,max_line_len,input) == NULL)
306 | 		return NULL;
307 | 
308 | 	while(strrchr(line,'\n') == NULL)
309 | 	{
310 | 		max_line_len *= 2;
311 | 		line = (char *) realloc(line, max_line_len);
312 | 		len = (int) strlen(line);
313 | 		if(fgets(line+len,max_line_len-len,input) == NULL)
314 | 			break;
315 | 	}
316 | 	return line;
317 | }
318 | 
319 | void output_target(double value)
320 | {
321 | 	if(y_scaling)
322 | 	{
323 | 		if(value == y_min)
324 | 			value = y_lower;
325 | 		else if(value == y_max)
326 | 			value = y_upper;
327 | 		else value = y_lower + (y_upper-y_lower) *
328 | 			     (value - y_min)/(y_max-y_min);
329 | 	}
330 | 	printf("%g ",value);
331 | }
332 | 
333 | void output(int index, double value)
334 | {
335 | 	/* skip single-valued attribute */
336 | 	if(feature_max[index] == feature_min[index])
337 | 		return;
338 | 
339 | 	if(value == feature_min[index])
340 | 		value = lower;
341 | 	else if(value == feature_max[index])
342 | 		value = upper;
343 | 	else
344 | 		value = lower + (upper-lower) * 
345 | 			(value-feature_min[index])/
346 | 			(feature_max[index]-feature_min[index]);
347 | 
348 | 	if(value != 0)
349 | 	{
350 | 		printf("%d:%g ",index, value);
351 | 		new_num_nonzeros++;
352 | 	}
353 | }
354 | 


--------------------------------------------------------------------------------
/libsvm/libsvm/svm-train.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <ctype.h>
  5 | #include <errno.h>
  6 | #include "svm.h"
  7 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
  8 | 
  9 | void print_null(const char *s) {}
 10 | 
 11 | void exit_with_help()
 12 | {
 13 | 	printf(
 14 | 	"Usage: svm-train [options] training_set_file [model_file]\n"
 15 | 	"options:\n"
 16 | 	"-s svm_type : set type of SVM (default 0)\n"
 17 | 	"	0 -- C-SVC\n"
 18 | 	"	1 -- nu-SVC\n"
 19 | 	"	2 -- one-class SVM\n"
 20 | 	"	3 -- epsilon-SVR\n"
 21 | 	"	4 -- nu-SVR\n"
 22 | 	"-t kernel_type : set type of kernel function (default 2)\n"
 23 | 	"	0 -- linear: u'*v\n"
 24 | 	"	1 -- polynomial: (gamma*u'*v + coef0)^degree\n"
 25 | 	"	2 -- radial basis function: exp(-gamma*|u-v|^2)\n"
 26 | 	"	3 -- sigmoid: tanh(gamma*u'*v + coef0)\n"
 27 | 	"	4 -- precomputed kernel (kernel values in training_set_file)\n"
 28 | 	"-d degree : set degree in kernel function (default 3)\n"
 29 | 	"-g gamma : set gamma in kernel function (default 1/num_features)\n"
 30 | 	"-r coef0 : set coef0 in kernel function (default 0)\n"
 31 | 	"-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)\n"
 32 | 	"-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)\n"
 33 | 	"-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
 34 | 	"-m cachesize : set cache memory size in MB (default 100)\n"
 35 | 	"-e epsilon : set tolerance of termination criterion (default 0.001)\n"
 36 | 	"-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n"
 37 | 	"-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n"
 38 | 	"-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n"
 39 | 	"-v n: n-fold cross validation mode\n"
 40 | 	"-q : quiet mode (no outputs)\n"
 41 | 	);
 42 | 	exit(1);
 43 | }
 44 | 
 45 | void exit_input_error(int line_num)
 46 | {
 47 | 	fprintf(stderr,"Wrong input format at line %d\n", line_num);
 48 | 	exit(1);
 49 | }
 50 | 
 51 | void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name);
 52 | void read_problem(const char *filename);
 53 | void do_cross_validation();
 54 | 
 55 | struct svm_parameter param;		// set by parse_command_line
 56 | struct svm_problem prob;		// set by read_problem
 57 | struct svm_model *model;
 58 | struct svm_node *x_space;
 59 | int cross_validation;
 60 | int nr_fold;
 61 | 
 62 | static char *line = NULL;
 63 | static int max_line_len;
 64 | 
 65 | static char* readline(FILE *input)
 66 | {
 67 | 	int len;
 68 | 	
 69 | 	if(fgets(line,max_line_len,input) == NULL)
 70 | 		return NULL;
 71 | 
 72 | 	while(strrchr(line,'\n') == NULL)
 73 | 	{
 74 | 		max_line_len *= 2;
 75 | 		line = (char *) realloc(line,max_line_len);
 76 | 		len = (int) strlen(line);
 77 | 		if(fgets(line+len,max_line_len-len,input) == NULL)
 78 | 			break;
 79 | 	}
 80 | 	return line;
 81 | }
 82 | 
 83 | int main(int argc, char **argv)
 84 | {
 85 | 	char input_file_name[1024];
 86 | 	char model_file_name[1024];
 87 | 	const char *error_msg;
 88 | 
 89 | 	parse_command_line(argc, argv, input_file_name, model_file_name);
 90 | 	read_problem(input_file_name);
 91 | 	error_msg = svm_check_parameter(&prob,&param);
 92 | 
 93 | 	if(error_msg)
 94 | 	{
 95 | 		fprintf(stderr,"ERROR: %s\n",error_msg);
 96 | 		exit(1);
 97 | 	}
 98 | 
 99 | 	if(cross_validation)
100 | 	{
101 | 		do_cross_validation();
102 | 	}
103 | 	else
104 | 	{
105 | 		model = svm_train(&prob,&param);
106 | 		if(svm_save_model(model_file_name,model))
107 | 		{
108 | 			fprintf(stderr, "can't save model to file %s\n", model_file_name);
109 | 			exit(1);
110 | 		}
111 | 		svm_free_and_destroy_model(&model);
112 | 	}
113 | 	svm_destroy_param(&param);
114 | 	free(prob.y);
115 | 	free(prob.x);
116 | 	free(x_space);
117 | 	free(line);
118 | 
119 | 	return 0;
120 | }
121 | 
122 | void do_cross_validation()
123 | {
124 | 	int i;
125 | 	int total_correct = 0;
126 | 	double total_error = 0;
127 | 	double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
128 | 	double *target = Malloc(double,prob.l);
129 | 
130 | 	svm_cross_validation(&prob,&param,nr_fold,target);
131 | 	if(param.svm_type == EPSILON_SVR ||
132 | 	   param.svm_type == NU_SVR)
133 | 	{
134 | 		for(i=0;i<prob.l;i++)
135 | 		{
136 | 			double y = prob.y[i];
137 | 			double v = target[i];
138 | 			total_error += (v-y)*(v-y);
139 | 			sumv += v;
140 | 			sumy += y;
141 | 			sumvv += v*v;
142 | 			sumyy += y*y;
143 | 			sumvy += v*y;
144 | 		}
145 | 		printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
146 | 		printf("Cross Validation Squared correlation coefficient = %g\n",
147 | 			((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
148 | 			((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
149 | 			);
150 | 	}
151 | 	else
152 | 	{
153 | 		for(i=0;i<prob.l;i++)
154 | 			if(target[i] == prob.y[i])
155 | 				++total_correct;
156 | 		printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
157 | 	}
158 | 	free(target);
159 | }
160 | 
161 | void parse_command_line(int argc, char **argv, char *input_file_name, char *model_file_name)
162 | {
163 | 	int i;
164 | 	void (*print_func)(const char*) = NULL;	// default printing to stdout
165 | 
166 | 	// default values
167 | 	param.svm_type = C_SVC;
168 | 	param.kernel_type = RBF;
169 | 	param.degree = 3;
170 | 	param.gamma = 0;	// 1/num_features
171 | 	param.coef0 = 0;
172 | 	param.nu = 0.5;
173 | 	param.cache_size = 100;
174 | 	param.C = 1;
175 | 	param.eps = 1e-3;
176 | 	param.p = 0.1;
177 | 	param.shrinking = 1;
178 | 	param.probability = 0;
179 | 	param.nr_weight = 0;
180 | 	param.weight_label = NULL;
181 | 	param.weight = NULL;
182 | 	cross_validation = 0;
183 | 
184 | 	// parse options
185 | 	for(i=1;i<argc;i++)
186 | 	{
187 | 		if(argv[i][0] != '-') break;
188 | 		if(++i>=argc)
189 | 			exit_with_help();
190 | 		switch(argv[i-1][1])
191 | 		{
192 | 			case 's':
193 | 				param.svm_type = atoi(argv[i]);
194 | 				break;
195 | 			case 't':
196 | 				param.kernel_type = atoi(argv[i]);
197 | 				break;
198 | 			case 'd':
199 | 				param.degree = atoi(argv[i]);
200 | 				break;
201 | 			case 'g':
202 | 				param.gamma = atof(argv[i]);
203 | 				break;
204 | 			case 'r':
205 | 				param.coef0 = atof(argv[i]);
206 | 				break;
207 | 			case 'n':
208 | 				param.nu = atof(argv[i]);
209 | 				break;
210 | 			case 'm':
211 | 				param.cache_size = atof(argv[i]);
212 | 				break;
213 | 			case 'c':
214 | 				param.C = atof(argv[i]);
215 | 				break;
216 | 			case 'e':
217 | 				param.eps = atof(argv[i]);
218 | 				break;
219 | 			case 'p':
220 | 				param.p = atof(argv[i]);
221 | 				break;
222 | 			case 'h':
223 | 				param.shrinking = atoi(argv[i]);
224 | 				break;
225 | 			case 'b':
226 | 				param.probability = atoi(argv[i]);
227 | 				break;
228 | 			case 'q':
229 | 				print_func = &print_null;
230 | 				i--;
231 | 				break;
232 | 			case 'v':
233 | 				cross_validation = 1;
234 | 				nr_fold = atoi(argv[i]);
235 | 				if(nr_fold < 2)
236 | 				{
237 | 					fprintf(stderr,"n-fold cross validation: n must >= 2\n");
238 | 					exit_with_help();
239 | 				}
240 | 				break;
241 | 			case 'w':
242 | 				++param.nr_weight;
243 | 				param.weight_label = (int *)realloc(param.weight_label,sizeof(int)*param.nr_weight);
244 | 				param.weight = (double *)realloc(param.weight,sizeof(double)*param.nr_weight);
245 | 				param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]);
246 | 				param.weight[param.nr_weight-1] = atof(argv[i]);
247 | 				break;
248 | 			default:
249 | 				fprintf(stderr,"Unknown option: -%c\n", argv[i-1][1]);
250 | 				exit_with_help();
251 | 		}
252 | 	}
253 | 
254 | 	svm_set_print_string_function(print_func);
255 | 
256 | 	// determine filenames
257 | 
258 | 	if(i>=argc)
259 | 		exit_with_help();
260 | 
261 | 	strcpy(input_file_name, argv[i]);
262 | 
263 | 	if(i<argc-1)
264 | 		strcpy(model_file_name,argv[i+1]);
265 | 	else
266 | 	{
267 | 		char *p = strrchr(argv[i],'/');
268 | 		if(p==NULL)
269 | 			p = argv[i];
270 | 		else
271 | 			++p;
272 | 		sprintf(model_file_name,"%s.model",p);
273 | 	}
274 | }
275 | 
276 | // read in a problem (in svmlight format)
277 | 
278 | void read_problem(const char *filename)
279 | {
280 | 	int elements, max_index, inst_max_index, i, j;
281 | 	FILE *fp = fopen(filename,"r");
282 | 	char *endptr;
283 | 	char *idx, *val, *label;
284 | 
285 | 	if(fp == NULL)
286 | 	{
287 | 		fprintf(stderr,"can't open input file %s\n",filename);
288 | 		exit(1);
289 | 	}
290 | 
291 | 	prob.l = 0;
292 | 	elements = 0;
293 | 
294 | 	max_line_len = 1024;
295 | 	line = Malloc(char,max_line_len);
296 | 	while(readline(fp)!=NULL)
297 | 	{
298 | 		char *p = strtok(line," \t"); // label
299 | 
300 | 		// features
301 | 		while(1)
302 | 		{
303 | 			p = strtok(NULL," \t");
304 | 			if(p == NULL || *p == '\n') // check '\n' as ' ' may be after the last feature
305 | 				break;
306 | 			++elements;
307 | 		}
308 | 		++elements;
309 | 		++prob.l;
310 | 	}
311 | 	rewind(fp);
312 | 
313 | 	prob.y = Malloc(double,prob.l);
314 | 	prob.x = Malloc(struct svm_node *,prob.l);
315 | 	x_space = Malloc(struct svm_node,elements);
316 | 
317 | 	max_index = 0;
318 | 	j=0;
319 | 	for(i=0;i<prob.l;i++)
320 | 	{
321 | 		inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
322 | 		readline(fp);
323 | 		prob.x[i] = &x_space[j];
324 | 		label = strtok(line," \t\n");
325 | 		if(label == NULL) // empty line
326 | 			exit_input_error(i+1);
327 | 
328 | 		prob.y[i] = strtod(label,&endptr);
329 | 		if(endptr == label || *endptr != '\0')
330 | 			exit_input_error(i+1);
331 | 
332 | 		while(1)
333 | 		{
334 | 			idx = strtok(NULL,":");
335 | 			val = strtok(NULL," \t");
336 | 
337 | 			if(val == NULL)
338 | 				break;
339 | 
340 | 			errno = 0;
341 | 			x_space[j].index = (int) strtol(idx,&endptr,10);
342 | 			if(endptr == idx || errno != 0 || *endptr != '\0' || x_space[j].index <= inst_max_index)
343 | 				exit_input_error(i+1);
344 | 			else
345 | 				inst_max_index = x_space[j].index;
346 | 
347 | 			errno = 0;
348 | 			x_space[j].value = strtod(val,&endptr);
349 | 			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
350 | 				exit_input_error(i+1);
351 | 
352 | 			++j;
353 | 		}
354 | 
355 | 		if(inst_max_index > max_index)
356 | 			max_index = inst_max_index;
357 | 		x_space[j++].index = -1;
358 | 	}
359 | 
360 | 	if(param.gamma == 0 && max_index > 0)
361 | 		param.gamma = 1.0/max_index;
362 | 
363 | 	if(param.kernel_type == PRECOMPUTED)
364 | 		for(i=0;i<prob.l;i++)
365 | 		{
366 | 			if (prob.x[i][0].index != 0)
367 | 			{
368 | 				fprintf(stderr,"Wrong input format: first column must be 0:sample_serial_number\n");
369 | 				exit(1);
370 | 			}
371 | 			if ((int)prob.x[i][0].value <= 0 || (int)prob.x[i][0].value > max_index)
372 | 			{
373 | 				fprintf(stderr,"Wrong input format: sample_serial_number out of range\n");
374 | 				exit(1);
375 | 			}
376 | 		}
377 | 
378 | 	fclose(fp);
379 | }
380 | 


--------------------------------------------------------------------------------
/libsvm/libsvm/svm.def:
--------------------------------------------------------------------------------
 1 | LIBRARY libsvm
 2 | EXPORTS
 3 | 	svm_train	@1
 4 | 	svm_cross_validation	@2
 5 | 	svm_save_model	@3
 6 | 	svm_load_model	@4
 7 | 	svm_get_svm_type	@5
 8 | 	svm_get_nr_class	@6
 9 | 	svm_get_labels	@7
10 | 	svm_get_svr_probability	@8
11 | 	svm_predict_values	@9
12 | 	svm_predict	@10
13 | 	svm_predict_probability	@11
14 | 	svm_free_model_content	@12
15 | 	svm_free_and_destroy_model	@13
16 | 	svm_destroy_param	@14
17 | 	svm_check_parameter	@15
18 | 	svm_check_probability_model	@16
19 | 	svm_set_print_string_function	@17
20 | 


--------------------------------------------------------------------------------
/libsvm/libsvm/svm.h:
--------------------------------------------------------------------------------
  1 | #ifndef _LIBSVM_H
  2 | #define _LIBSVM_H
  3 | 
  4 | #define LIBSVM_VERSION 312
  5 | 
  6 | #ifdef __cplusplus
  7 | extern "C" {
  8 | #endif
  9 | 
 10 | extern int libsvm_version;
 11 | 
 12 | struct svm_node
 13 | {
 14 | 	int index;
 15 | 	double value;
 16 | };
 17 | 
 18 | struct svm_problem
 19 | {
 20 | 	int l;
 21 | 	double *y;
 22 | 	struct svm_node **x;
 23 | };
 24 | 
 25 | enum { C_SVC, NU_SVC, ONE_CLASS, EPSILON_SVR, NU_SVR };	/* svm_type */
 26 | enum { LINEAR, POLY, RBF, SIGMOID, PRECOMPUTED }; /* kernel_type */
 27 | 
 28 | struct svm_parameter
 29 | {
 30 | 	int svm_type;
 31 | 	int kernel_type;
 32 | 	int degree;	/* for poly */
 33 | 	double gamma;	/* for poly/rbf/sigmoid */
 34 | 	double coef0;	/* for poly/sigmoid */
 35 | 
 36 | 	/* these are for training only */
 37 | 	double cache_size; /* in MB */
 38 | 	double eps;	/* stopping criteria */
 39 | 	double C;	/* for C_SVC, EPSILON_SVR and NU_SVR */
 40 | 	int nr_weight;		/* for C_SVC */
 41 | 	int *weight_label;	/* for C_SVC */
 42 | 	double* weight;		/* for C_SVC */
 43 | 	double nu;	/* for NU_SVC, ONE_CLASS, and NU_SVR */
 44 | 	double p;	/* for EPSILON_SVR */
 45 | 	int shrinking;	/* use the shrinking heuristics */
 46 | 	int probability; /* do probability estimates */
 47 | };
 48 | 
 49 | //
 50 | // svm_model
 51 | // 
 52 | struct svm_model
 53 | {
 54 | 	struct svm_parameter param;	/* parameter */
 55 | 	int nr_class;		/* number of classes, = 2 in regression/one class svm */
 56 | 	int l;			/* total #SV */
 57 | 	struct svm_node **SV;		/* SVs (SV[l]) */
 58 | 	double **sv_coef;	/* coefficients for SVs in decision functions (sv_coef[k-1][l]) */
 59 | 	double *rho;		/* constants in decision functions (rho[k*(k-1)/2]) */
 60 | 	double *probA;		/* pariwise probability information */
 61 | 	double *probB;
 62 | 
 63 | 	/* for classification only */
 64 | 
 65 | 	int *label;		/* label of each class (label[k]) */
 66 | 	int *nSV;		/* number of SVs for each class (nSV[k]) */
 67 | 				/* nSV[0] + nSV[1] + ... + nSV[k-1] = l */
 68 | 	/* XXX */
 69 | 	int free_sv;		/* 1 if svm_model is created by svm_load_model*/
 70 | 				/* 0 if svm_model is created by svm_train */
 71 | };
 72 | 
 73 | struct svm_model *svm_train(const struct svm_problem *prob, const struct svm_parameter *param);
 74 | void svm_cross_validation(const struct svm_problem *prob, const struct svm_parameter *param, int nr_fold, double *target);
 75 | 
 76 | int svm_save_model(const char *model_file_name, const struct svm_model *model);
 77 | struct svm_model *svm_load_model(const char *model_file_name);
 78 | 
 79 | int svm_get_svm_type(const struct svm_model *model);
 80 | int svm_get_nr_class(const struct svm_model *model);
 81 | void svm_get_labels(const struct svm_model *model, int *label);
 82 | double svm_get_svr_probability(const struct svm_model *model);
 83 | 
 84 | double svm_predict_values(const struct svm_model *model, const struct svm_node *x, double* dec_values);
 85 | double svm_predict(const struct svm_model *model, const struct svm_node *x);
 86 | double svm_predict_probability(const struct svm_model *model, const struct svm_node *x, double* prob_estimates);
 87 | 
 88 | void svm_free_model_content(struct svm_model *model_ptr);
 89 | void svm_free_and_destroy_model(struct svm_model **model_ptr_ptr);
 90 | void svm_destroy_param(struct svm_parameter *param);
 91 | 
 92 | const char *svm_check_parameter(const struct svm_problem *prob, const struct svm_parameter *param);
 93 | int svm_check_probability_model(const struct svm_model *model);
 94 | 
 95 | void svm_set_print_string_function(void (*print_func)(const char *));
 96 | 
 97 | #ifdef __cplusplus
 98 | }
 99 | #endif
100 | 
101 | #endif /* _LIBSVM_H */
102 | 


--------------------------------------------------------------------------------
/libsvm/libsvm_predict.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | 
  5 | #include "TH.h"
  6 | #include "luaT.h"
  7 | 
  8 | #include "libsvm/svm.h"
  9 | #include "svm_model_torch.h"
 10 | 
 11 | 
 12 | #define CMD_LEN 2048
 13 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 14 | #define max_(a,b) (a>=b ? a : b)
 15 | 
 16 | 
 17 | void read_sparse_instance(lua_State *L, int index, double *target_label, struct svm_node *x)
 18 | {
 19 | 	lua_pushnumber(L,index+1);lua_gettable(L,-2);
 20 | 	luaL_argcheck(L,lua_istable(L,1),1,"Expecting table in read_sparse_instance");
 21 | 	int j = 0;
 22 | 	{
 23 | 		// get label
 24 | 		lua_pushnumber(L,1);lua_gettable(L,-2);
 25 | 		*target_label = (double)lua_tonumber(L,-1);
 26 | 		lua_pop(L,1);
 27 | 		// get values
 28 | 		lua_pushnumber(L,2);lua_gettable(L,-2);
 29 | 		{
 30 | 			lua_pushnumber(L,1);lua_gettable(L,-2);
 31 | 			THIntTensor *indices = luaT_checkudata(L,-1,"torch.IntTensor");
 32 | 			lua_pop(L,1);
 33 | 			lua_pushnumber(L,2);lua_gettable(L,-2);
 34 | 			THFloatTensor *vals = luaT_checkudata(L,-1,"torch.FloatTensor");
 35 | 			lua_pop(L,1);
 36 | 
 37 | 			int *indices_data = THIntTensor_data(indices);
 38 | 			float *vals_data = THFloatTensor_data(vals);
 39 | 			int k;
 40 | 			for (k=0; k<(int)THIntTensor_nElement(indices); k++)
 41 | 			{
 42 | 				x[j].index = indices_data[k];
 43 | 				x[j].value = vals_data[k];
 44 | 				j++;
 45 | 			}
 46 | 			x[j++].index = -1;
 47 | 		}
 48 | 		lua_pop(L,1);
 49 | 	}
 50 | 	lua_pop(L,1);
 51 | }
 52 | 
 53 | 
 54 | void predict(lua_State *L, struct svm_model *model_, const int predict_probability)
 55 | {
 56 | 	int label_vector_row_num;
 57 | 	int feature_number, testing_instance_number;
 58 | 	int instance_index;
 59 | 	double *ptr_predict_label; 
 60 | 	double *ptr_prob_estimates, *ptr_dec_values;
 61 | 	struct svm_node *x;
 62 | 	THDoubleTensor *label;
 63 | 	THDoubleTensor *dec;
 64 | 
 65 | 	int correct = 0;
 66 | 	int total = 0;
 67 | 	double error = 0;
 68 | 	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
 69 | 
 70 | 	int svm_type=svm_get_svm_type(model_);
 71 | 	int nr_class=svm_get_nr_class(model_);
 72 | 	double *prob_estimates=NULL;
 73 | 
 74 | 	luaL_argcheck(L,lua_istable(L,1),1,"Expecting table in do_predict");
 75 | 
 76 | 
 77 | 	// prhs[1] = testing instance matrix
 78 | 	testing_instance_number = (int) lua_objlen(L,1);
 79 | 	label_vector_row_num = testing_instance_number;
 80 | 
 81 | 	int i;
 82 | 	feature_number = -1;
 83 | 	for (i=0; i< label_vector_row_num; i++)
 84 | 	{
 85 | 		// get the table elem
 86 | 		lua_pushnumber(L,i+1);
 87 | 		lua_gettable(L,-2);
 88 | 		if (!lua_istable(L,-1))
 89 | 			luaL_error(L,"expected table at index %d while getting max_index\n",i+1);
 90 | 		{
 91 | 			// get values
 92 | 			lua_pushnumber(L,2);lua_gettable(L,-2);
 93 | 			{
 94 | 				lua_pushnumber(L,1);lua_gettable(L,-2);
 95 | 				THIntTensor *indices = luaT_toudata(L,-1,"torch.IntTensor");
 96 | 				feature_number = max_(feature_number,THIntTensor_get1d(indices,indices->size[0]-1));
 97 | 				lua_pop(L,1);
 98 | 			}
 99 | 			lua_pop(L,1);
100 | 		}
101 | 		lua_pop(L,1);
102 | 	}
103 | 
104 | 	if(predict_probability)
105 | 	{
106 | 		if(svm_type==NU_SVR || svm_type==EPSILON_SVR)
107 | 			printf("Prob. model for test data: target value = predicted value + z,\nz: Laplace distribution e^(-|z|/sigma)/(2sigma),sigma=%g\n",svm_get_svr_probability(model_));
108 | 		else
109 | 			prob_estimates = (double *) malloc(nr_class*sizeof(double));
110 | 	}
111 | 
112 | 	label = THDoubleTensor_newWithSize1d(testing_instance_number);
113 | 	if(predict_probability)
114 | 	{
115 | 		// prob estimates are in plhs[2]
116 | 		if(svm_type==C_SVC || svm_type==NU_SVC)
117 | 			dec = THDoubleTensor_newWithSize2d(testing_instance_number,nr_class);
118 | 		else
119 | 			dec = THDoubleTensor_new();
120 | 	}
121 | 	else
122 | 	{
123 | 		// decision values are in plhs[2]
124 | 		if(svm_type == ONE_CLASS ||
125 | 		   svm_type == EPSILON_SVR ||
126 | 		   svm_type == NU_SVR ||
127 | 		   nr_class == 1) // if only one class in training data, decision values are still returned.
128 | 		 	dec = THDoubleTensor_newWithSize2d(testing_instance_number,1);
129 | 		else
130 | 			dec = THDoubleTensor_newWithSize2d(testing_instance_number,nr_class*(nr_class-1)/2);
131 | 	}
132 | 
133 | 	ptr_predict_label = THDoubleTensor_data(label);
134 | 	ptr_prob_estimates = THDoubleTensor_data(dec);
135 | 	ptr_dec_values = THDoubleTensor_data(dec);
136 | 
137 | 	x = (struct svm_node*)malloc((feature_number+1)*sizeof(struct svm_node) );
138 | 	for(instance_index=0;instance_index<testing_instance_number;instance_index++)
139 | 	{
140 | 		int i;
141 | 		double target_label, predict_label;
142 | 
143 | 		if(model_->param.kernel_type != PRECOMPUTED) // prhs[1]^T is still sparse
144 | 			read_sparse_instance(L, instance_index, &target_label, x);
145 | 		else
146 | 		{
147 | 			printf("only sparse for now.");
148 | 			// for(i=0;i<feature_number;i++)
149 | 			// {
150 | 			// 	x[i].index = i+1;
151 | 			// 	x[i].value = ptr_instance[testing_instance_number*i+instance_index];
152 | 			// }
153 | 			// x[feature_number].index = -1;
154 | 		}
155 | 
156 | 		if(predict_probability)
157 | 		{
158 | 			if(svm_type==C_SVC || svm_type==NU_SVC)
159 | 			{
160 | 				predict_label = svm_predict_probability(model_, x, prob_estimates);
161 | 				ptr_predict_label[instance_index] = predict_label;
162 | 				for(i=0;i<nr_class;i++)
163 | 					ptr_prob_estimates[instance_index * nr_class + i] = prob_estimates[i];
164 | 			} else {
165 | 				predict_label = svm_predict(model_,x);
166 | 				ptr_predict_label[instance_index] = predict_label;
167 | 			}
168 | 		}
169 | 		else
170 | 		{
171 | 			if(svm_type == ONE_CLASS ||
172 | 			   svm_type == EPSILON_SVR ||
173 | 			   svm_type == NU_SVR)
174 | 			{
175 | 				double res;
176 | 				predict_label = svm_predict_values(model_, x, &res);
177 | 				ptr_dec_values[instance_index] = res;
178 | 			}
179 | 			else
180 | 			{
181 | 				double *dec_values = (double *) malloc(sizeof(double) * nr_class*(nr_class-1)/2);
182 | 				predict_label = svm_predict_values(model_, x, dec_values);
183 | 				if(nr_class == 1) 
184 | 					ptr_dec_values[instance_index] = 1;
185 | 				else
186 | 					for(i=0;i<(nr_class*(nr_class-1))/2;i++)
187 | 						ptr_dec_values[instance_index * (nr_class*(nr_class-1))/2 + i] = dec_values[i];
188 | 				free(dec_values);
189 | 			}
190 | 			ptr_predict_label[instance_index] = predict_label;
191 | 		}
192 | 
193 | 		if(predict_label == target_label)
194 | 			++correct;
195 | 		error += (predict_label-target_label)*(predict_label-target_label);
196 | 		sump += predict_label;
197 | 		sumt += target_label;
198 | 		sumpp += predict_label*predict_label;
199 | 		sumtt += target_label*target_label;
200 | 		sumpt += predict_label*target_label;
201 | 		++total;
202 | 	}
203 | 	if(svm_type==NU_SVR || svm_type==EPSILON_SVR)
204 | 	{
205 | 		printf("Mean squared error = %g (regression)\n",error/total);
206 | 		printf("Squared correlation coefficient = %g (regression)\n",
207 | 			((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
208 | 			((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
209 | 			);
210 | 	}
211 | 	else
212 | 		printf("Accuracy = %g%% (%d/%d) (classification)\n",
213 | 			(double)correct/total*100,correct,total);
214 | 
215 | 	// label = res[1]
216 | 	luaT_pushudata(L,label,"torch.DoubleTensor");
217 | 
218 | 	// return accuracy, mean squared error, squared correlation coefficient
219 | 	// acc = res[2] : {accuracy, mean squared error, squared correlation coefficient}
220 | 	lua_newtable(L);
221 | 	lua_pushnumber(L,1);
222 | 	lua_pushnumber(L,(double)correct/total*100);
223 | 	lua_settable(L,-3);
224 | 
225 | 	lua_pushnumber(L,2);
226 | 	lua_pushnumber(L,(double)error/total);
227 | 	lua_settable(L,-3);
228 | 
229 | 	lua_pushnumber(L,3);
230 | 	lua_pushnumber(L,(double)
231 | 		((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
232 | 		((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt)));
233 | 	lua_settable(L,-3);
234 | 
235 | 	// prob = res[3]
236 | 	luaT_pushudata(L,dec,"torch.DoubleTensor");
237 | 
238 | 	free(x);
239 | 	if(prob_estimates != NULL)
240 | 		free(prob_estimates);
241 | }
242 | 
243 | void svm_exit_with_help()
244 | {
245 | 	printf(
246 | 		"Usage: [predicted_label, accuracy, decision_values/prob_estimates] = svmpredict(testing_label_vector, testing_instance_matrix, model, 'libsvm_options')\n"
247 | 		"Parameters:\n"
248 | 		"  model: SVM model structure from svmtrain.\n"
249 | 		"  libsvm_options:\n"
250 | 		"    -b probability_estimates: whether to predict probability estimates, 0 or 1 (default 0); one-class SVM not supported yet\n"
251 | 		"Returns:\n"
252 | 		"  predicted_label: SVM prediction output vector.\n"
253 | 		"  accuracy: a vector with accuracy, mean squared error, squared correlation coefficient.\n"
254 | 		"  prob_estimates: If selected, probability estimate vector.\n"
255 | 	);
256 | }
257 | 
258 | static int libsvm_predict(lua_State *L)
259 | {
260 | 	int nrhs = lua_gettop(L);
261 | 	int prob_estimate_flag = 0;
262 | 	struct svm_model *model_;
263 | 
264 | 	if(nrhs > 4 || nrhs < 2)
265 | 	{
266 | 		svm_exit_with_help();
267 | 		return 0;
268 | 	}
269 | 
270 | 	{
271 | 		const char *error_msg;
272 | 
273 | 		// parse options
274 | 		if(nrhs==3)
275 | 		{
276 | 			int i, argc = 1;
277 | 			char *argv[CMD_LEN/2];
278 | 
279 | 
280 | 			// put options in argv[]
281 | 			size_t slen;
282 | 			const char *tcmd = lua_tolstring(L,3,&slen);
283 | 			char cmd[slen];
284 | 			strcpy(cmd,tcmd);
285 | 			if((argv[argc] = strtok((char*)cmd, " ")) != NULL)
286 | 				while((argv[++argc] = strtok(NULL, " ")) != NULL)
287 | 					;
288 | 
289 | 			for(i=1;i<argc;i++)
290 | 			{
291 | 				if(argv[i][0] != '-') break;
292 | 				if(++i>=argc)
293 | 				{
294 | 					svm_exit_with_help();
295 | 					return 0;
296 | 				}
297 | 				switch(argv[i-1][1])
298 | 				{
299 | 					case 'b':
300 | 						prob_estimate_flag = atoi(argv[i]);
301 | 						break;
302 | 					default:
303 | 						printf("Unknown option: -%c\n", argv[i-1][1]);
304 | 						svm_exit_with_help();
305 | 						return 0;
306 | 				}
307 | 			}
308 | 		}
309 | 
310 | 		model_ = Malloc(struct svm_model, 1);
311 | 		torch_structure_to_libsvm_model(model_, L);
312 | 		lua_pop(L,1);
313 | 
314 | 		if (model_ == NULL)
315 | 		{
316 | 			printf("Error: can't read model: %s\n", error_msg);
317 | 			return 0;
318 | 		}
319 | 
320 | 		if(prob_estimate_flag)
321 | 		{
322 | 			if(svm_check_probability_model(model_)==0)
323 | 			{
324 | 				printf("Model does not support probabiliy estimates\n");
325 | 				svm_free_and_destroy_model(&model_);
326 | 				return 0;
327 | 			}
328 | 		}
329 | 		else
330 | 		{
331 | 			if(svm_check_probability_model(model_)!=0)
332 | 				printf("Model supports probability estimates, but disabled in predicton.\n");
333 | 		}
334 | 
335 | 		predict(L, model_, prob_estimate_flag);
336 | 		// destroy model
337 | 		svm_free_and_destroy_model(&model_);
338 | 	}
339 | 
340 | 	return 3;
341 | }
342 | 
343 | static const struct luaL_Reg libsvm_predict_util__ [] = {
344 |   {"predict", libsvm_predict},
345 |   {NULL, NULL}
346 | };
347 | 
348 | 
349 | int liblibsvm_predict_init(lua_State *L)
350 | {
351 |   luaL_register(L, "libsvm", libsvm_predict_util__);
352 |   return 1;
353 | }
354 | 
355 | 


--------------------------------------------------------------------------------
/libsvm/libsvm_train.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <math.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <ctype.h>
  6 | 
  7 | #include "TH.h"
  8 | #include "luaT.h"
  9 | 
 10 | #include "libsvm/svm.h"
 11 | #include "svm_model_torch.h"
 12 | 
 13 | #define CMD_LEN 2048
 14 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 15 | 
 16 | #define max_(a,b) (a>=b ? a : b)
 17 | #define min_(a,b) (a<=b ? a : b)
 18 | 
 19 | 
 20 | // svm arguments
 21 | struct svm_parameter param;		// set by parse_command_line
 22 | struct svm_problem prob;		// set by read_problem
 23 | struct svm_model *model;
 24 | struct svm_node *x_space;
 25 | int cross_validation;
 26 | int nr_fold;
 27 | 
 28 | void print_null(const char *s) {}
 29 | void print_string_default(const char *s) {printf("%s",s);}
 30 | 
 31 | void exit_with_help()
 32 | {
 33 | 	printf(
 34 | 	"Usage: model = svmtrain(training_data, 'libsvm_options');\n"
 35 | 	"libsvm_options:\n"
 36 | 	"-s svm_type : set type of SVM (default 0)\n"
 37 | 	"	0 -- C-SVC\n"
 38 | 	"	1 -- nu-SVC\n"
 39 | 	"	2 -- one-class SVM\n"
 40 | 	"	3 -- epsilon-SVR\n"
 41 | 	"	4 -- nu-SVR\n"
 42 | 	"-t kernel_type : set type of kernel function (default 2)\n"
 43 | 	"	0 -- linear: u'*v\n"
 44 | 	"	1 -- polynomial: (gamma*u'*v + coef0)^degree\n"
 45 | 	"	2 -- radial basis function: exp(-gamma*|u-v|^2)\n"
 46 | 	"	3 -- sigmoid: tanh(gamma*u'*v + coef0)\n"
 47 | 	"	4 -- precomputed kernel (kernel values in training_instance_matrix)\n"
 48 | 	"-d degree : set degree in kernel function (default 3)\n"
 49 | 	"-g gamma : set gamma in kernel function (default 1/num_features)\n"
 50 | 	"-r coef0 : set coef0 in kernel function (default 0)\n"
 51 | 	"-c cost : set the parameter C of C-SVC, epsilon-SVR, and nu-SVR (default 1)\n"
 52 | 	"-n nu : set the parameter nu of nu-SVC, one-class SVM, and nu-SVR (default 0.5)\n"
 53 | 	"-p epsilon : set the epsilon in loss function of epsilon-SVR (default 0.1)\n"
 54 | 	"-m cachesize : set cache memory size in MB (default 100)\n"
 55 | 	"-e epsilon : set tolerance of termination criterion (default 0.001)\n"
 56 | 	"-h shrinking : whether to use the shrinking heuristics, 0 or 1 (default 1)\n"
 57 | 	"-b probability_estimates : whether to train a SVC or SVR model for probability estimates, 0 or 1 (default 0)\n"
 58 | 	"-wi weight : set the parameter C of class i to weight*C, for C-SVC (default 1)\n"
 59 | 	"-v n : n-fold cross validation mode\n"
 60 | 	"-q : quiet mode (no outputs)\n"
 61 | 	);
 62 | }
 63 | 
 64 | 
 65 | 
 66 | double do_cross_validation()
 67 | {
 68 | 	int i;
 69 | 	int total_correct = 0;
 70 | 	double total_error = 0;
 71 | 	double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
 72 | 	double *target = Malloc(double,prob.l);
 73 | 	double retval = 0.0;
 74 | 
 75 | 	svm_cross_validation(&prob,&param,nr_fold,target);
 76 | 	if(param.svm_type == EPSILON_SVR ||
 77 | 	   param.svm_type == NU_SVR)
 78 | 	{
 79 | 		for(i=0;i<prob.l;i++)
 80 | 		{
 81 | 			double y = prob.y[i];
 82 | 			double v = target[i];
 83 | 			total_error += (v-y)*(v-y);
 84 | 			sumv += v;
 85 | 			sumy += y;
 86 | 			sumvv += v*v;
 87 | 			sumyy += y*y;
 88 | 			sumvy += v*y;
 89 | 		}
 90 | 		printf("Cross Validation Mean squared error = %g\n",total_error/prob.l);
 91 | 		printf("Cross Validation Squared correlation coefficient = %g\n",
 92 | 			((prob.l*sumvy-sumv*sumy)*(prob.l*sumvy-sumv*sumy))/
 93 | 			((prob.l*sumvv-sumv*sumv)*(prob.l*sumyy-sumy*sumy))
 94 | 			);
 95 | 		retval = total_error/prob.l;
 96 | 	}
 97 | 	else
 98 | 	{
 99 | 		for(i=0;i<prob.l;i++)
100 | 			if(target[i] == prob.y[i])
101 | 				++total_correct;
102 | 		printf("Cross Validation Accuracy = %g%%\n",100.0*total_correct/prob.l);
103 | 		retval = 100.0*total_correct/prob.l;
104 | 	}
105 | 	free(target);
106 | 	return retval;
107 | }
108 | 
109 | // nrhs should be 3
110 | int parse_command_line(lua_State *L)
111 | {
112 | 	int i, argc = 1;
113 | 	char *argv[CMD_LEN/2];
114 | 	char cmd[CMD_LEN];
115 | 	void (*print_func)(const char *) = print_string_default;
116 | 
117 | 	// default values
118 | 	param.svm_type = C_SVC;
119 | 	param.kernel_type = RBF;
120 | 	param.degree = 3;
121 | 	param.gamma = 0;	// 1/num_features
122 | 	param.coef0 = 0;
123 | 	param.nu = 0.5;
124 | 	param.cache_size = 100;
125 | 	param.C = 1;
126 | 	param.eps = 1e-3;
127 | 	param.p = 0.1;
128 | 	param.shrinking = 1;
129 | 	param.probability = 0;
130 | 	param.nr_weight = 0;
131 | 	param.weight_label = NULL;
132 | 	param.weight = NULL;
133 | 	cross_validation = 0;
134 | 
135 | 	int nrhs = lua_gettop(L);
136 | 
137 | 	if(nrhs < 1)
138 | 		return 1;
139 | 
140 | 	if(nrhs >= 2)
141 | 	{
142 | 		// put options in argv[]
143 | 	        size_t slen;
144 | 		const char *tcmd = lua_tolstring(L,2,&slen);
145 | 		strncpy(cmd,tcmd,slen);
146 | 		if((argv[argc] = strtok((char*)cmd, " ")) != NULL)
147 | 			while((argv[++argc] = strtok(NULL, " ")) != NULL)
148 | 				;
149 | 
150 | 		lua_pop(L,1);
151 | 	}
152 | 
153 | 	// parse options
154 | 	for(i=1;i<argc;i++)
155 | 	{
156 | 		if(argv[i][0] != '-') break;
157 | 		++i;
158 | 		if(i>=argc && argv[i-1][1] != 'q')	// since option -q has no parameter
159 | 			return 1;
160 | 		switch(argv[i-1][1])
161 | 		{
162 | 			case 's':
163 | 				param.svm_type = atoi(argv[i]);
164 | 				break;
165 | 			case 't':
166 | 				param.kernel_type = atoi(argv[i]);
167 | 				break;
168 | 			case 'd':
169 | 				param.degree = atoi(argv[i]);
170 | 				break;
171 | 			case 'g':
172 | 				param.gamma = atof(argv[i]);
173 | 				break;
174 | 			case 'r':
175 | 				param.coef0 = atof(argv[i]);
176 | 				break;
177 | 			case 'n':
178 | 				param.nu = atof(argv[i]);
179 | 				break;
180 | 			case 'm':
181 | 				param.cache_size = atof(argv[i]);
182 | 				break;
183 | 			case 'c':
184 | 				param.C = atof(argv[i]);
185 | 				break;
186 | 			case 'e':
187 | 				param.eps = atof(argv[i]);
188 | 				break;
189 | 			case 'p':
190 | 				param.p = atof(argv[i]);
191 | 				break;
192 | 			case 'h':
193 | 				param.shrinking = atoi(argv[i]);
194 | 				break;
195 | 			case 'b':
196 | 				param.probability = atoi(argv[i]);
197 | 				break;
198 | 			case 'q':
199 | 				print_func = &print_null;
200 | 				i--;
201 | 				break;
202 | 			case 'v':
203 | 				cross_validation = 1;
204 | 				nr_fold = atoi(argv[i]);
205 | 				if(nr_fold < 2)
206 | 				{
207 | 					printf("n-fold cross validation: n must >= 2\n");
208 | 					return 1;
209 | 				}
210 | 				break;
211 | 			case 'w':
212 | 				++param.nr_weight;
213 | 				param.weight_label = (int *)realloc(param.weight_label,sizeof(int)*param.nr_weight);
214 | 				param.weight = (double *)realloc(param.weight,sizeof(double)*param.nr_weight);
215 | 				param.weight_label[param.nr_weight-1] = atoi(&argv[i-1][2]);
216 | 				param.weight[param.nr_weight-1] = atof(argv[i]);
217 | 				break;
218 | 			default:
219 | 				printf("Unknown option -%c\n", argv[i-1][1]);
220 | 				return 1;
221 | 		}
222 | 	}
223 | 
224 | 	svm_set_print_string_function(print_func);
225 | 
226 | 	return 0;
227 | }
228 | 
229 | // read in a problem (in svmlight format)
230 | int read_problem_dense(lua_State *L)
231 | {
232 | 	int i, j, k;
233 | 	int elements, max_index, sc, label_vector_row_num;
234 | 	float *samples, *labels;
235 | 
236 | 	prob.x = NULL;
237 | 	prob.y = NULL;
238 | 	x_space = NULL;
239 | 
240 | 	lua_pushnumber(L,1);
241 | 	lua_gettable(L,-2);
242 | 	THFloatTensor *tlabels = luaT_checkudata(L,1,"torch.FloatTensor");
243 | 	lua_pushnumber(L,2);
244 | 	lua_gettable(L,-2);
245 | 	THFloatTensor *tsamples = luaT_checkudata(L,2,"torch.FloatTensor");
246 | 
247 | 	labels = THFloatTensor_data(tlabels);
248 | 	samples = THFloatTensor_data(tsamples);
249 | 	sc = (int)tsamples->size[1];
250 | 
251 | 	elements = 0;
252 | 	// the number of instance
253 | 	prob.l = (int)tsamples->size[0];
254 | 	label_vector_row_num = (int)tlabels->size[0];
255 | 
256 | 	if(label_vector_row_num!=prob.l)
257 | 	{
258 | 		printf("Length of label vector does not match # of instances.\n");
259 | 		return -1;
260 | 	}
261 | 
262 | 	if(param.kernel_type == PRECOMPUTED)
263 | 		elements = prob.l * (sc + 1);
264 | 	else
265 | 	{
266 | 		for(i = 0; i < prob.l; i++)
267 | 		{
268 | 			for(k = 0; k < sc; k++)
269 | 				if(samples[i * sc + k] != 0)
270 | 					elements++;
271 | 			// count the '-1' element
272 | 			elements++;
273 | 		}
274 | 	}
275 | 
276 | 	prob.y = Malloc(double,prob.l);
277 | 	prob.x = Malloc(struct svm_node *,prob.l);
278 | 	x_space = Malloc(struct svm_node, elements);
279 | 
280 | 	max_index = sc;
281 | 	j = 0;
282 | 	for(i = 0; i < prob.l; i++)
283 | 	{
284 | 		prob.x[i] = &x_space[j];
285 | 		prob.y[i] = labels[i];
286 | 
287 | 		for(k = 0; k < sc; k++)
288 | 		{
289 | 			if(param.kernel_type == PRECOMPUTED || samples[k * prob.l + i] != 0)
290 | 			{
291 | 				x_space[j].index = k + 1;
292 | 				x_space[j].value = samples[i*sc + k];
293 | 				j++;
294 | 			}
295 | 		}
296 | 		x_space[j++].index = -1;
297 | 	}
298 | 
299 | 	if(param.gamma == 0 && max_index > 0)
300 | 		param.gamma = 1.0/max_index;
301 | 
302 | 	if(param.kernel_type == PRECOMPUTED)
303 | 		for(i=0;i<prob.l;i++)
304 | 		{
305 | 			if((int)prob.x[i][0].value <= 0 || (int)prob.x[i][0].value > max_index)
306 | 			{
307 | 				printf("Wrong input format: sample_serial_number out of range\n");
308 | 				return -1;
309 | 			}
310 | 		}
311 | 
312 | 	return 0;
313 | }
314 | 
315 | 
316 | int read_problem_sparse(lua_State *L)
317 | {
318 | 
319 | 	luaL_argcheck(L,lua_istable(L,1),1,"Expecting table in read_problem_sparse");
320 | 	int label_vector_row_num = lua_objlen(L,1);
321 | 	int num_samples = 0;
322 | 	int max_index = 0;
323 | 	int elements;
324 | 
325 | 	prob.l = label_vector_row_num;
326 | 
327 | 	int i;
328 | 	for (i=0; i< label_vector_row_num; i++)
329 | 	{
330 | 		// get the table elem
331 | 		lua_pushnumber(L,i+1);
332 | 		lua_gettable(L,-2);
333 | 		if (!lua_istable(L,-1))
334 | 			luaL_error(L,"expected table at index %d while getting max_index\n",i+1);
335 | 		{
336 | 			// get values
337 | 			lua_pushnumber(L,2);lua_gettable(L,-2);
338 | 			{
339 | 				lua_pushnumber(L,1);lua_gettable(L,-2);
340 | 				THIntTensor *indices = luaT_toudata(L,-1,"torch.IntTensor");
341 | 				num_samples += (int)THIntTensor_nElement(indices);
342 | 				max_index = max_(max_index,THIntTensor_get1d(indices,indices->size[0]-1));
343 | 				// lua_pushnumber(L,2);lua_gettable(L,-2);
344 | 				// THFloatTensor *indices = luaT_checkudata(L,-1,"torch.FloatTensor");
345 | 				lua_pop(L,1);
346 | 			}
347 | 			lua_pop(L,1);
348 | 		}
349 | 		lua_pop(L,1);
350 | 	}
351 | 
352 | 	elements = num_samples + prob.l*2;
353 | 	prob.y = Malloc(double,prob.l);
354 | 	prob.x = Malloc(struct svm_node *,prob.l);
355 | 	x_space = Malloc(struct svm_node, elements);
356 | 
357 | 	int j = 0;
358 | 	for (i=0; i<prob.l; i++)
359 | 	{
360 | 		prob.x[i] = &x_space[j];
361 | 		// get the table elem
362 | 		lua_pushnumber(L,i+1);
363 | 		lua_gettable(L,-2);
364 | 		if (!lua_istable(L,-1))
365 | 			luaL_error(L,"expected table at index %d while reading data\n",i+1);
366 | 		{
367 | 			// get label
368 | 			lua_pushnumber(L,1);lua_gettable(L,-2);
369 | 			prob.y[i] = (double)lua_tonumber(L,-1);
370 | 			lua_pop(L,1);
371 | 			// get values
372 | 			lua_pushnumber(L,2);lua_gettable(L,-2);
373 | 			{
374 | 				lua_pushnumber(L,1);lua_gettable(L,-2);
375 | 				THIntTensor *indices = luaT_checkudata(L,-1,"torch.IntTensor");
376 | 				lua_pop(L,1);
377 | 				lua_pushnumber(L,2);lua_gettable(L,-2);
378 | 				THFloatTensor *vals = luaT_checkudata(L,-1,"torch.FloatTensor");
379 | 				lua_pop(L,1);
380 | 
381 | 				int *indices_data = THIntTensor_data(indices);
382 | 				float *vals_data = THFloatTensor_data(vals);
383 | 				int k;
384 | 				for (k=0; k<(int)THIntTensor_nElement(indices); k++)
385 | 				{
386 | 					x_space[j].index = indices_data[k];
387 | 					x_space[j].value = vals_data[k];
388 | 					j++;
389 | 				}
390 | 				x_space[j++].index = -1;
391 | 			}
392 | 			lua_pop(L,1);
393 | 		}
394 | 		lua_pop(L,1);
395 | 	}
396 | 	if(param.gamma == 0 && max_index > 0)
397 | 		param.gamma = 1.0/max_index;
398 | 
399 | 	return 0;
400 | }
401 | 
402 | 
403 | // Interface function of torch
404 | static int libsvm_train( lua_State *L )
405 | {
406 | 	const char *error_msg;
407 | 
408 | 	// fix random seed to have same results for each run
409 | 	// (for cross validation and probability estimation)
410 | 	srand(1);
411 | 
412 | 	int nrhs = lua_gettop(L);
413 | 
414 | 	// Transform the input Matrix to libsvm format
415 | 	if(nrhs >= 1 && nrhs < 3)
416 | 	{
417 | 		int err;
418 | 
419 | 		if(parse_command_line(L))
420 | 		{
421 | 			printf("parsing failed\n");
422 | 			exit_with_help();
423 | 			svm_destroy_param(&param);
424 | 			return 0;
425 | 		}
426 | 
427 | 		if(param.kernel_type == PRECOMPUTED)
428 | 		{
429 | 			err = read_problem_dense(L);
430 | 		}
431 | 		else
432 | 			err = read_problem_sparse(L);
433 | 
434 | 		// svmtrain's original code
435 | 		error_msg = svm_check_parameter(&prob, &param);
436 | 
437 | 		if(err || error_msg)
438 | 		{
439 | 			if (error_msg != NULL)
440 | 				printf("Error: %s\n", error_msg);
441 | 			svm_destroy_param(&param);
442 | 			free(prob.y);
443 | 			free(prob.x);
444 | 			free(x_space);
445 | 			return 0;
446 | 		}
447 | 
448 | 		if(cross_validation)
449 | 		{
450 | 			lua_pushnumber(L,do_cross_validation());
451 | 		}
452 | 		else
453 | 		{
454 | 			model = svm_train(&prob, &param);
455 | 			libsvm_model_to_torch_structure(L, model);
456 | 			svm_free_and_destroy_model(&model);
457 | 		}
458 | 		svm_destroy_param(&param);
459 | 		free(prob.y);
460 | 		free(prob.x);
461 | 		free(x_space);
462 | 		return 1;
463 | 	}
464 | 	else
465 | 	{
466 | 		exit_with_help();
467 | 		return 0;
468 | 	}
469 | }
470 | 
471 | static const struct luaL_Reg libsvm_util__ [] = {
472 |   {"train", libsvm_train},
473 |   {NULL, NULL}
474 | };
475 | 
476 | 
477 | int liblibsvm_train_init(lua_State *L)
478 | {
479 |   luaL_register(L, "libsvm", libsvm_util__);
480 |   return 1;
481 | }
482 | 


--------------------------------------------------------------------------------
/libsvm/svm_model_torch.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <string.h>
  3 | #include "libsvm/svm.h"
  4 | 
  5 | #include "TH.h"
  6 | #include "luaT.h"
  7 | 
  8 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
  9 | 
 10 | int libsvm_model_to_torch_structure(lua_State *L, struct svm_model *model_)
 11 | {
 12 | 	int i,j,n;
 13 | 
 14 | 	// model table
 15 | 	lua_newtable(L);
 16 | 
 17 | 	// solver type (Parameters, but we only use solver_type)
 18 | 	lua_pushstring(L,"svm_type");
 19 | 	lua_pushinteger(L,model_->param.svm_type);
 20 | 	lua_settable(L,-3);
 21 | 
 22 | 	// solver type (Parameters, but we only use solver_type)
 23 | 	lua_pushstring(L,"kernel_type");
 24 | 	lua_pushinteger(L,model_->param.kernel_type);
 25 | 	lua_settable(L,-3);
 26 | 
 27 | 	// solver type (Parameters, but we only use solver_type)
 28 | 	lua_pushstring(L,"degree");
 29 | 	lua_pushinteger(L,model_->param.degree);
 30 | 	lua_settable(L,-3);
 31 | 
 32 | 	// solver type (Parameters, but we only use solver_type)
 33 | 	lua_pushstring(L,"gamma");
 34 | 	lua_pushnumber(L,model_->param.gamma);
 35 | 	lua_settable(L,-3);
 36 | 
 37 | 	// solver type (Parameters, but we only use solver_type)
 38 | 	lua_pushstring(L,"coef0");
 39 | 	lua_pushnumber(L,model_->param.coef0);
 40 | 	lua_settable(L,-3);
 41 | 
 42 | 	// nr_class
 43 | 	lua_pushstring(L,"nr_class");
 44 | 	lua_pushinteger(L,model_->nr_class);
 45 | 	lua_settable(L,-3);
 46 | 
 47 | 	// total_SV
 48 | 	lua_pushstring(L,"totalSV");
 49 | 	lua_pushinteger(L,model_->l);
 50 | 	lua_settable(L,-3);
 51 | 
 52 | 	n = model_->nr_class*(model_->nr_class-1)/2;
 53 | 	THDoubleTensor *rho = THDoubleTensor_newWithSize1d(n);
 54 | 	double *rho_data = THDoubleTensor_data(rho);
 55 | 	for (i=0; i<n; i++)
 56 | 	{
 57 | 		rho_data[i] = model_->rho[i];
 58 | 	}
 59 | 	lua_pushstring(L,"rho");
 60 | 	luaT_pushudata(L,rho,"torch.DoubleTensor");
 61 | 	lua_settable(L,-3);
 62 | 
 63 | 	// Label
 64 | 	THIntTensor *label;
 65 | 	if(model_->label)
 66 | 	{
 67 | 		label = THIntTensor_newWithSize1d((long)(model_->nr_class));
 68 | 		int *label_data = THIntTensor_data(label);
 69 | 		for(i = 0; i < model_->nr_class; i++)
 70 | 			label_data[i] = model_->label[i];
 71 | 	}
 72 | 	else
 73 | 	{
 74 | 		label = THIntTensor_new();		
 75 | 	}
 76 | 	lua_pushstring(L,"label");
 77 | 	luaT_pushudata(L,label,"torch.IntTensor");
 78 | 	lua_settable(L,-3);
 79 | 
 80 | 	// probA
 81 | 	THDoubleTensor *probA;
 82 | 	if(model_->probA != NULL)
 83 | 	{
 84 | 		probA = THDoubleTensor_newWithSize1d(n);
 85 | 		double *probA_data = THDoubleTensor_data(probA);
 86 | 		for (i=0; i<n; i++)
 87 | 		{
 88 | 			probA_data[i] = model_->probA[i];
 89 | 		}
 90 | 	}
 91 | 	else
 92 | 	{
 93 | 		probA = THDoubleTensor_new();
 94 | 	}
 95 | 	lua_pushstring(L,"probA");
 96 | 	luaT_pushudata(L,probA,"torch.DoubleTensor");
 97 | 	lua_settable(L,-3);
 98 | 
 99 | 	// probB
100 | 	THDoubleTensor *probB;
101 | 	if(model_->probB != NULL)
102 | 	{
103 | 		probB = THDoubleTensor_newWithSize1d(n);
104 | 		double *probB_data = THDoubleTensor_data(probB);
105 | 		for (i=0; i<n; i++)
106 | 		{
107 | 			probB_data[i] = model_->probB[i];
108 | 		}
109 | 	}
110 | 	else
111 | 	{
112 | 		probB = THDoubleTensor_new();
113 | 	}
114 | 	lua_pushstring(L,"probB");
115 | 	luaT_pushudata(L,probB,"torch.DoubleTensor");
116 | 	lua_settable(L,-3);
117 | 
118 | 	// Label
119 | 	THIntTensor *nSV;
120 | 	if(model_->nSV)
121 | 	{
122 | 		nSV = THIntTensor_newWithSize1d((long)(model_->nr_class));
123 | 		int *nSV_data = THIntTensor_data(nSV);
124 | 		for(i = 0; i < model_->nr_class; i++)
125 | 			nSV_data[i] = model_->nSV[i];
126 | 	}
127 | 	else
128 | 	{
129 | 		nSV = THIntTensor_new();		
130 | 	}
131 | 	lua_pushstring(L,"nSV");
132 | 	luaT_pushudata(L,nSV,"torch.IntTensor");
133 | 	lua_settable(L,-3);
134 | 
135 | 	// sv_coef
136 | 	THDoubleTensor *sv_coef = THDoubleTensor_newWithSize2d(model_->l, model_->nr_class-1);
137 | 	double *sv_coef_data = THDoubleTensor_data(sv_coef);
138 | 	for(i = 0; i < model_->nr_class-1; i++)
139 | 		for(j = 0; j < model_->l; j++)
140 | 			sv_coef_data[(i*(model_->l))+j] = model_->sv_coef[i][j];
141 | 	lua_pushstring(L,"sv_coef");
142 | 	luaT_pushudata(L,sv_coef,"torch.DoubleTensor");
143 | 	lua_settable(L,-3);
144 | 
145 | 	// SVs
146 | 	lua_pushstring(L,"SVs");
147 | 	lua_newtable(L);
148 | 	for(i = 0;i < model_->l; i++)
149 | 	{
150 | 		lua_pushnumber(L,i+1);
151 | 		if(model_->param.kernel_type == PRECOMPUTED)
152 | 		{
153 | 			lua_pushnumber(L,model_->SV[i][0].value);
154 | 		}
155 | 		else
156 | 		{
157 | 			int x_index = 0;
158 | 			while (model_->SV[i][x_index].index != -1)
159 | 				x_index++;
160 | 
161 | 			THIntTensor *indices = THIntTensor_newWithSize1d(x_index);
162 | 			int *indices_data = THIntTensor_data(indices);
163 | 			THDoubleTensor *vals = THDoubleTensor_newWithSize1d(x_index);
164 | 			double *vals_data = THDoubleTensor_data(vals);
165 | 
166 | 			x_index = 0;
167 | 			while (model_->SV[i][x_index].index != -1)
168 | 			{
169 | 				indices_data[x_index] = model_->SV[i][x_index].index;
170 | 				vals_data[x_index] = model_->SV[i][x_index].value;
171 | 				x_index++;
172 | 			}
173 | 			lua_newtable(L);
174 | 			lua_pushnumber(L,1);
175 | 			luaT_pushudata(L,indices,"torch.IntTensor");
176 | 			lua_settable(L,-3);
177 | 			lua_pushnumber(L,2);
178 | 			luaT_pushudata(L,vals,"torch.DoubleTensor");
179 | 			lua_settable(L,-3);
180 | 		}
181 | 		lua_settable(L,-3);
182 | 	}
183 | 	lua_settable(L,-3);
184 | 	return 1;
185 | }
186 | 
187 | int torch_structure_to_libsvm_model(struct svm_model *model_, lua_State *L)
188 | {
189 | 	int i, j, n;
190 | 	struct svm_node *x_space;
191 | 
192 | 	// init
193 | 	model_->rho = NULL;
194 | 	model_->probA = NULL;
195 | 	model_->probB = NULL;
196 | 	model_->label = NULL;
197 | 	model_->nSV = NULL;
198 | 	model_->free_sv = 1; // XXX
199 | 
200 | 	// Parameters
201 | 	lua_pushstring(L,"svm_type");
202 | 	lua_gettable(L,-2);
203 | 	if (!lua_isnumber(L,-1))
204 | 		luaL_error(L,"svm_type expected to be integer %s",luaL_typename(L,-1));
205 | 	model_->param.svm_type = lua_tointeger(L,-1);
206 | 	lua_pop(L,1);
207 | 
208 | 	lua_pushstring(L,"kernel_type");
209 | 	lua_gettable(L,-2);
210 | 	if (!lua_isnumber(L,-1))
211 | 		luaL_error(L,"kernel_type expected to be integer %s",luaL_typename(L,-1));
212 | 	model_->param.kernel_type = lua_tointeger(L,-1);
213 | 	lua_pop(L,1);
214 | 	
215 | 	lua_pushstring(L,"degree");
216 | 	lua_gettable(L,-2);
217 | 	if (!lua_isnumber(L,-1))
218 | 		luaL_error(L,"degree expected to be integer %s",luaL_typename(L,-1));
219 | 	model_->param.degree = lua_tointeger(L,-1);
220 | 	lua_pop(L,1);
221 | 
222 | 	lua_pushstring(L,"gamma");
223 | 	lua_gettable(L,-2);
224 | 	if (!lua_isnumber(L,-1))
225 | 		luaL_error(L,"gamma expected to be number %s",luaL_typename(L,-1));
226 | 	model_->param.gamma = lua_tonumber(L,-1);
227 | 	lua_pop(L,1);
228 | 
229 | 	lua_pushstring(L,"coef0");
230 | 	lua_gettable(L,-2);
231 | 	if (!lua_isnumber(L,-1))
232 | 		luaL_error(L,"coef0 expected to be number %s",luaL_typename(L,-1));
233 | 	model_->param.coef0 = lua_tonumber(L,-1);
234 | 	lua_pop(L,1);
235 | 
236 | 	lua_pushstring(L,"nr_class");
237 | 	lua_gettable(L,-2);
238 | 	if (!lua_isnumber(L,-1))
239 | 		luaL_error(L,"nr_class expected to be integer %s",luaL_typename(L,-1));
240 | 	model_->nr_class = lua_tointeger(L,-1);
241 | 	lua_pop(L,1);
242 | 
243 | 	lua_pushstring(L,"totalSV");
244 | 	lua_gettable(L,-2);
245 | 	if (!lua_isnumber(L,-1))
246 | 		luaL_error(L,"totalSV expected to be integer %s",luaL_typename(L,-1));
247 | 	model_->l = lua_tointeger(L,-1);
248 | 	lua_pop(L,1);
249 | 
250 | 	lua_pushstring(L,"rho");
251 | 	lua_gettable(L,-2);
252 | 	THDoubleTensor *rho = luaT_checkudata(L,-1,"torch.DoubleTensor");
253 | 	lua_pop(L,1);
254 | 	double *rho_data = THDoubleTensor_data(rho);
255 | 	n = model_->nr_class * (model_->nr_class-1)/2;
256 | 	model_->rho = (double*) malloc(n*sizeof(double));
257 | 	for (i=0; i<n; i++)
258 | 		model_->rho[i] = rho_data[i];
259 | 
260 | 	// Label
261 | 	lua_pushstring(L,"label");
262 | 	lua_gettable(L,-2);
263 | 	THIntTensor *label = luaT_checkudata(L,-1,"torch.IntTensor");
264 | 	lua_pop(L,1);
265 | 	int nlabel = (int)THIntTensor_nElement(label);
266 | 	if( nlabel > 0)
267 | 	{
268 | 		if (nlabel != model_->nr_class)
269 | 			luaL_error(L,"Number of elements in label vector is different than nr_class");
270 | 
271 | 		int *label_data = THIntTensor_data(label);
272 | 		model_->label = Malloc(int, model_->nr_class);
273 | 		for(i=0;i<model_->nr_class;i++)
274 | 			model_->label[i] = label_data[i];
275 | 	}
276 | 
277 | 	// probA
278 | 	lua_pushstring(L,"probA");
279 | 	lua_gettable(L,-2);
280 | 	THDoubleTensor *probA = luaT_checkudata(L,-1,"torch.DoubleTensor");
281 | 	lua_pop(L,1);
282 | 	int nprobA = (int)THDoubleTensor_nElement(probA);
283 | 	if( nprobA > 0)
284 | 	{
285 | 		if (nprobA != n)
286 | 			luaL_error(L,"Number of elements in probA is different than n");
287 | 		double *probA_data = THDoubleTensor_data(probA);
288 | 		model_->probA = (double*) malloc(n*sizeof(double));
289 | 		for(i=0;i<n;i++)
290 | 			model_->probA[i] = probA_data[i];
291 | 	}
292 | 
293 | 	// probB
294 | 	lua_pushstring(L,"probB");
295 | 	lua_gettable(L,-2);
296 | 	THDoubleTensor *probB = luaT_checkudata(L,-1,"torch.DoubleTensor");
297 | 	lua_pop(L,1);
298 | 	int nprobB = (int)THDoubleTensor_nElement(probB);
299 | 	if( nprobB > 0)
300 | 	{
301 | 		if (nprobB != n)
302 | 			luaL_error(L,"Number of elements in probB is different than n");
303 | 		double *probB_data = THDoubleTensor_data(probB);
304 | 		model_->probB = (double*) malloc(n*sizeof(double));
305 | 		for(i=0;i<n;i++)
306 | 			model_->probB[i] = probB_data[i];
307 | 	}
308 | 
309 | 	// nSV
310 | 	lua_pushstring(L,"nSV");
311 | 	lua_gettable(L,-2);
312 | 	THIntTensor *nSV = luaT_checkudata(L,-1,"torch.IntTensor");
313 | 	lua_pop(L,1);
314 | 	int nnSV = (int)THIntTensor_nElement(nSV);
315 | 	if( nnSV > 0)
316 | 	{
317 | 		if (nnSV != model_->nr_class)
318 | 			luaL_error(L,"Number of elements in nSV vector is different than nr_class");
319 | 
320 | 		int *nSV_data = THIntTensor_data(nSV);
321 | 		model_->nSV = Malloc(int, model_->nr_class);
322 | 		for(i=0;i<model_->nr_class;i++)
323 | 			model_->nSV[i] = nSV_data[i];
324 | 	}
325 | 
326 | 	// sv_coef
327 | 	lua_pushstring(L,"sv_coef");
328 | 	lua_gettable(L,-2);
329 | 	THDoubleTensor *sv_coef = luaT_checkudata(L,-1,"torch.DoubleTensor");
330 | 	lua_pop(L,1);
331 | 	double *sv_coef_data = THDoubleTensor_data(sv_coef);
332 | 	model_->sv_coef = (double**) malloc((model_->nr_class-1)*sizeof(double));
333 | 	for( i=0 ; i< model_->nr_class -1 ; i++ )
334 | 		model_->sv_coef[i] = (double*) malloc((model_->l)*sizeof(double));
335 | 	for(i = 0; i < model_->nr_class - 1; i++)
336 | 		for(j = 0; j < model_->l; j++)
337 | 			model_->sv_coef[i][j] = sv_coef_data[i*(model_->l)+j];
338 | 
339 | 
340 | 	// SV
341 | 	{
342 | 		lua_pushstring(L,"SVs");
343 | 		lua_gettable(L,-2);
344 | 		int sr, elements, num_samples;
345 | 
346 | 		num_samples = 0;
347 | 		sr = lua_objlen(L,-1);
348 | 		for (i=0; i<sr; i++)
349 | 		{
350 | 			lua_pushnumber(L,i+1);
351 | 			lua_gettable(L,-2);
352 | 			lua_pushnumber(L,1);
353 | 			lua_gettable(L,-2);			
354 | 			THIntTensor *ind = luaT_checkudata(L,-1,"torch.IntTensor");
355 | 			num_samples += ind->size[0];
356 | 			lua_pop(L,1);
357 | 			lua_pop(L,1);
358 | 		}
359 | 
360 | 		elements = num_samples + sr;
361 | 
362 | 		model_->SV = (struct svm_node **) malloc(sr * sizeof(struct svm_node *));
363 | 		x_space = (struct svm_node *)malloc(elements * sizeof(struct svm_node));
364 | 
365 | 		int xi = 0;
366 | 		for(i=0;i<sr;i++)
367 | 		{
368 | 			lua_pushnumber(L,i+1);
369 | 			lua_gettable(L,-2);
370 | 			model_->SV[i] = &x_space[xi];
371 | 
372 | 			lua_pushnumber(L,1);
373 | 			lua_gettable(L,-2);
374 | 			THIntTensor *inds = luaT_checkudata(L,-1,"torch.IntTensor");
375 | 			int *inds_data = THIntTensor_data(inds);
376 | 			lua_pop(L,1);
377 | 			lua_pushnumber(L,2);
378 | 			lua_gettable(L,-2);
379 | 			THDoubleTensor *vals = luaT_checkudata(L,-1,"torch.DoubleTensor");
380 | 			double *vals_data = THDoubleTensor_data(vals);
381 | 			lua_pop(L,1);
382 | 			int nf = inds->size[0];
383 | 			for(j=0; j<nf; j++)
384 | 			{
385 | 				model_->SV[i][j].index = inds_data[j];
386 | 				model_->SV[i][j].value = vals_data[j];
387 | 			}
388 | 			model_->SV[i][nf].index = -1;
389 | 			xi += nf;
390 | 			xi++;
391 | 			lua_pop(L,1);
392 | 		}
393 | 		lua_pop(L,1);
394 | 	}
395 | 	return 1;
396 | }
397 | 
398 | 
399 | 


--------------------------------------------------------------------------------
/libsvm/svm_model_torch.h:
--------------------------------------------------------------------------------
1 | #include "TH.h"
2 | #include "luaT.h"
3 | 
4 | int libsvm_model_to_torch_structure( lua_State *L, struct svm_model *model_);
5 | int torch_structure_to_libsvm_model(struct svm_model *model_,  lua_State *L);
6 | 


--------------------------------------------------------------------------------
/sgd/CMakeLists.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | SET(src)
 3 | 
 4 | SET(luasrc init.lua 
 5 |   sgd.lua
 6 |   asgd.lua
 7 |   loss.lua
 8 |   )
 9 | 
10 | INCLUDE_DIRECTORIES(${Torch_SOURCE_INCLUDES})
11 | ADD_TORCH_PACKAGE(svmsgd "${src}"  "${luasrc}" "SGD SVM Interface")
12 | 
13 | 


--------------------------------------------------------------------------------
/sgd/asgd.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 	Implementation of some simple linear classifiers to test with 
  3 | 	Leon Bottou's implementation for comparing in terms of speed
  4 | 	and convergence properties.
  5 | 
  6 | 	hehe, this seems like ~10 times slower
  7 | ]]--
  8 | 
  9 | local svmasgd,parent = torch.class('svm.SvmAsgd','svm.SvmSgd')
 10 | 
 11 | function svmasgd:__init(nf,lam)
 12 | 	parent.__init(self,nf,lam)
 13 | 	-- weights/biases
 14 | 	self.a = torch.FloatTensor(nf):zero()
 15 | 	self.adiv = 1
 16 | 	self.ab = 0
 17 | 	self.wfrac = 0
 18 | 	-- step size
 19 | 	self.mu0 = 1
 20 | 	-- counter
 21 | 	self.nupdate = 0
 22 | 	self.avstart = 1
 23 | 	self.averaging = false
 24 | end
 25 | 
 26 | function svmasgd:renorm()
 27 | 	if self.wdiv ~= 1 or self.adiv ~= 1 or self.wfrac ~= 0 then
 28 | 		self.a:mul(1/self.adiv)
 29 | 		self.a:add(self.wfrac/self.adiv, self.w)
 30 | 		self.w:mul(1/self.wdiv)
 31 | 		self.wdiv = 1
 32 | 		self.adiv = 1
 33 | 		self.wfrac = 0
 34 | 	end
 35 | end
 36 | 
 37 | function svmasgd:wnorm()
 38 | 	local w = self.w
 39 | 	local norm = torch.dot(w,w) / self.wdiv / self.wdiv
 40 | 	if self.regbias then
 41 | 		norm = norm + self.ab*self.ab
 42 | 	end
 43 | 	return norm
 44 | end
 45 | 
 46 | function svmasgd:anorm()
 47 | 	local a = self.a
 48 | 	self:renorm()
 49 | 	local norm = torch.dot(a,a)
 50 | 	if self.regbias then
 51 | 		norm = norm + self.ab*self.ab
 52 | 	end
 53 | 	return norm
 54 | end
 55 | 
 56 | function svmasgd:testOne(y,si,sx)
 57 | 
 58 | 	-- local variables
 59 | 	local w    = self.w
 60 | 	local a    = self.a
 61 | 	local wdiv = self.wdiv
 62 | 	local adiv = self.adiv
 63 | 	local wfrac = self.wfrac
 64 | 	local ab   = self.ab
 65 | 
 66 | 	local s = svm.spdot(a,si,sx)
 67 | 
 68 | 	if wfrac ~= 0 then
 69 | 		s = s + svm.spdot(w,si,sx)*wfrac
 70 | 	end
 71 | 	s = s / adiv + ab
 72 | 
 73 | 	-- calculate gradient from loss
 74 | 	local lx,ldx = self:loss(s,y)
 75 | 	local e = 0
 76 | 	if s*y <= 0 then
 77 | 		e = 1
 78 | 	end
 79 | 	return s,lx,e
 80 | end
 81 | 
 82 | function svmasgd:trainOne(y,si,sx,eta,mu)
 83 | 	mu = mu or 1.0
 84 | 	-- local variables
 85 | 	local w    = self.w
 86 | 	local a    = self.a
 87 | 	local wdiv = self.wdiv
 88 | 	local adiv = self.adiv
 89 | 	local wfrac = self.wfrac
 90 | 	local b    = self.b
 91 | 	local ab   = self.ab
 92 | 	local lambda = self.lambda
 93 | 
 94 | 	if adiv > 1e5 or wdiv > 1e5 then
 95 | 		self:renorm()
 96 | 		adiv = self.adiv
 97 | 		wdiv = self.wdiv
 98 | 		wfrac = self.wfrac
 99 | 	end
100 | 
101 | 	local s = svm.spdot(w,si,sx)/wdiv + b
102 | 
103 | 	-- update wdiv
104 | 	wdiv = wdiv / (1 - eta*lambda)
105 | 
106 | 	-- calculate gradient from loss
107 | 	local lx,ldx = self:loss(s,y)
108 | 
109 | 	local etd = eta * ldx * wdiv
110 | 
111 | 	-- update weights
112 | 	if etd ~= 0 then
113 | 		svm.spadd(w,etd,si,sx)
114 | 	end
115 | 
116 | 	-- averaging
117 | 	if mu >= 1 then
118 | 		if self.averaging then
119 | 			a:zero()
120 | 		end
121 | 		adiv = wdiv
122 | 		wfrac = 1
123 | 	elseif mu > 0 then
124 | 		if etd ~= 0 then
125 | 			svm.spadd(a,-wfrac*etd,si,sx)
126 | 		end
127 | 		adiv = adiv / (1-mu)
128 | 		wfrac = wfrac + mu * adiv / wdiv
129 | 	end
130 | 
131 | 	-- update bias
132 | 	if self.regbias then
133 | 		b = b * (1- eta * 0.01 * lambda)
134 | 	end
135 | 	b = b + eta*0.01*ldx
136 | 	ab = ab + mu * (b - ab);
137 | 	-- put back modified numbers
138 | 	self.adiv = adiv
139 | 	self.wdiv = wdiv
140 | 	self.wfrac = wfrac
141 | 	self.ab = ab
142 | 	self.b = b
143 | 
144 | end
145 | 
146 | function svmasgd:trainepoch(data)
147 | 	print('Training on ' .. data:size() .. ' samples')
148 | 	-- local variables
149 | 	local lambda = self.lambda
150 | 	local eta0 = self.eta0
151 | 	local mu0 = self.mu0
152 | 	local nupdate = self.nupdate
153 | 
154 | 	-- run over every sample
155 | 	for i=1,data:size() do
156 | 		local ex = data[i]
157 | 
158 | 		-- update learning rate
159 | 		local eta = eta0 / math.pow(1 + lambda*eta0*nupdate,0.75)
160 | 		local mu = 1
161 | 		if nupdate >= self.avstart then
162 | 			if not self.averaging then self.averaging = true end
163 | 			mu = mu0 / (1 + mu0 * (nupdate - self.avstart))
164 | 			-- print(i)
165 | 		end
166 | 
167 | 		-- train for a sample
168 | 		self:trainOne(ex[1], ex[2][1], ex[2][2], eta, mu)
169 | 
170 | 		nupdate = nupdate + 1
171 | 	end
172 | 	io.write('wNorm=' .. string.format('%.2f',self:wnorm()))
173 | 	io.write(' aNorm=' .. string.format('%.2f',self:anorm()))
174 | 	io.write(' wBias=' .. string.format('%.2f',self.b))
175 | 	io.write(' aBias=' .. string.format('%.2f\n',self.ab))
176 | 	self.nupdate = nupdate
177 | end
178 | 
179 | function svmasgd:train(trdata,tedata,epochs)
180 | 	self.avstart = self.avstart * trdata:size()
181 | 	parent.train(self,trdata,tedata,epochs)
182 | 	self:renorm()
183 | end
184 | 
185 | 


--------------------------------------------------------------------------------
/sgd/init.lua:
--------------------------------------------------------------------------------
1 | include('loss.lua')
2 | include('sgd.lua')
3 | include('asgd.lua')
4 | 
5 | 


--------------------------------------------------------------------------------
/sgd/loss.lua:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | function svm.hingeloss(a,y)
 4 | 	local z = a*y
 5 | 	if z > 1 then return 0,0 end
 6 | 	return 1-z,y
 7 | end
 8 | 
 9 | function svm.logloss(a,y)
10 | 	local z = a*y
11 | 	if z>18 then
12 | 		return math.exp(-z), y*math.exp(-z)
13 | 	end
14 | 	if z<-18 then
15 | 		return -z,y
16 | 	end
17 | 	return math.log(1+math.exp(-z)),y/(1+math.exp(z))
18 | end
19 | 
20 | function svm.squaredhingeloss(a,y)
21 | 	local z = a*y
22 | 	if z > 1 then return 0,0 end
23 | 	local d=1-z
24 | 	return 0.5*d*d,y*d
25 | end
26 | 
27 | 
28 | 


--------------------------------------------------------------------------------
/sgd/sgd.lua:
--------------------------------------------------------------------------------
  1 | --[[
  2 | 	Implementation of some simple linear classifiers to test with 
  3 | 	Leon Bottou's implementation for comparing in terms of speed
  4 | 	and convergence properties.
  5 | 
  6 | 	hehe, this seems like ~10 times slower
  7 | ]]--
  8 | 
  9 | local svmsgd = torch.class('svm.SvmSgd')
 10 | 
 11 | function svmsgd:__init(nf,lam)
 12 | 	-- weights/biases
 13 | 	self.w = torch.FloatTensor(nf):zero()
 14 | 	self.b = 0
 15 | 	self.wdiv = 1
 16 | 	self.lambda = lam
 17 | 	self.eta0 = 1
 18 | 	self.nupdate = 0
 19 | 	self.nf = nf
 20 | 	self.regbias = false
 21 | 	self.svmloss = svm.hingeloss
 22 | end
 23 | 
 24 | function svmsgd:loss(a,y)
 25 | 	return self.svmloss(a,y)
 26 | end
 27 | 
 28 | function svmsgd:renorm()
 29 | 	if self.wdiv ~= 1 then
 30 | 		self.w:mul(1/self.wdiv)
 31 | 		self.wdiv = 1
 32 | 	end
 33 | end
 34 | 
 35 | function svmsgd:wnorm()
 36 | 	local  w = self.w
 37 | 	local norm = torch.dot(w,w) / self.wdiv / self.wdiv
 38 | 	if self.regbias then
 39 | 		norm = norm + self.b + self.b
 40 | 	end
 41 | 	return norm
 42 | end
 43 | 
 44 | function svmsgd:testOne(y,si,sx)
 45 | 
 46 | 	-- local variables
 47 | 	local w    = self.w
 48 | 	local wdiv = self.wdiv
 49 | 	local b    = self.b
 50 | 
 51 | 	local s = svm.spdot(w,si,sx)/wdiv + b
 52 | 
 53 | 	-- calculate gradient from loss
 54 | 	local lx,ldx = self:loss(s,y)
 55 | 	local e = 0
 56 | 	if s*y <= 0 then e = 1 end
 57 | 	return s,lx,e
 58 | end
 59 | 
 60 | function svmsgd:trainOne(y,si,sx,eta)
 61 | 	local w    = self.w
 62 | 	local wdiv = self.wdiv
 63 | 	local b    = self.b
 64 | 	local lambda = self.lambda
 65 | 
 66 | 	local s = svm.spdot(w,si,sx)/wdiv + b
 67 | 
 68 | 	-- update wdiv
 69 | 	wdiv = wdiv / (1 - eta*lambda)
 70 | 	if wdiv > 1e5 then
 71 | 		self:renorm()
 72 | 		wdiv = self.wdiv
 73 | 	end
 74 | 
 75 | 	-- calculate gradient from loss
 76 | 	local lx,ldx = self:loss(s,y)
 77 | 
 78 | 	-- update weights
 79 | 	if ldx ~= 0 then
 80 | 		svm.spadd(w,eta*ldx*wdiv,si,sx)
 81 | 	end
 82 | 
 83 | 	-- update bias
 84 | 	if self.regbias then
 85 | 		b = b * (1- eta * 0.01 * lambda)
 86 | 	end
 87 | 	b = b + eta*0.01*ldx
 88 | 
 89 | 	-- put back modified numbers
 90 | 	self.wdiv = wdiv
 91 | 	self.b = b
 92 | end
 93 | 
 94 | function svmsgd:trainepoch(data)
 95 | 	print('Training on ' .. data:size() .. ' samples')
 96 | 	-- local variables
 97 | 	local eta = 0
 98 | 	local lambda = self.lambda
 99 | 	local eta0 = self.eta0
100 | 	local nupdate = self.nupdate
101 | 
102 | 	for i=1,data:size() do
103 | 		-- update learning rate
104 | 		eta = eta0 / (1 + lambda*eta0*nupdate)
105 | 
106 | 		-- train for a sample
107 | 		local ex = data[i]
108 | 		self:trainOne(ex[1], ex[2][1], ex[2][2], eta)
109 | 
110 | 		nupdate = nupdate + 1
111 | 	end
112 | 	io.write('wNorm=' .. string.format('%.2f',self:wnorm()))
113 | 	io.write(' wBias=' .. string.format('%.2f\n',self.b))
114 | 	self.nupdate = nupdate
115 | end
116 | 
117 | function svmsgd:test(data)
118 | 
119 | 	io.write('Testing on ' .. data:size() .. ' samples\n')
120 | 
121 | 	local loss = 0
122 | 	local nerr = 0
123 | 	for i=1,data:size() do
124 | 		local ex = data[i]
125 | 		local s,l,e = self:testOne(ex[1], ex[2][1], ex[2][2])
126 | 		loss = loss + l
127 | 		nerr = nerr + e
128 | 	end
129 | 
130 | 	loss = loss/data:size()
131 | 	nerr = nerr/data:size()
132 | 
133 | 	io.write('Loss=' .. string.format('%.8f',loss))
134 | 	io.write(' Cost=' .. string.format('%.8f',loss + 0.5*self.lambda*self:wnorm()))
135 | 	io.write(' Misclassification=' .. string.format('%.2f %%\n',100*nerr))
136 | end
137 | 
138 | function svmsgd:predict(data)
139 | 	local tlabel = torch.IntTensor(data:size())
140 | 	local tdec = torch.Tensor(data:size())
141 | 	local loss = 0
142 | 	local nerr = 0
143 | 	for i=1,data:size() do
144 | 		local ex = data[i]
145 | 		local s,l,e = self:testOne(ex[1], ex[2][1], ex[2][2])
146 | 		loss = loss + l
147 | 		nerr = nerr + e
148 | 		if e == 1 then tlabel[i] = -ex[1] else tlabel[i] = ex[1] end
149 | 		tdec[i] = s
150 | 	end
151 | 
152 | 	loss = loss/data:size()
153 | 	nerr = nerr/data:size()
154 | 	io.write('Accuracy=' .. string.format('%.4f %% (%d/%d)\n',
155 | 		100-100*nerr,data:size()-nerr*data:size(),data:size()))
156 | 	return tlabel,{100-100*nerr,loss,loss + 0.5*self.lambda*self:wnorm()},tdec
157 | end
158 | 
159 | function svmsgd:train(trdata,tedata,epochs)
160 | 
161 | 	local trtime = torch.Timer()
162 | 	for i=1,epochs do
163 | 		print('============== Epoch #' .. i .. ' ==============')
164 | 
165 | 		-- train
166 | 		trtime:resume()
167 | 		self:trainepoch(trdata)
168 | 		trtime:stop()
169 | 		print('Total Training Time = ' .. string.format('%.2f secs',trtime:time().real))
170 | 
171 | 		-- test
172 | 		io.write('>> train: ')
173 | 		self:test(trdata)
174 | 		if tedata then
175 | 			io.write('>> test: ')
176 | 			self:test(tedata)
177 | 		end
178 | 	end
179 | end
180 | 
181 | function svmsgd:evalEta(nsample,data,eta)
182 | 	-- clone the weight and bias
183 | 	local w = self.w:clone()
184 | 	local b = self.b
185 | 	local wdiv = self.wdiv
186 | 	for i=1,nsample do
187 | 		local ex = data[i]
188 | 		self:trainOne(ex[1], ex[2][1], ex[2][2], eta)
189 | 	end
190 | 	local loss = 0
191 | 	for i=1,nsample do
192 | 		local ex = data[i]
193 | 		local s,l,e = self:testOne(ex[1], ex[2][1], ex[2][2])
194 | 		loss = loss + l
195 | 	end
196 | 	local cost = loss/nsample + 0.5 * self.lambda * self:wnorm()
197 | 	self.w:copy(w)
198 | 	self.b = b
199 | 	self.wdiv = wdiv
200 | 	return cost
201 | end
202 | 
203 | function svmsgd:determineEta0(nsample,data)
204 | 	local factor = 2
205 | 	local loeta = 1
206 | 	local locost = self:evalEta(nsample,data,loeta)
207 | 	local hieta = loeta * factor
208 | 	local hicost = self:evalEta(nsample,data,hieta)
209 | 	if locost < hicost then
210 | 		while locost < hicost do
211 | 			hieta = loeta
212 | 			hicost = locost
213 | 			loeta = hieta / factor
214 | 			locost = self:evalEta(nsample,data,loeta)
215 | 		end
216 | 	elseif hicost < locost then
217 | 		while hicost < locost do
218 | 			loeta = hieta
219 | 			locost = hicost
220 | 			hieta = loeta * factor
221 | 			hicost = self:evalEta(nsample,data,hieta)
222 | 		end
223 | 	end
224 | 	self.eta0 = loeta
225 | 	print('# Using eta0='..string.format('%.4f',self.eta0))
226 | end
227 | 
228 | 


--------------------------------------------------------------------------------
/sgd/test.lua:
--------------------------------------------------------------------------------
 1 | require 'svm'
 2 | 
 3 | if #arg == 0 then arg = nil end
 4 | 
 5 | -- you can get these files use Leon Bottou's sgd project.
 6 | dtr=svm.dataset('../data/rcv1.train.bin')
 7 | dte=svm.dataset('../data/rcv1.test.bin')
 8 | 
 9 | if arg and (arg[1] == 'dense' or arg[2] == 'dense') then
10 | 	dtr:dense()
11 | 	dte:dense()
12 | end
13 | 
14 | -- These tests are comparing with Leon Bottou's experiments.
15 | if not arg or (arg and arg[1] == 'sgd-hinge') then
16 | 	print('======================================')
17 | 	print('SVM SGD HingeLoss')
18 | 	-- sgd (hinge)
19 | 	mysvm = svm.SvmSgd(dtr:nfeature(),1e-4)
20 | 	print(mysvm)
21 | 	mysvm:determineEta0(1001,dtr)
22 | 	mysvm:train(dtr,dte,5)
23 | end
24 | 
25 | if not arg or (arg and arg[1] == 'asgd-hinge') then
26 | 	print('======================================')
27 | 	print('SVM ASGD HingeLoss')
28 | 	-- asgd (hinge)
29 | 	mysvm = svm.SvmAsgd(dtr:nfeature(),1e-4)
30 | 	print(mysvm)
31 | 	mysvm:determineEta0(1001,dtr)
32 | 	mysvm:train(dtr,dte,3)
33 | end
34 | 
35 | if not arg or (arg and arg[1] == 'sgd-log') then
36 | 	print('======================================')
37 | 	print('SVM SGD LogLoss')
38 | 	-- sgd(logloss)
39 | 	mysvm = svm.SvmSgd(dtr:nfeature(),5e-7)
40 | 	mysvm.svmloss = svm.logloss
41 | 	mysvm.regbias = false
42 | 	print(mysvm)
43 | 	mysvm:determineEta0(1001,dtr)
44 | 	mysvm:train(dtr,dte,12)
45 | end
46 | 
47 | if not arg or (arg and arg[1] == 'asgd-log') then
48 | 	print('======================================')
49 | 	print('SVM ASGD LogLoss')
50 | 	-- asgd(logloss)
51 | 	mysvm = svm.SvmAsgd(dtr:nfeature(),5e-7)
52 | 	mysvm.svmloss = svm.logloss
53 | 	mysvm.regbias = false
54 | 	print(mysvm)
55 | 	mysvm:determineEta0(1001,dtr)
56 | 	mysvm:train(dtr,dte,8)
57 | end
58 | 


--------------------------------------------------------------------------------
/svm-0.1-0.rockspec:
--------------------------------------------------------------------------------
 1 | package = "svm"
 2 | version = "0.1-0"
 3 | 
 4 | source = {
 5 |    url = "git@github.com:koraykv/torch-svm.git",
 6 |    tag = "master"
 7 | }
 8 | 
 9 | description = {
10 |    summary = "Torch-SVM library",
11 |    detailed = [[
12 | Torch-SVM library
13 |    ]],
14 |    homepage = "https://github.com/koraykv/torch-svm",
15 |    license = "BSD"
16 |    }
17 | 
18 | dependencies = {
19 |    "torch >= 7.0",
20 |    "xlua >= 1.0"
21 | }
22 | 
23 | build = {
24 |    type = "command",
25 |    build_command = [[
26 | cmake -E make_directory build && cd build && cmake .. -DCMAKE_BUILD_TYPE=Release -DLUA=$(LUA) -DLUALIB=$(LUALIB) -DLUA_BINDIR="$(LUA_BINDIR)" -DLUA_INCDIR="$(LUA_INCDIR)" -DLUA_LIBDIR="$(LUA_LIBDIR)" -DLUADIR="$(LUADIR)" -DLIBDIR="$(LIBDIR)" -DCMAKE_INSTALL_PREFIX="$(PREFIX)" && $(MAKE)
27 | ]],
28 |    install_command = "cd build && $(MAKE) install"
29 | }
30 | 


--------------------------------------------------------------------------------
/util.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include "TH.h"
 3 | #include "luaT.h"
 4 | 
 5 | static int svm_spdot(lua_State *L)
 6 | {
 7 | 	THFloatTensor *tdense = luaT_checkudata(L,1,"torch.FloatTensor");
 8 | 	THIntTensor *indices;
 9 | 	if lua_isnil(L,2)
10 | 	{
11 | 		indices = NULL;
12 | 	}
13 | 	else
14 | 	{
15 | 		indices = luaT_checkudata(L,2,"torch.IntTensor");
16 | 	}
17 | 	THFloatTensor *tsparse = luaT_checkudata(L,3,"torch.FloatTensor");
18 | 
19 | 	luaL_argcheck(L,tdense->nDimension == 1, 1, "Dense Matrix is expected to 1D");
20 | 	luaL_argcheck(L,!indices || indices->nDimension == 1, 2, "Index tensor is expected to 1D");
21 | 	luaL_argcheck(L,tsparse->nDimension == 1, 3, "Sparse value tensor is expected to 1D");
22 | 
23 | 	if (!indices)
24 | 	{
25 | 		lua_pushnumber(L,(double)THFloatTensor_dot(tdense,tsparse));
26 | 		return 1;
27 | 	}
28 | 
29 | 	float *dense_data = THFloatTensor_data(tdense);
30 | 	float *sparse_data = THFloatTensor_data(tsparse);
31 | 	int *indices_data = THIntTensor_data(indices);
32 | 
33 | 	long i;
34 | 	float res = 0;
35 | 
36 | 	for (i=0; i< indices->size[0]; i++)
37 | 	{
38 | 		res += sparse_data[i]*dense_data[indices_data[i]-1];
39 | 	}
40 | 	lua_pushnumber(L,(double)res);
41 | 	return 1;
42 | }
43 | 
44 | static int svm_spadd(lua_State *L)
45 | {
46 | 	THFloatTensor *tdense = luaT_checkudata(L,1,"torch.FloatTensor");
47 | 	float c = (float)lua_tonumber(L,2);
48 | 	THIntTensor *indices;
49 | 	if (lua_isnil(L,3))
50 | 	{
51 | 		indices = NULL;
52 | 	}
53 | 	else
54 | 	{
55 | 		indices = luaT_checkudata(L,3,"torch.IntTensor");
56 | 	}
57 | 	THFloatTensor *tsparse = luaT_checkudata(L,4,"torch.FloatTensor");
58 | 
59 | 	luaL_argcheck(L,tdense->nDimension == 1, 1, "Dense Matrix is expected to 1D");
60 | 	luaL_argcheck(L,!indices||indices->nDimension == 1, 3, "Index tensor is expected to 1D");
61 | 	luaL_argcheck(L,tsparse->nDimension == 1, 4, "Sparse value tensor is expected to 1D");
62 | 
63 | 	if(!indices)
64 | 	{
65 | 		THFloatTensor_cadd(tdense,tdense,c,tsparse);
66 | 		return 0;
67 | 	}
68 | 
69 | 	float *dense_data = THFloatTensor_data(tdense);
70 | 	float *sparse_data = THFloatTensor_data(tsparse);
71 | 	int *indices_data = THIntTensor_data(indices);
72 | 
73 | 	long i;
74 | 
75 | 	for (i=0; i< indices->size[0]; i++)
76 | 	{
77 | 		dense_data[indices_data[i]-1] += c*sparse_data[i];
78 | 	}
79 | 	return 0;
80 | }
81 | 
82 | static const struct luaL_Reg svm_util__ [] = {
83 |   {"spdot", svm_spdot},
84 |   {"spadd", svm_spadd},
85 |   {NULL, NULL}
86 | };
87 | 
88 | 
89 | int libsvm_util_init(lua_State *L)
90 | {
91 |   luaL_register(L, "svm", svm_util__);
92 |   return 1;
93 | }
94 | 


--------------------------------------------------------------------------------