├── README.md
├── linux
    ├── .DS_Store
    ├── concatenate.cpp
    ├── evaluate
    │   ├── .DS_Store
    │   ├── liblinear
    │   │   ├── COPYRIGHT
    │   │   ├── Makefile
    │   │   ├── Makefile.win
    │   │   ├── README
    │   │   ├── blas
    │   │   │   ├── Makefile
    │   │   │   ├── blas.h
    │   │   │   ├── blasp.h
    │   │   │   ├── daxpy.c
    │   │   │   ├── ddot.c
    │   │   │   ├── dnrm2.c
    │   │   │   └── dscal.c
    │   │   ├── heart_scale
    │   │   ├── linear.cpp
    │   │   ├── linear.def
    │   │   ├── linear.h
    │   │   ├── matlab
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── libsvmread.c
    │   │   │   ├── libsvmwrite.c
    │   │   │   ├── linear_model_matlab.c
    │   │   │   ├── linear_model_matlab.h
    │   │   │   ├── make.m
    │   │   │   ├── predict.c
    │   │   │   └── train.c
    │   │   ├── predict.c
    │   │   ├── python
    │   │   │   ├── Makefile
    │   │   │   ├── README
    │   │   │   ├── liblinear.py
    │   │   │   └── liblinearutil.py
    │   │   ├── train.c
    │   │   ├── tron.cpp
    │   │   ├── tron.h
    │   │   └── windows
    │   │   │   ├── liblinear.dll
    │   │   │   ├── libsvmread.mexw64
    │   │   │   ├── libsvmwrite.mexw64
    │   │   │   ├── predict.exe
    │   │   │   ├── predict.mexw64
    │   │   │   ├── test
    │   │   │       ├── model.txt
    │   │   │       ├── output.txt
    │   │   │       ├── test.txt
    │   │   │       └── train.txt
    │   │   │   ├── train.exe
    │   │   │   └── train.mexw64
    │   ├── make.sh
    │   ├── program
    │   │   ├── .DS_Store
    │   │   ├── label.txt
    │   │   ├── preprocess.cpp
    │   │   ├── score.cpp
    │   │   └── vocab.txt
    │   ├── run.sh
    │   └── score.py
    ├── line
    ├── line.cpp
    ├── normalize.cpp
    ├── preprocess_youtube.py
    ├── reconstruct.cpp
    └── train_youtube.sh
└── windows
    ├── .DS_Store
    ├── concatenate.cpp
    ├── evaluate
        ├── .DS_Store
        ├── liblinear
        │   ├── COPYRIGHT
        │   ├── Makefile
        │   ├── Makefile.win
        │   ├── README
        │   ├── blas
        │   │   ├── Makefile
        │   │   ├── blas.h
        │   │   ├── blasp.h
        │   │   ├── daxpy.c
        │   │   ├── ddot.c
        │   │   ├── dnrm2.c
        │   │   └── dscal.c
        │   ├── heart_scale
        │   ├── linear.cpp
        │   ├── linear.def
        │   ├── linear.h
        │   ├── matlab
        │   │   ├── Makefile
        │   │   ├── README
        │   │   ├── libsvmread.c
        │   │   ├── libsvmwrite.c
        │   │   ├── linear_model_matlab.c
        │   │   ├── linear_model_matlab.h
        │   │   ├── make.m
        │   │   ├── predict.c
        │   │   └── train.c
        │   ├── predict.c
        │   ├── python
        │   │   ├── Makefile
        │   │   ├── README
        │   │   ├── liblinear.py
        │   │   └── liblinearutil.py
        │   ├── train.c
        │   ├── tron.cpp
        │   ├── tron.h
        │   └── windows
        │   │   ├── liblinear.dll
        │   │   ├── libsvmread.mexw64
        │   │   ├── libsvmwrite.mexw64
        │   │   ├── predict.exe
        │   │   ├── predict.mexw64
        │   │   ├── test
        │   │       ├── model.txt
        │   │       ├── output.txt
        │   │       ├── test.txt
        │   │       └── train.txt
        │   │   ├── train.exe
        │   │   └── train.mexw64
        ├── program
        │   ├── .DS_Store
        │   ├── label.txt
        │   ├── preprocess.cpp
        │   ├── score.cpp
        │   └── vocab.txt
        ├── run.bat
        └── score.py
    ├── line.cpp
    ├── line.exe
    ├── normalize.cpp
    ├── preprocess_youtube.py
    ├── reconstruct.cpp
    └── train_youtube.bat


/README.md:
--------------------------------------------------------------------------------
 1 | # LINE: Large-scale information network embedding
 2 | 
 3 | ** Note this repository will no longer be maintained. For node embedding methods, please use our graph embedding system GraphVite: https://github.com/DeepGraphLearning/graphvite
 4 | 
 5 | **Introduction**
 6 | 
 7 | This is the LINE toolkit developed for embedding very large-scale information networks. It is suitable to a variety of networks including directed, undirected, binary or weighted edges. The LINE model is quite efficient, which is able to embed a network with millions of vertices and billions of edges on a single machine within a few hours.
 8 | ```
 9 | Contact: Jian Tang, tangjianpku@gmail.com
10 | Project page: https://sites.google.com/site/pkujiantang/line
11 | This work was done when the author was working at Microsoft Research
12 | ```
13 | **Usage**
14 | 
15 | We provide both the Windows and LINUX versions. To compile the souce codes, some external packages are required, which are used to generate random numbers for the edge-sampling algorithm in the LINE model. For Windows version, the BOOST package is used and can be downloaded at http://www.boost.org/; for LINUX, the GSL package is used and can be downloaded at http://www.gnu.org/software/gsl/
16 | 
17 | **Network Input**
18 | 
19 | The input of a network consists of the edges in the network. Each line of the input file represents a DIRECTED edge in the network, which is specified as the format "source_node target_node weight" (can be either separated by blank or tab). For each undirected edge, users must use TWO DIRECTED edges to represent it. Here is an input example of a word co-occurrence network:
20 | ```
21 | good the 3
22 | the good 3
23 | good bad 1
24 | bad good 1
25 | bad of 4
26 | of bad 4
27 | ```
28 | 
29 | **Run**
30 | ```
31 | ./line -train network_file -output embedding_file -binary 1 -size 200 -order 2 -negative 5 -samples 100 -rho 0.025 -threads 20
32 | ```
33 | - -train, the input file of a network;
34 | - -output, the output file of the embedding;
35 | - -binary, whether saving the output file in binary mode; the default is 0 (off);
36 | - -size, the dimension of the embedding; the default is 100;
37 | - -order, the order of the proximity used; 1 for first order, 2 for second order; the default is 2;
38 | - -negative, the number of negative samples used in negative sampling; the deault is 5;
39 | - -samples, the total number of training samples (*Million);
40 | - -rho, the starting value of the learning rate; the default is 0.025;
41 | - -threads, the total number of threads used; the default is 1.
42 | 
43 | **Files in the folder**
44 | - line.cpp, the souce code of the LINE;
45 | - reconstruct.cpp, the code used for reconstructing the sparse networks into dense ones, which is described in Section 4.3;
46 | - normalize.cpp, the code for normalizing the embeddings (l2 normalization);
47 | - concatenate.cpp, the code for concatenating the embeddings with 1st-order and 2nd-order;
48 | 
49 | **Examples**
50 | 
51 | We provide an example running script for the Youtube data set (available at http://socialnetworks.mpi-sws.mpg.de/data/youtube-links.txt.gz). The script will first run LINE to learn network embeddings, then it will evaluate the learned embeddings on the node classification task.
52 | 
53 | To run the script, users first need to compile the evaluation codes by running make.sh in the folder "evaluate". Afterwards, we can run train_youtube.bat or train_youtube.sh to run the whole pipeline.
54 | 
55 | **Citation**
56 | 
57 | ```
58 | @inproceedings{tang2015line,
59 |   title={LINE: Large-scale Information Network Embedding.},
60 |   author={Tang, Jian and Qu, Meng and Wang, Mingzhe and Zhang, Ming and Yan, Jun and Mei, Qiaozhu},
61 |   booktitle={WWW},
62 |   year={2015},
63 |   organization={ACM}
64 | }
65 | ```
66 | 


--------------------------------------------------------------------------------
/linux/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/linux/.DS_Store


--------------------------------------------------------------------------------
/linux/concatenate.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <math.h>
  5 | #include <vector>
  6 | 
  7 | #define MAX_STRING 100
  8 | 
  9 | const int hash_table_size = 30000000;
 10 | 
 11 | typedef float real;                    // Precision of float numbers
 12 | 
 13 | struct ClassVertex {
 14 | 	double degree;
 15 | 	char *name;
 16 | };
 17 | 
 18 | char vector_file1[MAX_STRING], vector_file2[MAX_STRING], output_file[MAX_STRING];
 19 | struct ClassVertex *vertex;
 20 | int binary = 0;
 21 | int *vertex_hash_table;
 22 | long long max_num_vertices = 1000, num_vertices = 0;
 23 | long long vector_dim1, vector_dim2;
 24 | real *vec1, *vec2;
 25 | 
 26 | /* Build a hash table, mapping each vertex name to a unique vertex id */
 27 | unsigned int Hash(char *key)
 28 | {
 29 | 	unsigned int seed = 131;
 30 | 	unsigned int hash = 0;
 31 | 	while (*key)
 32 | 	{
 33 | 		hash = hash * seed + (*key++);
 34 | 	}
 35 | 	return hash % hash_table_size;
 36 | }
 37 | 
 38 | void InitHashTable()
 39 | {
 40 | 	vertex_hash_table = (int *)malloc(hash_table_size * sizeof(int));
 41 | 	for (int k = 0; k != hash_table_size; k++) vertex_hash_table[k] = -1;
 42 | }
 43 | 
 44 | void InsertHashTable(char *key, int value)
 45 | {
 46 | 	int addr = Hash(key);
 47 | 	while (vertex_hash_table[addr] != -1) addr = (addr + 1) % hash_table_size;
 48 | 	vertex_hash_table[addr] = value;
 49 | }
 50 | 
 51 | int SearchHashTable(char *key)
 52 | {
 53 | 	int addr = Hash(key);
 54 | 	while (1)
 55 | 	{
 56 | 		if (vertex_hash_table[addr] == -1) return -1;
 57 | 		if (!strcmp(key, vertex[vertex_hash_table[addr]].name)) return vertex_hash_table[addr];
 58 | 		addr = (addr + 1) % hash_table_size;
 59 | 	}
 60 | 	return -1;
 61 | }
 62 | 
 63 | /* Add a vertex to the vertex set */
 64 | int AddVertex(char *name, int vid)
 65 | {
 66 | 	int length = strlen(name) + 1;
 67 | 	if (length > MAX_STRING) length = MAX_STRING;
 68 | 	vertex[vid].name = (char *)calloc(length, sizeof(char));
 69 | 	strcpy(vertex[vid].name, name);
 70 | 	vertex[vid].degree = 0;
 71 | 	InsertHashTable(name, vid);
 72 | 	return vid;
 73 | }
 74 | 
 75 | void ReadVector()
 76 | {
 77 | 	char ch, name[MAX_STRING];
 78 | 	real f_num;
 79 | 	long long l;
 80 | 
 81 | 	FILE *fi = fopen(vector_file1, "rb");
 82 | 	if (fi == NULL) {
 83 | 		printf("Vector file 1 not found\n");
 84 | 		exit(1);
 85 | 	}
 86 | 	fscanf(fi, "%lld %lld", &num_vertices, &vector_dim1);
 87 | 	vertex = (struct ClassVertex *)calloc(num_vertices, sizeof(struct ClassVertex));
 88 | 	vec1 = (real *)calloc(num_vertices * vector_dim1, sizeof(real));
 89 | 	for (long long k = 0; k != num_vertices; k++)
 90 | 	{
 91 | 		fscanf(fi, "%s", name);
 92 | 		ch = fgetc(fi);
 93 | 		AddVertex(name, k);
 94 | 		l = k * vector_dim1;
 95 | 		for (int c = 0; c != vector_dim1; c++)
 96 | 		{
 97 | 			fread(&f_num, sizeof(real), 1, fi);
 98 | 			vec1[c + l] = (real)f_num;
 99 | 		}
100 | 	}
101 | 	fclose(fi);
102 | 
103 | 	fi = fopen(vector_file2, "rb");
104 | 	if (fi == NULL) {
105 | 		printf("Vector file 2 not found\n");
106 | 		exit(1);
107 | 	}
108 | 	fscanf(fi, "%lld %lld", &l, &vector_dim2);
109 | 	vec2 = (real *)calloc((num_vertices + 1) * vector_dim2, sizeof(real));
110 | 	for (long long k = 0; k != num_vertices; k++)
111 | 	{
112 | 		fscanf(fi, "%s", name);
113 | 		ch = fgetc(fi);
114 | 		int i = SearchHashTable(name);
115 | 		if (i == -1) l = num_vertices * vector_dim2;
116 | 		else l = i * vector_dim2;
117 | 		for (int c = 0; c != vector_dim2; c++)
118 | 		{
119 | 			fread(&f_num, sizeof(float), 1, fi);
120 | 			vec2[c + l] = (real)f_num;
121 | 		}
122 | 	}
123 | 	fclose(fi);
124 | 
125 | 	printf("Vocab size: %lld\n", num_vertices);
126 | 	printf("Vector size 1: %lld\n", vector_dim1);
127 | 	printf("Vector size 2: %lld\n", vector_dim2);
128 | }
129 | 
130 | 
131 | void TrainModel() {
132 | 	long long a, b;
133 | 	double len;
134 | 
135 | 	InitHashTable();
136 | 	ReadVector();
137 | 
138 | 	FILE *fo;
139 | 	fo = fopen(output_file, "wb");
140 | 	fprintf(fo, "%lld %lld\n", num_vertices, vector_dim1 + vector_dim2);
141 | 	for (a = 0; a < num_vertices; a++) {
142 | 		fprintf(fo, "%s ", vertex[a].name);
143 | 
144 | 		len = 0;
145 | 		for (b = 0; b < vector_dim1; b++) len += vec1[b + a * vector_dim1] * vec1[b + a * vector_dim1];
146 | 		len = sqrt(len);
147 | 		for (b = 0; b < vector_dim1; b++) vec1[b + a * vector_dim1] /= len;
148 | 
149 | 		len = 0;
150 | 		for (b = 0; b < vector_dim2; b++) len += vec2[b + a * vector_dim2] * vec2[b + a * vector_dim2];
151 | 		len = sqrt(len);
152 | 		for (b = 0; b < vector_dim2; b++) vec2[b + a * vector_dim2] /= len;
153 | 
154 | 		if (binary)
155 | 		{
156 | 			for (b = 0; b < vector_dim1; b++)
157 | 				fwrite(&vec1[a * vector_dim1 + b], sizeof(real), 1, fo);
158 | 			for (b = 0; b < vector_dim2; b++)
159 | 				fwrite(&vec2[a * vector_dim2 + b], sizeof(real), 1, fo);
160 | 		}
161 | 		else
162 | 		{
163 | 			for (b = 0; b < vector_dim1; b++)
164 | 				fprintf(fo, "%lf ", vec1[a * vector_dim1 + b]);
165 | 			for (b = 0; b < vector_dim2; b++)
166 | 				fprintf(fo, "%lf ", vec2[a * vector_dim2 + b]);
167 | 		}
168 | 		fprintf(fo, "\n");
169 | 	}
170 | 	fclose(fo);
171 | }
172 | 
173 | int ArgPos(char *str, int argc, char **argv) {
174 | 	int a;
175 | 	for (a = 1; a < argc; a++) if (!strcmp(str, argv[a])) {
176 | 		if (a == argc - 1) {
177 | 			printf("Argument missing for %s\n", str);
178 | 			exit(1);
179 | 		}
180 | 		return a;
181 | 	}
182 | 	return -1;
183 | }
184 | 
185 | int main(int argc, char **argv) {
186 | 	int i;
187 | 	if (argc == 1) {
188 | 		printf("Concatenate the 1st-order embedding and the 2nd-order embeddings\n\n");
189 | 		printf("Options:\n");
190 | 		printf("Parameters for training:\n");
191 | 		printf("\t-input1 <file>\n");
192 | 		printf("\t\tThe 1st-order embeddings\n");
193 | 		printf("\t-input2 <file>\n");
194 | 		printf("\t\tThe 2nd-order embeddings\n");
195 | 		printf("\t-output <file>\n");
196 | 		printf("\t\tUse <file> to save the concatenated embeddings\n");
197 | 		printf("\t-binary <int>\n");
198 | 		printf("\t\tSave the learnt embeddings in binary moded; default is 0 (off)\n");
199 | 		printf("\nExamples:\n");
200 | 		printf("./concatenate -input1 vec_1st.txt -input2 vec_2nd.txt -output vec_all.txt -binary 1\n\n");
201 | 		return 0;
202 | 	}
203 | 	if ((i = ArgPos((char *)"-input1", argc, argv)) > 0) strcpy(vector_file1, argv[i + 1]);
204 | 	if ((i = ArgPos((char *)"-input2", argc, argv)) > 0) strcpy(vector_file2, argv[i + 1]);
205 | 	if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]);
206 | 	if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]);
207 | 	TrainModel();
208 | 	return 0;
209 | }


--------------------------------------------------------------------------------
/linux/evaluate/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/linux/evaluate/.DS_Store


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2007-2013 The LIBLINEAR Project.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither name of copyright holders nor the names of its contributors
17 | may be used to endorse or promote products derived from this software
18 | without specific prior written permission.
19 | 
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/Makefile:
--------------------------------------------------------------------------------
 1 | CXX ?= g++
 2 | CC ?= gcc
 3 | CFLAGS = -Wall -Wconversion -O3 -fPIC
 4 | LIBS = blas/blas.a
 5 | SHVER = 1
 6 | OS = $(shell uname)
 7 | #LIBS = -lblas
 8 | 
 9 | all: train predict
10 | 
11 | lib: linear.o tron.o blas/blas.a
12 | 	if [ "$(OS)" = "Darwin" ]; then \
13 | 		SHARED_LIB_FLAG="-dynamiclib -Wl,-install_name,liblinear.so.$(SHVER)"; \
14 | 	else \
15 | 		SHARED_LIB_FLAG="-shared -Wl,-soname,liblinear.so.$(SHVER)"; \
16 | 	fi; \
17 | 	$(CXX) $${SHARED_LIB_FLAG} linear.o tron.o blas/blas.a -o liblinear.so.$(SHVER)
18 | 
19 | train: tron.o linear.o train.c blas/blas.a
20 | 	$(CXX) $(CFLAGS) -o train train.c tron.o linear.o $(LIBS)
21 | 
22 | predict: tron.o linear.o predict.c blas/blas.a
23 | 	$(CXX) $(CFLAGS) -o predict predict.c tron.o linear.o $(LIBS)
24 | 
25 | tron.o: tron.cpp tron.h
26 | 	$(CXX) $(CFLAGS) -c -o tron.o tron.cpp
27 | 
28 | linear.o: linear.cpp linear.h
29 | 	$(CXX) $(CFLAGS) -c -o linear.o linear.cpp
30 | 
31 | blas/blas.a: blas/*.c blas/*.h
32 | 	make -C blas OPTFLAGS='$(CFLAGS)' CC='$(CC)';
33 | 
34 | clean:
35 | 	make -C blas clean
36 | 	make -C matlab clean
37 | 	rm -f *~ tron.o linear.o train predict liblinear.so.$(SHVER)
38 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/Makefile.win:
--------------------------------------------------------------------------------
 1 | #You must ensure nmake.exe, cl.exe, link.exe are in system path.
 2 | #VCVARS32.bat
 3 | #Under dosbox prompt
 4 | #nmake -f Makefile.win
 5 | 
 6 | ##########################################
 7 | CXX = cl.exe
 8 | CFLAGS = -nologo -O2 -EHsc -I. -D __WIN32__ -D _CRT_SECURE_NO_DEPRECATE
 9 | TARGET = windows
10 | 
11 | all: $(TARGET)\train.exe $(TARGET)\predict.exe
12 | 
13 | $(TARGET)\train.exe: tron.obj linear.obj train.c blas\*.c
14 | 	$(CXX) $(CFLAGS) -Fe$(TARGET)\train.exe tron.obj linear.obj train.c blas\*.c
15 | 
16 | $(TARGET)\predict.exe: tron.obj linear.obj predict.c blas\*.c
17 | 	$(CXX) $(CFLAGS) -Fe$(TARGET)\predict.exe tron.obj linear.obj predict.c blas\*.c
18 | 
19 | linear.obj: linear.cpp linear.h
20 | 	$(CXX) $(CFLAGS) -c linear.cpp
21 | 
22 | tron.obj: tron.cpp tron.h
23 | 	$(CXX) $(CFLAGS) -c tron.cpp
24 | 
25 | lib: linear.cpp linear.h linear.def tron.obj
26 | 	$(CXX) $(CFLAGS) -LD linear.cpp tron.obj blas\*.c -Fe$(TARGET)\liblinear -link -DEF:linear.def 
27 | 
28 | clean:
29 | 	 -erase /Q *.obj $(TARGET)\.
30 | 
31 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/blas/Makefile:
--------------------------------------------------------------------------------
 1 | AR     = ar rcv
 2 | RANLIB = ranlib 
 3 | 
 4 | HEADERS = blas.h blasp.h
 5 | FILES = dnrm2.o daxpy.o ddot.o dscal.o 
 6 | 
 7 | CFLAGS = $(OPTFLAGS) 
 8 | FFLAGS = $(OPTFLAGS)
 9 | 
10 | blas: $(FILES) $(HEADERS)
11 | 	$(AR) blas.a $(FILES)  
12 | 	$(RANLIB) blas.a
13 | 
14 | clean:
15 | 	- rm -f *.o
16 | 	- rm -f *.a
17 | 	- rm -f *~
18 | 
19 | .c.o:
20 | 	$(CC) $(CFLAGS) -c $*.c
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/blas/blas.h:
--------------------------------------------------------------------------------
 1 | /* blas.h  --  C header file for BLAS                         Ver 1.0 */
 2 | /* Jesse Bennett                                       March 23, 2000 */
 3 | 
 4 | /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 5 | 
 6 | 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 7 | 
 8 | #ifndef BLAS_INCLUDE
 9 | #define BLAS_INCLUDE
10 | 
11 | /* Data types specific to BLAS implementation */
12 | typedef struct { float r, i; } fcomplex;
13 | typedef struct { double r, i; } dcomplex;
14 | typedef int blasbool;
15 | 
16 | #include "blasp.h"    /* Prototypes for all BLAS functions */
17 | 
18 | #define FALSE 0
19 | #define TRUE  1
20 | 
21 | /* Macro functions */
22 | #define MIN(a,b) ((a) <= (b) ? (a) : (b))
23 | #define MAX(a,b) ((a) >= (b) ? (a) : (b))
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/blas/daxpy.c:
--------------------------------------------------------------------------------
 1 | #include "blas.h"
 2 | 
 3 | int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
 4 |            int *incy)
 5 | {
 6 |   long int i, m, ix, iy, nn, iincx, iincy;
 7 |   register double ssa;
 8 | 
 9 |   /* constant times a vector plus a vector.   
10 |      uses unrolled loop for increments equal to one.   
11 |      jack dongarra, linpack, 3/11/78.   
12 |      modified 12/3/93, array(1) declarations changed to array(*) */
13 | 
14 |   /* Dereference inputs */
15 |   nn = *n;
16 |   ssa = *sa;
17 |   iincx = *incx;
18 |   iincy = *incy;
19 | 
20 |   if( nn > 0 && ssa != 0.0 )
21 |   {
22 |     if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
23 |     {
24 |       m = nn-3;
25 |       for (i = 0; i < m; i += 4)
26 |       {
27 |         sy[i] += ssa * sx[i];
28 |         sy[i+1] += ssa * sx[i+1];
29 |         sy[i+2] += ssa * sx[i+2];
30 |         sy[i+3] += ssa * sx[i+3];
31 |       }
32 |       for ( ; i < nn; ++i) /* clean-up loop */
33 |         sy[i] += ssa * sx[i];
34 |     }
35 |     else /* code for unequal increments or equal increments not equal to 1 */
36 |     {
37 |       ix = iincx >= 0 ? 0 : (1 - nn) * iincx;
38 |       iy = iincy >= 0 ? 0 : (1 - nn) * iincy;
39 |       for (i = 0; i < nn; i++)
40 |       {
41 |         sy[iy] += ssa * sx[ix];
42 |         ix += iincx;
43 |         iy += iincy;
44 |       }
45 |     }
46 |   }
47 | 
48 |   return 0;
49 | } /* daxpy_ */
50 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/blas/ddot.c:
--------------------------------------------------------------------------------
 1 | #include "blas.h"
 2 | 
 3 | double ddot_(int *n, double *sx, int *incx, double *sy, int *incy)
 4 | {
 5 |   long int i, m, nn, iincx, iincy;
 6 |   double stemp;
 7 |   long int ix, iy;
 8 | 
 9 |   /* forms the dot product of two vectors.   
10 |      uses unrolled loops for increments equal to one.   
11 |      jack dongarra, linpack, 3/11/78.   
12 |      modified 12/3/93, array(1) declarations changed to array(*) */
13 | 
14 |   /* Dereference inputs */
15 |   nn = *n;
16 |   iincx = *incx;
17 |   iincy = *incy;
18 | 
19 |   stemp = 0.0;
20 |   if (nn > 0)
21 |   {
22 |     if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
23 |     {
24 |       m = nn-4;
25 |       for (i = 0; i < m; i += 5)
26 |         stemp += sx[i] * sy[i] + sx[i+1] * sy[i+1] + sx[i+2] * sy[i+2] +
27 |                  sx[i+3] * sy[i+3] + sx[i+4] * sy[i+4];
28 | 
29 |       for ( ; i < nn; i++)        /* clean-up loop */
30 |         stemp += sx[i] * sy[i];
31 |     }
32 |     else /* code for unequal increments or equal increments not equal to 1 */
33 |     {
34 |       ix = 0;
35 |       iy = 0;
36 |       if (iincx < 0)
37 |         ix = (1 - nn) * iincx;
38 |       if (iincy < 0)
39 |         iy = (1 - nn) * iincy;
40 |       for (i = 0; i < nn; i++)
41 |       {
42 |         stemp += sx[ix] * sy[iy];
43 |         ix += iincx;
44 |         iy += iincy;
45 |       }
46 |     }
47 |   }
48 | 
49 |   return stemp;
50 | } /* ddot_ */
51 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/blas/dnrm2.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>  /* Needed for fabs() and sqrt() */
 2 | #include "blas.h"
 3 | 
 4 | double dnrm2_(int *n, double *x, int *incx)
 5 | {
 6 |   long int ix, nn, iincx;
 7 |   double norm, scale, absxi, ssq, temp;
 8 | 
 9 | /*  DNRM2 returns the euclidean norm of a vector via the function   
10 |     name, so that   
11 | 
12 |        DNRM2 := sqrt( x'*x )   
13 | 
14 |     -- This version written on 25-October-1982.   
15 |        Modified on 14-October-1993 to inline the call to SLASSQ.   
16 |        Sven Hammarling, Nag Ltd.   */
17 | 
18 |   /* Dereference inputs */
19 |   nn = *n;
20 |   iincx = *incx;
21 | 
22 |   if( nn > 0 && iincx > 0 )
23 |   {
24 |     if (nn == 1)
25 |     {
26 |       norm = fabs(x[0]);
27 |     }  
28 |     else
29 |     {
30 |       scale = 0.0;
31 |       ssq = 1.0;
32 | 
33 |       /* The following loop is equivalent to this call to the LAPACK 
34 |          auxiliary routine:   CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */
35 | 
36 |       for (ix=(nn-1)*iincx; ix>=0; ix-=iincx)
37 |       {
38 |         if (x[ix] != 0.0)
39 |         {
40 |           absxi = fabs(x[ix]);
41 |           if (scale < absxi)
42 |           {
43 |             temp = scale / absxi;
44 |             ssq = ssq * (temp * temp) + 1.0;
45 |             scale = absxi;
46 |           }
47 |           else
48 |           {
49 |             temp = absxi / scale;
50 |             ssq += temp * temp;
51 |           }
52 |         }
53 |       }
54 |       norm = scale * sqrt(ssq);
55 |     }
56 |   }
57 |   else
58 |     norm = 0.0;
59 | 
60 |   return norm;
61 | 
62 | } /* dnrm2_ */
63 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/blas/dscal.c:
--------------------------------------------------------------------------------
 1 | #include "blas.h"
 2 | 
 3 | int dscal_(int *n, double *sa, double *sx, int *incx)
 4 | {
 5 |   long int i, m, nincx, nn, iincx;
 6 |   double ssa;
 7 | 
 8 |   /* scales a vector by a constant.   
 9 |      uses unrolled loops for increment equal to 1.   
10 |      jack dongarra, linpack, 3/11/78.   
11 |      modified 3/93 to return if incx .le. 0.   
12 |      modified 12/3/93, array(1) declarations changed to array(*) */
13 | 
14 |   /* Dereference inputs */
15 |   nn = *n;
16 |   iincx = *incx;
17 |   ssa = *sa;
18 | 
19 |   if (nn > 0 && iincx > 0)
20 |   {
21 |     if (iincx == 1) /* code for increment equal to 1 */
22 |     {
23 |       m = nn-4;
24 |       for (i = 0; i < m; i += 5)
25 |       {
26 |         sx[i] = ssa * sx[i];
27 |         sx[i+1] = ssa * sx[i+1];
28 |         sx[i+2] = ssa * sx[i+2];
29 |         sx[i+3] = ssa * sx[i+3];
30 |         sx[i+4] = ssa * sx[i+4];
31 |       }
32 |       for ( ; i < nn; ++i) /* clean-up loop */
33 |         sx[i] = ssa * sx[i];
34 |     }
35 |     else /* code for increment not equal to 1 */
36 |     {
37 |       nincx = nn * iincx;
38 |       for (i = 0; i < nincx; i += iincx)
39 |         sx[i] = ssa * sx[i];
40 |     }
41 |   }
42 | 
43 |   return 0;
44 | } /* dscal_ */
45 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/linear.def:
--------------------------------------------------------------------------------
 1 | LIBRARY liblinear
 2 | EXPORTS
 3 | 	train	@1
 4 | 	cross_validation	@2
 5 | 	save_model	@3
 6 | 	load_model	@4
 7 | 	get_nr_feature	@5
 8 | 	get_nr_class	@6
 9 | 	get_labels	@7
10 | 	predict_values	@8
11 | 	predict	@9
12 | 	predict_probability	@10
13 | 	free_and_destroy_model	@11
14 | 	free_model_content	@12
15 | 	destroy_param	@13
16 | 	check_parameter	@14
17 | 	check_probability_model	@15
18 | 	set_print_string_function	@16
19 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/linear.h:
--------------------------------------------------------------------------------
 1 | #ifndef _LIBLINEAR_H
 2 | #define _LIBLINEAR_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | struct feature_node
 9 | {
10 | 	int index;
11 | 	double value;
12 | };
13 | 
14 | struct problem
15 | {
16 | 	int l, n;
17 | 	double *y;
18 | 	struct feature_node **x;
19 | 	double bias;            /* < 0 if no bias term */  
20 | };
21 | 
22 | enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */
23 | 
24 | struct parameter
25 | {
26 | 	int solver_type;
27 | 
28 | 	/* these are for training only */
29 | 	double eps;	        /* stopping criteria */
30 | 	double C;
31 | 	int nr_weight;
32 | 	int *weight_label;
33 | 	double* weight;
34 | 	double p;
35 | };
36 | 
37 | struct model
38 | {
39 | 	struct parameter param;
40 | 	int nr_class;		/* number of classes */
41 | 	int nr_feature;
42 | 	double *w;
43 | 	int *label;		/* label of each class */
44 | 	double bias;
45 | };
46 | 
47 | struct model* train(const struct problem *prob, const struct parameter *param);
48 | void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
49 | 
50 | double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
51 | double predict(const struct model *model_, const struct feature_node *x);
52 | double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
53 | 
54 | int save_model(const char *model_file_name, const struct model *model_);
55 | struct model *load_model(const char *model_file_name);
56 | 
57 | int get_nr_feature(const struct model *model_);
58 | int get_nr_class(const struct model *model_);
59 | void get_labels(const struct model *model_, int* label);
60 | 
61 | void free_model_content(struct model *model_ptr);
62 | void free_and_destroy_model(struct model **model_ptr_ptr);
63 | void destroy_param(struct parameter *param);
64 | 
65 | const char *check_parameter(const struct problem *prob, const struct parameter *param);
66 | int check_probability_model(const struct model *model);
67 | void set_print_string_function(void (*print_func) (const char*));
68 | 
69 | #ifdef __cplusplus
70 | }
71 | #endif
72 | 
73 | #endif /* _LIBLINEAR_H */
74 | 
75 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/matlab/Makefile:
--------------------------------------------------------------------------------
 1 | # This Makefile is used under Linux
 2 | 
 3 | MATLABDIR ?= /usr/local/matlab
 4 | CXX ?= g++
 5 | #CXX = g++-3.3
 6 | CC ?= gcc
 7 | CFLAGS = -Wall -Wconversion -O3 -fPIC -I$(MATLABDIR)/extern/include -I..
 8 | 
 9 | MEX = $(MATLABDIR)/bin/mex
10 | MEX_OPTION = CC\#$(CXX) CXX\#$(CXX) CFLAGS\#"$(CFLAGS)" CXXFLAGS\#"$(CFLAGS)"
11 | # comment the following line if you use MATLAB on a 32-bit computer
12 | MEX_OPTION += -largeArrayDims
13 | MEX_EXT = $(shell $(MATLABDIR)/bin/mexext)
14 | 
15 | OCTAVEDIR ?= /usr/include/octave
16 | OCTAVE_MEX = env CC=$(CXX) mkoctfile
17 | OCTAVE_MEX_OPTION = --mex
18 | OCTAVE_MEX_EXT = mex
19 | OCTAVE_CFLAGS = -Wall -O3 -fPIC -I$(OCTAVEDIR) -I..
20 | 
21 | all:	matlab
22 | 
23 | matlab:	binary
24 | 
25 | octave:
26 | 	@make MEX="$(OCTAVE_MEX)" MEX_OPTION="$(OCTAVE_MEX_OPTION)" \
27 | 	MEX_EXT="$(OCTAVE_MEX_EXT)" CFLAGS="$(OCTAVE_CFLAGS)" \
28 | 	binary
29 | 
30 | binary: train.$(MEX_EXT) predict.$(MEX_EXT) libsvmread.$(MEX_EXT) libsvmwrite.$(MEX_EXT)
31 | 
32 | train.$(MEX_EXT): train.c ../linear.h ../tron.o ../linear.o linear_model_matlab.o ../blas/blas.a
33 | 	$(MEX) $(MEX_OPTION) train.c ../tron.o ../linear.o linear_model_matlab.o ../blas/blas.a
34 | 
35 | predict.$(MEX_EXT): predict.c ../linear.h ../tron.o ../linear.o linear_model_matlab.o ../blas/blas.a
36 | 	$(MEX) $(MEX_OPTION) predict.c ../tron.o ../linear.o linear_model_matlab.o ../blas/blas.a
37 | 
38 | libsvmread.$(MEX_EXT):	libsvmread.c
39 | 	$(MEX) $(MEX_OPTION) libsvmread.c
40 | 
41 | libsvmwrite.$(MEX_EXT):	libsvmwrite.c
42 | 	$(MEX) $(MEX_OPTION) libsvmwrite.c
43 | 
44 | linear_model_matlab.o: linear_model_matlab.c ../linear.h
45 | 	$(CXX) $(CFLAGS) -c linear_model_matlab.c
46 | 
47 | ../linear.o: ../linear.cpp ../linear.h
48 | 	make -C .. linear.o
49 | 
50 | ../tron.o: ../tron.cpp ../tron.h 
51 | 	make -C .. tron.o
52 | 
53 | ../blas/blas.a: ../blas/*.c ../blas/*.h
54 | 	make -C ../blas OPTFLAGS='$(CFLAGS)' CC='$(CC)';
55 | 
56 | clean:
57 | 	make -C ../blas clean
58 | 	rm -f *~ *.o *.mex* *.obj ../linear.o ../tron.o
59 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/matlab/README:
--------------------------------------------------------------------------------
  1 | --------------------------------------------
  2 | --- MATLAB/OCTAVE interface of LIBLINEAR ---
  3 | --------------------------------------------
  4 | 
  5 | Table of Contents
  6 | =================
  7 | 
  8 | - Introduction
  9 | - Installation
 10 | - Usage
 11 | - Returned Model Structure
 12 | - Other Utilities
 13 | - Examples
 14 | - Additional Information
 15 | 
 16 | 
 17 | Introduction
 18 | ============
 19 | 
 20 | This tool provides a simple interface to LIBLINEAR, a library for
 21 | large-scale regularized linear classification and regression
 22 | (http://www.csie.ntu.edu.tw/~cjlin/liblinear). It is very easy to use
 23 | as the usage and the way of specifying parameters are the same as that
 24 | of LIBLINEAR.
 25 | 
 26 | Installation
 27 | ============
 28 | 
 29 | On Windows systems, pre-built binary files are already in the
 30 | directory '..\windows', so no need to conduct installation. Now we
 31 | provide binary files only for 64bit MATLAB on Windows. If you would
 32 | like to re-build the package, please rely on the following steps.
 33 | 
 34 | We recommend using make.m on both MATLAB and OCTAVE. Just type 'make'
 35 | to build 'libsvmread.mex', 'libsvmwrite.mex', 'train.mex', and
 36 | 'predict.mex'.
 37 | 
 38 | On MATLAB or Octave:
 39 | 
 40 |         >> make
 41 | 
 42 | If make.m does not work on MATLAB (especially for Windows), try 'mex
 43 | -setup' to choose a suitable compiler for mex. Make sure your compiler
 44 | is accessible and workable. Then type 'make' to start the
 45 | installation.
 46 | 
 47 | Example:
 48 | 
 49 |         matlab>> mex -setup
 50 |         (ps: MATLAB will show the following messages to setup default compiler.)
 51 |         Please choose your compiler for building external interface (MEX) files:
 52 |         Would you like mex to locate installed compilers [y]/n? y
 53 |         Select a compiler:
 54 |         [1] Microsoft Visual C/C++ version 7.1 in C:\Program Files\Microsoft Visual Studio
 55 |         [0] None
 56 |         Compiler: 1
 57 |         Please verify your choices:
 58 |         Compiler: Microsoft Visual C/C++ 7.1
 59 |         Location: C:\Program Files\Microsoft Visual Studio
 60 |         Are these correct?([y]/n): y
 61 | 
 62 |         matlab>> make
 63 | 
 64 | On Unix systems, if neither make.m nor 'mex -setup' works, please use
 65 | Makefile and type 'make' in a command window. Note that we assume
 66 | your MATLAB is installed in '/usr/local/matlab'. If not, please change
 67 | MATLABDIR in Makefile.
 68 | 
 69 | Example:
 70 |         linux> make
 71 | 
 72 | To use octave, type 'make octave':
 73 | 
 74 | Example:
 75 |         linux> make octave
 76 | 
 77 | For a list of supported/compatible compilers for MATLAB, please check
 78 | the following page:
 79 | 
 80 | http://www.mathworks.com/support/compilers/current_release/
 81 | 
 82 | Usage
 83 | =====
 84 | 
 85 | matlab> model = train(training_label_vector, training_instance_matrix [,'liblinear_options', 'col']);
 86 | 
 87 |         -training_label_vector:
 88 |             An m by 1 vector of training labels. (type must be double)
 89 |         -training_instance_matrix:
 90 |             An m by n matrix of m training instances with n features.
 91 |             It must be a sparse matrix. (type must be double)
 92 |         -liblinear_options:
 93 |             A string of training options in the same format as that of LIBLINEAR.
 94 |         -col:
 95 |             if 'col' is set, each column of training_instance_matrix is a data instance. Otherwise each row is a data instance.
 96 | 
 97 | matlab> [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model [, 'liblinear_options', 'col']);
 98 | matlab> [predicted_label] = predict(testing_label_vector, testing_instance_matrix, model [, 'liblinear_options', 'col']);
 99 | 
100 |         -testing_label_vector:
101 |             An m by 1 vector of prediction labels. If labels of test
102 |             data are unknown, simply use any random values. (type must be double)
103 |         -testing_instance_matrix:
104 |             An m by n matrix of m testing instances with n features.
105 |             It must be a sparse matrix. (type must be double)
106 |         -model:
107 |             The output of train.
108 |         -liblinear_options:
109 |             A string of testing options in the same format as that of LIBLINEAR.
110 |         -col:
111 |             if 'col' is set, each column of testing_instance_matrix is a data instance. Otherwise each row is a data instance.
112 | 
113 | Returned Model Structure
114 | ========================
115 | 
116 | The 'train' function returns a model which can be used for future
117 | prediction.  It is a structure and is organized as [Parameters, nr_class,
118 | nr_feature, bias, Label, w]:
119 | 
120 |         -Parameters: Parameters
121 |         -nr_class: number of classes; = 2 for regression
122 |         -nr_feature: number of features in training data (without including the bias term)
123 |         -bias: If >= 0, we assume one additional feature is added to the end
124 |             of each data instance.
125 |         -Label: label of each class; empty for regression
126 |         -w: a nr_w-by-n matrix for the weights, where n is nr_feature
127 |             or nr_feature+1 depending on the existence of the bias term.
128 |             nr_w is 1 if nr_class=2 and -s is not 4 (i.e., not
129 |             multi-class svm by Crammer and Singer). It is
130 |             nr_class otherwise.
131 | 
132 | If the '-v' option is specified, cross validation is conducted and the
133 | returned model is just a scalar: cross-validation accuracy for 
134 | classification and mean-squared error for regression.
135 | 
136 | Result of Prediction
137 | ====================
138 | 
139 | The function 'predict' has three outputs. The first one,
140 | predicted_label, is a vector of predicted labels. The second output,
141 | accuracy, is a vector including accuracy (for classification), mean
142 | squared error, and squared correlation coefficient (for regression).
143 | The third is a matrix containing decision values or probability
144 | estimates (if '-b 1' is specified). If k is the number of classes
145 | and k' is the number of classifiers (k'=1 if k=2, otherwise k'=k), for decision values,
146 | each row includes results of k' binary linear classifiers. For probabilities,
147 | each row contains k values indicating the probability that the testing instance is in
148 | each class. Note that the order of classes here is the same as 'Label'
149 | field in the model structure.
150 | 
151 | Other Utilities
152 | ===============
153 | 
154 | A matlab function libsvmread reads files in LIBSVM format: 
155 | 
156 | [label_vector, instance_matrix] = libsvmread('data.txt'); 
157 | 
158 | Two outputs are labels and instances, which can then be used as inputs
159 | of svmtrain or svmpredict. 
160 | 
161 | A matlab function libsvmwrite writes Matlab matrix to a file in LIBSVM format:
162 | 
163 | libsvmwrite('data.txt', label_vector, instance_matrix]
164 | 
165 | The instance_matrix must be a sparse matrix. (type must be double)
166 | For windows, `libsvmread.mexw64' and `libsvmwrite.mexw64' are ready in 
167 | the directory `..\windows'.
168 | 
169 | These codes are prepared by Rong-En Fan and Kai-Wei Chang from National
170 | Taiwan University.
171 | 
172 | Examples
173 | ========
174 | 
175 | Train and test on the provided data heart_scale:
176 | 
177 | matlab> [heart_scale_label, heart_scale_inst] = libsvmread('../heart_scale');
178 | matlab> model = train(heart_scale_label, heart_scale_inst, '-c 1');
179 | matlab> [predict_label, accuracy, dec_values] = predict(heart_scale_label, heart_scale_inst, model); % test the training data
180 | 
181 | Note that for testing, you can put anything in the testing_label_vector.
182 | 
183 | For probability estimates, you need '-b 1' only in the testing phase:
184 | 
185 | matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1');
186 | 
187 | Additional Information
188 | ======================
189 | 
190 | Please cite LIBLINEAR as follows
191 | 
192 | R.-E. Fan, K.-W. Chang, C.-J. Hsieh, X.-R. Wang, and C.-J. Lin.
193 | LIBLINEAR: A Library for Large Linear Classification, Journal of
194 | Machine Learning Research 9(2008), 1871-1874.Software available at
195 | http://www.csie.ntu.edu.tw/~cjlin/liblinear
196 | 
197 | For any question, please contact Chih-Jen Lin <cjlin@csie.ntu.edu.tw>.
198 | 
199 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/matlab/libsvmread.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | #include <stdlib.h>
  4 | #include <ctype.h>
  5 | #include <errno.h>
  6 | 
  7 | #include "mex.h"
  8 | 
  9 | #ifdef MX_API_VER
 10 | #if MX_API_VER < 0x07030000
 11 | typedef int mwIndex;
 12 | #endif 
 13 | #endif 
 14 | #ifndef max
 15 | #define max(x,y) (((x)>(y))?(x):(y))
 16 | #endif
 17 | #ifndef min
 18 | #define min(x,y) (((x)<(y))?(x):(y))
 19 | #endif
 20 | 
 21 | void exit_with_help()
 22 | {
 23 | 	mexPrintf(
 24 | 	"Usage: [label_vector, instance_matrix] = libsvmread('filename');\n"
 25 | 	);
 26 | }
 27 | 
 28 | static void fake_answer(int nlhs, mxArray *plhs[])
 29 | {
 30 | 	int i;
 31 | 	for(i=0;i<nlhs;i++)
 32 | 		plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
 33 | }
 34 | 
 35 | static char *line;
 36 | static int max_line_len;
 37 | 
 38 | static char* readline(FILE *input)
 39 | {
 40 | 	int len;
 41 | 	
 42 | 	if(fgets(line,max_line_len,input) == NULL)
 43 | 		return NULL;
 44 | 
 45 | 	while(strrchr(line,'\n') == NULL)
 46 | 	{
 47 | 		max_line_len *= 2;
 48 | 		line = (char *) realloc(line, max_line_len);
 49 | 		len = (int) strlen(line);
 50 | 		if(fgets(line+len,max_line_len-len,input) == NULL)
 51 | 			break;
 52 | 	}
 53 | 	return line;
 54 | }
 55 | 
 56 | // read in a problem (in libsvm format)
 57 | void read_problem(const char *filename, int nlhs, mxArray *plhs[])
 58 | {
 59 | 	int max_index, min_index, inst_max_index, i;
 60 | 	long elements, k;
 61 | 	FILE *fp = fopen(filename,"r");
 62 | 	int l = 0;
 63 | 	char *endptr;
 64 | 	mwIndex *ir, *jc;
 65 | 	double *labels, *samples;
 66 | 	
 67 | 	if(fp == NULL)
 68 | 	{
 69 | 		mexPrintf("can't open input file %s\n",filename);
 70 | 		fake_answer(nlhs, plhs);
 71 | 		return;
 72 | 	}
 73 | 
 74 | 	max_line_len = 1024;
 75 | 	line = (char *) malloc(max_line_len*sizeof(char));
 76 | 
 77 | 	max_index = 0;
 78 | 	min_index = 1; // our index starts from 1
 79 | 	elements = 0;
 80 | 	while(readline(fp) != NULL)
 81 | 	{
 82 | 		char *idx, *val;
 83 | 		// features
 84 | 		int index = 0;
 85 | 
 86 | 		inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
 87 | 		strtok(line," \t"); // label
 88 | 		while (1)
 89 | 		{
 90 | 			idx = strtok(NULL,":"); // index:value
 91 | 			val = strtok(NULL," \t");
 92 | 			if(val == NULL)
 93 | 				break;
 94 | 
 95 | 			errno = 0;
 96 | 			index = (int) strtol(idx,&endptr,10);
 97 | 			if(endptr == idx || errno != 0 || *endptr != '\0' || index <= inst_max_index)
 98 | 			{
 99 | 				mexPrintf("Wrong input format at line %d\n",l+1);
100 | 				fake_answer(nlhs, plhs);
101 | 				return;
102 | 			}
103 | 			else
104 | 				inst_max_index = index;
105 | 
106 | 			min_index = min(min_index, index);
107 | 			elements++;
108 | 		}
109 | 		max_index = max(max_index, inst_max_index);
110 | 		l++;
111 | 	}
112 | 	rewind(fp);
113 | 
114 | 	// y
115 | 	plhs[0] = mxCreateDoubleMatrix(l, 1, mxREAL);
116 | 	// x^T
117 | 	if (min_index <= 0)
118 | 		plhs[1] = mxCreateSparse(max_index-min_index+1, l, elements, mxREAL);
119 | 	else
120 | 		plhs[1] = mxCreateSparse(max_index, l, elements, mxREAL);
121 | 
122 | 	labels = mxGetPr(plhs[0]);
123 | 	samples = mxGetPr(plhs[1]);
124 | 	ir = mxGetIr(plhs[1]);
125 | 	jc = mxGetJc(plhs[1]);
126 | 
127 | 	k=0;
128 | 	for(i=0;i<l;i++)
129 | 	{
130 | 		char *idx, *val, *label;
131 | 		jc[i] = k;
132 | 
133 | 		readline(fp);
134 | 
135 | 		label = strtok(line," \t\n");
136 | 		if(label == NULL)
137 | 		{
138 | 			mexPrintf("Empty line at line %d\n",i+1);
139 | 			fake_answer(nlhs, plhs);
140 | 			return;
141 | 		}
142 | 		labels[i] = strtod(label,&endptr);
143 | 		if(endptr == label || *endptr != '\0')
144 | 		{
145 | 			mexPrintf("Wrong input format at line %d\n",i+1);
146 | 			fake_answer(nlhs, plhs);
147 | 			return;
148 | 		}
149 | 
150 | 		// features
151 | 		while(1)
152 | 		{
153 | 			idx = strtok(NULL,":");
154 | 			val = strtok(NULL," \t");
155 | 			if(val == NULL)
156 | 				break;
157 | 
158 | 			ir[k] = (mwIndex) (strtol(idx,&endptr,10) - min_index); // precomputed kernel has <index> start from 0
159 | 
160 | 			errno = 0;
161 | 			samples[k] = strtod(val,&endptr);
162 | 			if (endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
163 | 			{
164 | 				mexPrintf("Wrong input format at line %d\n",i+1);
165 | 				fake_answer(nlhs, plhs);
166 | 				return;
167 | 			}
168 | 			++k;
169 | 		}
170 | 	}
171 | 	jc[l] = k;
172 | 
173 | 	fclose(fp);
174 | 	free(line);
175 | 
176 | 	{
177 | 		mxArray *rhs[1], *lhs[1];
178 | 		rhs[0] = plhs[1];
179 | 		if(mexCallMATLAB(1, lhs, 1, rhs, "transpose"))
180 | 		{
181 | 			mexPrintf("Error: cannot transpose problem\n");
182 | 			fake_answer(nlhs, plhs);
183 | 			return;
184 | 		}
185 | 		plhs[1] = lhs[0];
186 | 	}
187 | }
188 | 
189 | void mexFunction( int nlhs, mxArray *plhs[],
190 | 		int nrhs, const mxArray *prhs[] )
191 | {
192 | 	char filename[256];
193 | 
194 | 	if(nrhs != 1 || nlhs != 2)
195 | 	{
196 | 		exit_with_help();
197 | 		fake_answer(nlhs, plhs);
198 | 		return;
199 | 	}
200 | 
201 | 	mxGetString(prhs[0], filename, mxGetN(prhs[0]) + 1);
202 | 
203 | 	if(filename == NULL)
204 | 	{
205 | 		mexPrintf("Error: filename is NULL\n");
206 | 		return;
207 | 	}
208 | 
209 | 	read_problem(filename, nlhs, plhs);
210 | 
211 | 	return;
212 | }
213 | 
214 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/matlab/libsvmwrite.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include "mex.h"
  5 | 
  6 | #ifdef MX_API_VER
  7 | #if MX_API_VER < 0x07030000
  8 | typedef int mwIndex;
  9 | #endif
 10 | #endif
 11 | 
 12 | void exit_with_help()
 13 | {
 14 | 	mexPrintf(
 15 | 	"Usage: libsvmwrite('filename', label_vector, instance_matrix);\n"
 16 | 	);
 17 | }
 18 | 
 19 | static void fake_answer(int nlhs, mxArray *plhs[])
 20 | {
 21 | 	int i;
 22 | 	for(i=0;i<nlhs;i++)
 23 | 		plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
 24 | }
 25 | 
 26 | void libsvmwrite(const char *filename, const mxArray *label_vec, const mxArray *instance_mat)
 27 | {
 28 | 	FILE *fp = fopen(filename,"w");
 29 | 	int i, k, low, high, l;
 30 | 	mwIndex *ir, *jc;
 31 | 	int label_vector_row_num;
 32 | 	double *samples, *labels;
 33 | 	mxArray *instance_mat_col; // instance sparse matrix in column format
 34 | 
 35 | 	if(fp ==NULL)
 36 | 	{
 37 | 		mexPrintf("can't open output file %s\n",filename);			
 38 | 		return;
 39 | 	}
 40 | 
 41 | 	// transpose instance matrix
 42 | 	{
 43 | 		mxArray *prhs[1], *plhs[1];
 44 | 		prhs[0] = mxDuplicateArray(instance_mat);
 45 | 		if(mexCallMATLAB(1, plhs, 1, prhs, "transpose"))
 46 | 		{
 47 | 			mexPrintf("Error: cannot transpose instance matrix\n");
 48 | 			return;
 49 | 		}
 50 | 		instance_mat_col = plhs[0];
 51 | 		mxDestroyArray(prhs[0]);
 52 | 	}
 53 | 
 54 | 	// the number of instance
 55 | 	l = (int) mxGetN(instance_mat_col);
 56 | 	label_vector_row_num = (int) mxGetM(label_vec);
 57 | 
 58 | 	if(label_vector_row_num!=l)
 59 | 	{
 60 | 		mexPrintf("Length of label vector does not match # of instances.\n");
 61 | 		return;
 62 | 	}
 63 | 
 64 | 	// each column is one instance
 65 | 	labels = mxGetPr(label_vec);
 66 | 	samples = mxGetPr(instance_mat_col);
 67 | 	ir = mxGetIr(instance_mat_col);
 68 | 	jc = mxGetJc(instance_mat_col);
 69 | 
 70 | 	for(i=0;i<l;i++)
 71 | 	{
 72 | 		fprintf(fp,"%g", labels[i]);
 73 | 
 74 | 		low = (int) jc[i], high = (int) jc[i+1];
 75 | 		for(k=low;k<high;k++)
 76 | 			fprintf(fp," %ld:%g", ir[k]+1, samples[k]);		
 77 | 
 78 | 		fprintf(fp,"\n");
 79 | 	}
 80 | 
 81 | 	fclose(fp);
 82 | 	return;
 83 | }
 84 | 
 85 | void mexFunction( int nlhs, mxArray *plhs[],
 86 | 		int nrhs, const mxArray *prhs[] )
 87 | {
 88 | 	if(nlhs > 0)
 89 | 	{
 90 | 		exit_with_help();
 91 | 		fake_answer(nlhs, plhs);
 92 | 		return;
 93 | 	}
 94 | 	
 95 | 	// Transform the input Matrix to libsvm format
 96 | 	if(nrhs == 3)
 97 | 	{
 98 | 		char filename[256];
 99 | 		if(!mxIsDouble(prhs[1]) || !mxIsDouble(prhs[2]))
100 | 		{
101 | 			mexPrintf("Error: label vector and instance matrix must be double\n");			
102 | 			return;
103 | 		}
104 | 		
105 | 		mxGetString(prhs[0], filename, mxGetN(prhs[0])+1);		
106 | 
107 | 		if(mxIsSparse(prhs[2]))
108 | 			libsvmwrite(filename, prhs[1], prhs[2]);
109 | 		else
110 | 		{
111 | 			mexPrintf("Instance_matrix must be sparse\n");			
112 | 			return;
113 | 		}
114 | 	}
115 | 	else
116 | 	{
117 | 		exit_with_help();		
118 | 		return;
119 | 	}
120 | }
121 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/matlab/linear_model_matlab.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <string.h>
  3 | #include "../linear.h"
  4 | 
  5 | #include "mex.h"
  6 | 
  7 | #ifdef MX_API_VER
  8 | #if MX_API_VER < 0x07030000
  9 | typedef int mwIndex;
 10 | #endif
 11 | #endif
 12 | 
 13 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 14 | 
 15 | #define NUM_OF_RETURN_FIELD 6
 16 | 
 17 | static const char *field_names[] = {
 18 | 	"Parameters",
 19 | 	"nr_class",
 20 | 	"nr_feature",
 21 | 	"bias",
 22 | 	"Label",
 23 | 	"w",
 24 | };
 25 | 
 26 | const char *model_to_matlab_structure(mxArray *plhs[], struct model *model_)
 27 | {
 28 | 	int i;
 29 | 	int nr_w;
 30 | 	double *ptr;
 31 | 	mxArray *return_model, **rhs;
 32 | 	int out_id = 0;
 33 | 	int n, w_size;
 34 | 
 35 | 	rhs = (mxArray **)mxMalloc(sizeof(mxArray *)*NUM_OF_RETURN_FIELD);
 36 | 
 37 | 	// Parameters
 38 | 	// for now, only solver_type is needed
 39 | 	rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
 40 | 	ptr = mxGetPr(rhs[out_id]);
 41 | 	ptr[0] = model_->param.solver_type;
 42 | 	out_id++;
 43 | 
 44 | 	// nr_class
 45 | 	rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
 46 | 	ptr = mxGetPr(rhs[out_id]);
 47 | 	ptr[0] = model_->nr_class;
 48 | 	out_id++;
 49 | 
 50 | 	if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
 51 | 		nr_w=1;
 52 | 	else
 53 | 		nr_w=model_->nr_class;
 54 | 
 55 | 	// nr_feature
 56 | 	rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
 57 | 	ptr = mxGetPr(rhs[out_id]);
 58 | 	ptr[0] = model_->nr_feature;
 59 | 	out_id++;
 60 | 
 61 | 	// bias
 62 | 	rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
 63 | 	ptr = mxGetPr(rhs[out_id]);
 64 | 	ptr[0] = model_->bias;
 65 | 	out_id++;
 66 | 
 67 | 	if(model_->bias>=0)
 68 | 		n=model_->nr_feature+1;
 69 | 	else
 70 | 		n=model_->nr_feature;
 71 | 
 72 | 	w_size = n;
 73 | 	// Label
 74 | 	if(model_->label)
 75 | 	{
 76 | 		rhs[out_id] = mxCreateDoubleMatrix(model_->nr_class, 1, mxREAL);
 77 | 		ptr = mxGetPr(rhs[out_id]);
 78 | 		for(i = 0; i < model_->nr_class; i++)
 79 | 			ptr[i] = model_->label[i];
 80 | 	}
 81 | 	else
 82 | 		rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL);
 83 | 	out_id++;
 84 | 
 85 | 	// w
 86 | 	rhs[out_id] = mxCreateDoubleMatrix(nr_w, w_size, mxREAL);
 87 | 	ptr = mxGetPr(rhs[out_id]);
 88 | 	for(i = 0; i < w_size*nr_w; i++)
 89 | 		ptr[i]=model_->w[i];
 90 | 	out_id++;
 91 | 
 92 | 	/* Create a struct matrix contains NUM_OF_RETURN_FIELD fields */
 93 | 	return_model = mxCreateStructMatrix(1, 1, NUM_OF_RETURN_FIELD, field_names);
 94 | 
 95 | 	/* Fill struct matrix with input arguments */
 96 | 	for(i = 0; i < NUM_OF_RETURN_FIELD; i++)
 97 | 		mxSetField(return_model,0,field_names[i],mxDuplicateArray(rhs[i]));
 98 | 	/* return */
 99 | 	plhs[0] = return_model;
100 | 	mxFree(rhs);
101 | 
102 | 	return NULL;
103 | }
104 | 
105 | const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_struct)
106 | {
107 | 	int i, num_of_fields;
108 | 	int nr_w;
109 | 	double *ptr;
110 | 	int id = 0;
111 | 	int n, w_size;
112 | 	mxArray **rhs;
113 | 
114 | 	num_of_fields = mxGetNumberOfFields(matlab_struct);
115 | 	rhs = (mxArray **) mxMalloc(sizeof(mxArray *)*num_of_fields);
116 | 
117 | 	for(i=0;i<num_of_fields;i++)
118 | 		rhs[i] = mxGetFieldByNumber(matlab_struct, 0, i);
119 | 
120 | 	model_->nr_class=0;
121 | 	nr_w=0;
122 | 	model_->nr_feature=0;
123 | 	model_->w=NULL;
124 | 	model_->label=NULL;
125 | 
126 | 	// Parameters
127 | 	ptr = mxGetPr(rhs[id]);
128 | 	model_->param.solver_type = (int)ptr[0];
129 | 	id++;
130 | 
131 | 	// nr_class
132 | 	ptr = mxGetPr(rhs[id]);
133 | 	model_->nr_class = (int)ptr[0];
134 | 	id++;
135 | 
136 | 	if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
137 | 		nr_w=1;
138 | 	else
139 | 		nr_w=model_->nr_class;
140 | 
141 | 	// nr_feature
142 | 	ptr = mxGetPr(rhs[id]);
143 | 	model_->nr_feature = (int)ptr[0];
144 | 	id++;
145 | 
146 | 	// bias
147 | 	ptr = mxGetPr(rhs[id]);
148 | 	model_->bias = (int)ptr[0];
149 | 	id++;
150 | 
151 | 	if(model_->bias>=0)
152 | 		n=model_->nr_feature+1;
153 | 	else
154 | 		n=model_->nr_feature;
155 | 	w_size = n;
156 | 
157 | 	// Label
158 | 	if(mxIsEmpty(rhs[id]) == 0)
159 | 	{
160 | 		model_->label = Malloc(int, model_->nr_class);
161 | 		ptr = mxGetPr(rhs[id]);
162 | 		for(i=0;i<model_->nr_class;i++)
163 | 			model_->label[i] = (int)ptr[i];
164 | 	}
165 | 	id++;
166 | 
167 | 	ptr = mxGetPr(rhs[id]);
168 | 	model_->w=Malloc(double, w_size*nr_w);
169 | 	for(i = 0; i < w_size*nr_w; i++)
170 | 		model_->w[i]=ptr[i];
171 | 	id++;
172 | 	mxFree(rhs);
173 | 
174 | 	return NULL;
175 | }
176 | 
177 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/matlab/linear_model_matlab.h:
--------------------------------------------------------------------------------
1 | const char *model_to_matlab_structure(mxArray *plhs[], struct model *model_);
2 | const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_struct);
3 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/matlab/make.m:
--------------------------------------------------------------------------------
 1 | % This make.m is for MATLAB and OCTAVE under Windows, Mac, and Unix
 2 | 
 3 | try
 4 | 	Type = ver;
 5 | 	% This part is for OCTAVE
 6 | 	if(strcmp(Type(1).Name, 'Octave') == 1)
 7 | 		mex libsvmread.c
 8 | 		mex libsvmwrite.c
 9 | 		mex train.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/*.c
10 | 		mex predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/*.c
11 | 	% This part is for MATLAB
12 | 	% Add -largeArrayDims on 64-bit machines of MATLAB
13 | 	else
14 | 		mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmread.c
15 | 		mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmwrite.c
16 | 		mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims train.c linear_model_matlab.c ../linear.cpp ../tron.cpp "../blas/*.c"
17 | 		mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp "../blas/*.c"
18 | 	end
19 | catch
20 | 	fprintf('If make.m fails, please check README about detailed instructions.\n');
21 | end
22 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/matlab/predict.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include "../linear.h"
  5 | 
  6 | #include "mex.h"
  7 | #include "linear_model_matlab.h"
  8 | 
  9 | #ifdef MX_API_VER
 10 | #if MX_API_VER < 0x07030000
 11 | typedef int mwIndex;
 12 | #endif
 13 | #endif
 14 | 
 15 | #define CMD_LEN 2048
 16 | 
 17 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 18 | 
 19 | int print_null(const char *s,...) {}
 20 | int (*info)(const char *fmt,...);
 21 | 
 22 | int col_format_flag;
 23 | 
 24 | void read_sparse_instance(const mxArray *prhs, int index, struct feature_node *x, int feature_number, double bias)
 25 | {
 26 | 	int i, j, low, high;
 27 | 	mwIndex *ir, *jc;
 28 | 	double *samples;
 29 | 
 30 | 	ir = mxGetIr(prhs);
 31 | 	jc = mxGetJc(prhs);
 32 | 	samples = mxGetPr(prhs);
 33 | 
 34 | 	// each column is one instance
 35 | 	j = 0;
 36 | 	low = (int) jc[index], high = (int) jc[index+1];
 37 | 	for(i=low; i<high && (int) (ir[i])<feature_number; i++)
 38 | 	{
 39 | 		x[j].index = (int) ir[i]+1;
 40 | 		x[j].value = samples[i];
 41 | 		j++;
 42 | 	}
 43 | 	if(bias>=0)
 44 | 	{
 45 | 		x[j].index = feature_number+1;
 46 | 		x[j].value = bias;
 47 | 		j++;
 48 | 	}
 49 | 	x[j].index = -1;
 50 | }
 51 | 
 52 | static void fake_answer(int nlhs, mxArray *plhs[])
 53 | {
 54 | 	int i;
 55 | 	for(i=0;i<nlhs;i++)
 56 | 		plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
 57 | }
 58 | 
 59 | void do_predict(int nlhs, mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag)
 60 | {
 61 | 	int label_vector_row_num, label_vector_col_num;
 62 | 	int feature_number, testing_instance_number;
 63 | 	int instance_index;
 64 | 	double *ptr_label, *ptr_predict_label;
 65 | 	double *ptr_prob_estimates, *ptr_dec_values, *ptr;
 66 | 	struct feature_node *x;
 67 | 	mxArray *pplhs[1]; // instance sparse matrix in row format
 68 | 	mxArray *tplhs[3]; // temporary storage for plhs[]
 69 | 
 70 | 	int correct = 0;
 71 | 	int total = 0;
 72 | 	double error = 0;
 73 | 	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
 74 | 
 75 | 	int nr_class=get_nr_class(model_);
 76 | 	int nr_w;
 77 | 	double *prob_estimates=NULL;
 78 | 
 79 | 	if(nr_class==2 && model_->param.solver_type!=MCSVM_CS)
 80 | 		nr_w=1;
 81 | 	else
 82 | 		nr_w=nr_class;
 83 | 
 84 | 	// prhs[1] = testing instance matrix
 85 | 	feature_number = get_nr_feature(model_);
 86 | 	testing_instance_number = (int) mxGetM(prhs[1]);
 87 | 	if(col_format_flag)
 88 | 	{
 89 | 		feature_number = (int) mxGetM(prhs[1]);
 90 | 		testing_instance_number = (int) mxGetN(prhs[1]);
 91 | 	}
 92 | 
 93 | 	label_vector_row_num = (int) mxGetM(prhs[0]);
 94 | 	label_vector_col_num = (int) mxGetN(prhs[0]);
 95 | 
 96 | 	if(label_vector_row_num!=testing_instance_number)
 97 | 	{
 98 | 		mexPrintf("Length of label vector does not match # of instances.\n");
 99 | 		fake_answer(nlhs, plhs);
100 | 		return;
101 | 	}
102 | 	if(label_vector_col_num!=1)
103 | 	{
104 | 		mexPrintf("label (1st argument) should be a vector (# of column is 1).\n");
105 | 		fake_answer(nlhs, plhs);
106 | 		return;
107 | 	}
108 | 
109 | 	ptr_label    = mxGetPr(prhs[0]);
110 | 
111 | 	// transpose instance matrix
112 | 	if(col_format_flag)
113 | 		pplhs[0] = (mxArray *)prhs[1];
114 | 	else
115 | 	{
116 | 		mxArray *pprhs[1];
117 | 		pprhs[0] = mxDuplicateArray(prhs[1]);
118 | 		if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose"))
119 | 		{
120 | 			mexPrintf("Error: cannot transpose testing instance matrix\n");
121 | 			fake_answer(nlhs, plhs);
122 | 			return;
123 | 		}
124 | 	}
125 | 
126 | 
127 | 	prob_estimates = Malloc(double, nr_class);
128 | 
129 | 	tplhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL);
130 | 	if(predict_probability_flag)
131 | 		tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL);
132 | 	else
133 | 		tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL);
134 | 
135 | 	ptr_predict_label = mxGetPr(tplhs[0]);
136 | 	ptr_prob_estimates = mxGetPr(tplhs[2]);
137 | 	ptr_dec_values = mxGetPr(tplhs[2]);
138 | 	x = Malloc(struct feature_node, feature_number+2);
139 | 	for(instance_index=0;instance_index<testing_instance_number;instance_index++)
140 | 	{
141 | 		int i;
142 | 		double target_label, predict_label;
143 | 
144 | 		target_label = ptr_label[instance_index];
145 | 
146 | 		// prhs[1] and prhs[1]^T are sparse
147 | 		read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias);
148 | 
149 | 		if(predict_probability_flag)
150 | 		{
151 | 			predict_label = predict_probability(model_, x, prob_estimates);
152 | 			ptr_predict_label[instance_index] = predict_label;
153 | 			for(i=0;i<nr_class;i++)
154 | 				ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i];
155 | 		}
156 | 		else
157 | 		{
158 | 			double *dec_values = Malloc(double, nr_class);
159 | 			predict_label = predict_values(model_, x, dec_values);
160 | 			ptr_predict_label[instance_index] = predict_label;
161 | 
162 | 			for(i=0;i<nr_w;i++)
163 | 				ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i];
164 | 			free(dec_values);
165 | 		}
166 | 
167 | 		if(predict_label == target_label)
168 | 			++correct;
169 | 		error += (predict_label-target_label)*(predict_label-target_label);
170 | 		sump += predict_label;
171 | 		sumt += target_label;
172 | 		sumpp += predict_label*predict_label;
173 | 		sumtt += target_label*target_label;
174 | 		sumpt += predict_label*target_label;
175 | 
176 | 		++total;
177 | 	}
178 | 
179 | 	if(model_->param.solver_type==L2R_L2LOSS_SVR ||
180 | 	   model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
181 | 	   model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
182 | 	{
183 | 		info("Mean squared error = %g (regression)\n",error/total);
184 | 		info("Squared correlation coefficient = %g (regression)\n",
185 | 			((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
186 | 			((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
187 | 			);
188 | 	}
189 | 	else
190 | 		info("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
191 | 
192 | 	// return accuracy, mean squared error, squared correlation coefficient
193 | 	tplhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL);
194 | 	ptr = mxGetPr(tplhs[1]);
195 | 	ptr[0] = (double)correct/total*100;
196 | 	ptr[1] = error/total;
197 | 	ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
198 | 				((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt));
199 | 
200 | 	free(x);
201 | 	if(prob_estimates != NULL)
202 | 		free(prob_estimates);
203 | 
204 | 	switch(nlhs)
205 | 	{
206 | 		case 3:
207 | 			plhs[2] = tplhs[2];
208 | 			plhs[1] = tplhs[1];
209 | 		case 1:
210 | 		case 0:
211 | 			plhs[0] = tplhs[0];
212 | 	}
213 | }
214 | 
215 | void exit_with_help()
216 | {
217 | 	mexPrintf(
218 | 			"Usage: [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n"
219 | 			"       [predicted_label] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n"
220 | 			"liblinear_options:\n"
221 | 			"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
222 | 			"-q quiet mode (no outputs)\n"
223 | 			"col: if 'col' is setted testing_instance_matrix is parsed in column format, otherwise is in row format\n"
224 | 			"Returns:\n"
225 | 			"  predicted_label: prediction output vector.\n"
226 | 			"  accuracy: a vector with accuracy, mean squared error, squared correlation coefficient.\n"
227 | 			"  prob_estimates: If selected, probability estimate vector.\n"
228 | 			);
229 | }
230 | 
231 | void mexFunction( int nlhs, mxArray *plhs[],
232 | 		int nrhs, const mxArray *prhs[] )
233 | {
234 | 	int prob_estimate_flag = 0;
235 | 	struct model *model_;
236 | 	char cmd[CMD_LEN];
237 | 	info = &mexPrintf;
238 | 	col_format_flag = 0;
239 | 
240 | 	if(nlhs == 2 || nlhs > 3 || nrhs > 5 || nrhs < 3)
241 | 	{
242 | 		exit_with_help();
243 | 		fake_answer(nlhs, plhs);
244 | 		return;
245 | 	}
246 | 	if(nrhs == 5)
247 | 	{
248 | 		mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1);
249 | 		if(strcmp(cmd, "col") == 0)
250 | 		{
251 | 			col_format_flag = 1;
252 | 		}
253 | 	}
254 | 
255 | 	if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) {
256 | 		mexPrintf("Error: label vector and instance matrix must be double\n");
257 | 		fake_answer(nlhs, plhs);
258 | 		return;
259 | 	}
260 | 
261 | 	if(mxIsStruct(prhs[2]))
262 | 	{
263 | 		const char *error_msg;
264 | 
265 | 		// parse options
266 | 		if(nrhs>=4)
267 | 		{
268 | 			int i, argc = 1;
269 | 			char *argv[CMD_LEN/2];
270 | 
271 | 			// put options in argv[]
272 | 			mxGetString(prhs[3], cmd,  mxGetN(prhs[3]) + 1);
273 | 			if((argv[argc] = strtok(cmd, " ")) != NULL)
274 | 				while((argv[++argc] = strtok(NULL, " ")) != NULL)
275 | 					;
276 | 
277 | 			for(i=1;i<argc;i++)
278 | 			{
279 | 				if(argv[i][0] != '-') break;
280 | 				++i;
281 | 				if(i>=argc && argv[i-1][1] != 'q')
282 | 				{
283 | 					exit_with_help();
284 | 					fake_answer(nlhs, plhs);
285 | 					return;
286 | 				}
287 | 				switch(argv[i-1][1])
288 | 				{
289 | 					case 'b':
290 | 						prob_estimate_flag = atoi(argv[i]);
291 | 						break;
292 | 					case 'q':
293 | 						info = &print_null;
294 | 						i--;
295 | 						break;
296 | 					default:
297 | 						mexPrintf("unknown option\n");
298 | 						exit_with_help();
299 | 						fake_answer(nlhs, plhs);
300 | 						return;
301 | 				}
302 | 			}
303 | 		}
304 | 
305 | 		model_ = Malloc(struct model, 1);
306 | 		error_msg = matlab_matrix_to_model(model_, prhs[2]);
307 | 		if(error_msg)
308 | 		{
309 | 			mexPrintf("Error: can't read model: %s\n", error_msg);
310 | 			free_and_destroy_model(&model_);
311 | 			fake_answer(nlhs, plhs);
312 | 			return;
313 | 		}
314 | 
315 | 		if(prob_estimate_flag)
316 | 		{
317 | 			if(!check_probability_model(model_))
318 | 			{
319 | 				mexPrintf("probability output is only supported for logistic regression\n");
320 | 				prob_estimate_flag=0;
321 | 			}
322 | 		}
323 | 
324 | 		if(mxIsSparse(prhs[1]))
325 | 			do_predict(nlhs, plhs, prhs, model_, prob_estimate_flag);
326 | 		else
327 | 		{
328 | 			mexPrintf("Testing_instance_matrix must be sparse; "
329 | 				"use sparse(Testing_instance_matrix) first\n");
330 | 			fake_answer(nlhs, plhs);
331 | 		}
332 | 
333 | 		// destroy model_
334 | 		free_and_destroy_model(&model_);
335 | 	}
336 | 	else
337 | 	{
338 | 		mexPrintf("model file should be a struct array\n");
339 | 		fake_answer(nlhs, plhs);
340 | 	}
341 | 
342 | 	return;
343 | }
344 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/predict.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <ctype.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <errno.h>
  6 | #include "linear.h"
  7 | 
  8 | int print_null(const char *s,...) {return 0;}
  9 | 
 10 | static int (*info)(const char *fmt,...) = &printf;
 11 | 
 12 | struct feature_node *x;
 13 | int max_nr_attr = 64;
 14 | 
 15 | struct model* model_;
 16 | int flag_predict_probability=0;
 17 | 
 18 | void exit_input_error(int line_num)
 19 | {
 20 | 	fprintf(stderr,"Wrong input format at line %d\n", line_num);
 21 | 	exit(1);
 22 | }
 23 | 
 24 | static char *line = NULL;
 25 | static int max_line_len;
 26 | 
 27 | static char* readline(FILE *input)
 28 | {
 29 | 	int len;
 30 | 
 31 | 	if(fgets(line,max_line_len,input) == NULL)
 32 | 		return NULL;
 33 | 
 34 | 	while(strrchr(line,'\n') == NULL)
 35 | 	{
 36 | 		max_line_len *= 2;
 37 | 		line = (char *) realloc(line,max_line_len);
 38 | 		len = (int) strlen(line);
 39 | 		if(fgets(line+len,max_line_len-len,input) == NULL)
 40 | 			break;
 41 | 	}
 42 | 	return line;
 43 | }
 44 | 
 45 | void do_predict(FILE *input, FILE *output)
 46 | {
 47 | 	int correct = 0;
 48 | 	int total = 0;
 49 | 	double error = 0;
 50 | 	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
 51 | 
 52 | 	int nr_class=get_nr_class(model_);
 53 | 	double *prob_estimates=NULL;
 54 | 	int j, n;
 55 | 	int nr_feature=get_nr_feature(model_);
 56 | 	if(model_->bias>=0)
 57 | 		n=nr_feature+1;
 58 | 	else
 59 | 		n=nr_feature;
 60 | 
 61 | 	if(flag_predict_probability)
 62 | 	{
 63 | 		int *labels;
 64 | 
 65 | 		if(!check_probability_model(model_))
 66 | 		{
 67 | 			fprintf(stderr, "probability output is only supported for logistic regression\n");
 68 | 			exit(1);
 69 | 		}
 70 | 
 71 | 		labels=(int *) malloc(nr_class*sizeof(int));
 72 | 		get_labels(model_,labels);
 73 | 		prob_estimates = (double *) malloc(nr_class*sizeof(double));
 74 | 		fprintf(output,"labels");
 75 | 		for(j=0;j<nr_class;j++)
 76 | 			fprintf(output," %d",labels[j]);
 77 | 		fprintf(output,"\n");
 78 | 		free(labels);
 79 | 	}
 80 | 
 81 | 	max_line_len = 1024;
 82 | 	line = (char *)malloc(max_line_len*sizeof(char));
 83 | 	while(readline(input) != NULL)
 84 | 	{
 85 | 		int i = 0;
 86 | 		double target_label, predict_label;
 87 | 		char *idx, *val, *label, *endptr;
 88 | 		int inst_max_index = 0; // strtol gives 0 if wrong format
 89 | 
 90 | 		label = strtok(line," \t\n");
 91 | 		if(label == NULL) // empty line
 92 | 			exit_input_error(total+1);
 93 | 
 94 | 		target_label = strtod(label,&endptr);
 95 | 		if(endptr == label || *endptr != '\0')
 96 | 			exit_input_error(total+1);
 97 | 
 98 | 		while(1)
 99 | 		{
100 | 			if(i>=max_nr_attr-2)	// need one more for index = -1
101 | 			{
102 | 				max_nr_attr *= 2;
103 | 				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
104 | 			}
105 | 
106 | 			idx = strtok(NULL,":");
107 | 			val = strtok(NULL," \t");
108 | 
109 | 			if(val == NULL)
110 | 				break;
111 | 			errno = 0;
112 | 			x[i].index = (int) strtol(idx,&endptr,10);
113 | 			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
114 | 				exit_input_error(total+1);
115 | 			else
116 | 				inst_max_index = x[i].index;
117 | 
118 | 			errno = 0;
119 | 			x[i].value = strtod(val,&endptr);
120 | 			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
121 | 				exit_input_error(total+1);
122 | 
123 | 			// feature indices larger than those in training are not used
124 | 			if(x[i].index <= nr_feature)
125 | 				++i;
126 | 		}
127 | 
128 | 		if(model_->bias>=0)
129 | 		{
130 | 			x[i].index = n;
131 | 			x[i].value = model_->bias;
132 | 			i++;
133 | 		}
134 | 		x[i].index = -1;
135 | 
136 | 		if(flag_predict_probability)
137 | 		{
138 | 			int j;
139 | 			predict_label = predict_probability(model_,x,prob_estimates);
140 | 			fprintf(output,"%g",predict_label);
141 | 			for(j=0;j<model_->nr_class;j++)
142 | 				fprintf(output," %g",prob_estimates[j]);
143 | 			fprintf(output,"\n");
144 | 		}
145 | 		else
146 | 		{
147 | 			predict_label = predict(model_,x);
148 | 			fprintf(output,"%g\n",predict_label);
149 | 		}
150 | 
151 | 		if(predict_label == target_label)
152 | 			++correct;
153 | 		error += (predict_label-target_label)*(predict_label-target_label);
154 | 		sump += predict_label;
155 | 		sumt += target_label;
156 | 		sumpp += predict_label*predict_label;
157 | 		sumtt += target_label*target_label;
158 | 		sumpt += predict_label*target_label;
159 | 		++total;
160 | 	}
161 | 	if(model_->param.solver_type==L2R_L2LOSS_SVR ||
162 | 	   model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
163 | 	   model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
164 | 	{
165 | 		info("Mean squared error = %g (regression)\n",error/total);
166 | 		info("Squared correlation coefficient = %g (regression)\n",
167 | 			((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
168 | 			((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
169 | 			);
170 | 	}
171 | 	else
172 | 		info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
173 | 	if(flag_predict_probability)
174 | 		free(prob_estimates);
175 | }
176 | 
177 | void exit_with_help()
178 | {
179 | 	printf(
180 | 	"Usage: predict [options] test_file model_file output_file\n"
181 | 	"options:\n"
182 | 	"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
183 | 	"-q : quiet mode (no outputs)\n"
184 | 	);
185 | 	exit(1);
186 | }
187 | 
188 | int main(int argc, char **argv)
189 | {
190 | 	FILE *input, *output;
191 | 	int i;
192 | 
193 | 	// parse options
194 | 	for(i=1;i<argc;i++)
195 | 	{
196 | 		if(argv[i][0] != '-') break;
197 | 		++i;
198 | 		switch(argv[i-1][1])
199 | 		{
200 | 			case 'b':
201 | 				flag_predict_probability = atoi(argv[i]);
202 | 				break;
203 | 			case 'q':
204 | 				info = &print_null;
205 | 				i--;
206 | 				break;
207 | 			default:
208 | 				fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
209 | 				exit_with_help();
210 | 				break;
211 | 		}
212 | 	}
213 | 	if(i>=argc)
214 | 		exit_with_help();
215 | 
216 | 	input = fopen(argv[i],"r");
217 | 	if(input == NULL)
218 | 	{
219 | 		fprintf(stderr,"can't open input file %s\n",argv[i]);
220 | 		exit(1);
221 | 	}
222 | 
223 | 	output = fopen(argv[i+2],"w");
224 | 	if(output == NULL)
225 | 	{
226 | 		fprintf(stderr,"can't open output file %s\n",argv[i+2]);
227 | 		exit(1);
228 | 	}
229 | 
230 | 	if((model_=load_model(argv[i+1]))==0)
231 | 	{
232 | 		fprintf(stderr,"can't open model file %s\n",argv[i+1]);
233 | 		exit(1);
234 | 	}
235 | 
236 | 	x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node));
237 | 	do_predict(input, output);
238 | 	free_and_destroy_model(&model_);
239 | 	free(line);
240 | 	free(x);
241 | 	fclose(input);
242 | 	fclose(output);
243 | 	return 0;
244 | }
245 | 
246 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/python/Makefile:
--------------------------------------------------------------------------------
1 | all = lib
2 | 
3 | lib:
4 | 	make -C .. lib
5 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/python/liblinear.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from ctypes import *
  4 | from ctypes.util import find_library
  5 | from os import path
  6 | import sys
  7 | 
  8 | try:
  9 | 	dirname = path.dirname(path.abspath(__file__))
 10 | 	if sys.platform == 'win32':
 11 | 		liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
 12 | 	else:
 13 | 		liblinear = CDLL(path.join(dirname, '../liblinear.so.1'))
 14 | except:
 15 | # For unix the prefix 'lib' is not considered.
 16 | 	if find_library('linear'):
 17 | 		liblinear = CDLL(find_library('linear'))
 18 | 	elif find_library('liblinear'):
 19 | 		liblinear = CDLL(find_library('liblinear'))
 20 | 	else:
 21 | 		raise Exception('LIBLINEAR library not found.')
 22 | 
 23 | # Construct constants
 24 | SOLVER_TYPE = ['L2R_LR', 'L2R_L2LOSS_SVC_DUAL', 'L2R_L2LOSS_SVC', 'L2R_L1LOSS_SVC_DUAL',\
 25 | 		'MCSVM_CS', 'L1R_L2LOSS_SVC', 'L1R_LR', 'L2R_LR_DUAL', \
 26 | 		None, None, None, \
 27 | 		'L2R_L2LOSS_SVR', 'L2R_L2LOSS_SVR_DUAL', 'L2R_L1LOSS_SVR_DUAL']
 28 | for i, s in enumerate(SOLVER_TYPE): 
 29 | 	if s is not None: exec("%s = %d" % (s , i))
 30 | 
 31 | PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
 32 | def print_null(s): 
 33 | 	return 
 34 | 
 35 | def genFields(names, types): 
 36 | 	return list(zip(names, types))
 37 | 
 38 | def fillprototype(f, restype, argtypes): 
 39 | 	f.restype = restype
 40 | 	f.argtypes = argtypes
 41 | 
 42 | class feature_node(Structure):
 43 | 	_names = ["index", "value"]
 44 | 	_types = [c_int, c_double]
 45 | 	_fields_ = genFields(_names, _types)
 46 | 
 47 | 	def __str__(self):
 48 | 		return '%d:%g' % (self.index, self.value)
 49 | 
 50 | def gen_feature_nodearray(xi, feature_max=None, issparse=True):
 51 | 	if isinstance(xi, dict):
 52 | 		index_range = xi.keys()
 53 | 	elif isinstance(xi, (list, tuple)):
 54 | 		xi = [0] + xi  # idx should start from 1
 55 | 		index_range = range(1, len(xi))
 56 | 	else:
 57 | 		raise TypeError('xi should be a dictionary, list or tuple')
 58 | 
 59 | 	if feature_max:
 60 | 		assert(isinstance(feature_max, int))
 61 | 		index_range = filter(lambda j: j <= feature_max, index_range)
 62 | 	if issparse: 
 63 | 		index_range = filter(lambda j:xi[j] != 0, index_range)
 64 | 
 65 | 	index_range = sorted(index_range)
 66 | 	ret = (feature_node * (len(index_range)+2))()
 67 | 	ret[-1].index = -1 # for bias term
 68 | 	ret[-2].index = -1
 69 | 	for idx, j in enumerate(index_range):
 70 | 		ret[idx].index = j
 71 | 		ret[idx].value = xi[j]
 72 | 	max_idx = 0
 73 | 	if index_range : 
 74 | 		max_idx = index_range[-1]
 75 | 	return ret, max_idx
 76 | 
 77 | class problem(Structure):
 78 | 	_names = ["l", "n", "y", "x", "bias"]
 79 | 	_types = [c_int, c_int, POINTER(c_double), POINTER(POINTER(feature_node)), c_double]
 80 | 	_fields_ = genFields(_names, _types)
 81 | 
 82 | 	def __init__(self, y, x, bias = -1):
 83 | 		if len(y) != len(x) :
 84 | 			raise ValueError("len(y) != len(x)")
 85 | 		self.l = l = len(y)
 86 | 		self.bias = -1
 87 | 
 88 | 		max_idx = 0
 89 | 		x_space = self.x_space = []
 90 | 		for i, xi in enumerate(x):
 91 | 			tmp_xi, tmp_idx = gen_feature_nodearray(xi)
 92 | 			x_space += [tmp_xi]
 93 | 			max_idx = max(max_idx, tmp_idx)
 94 | 		self.n = max_idx
 95 | 
 96 | 		self.y = (c_double * l)()
 97 | 		for i, yi in enumerate(y): self.y[i] = y[i]
 98 | 
 99 | 		self.x = (POINTER(feature_node) * l)() 
100 | 		for i, xi in enumerate(self.x_space): self.x[i] = xi
101 | 
102 | 		self.set_bias(bias)
103 | 
104 | 	def set_bias(self, bias):
105 | 		if self.bias == bias:
106 | 			return 
107 | 		if bias >= 0 and self.bias < 0: 
108 | 			self.n += 1
109 | 			node = feature_node(self.n, bias)
110 | 		if bias < 0 and self.bias >= 0: 
111 | 			self.n -= 1
112 | 			node = feature_node(-1, bias)
113 | 
114 | 		for xi in self.x_space:
115 | 			xi[-2] = node
116 | 		self.bias = bias
117 | 
118 | 
119 | class parameter(Structure):
120 | 	_names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"]
121 | 	_types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double]
122 | 	_fields_ = genFields(_names, _types)
123 | 
124 | 	def __init__(self, options = None):
125 | 		if options == None:
126 | 			options = ''
127 | 		self.parse_options(options)
128 | 
129 | 	def __str__(self):
130 | 		s = ''
131 | 		attrs = parameter._names + list(self.__dict__.keys())
132 | 		values = map(lambda attr: getattr(self, attr), attrs) 
133 | 		for attr, val in zip(attrs, values):
134 | 			s += (' %s: %s\n' % (attr, val))
135 | 		s = s.strip()
136 | 
137 | 		return s
138 | 
139 | 	def set_to_default_values(self):
140 | 		self.solver_type = L2R_L2LOSS_SVC_DUAL
141 | 		self.eps = float('inf')
142 | 		self.C = 1
143 | 		self.p = 0.1
144 | 		self.nr_weight = 0
145 | 		self.weight_label = (c_int * 0)()
146 | 		self.weight = (c_double * 0)()
147 | 		self.bias = -1
148 | 		self.cross_validation = False
149 | 		self.nr_fold = 0
150 | 		self.print_func = cast(None, PRINT_STRING_FUN)
151 | 
152 | 	def parse_options(self, options):
153 | 		if isinstance(options, list):
154 | 			argv = options
155 | 		elif isinstance(options, str):
156 | 			argv = options.split()
157 | 		else:
158 | 			raise TypeError("arg 1 should be a list or a str.")
159 | 		self.set_to_default_values()
160 | 		self.print_func = cast(None, PRINT_STRING_FUN)
161 | 		weight_label = []
162 | 		weight = []
163 | 
164 | 		i = 0
165 | 		while i < len(argv) :
166 | 			if argv[i] == "-s":
167 | 				i = i + 1
168 | 				self.solver_type = int(argv[i])
169 | 			elif argv[i] == "-c":
170 | 				i = i + 1
171 | 				self.C = float(argv[i])
172 | 			elif argv[i] == "-p":
173 | 				i = i + 1
174 | 				self.p = float(argv[i])
175 | 			elif argv[i] == "-e":
176 | 				i = i + 1
177 | 				self.eps = float(argv[i])
178 | 			elif argv[i] == "-B":
179 | 				i = i + 1
180 | 				self.bias = float(argv[i])
181 | 			elif argv[i] == "-v":
182 | 				i = i + 1
183 | 				self.cross_validation = 1
184 | 				self.nr_fold = int(argv[i])
185 | 				if self.nr_fold < 2 :
186 | 					raise ValueError("n-fold cross validation: n must >= 2")
187 | 			elif argv[i].startswith("-w"):
188 | 				i = i + 1
189 | 				self.nr_weight += 1
190 | 				nr_weight = self.nr_weight
191 | 				weight_label += [int(argv[i-1][2:])]
192 | 				weight += [float(argv[i])]
193 | 			elif argv[i] == "-q":
194 | 				self.print_func = PRINT_STRING_FUN(print_null)
195 | 			else :
196 | 				raise ValueError("Wrong options")
197 | 			i += 1
198 | 
199 | 		liblinear.set_print_string_function(self.print_func)
200 | 		self.weight_label = (c_int*self.nr_weight)()
201 | 		self.weight = (c_double*self.nr_weight)()
202 | 		for i in range(self.nr_weight): 
203 | 			self.weight[i] = weight[i]
204 | 			self.weight_label[i] = weight_label[i]
205 | 
206 | 		if self.eps == float('inf'):
207 | 			if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
208 | 				self.eps = 0.01
209 | 			elif self.solver_type in [L2R_L2LOSS_SVR]:
210 | 				self.eps = 0.001
211 | 			elif self.solver_type in [L2R_L2LOSS_SVC_DUAL, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L2R_LR_DUAL]:
212 | 				self.eps = 0.1
213 | 			elif self.solver_type in [L1R_L2LOSS_SVC, L1R_LR]:
214 | 				self.eps = 0.01
215 | 			elif self.solver_type in [L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
216 | 				self.eps = 0.1
217 | 
218 | class model(Structure):
219 | 	_names = ["param", "nr_class", "nr_feature", "w", "label", "bias"]
220 | 	_types = [parameter, c_int, c_int, POINTER(c_double), POINTER(c_int), c_double]
221 | 	_fields_ = genFields(_names, _types)
222 | 
223 | 	def __init__(self):
224 | 		self.__createfrom__ = 'python'
225 | 
226 | 	def __del__(self):
227 | 		# free memory created by C to avoid memory leak
228 | 		if hasattr(self, '__createfrom__') and self.__createfrom__ == 'C':
229 | 			liblinear.free_and_destroy_model(pointer(self))
230 | 
231 | 	def get_nr_feature(self):
232 | 		return liblinear.get_nr_feature(self)
233 | 
234 | 	def get_nr_class(self):
235 | 		return liblinear.get_nr_class(self)
236 | 
237 | 	def get_labels(self):
238 | 		nr_class = self.get_nr_class()
239 | 		labels = (c_int * nr_class)()
240 | 		liblinear.get_labels(self, labels)
241 | 		return labels[:nr_class]
242 | 
243 | 	def is_probability_model(self):
244 | 		return (liblinear.check_probability_model(self) == 1)
245 | 
246 | def toPyModel(model_ptr):
247 | 	"""
248 | 	toPyModel(model_ptr) -> model
249 | 
250 | 	Convert a ctypes POINTER(model) to a Python model
251 | 	"""
252 | 	if bool(model_ptr) == False:
253 | 		raise ValueError("Null pointer")
254 | 	m = model_ptr.contents
255 | 	m.__createfrom__ = 'C'
256 | 	return m
257 | 
258 | fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
259 | fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
260 | 
261 | fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
262 | fillprototype(liblinear.predict, c_double, [POINTER(model), POINTER(feature_node)])
263 | fillprototype(liblinear.predict_probability, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
264 | 
265 | fillprototype(liblinear.save_model, c_int, [c_char_p, POINTER(model)])
266 | fillprototype(liblinear.load_model, POINTER(model), [c_char_p])
267 | 
268 | fillprototype(liblinear.get_nr_feature, c_int, [POINTER(model)])
269 | fillprototype(liblinear.get_nr_class, c_int, [POINTER(model)])
270 | fillprototype(liblinear.get_labels, None, [POINTER(model), POINTER(c_int)])
271 | 
272 | fillprototype(liblinear.free_model_content, None, [POINTER(model)])
273 | fillprototype(liblinear.free_and_destroy_model, None, [POINTER(POINTER(model))])
274 | fillprototype(liblinear.destroy_param, None, [POINTER(parameter)])
275 | fillprototype(liblinear.check_parameter, c_char_p, [POINTER(problem), POINTER(parameter)])
276 | fillprototype(liblinear.check_probability_model, c_int, [POINTER(model)])
277 | fillprototype(liblinear.set_print_string_function, None, [CFUNCTYPE(None, c_char_p)])
278 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/python/liblinearutil.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os, sys
  4 | sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path 
  5 | from liblinear import *
  6 | 
  7 | def svm_read_problem(data_file_name):
  8 | 	"""
  9 | 	svm_read_problem(data_file_name) -> [y, x]
 10 | 
 11 | 	Read LIBSVM-format data from data_file_name and return labels y
 12 | 	and data instances x.
 13 | 	"""
 14 | 	prob_y = []
 15 | 	prob_x = []
 16 | 	for line in open(data_file_name):
 17 | 		line = line.split(None, 1)
 18 | 		# In case an instance with all zero features
 19 | 		if len(line) == 1: line += ['']
 20 | 		label, features = line
 21 | 		xi = {}
 22 | 		for e in features.split():
 23 | 			ind, val = e.split(":")
 24 | 			xi[int(ind)] = float(val)
 25 | 		prob_y += [float(label)]
 26 | 		prob_x += [xi]
 27 | 	return (prob_y, prob_x)
 28 | 
 29 | def load_model(model_file_name):
 30 | 	"""
 31 | 	load_model(model_file_name) -> model
 32 | 
 33 | 	Load a LIBLINEAR model from model_file_name and return.
 34 | 	"""
 35 | 	model = liblinear.load_model(model_file_name.encode())
 36 | 	if not model:
 37 | 		print("can't open model file %s" % model_file_name)
 38 | 		return None
 39 | 	model = toPyModel(model)
 40 | 	return model
 41 | 
 42 | def save_model(model_file_name, model):
 43 | 	"""
 44 | 	save_model(model_file_name, model) -> None
 45 | 
 46 | 	Save a LIBLINEAR model to the file model_file_name.
 47 | 	"""
 48 | 	liblinear.save_model(model_file_name.encode(), model)
 49 | 
 50 | def evaluations(ty, pv):
 51 | 	"""
 52 | 	evaluations(ty, pv) -> (ACC, MSE, SCC)
 53 | 
 54 | 	Calculate accuracy, mean squared error and squared correlation coefficient
 55 | 	using the true values (ty) and predicted values (pv).
 56 | 	"""
 57 | 	if len(ty) != len(pv):
 58 | 		raise ValueError("len(ty) must equal to len(pv)")
 59 | 	total_correct = total_error = 0
 60 | 	sumv = sumy = sumvv = sumyy = sumvy = 0
 61 | 	for v, y in zip(pv, ty):
 62 | 		if y == v:
 63 | 			total_correct += 1
 64 | 		total_error += (v-y)*(v-y)
 65 | 		sumv += v
 66 | 		sumy += y
 67 | 		sumvv += v*v
 68 | 		sumyy += y*y
 69 | 		sumvy += v*y
 70 | 	l = len(ty)
 71 | 	ACC = 100.0*total_correct/l
 72 | 	MSE = total_error/l
 73 | 	try:
 74 | 		SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
 75 | 	except:
 76 | 		SCC = float('nan')
 77 | 	return (ACC, MSE, SCC)
 78 | 
 79 | def train(arg1, arg2=None, arg3=None):
 80 | 	"""
 81 | 	train(y, x [, options]) -> model | ACC
 82 | 	train(prob [, options]) -> model | ACC
 83 | 	train(prob, param) -> model | ACC
 84 | 
 85 | 	Train a model from data (y, x) or a problem prob using
 86 | 	'options' or a parameter param.
 87 | 	If '-v' is specified in 'options' (i.e., cross validation)
 88 | 	either accuracy (ACC) or mean-squared error (MSE) is returned.
 89 | 
 90 | 	options:
 91 | 		-s type : set type of solver (default 1)
 92 | 		  for multi-class classification
 93 | 			 0 -- L2-regularized logistic regression (primal)
 94 | 			 1 -- L2-regularized L2-loss support vector classification (dual)
 95 | 			 2 -- L2-regularized L2-loss support vector classification (primal)
 96 | 			 3 -- L2-regularized L1-loss support vector classification (dual)
 97 | 			 4 -- support vector classification by Crammer and Singer
 98 | 			 5 -- L1-regularized L2-loss support vector classification
 99 | 			 6 -- L1-regularized logistic regression
100 | 			 7 -- L2-regularized logistic regression (dual)
101 | 		  for regression
102 | 			11 -- L2-regularized L2-loss support vector regression (primal)
103 | 			12 -- L2-regularized L2-loss support vector regression (dual)
104 | 			13 -- L2-regularized L1-loss support vector regression (dual)
105 | 		-c cost : set the parameter C (default 1)
106 | 		-p epsilon : set the epsilon in loss function of SVR (default 0.1)
107 | 		-e epsilon : set tolerance of termination criterion
108 | 			-s 0 and 2
109 | 				|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
110 | 				where f is the primal function, (default 0.01)
111 | 			-s 11
112 | 				|f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
113 | 			-s 1, 3, 4, and 7
114 | 				Dual maximal violation <= eps; similar to liblinear (default 0.)
115 | 			-s 5 and 6
116 | 				|f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
117 | 				where f is the primal function (default 0.01)
118 | 			-s 12 and 13
119 | 				|f'(alpha)|_1 <= eps |f'(alpha0)|,
120 | 				where f is the dual function (default 0.1)
121 | 		-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
122 | 		-wi weight: weights adjust the parameter C of different classes (see README for details)
123 | 		-v n: n-fold cross validation mode
124 | 	    -q : quiet mode (no outputs)
125 | 	"""
126 | 	prob, param = None, None
127 | 	if isinstance(arg1, (list, tuple)):
128 | 		assert isinstance(arg2, (list, tuple))
129 | 		y, x, options = arg1, arg2, arg3
130 | 		prob = problem(y, x)
131 | 		param = parameter(options)
132 | 	elif isinstance(arg1, problem):
133 | 		prob = arg1
134 | 		if isinstance(arg2, parameter):
135 | 			param = arg2
136 | 		else :
137 | 			param = parameter(arg2)
138 | 	if prob == None or param == None :
139 | 		raise TypeError("Wrong types for the arguments")
140 | 
141 | 	prob.set_bias(param.bias)
142 | 	liblinear.set_print_string_function(param.print_func)
143 | 	err_msg = liblinear.check_parameter(prob, param)
144 | 	if err_msg :
145 | 		raise ValueError('Error: %s' % err_msg)
146 | 
147 | 	if param.cross_validation:
148 | 		l, nr_fold = prob.l, param.nr_fold
149 | 		target = (c_double * l)()
150 | 		liblinear.cross_validation(prob, param, nr_fold, target)
151 | 		ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
152 | 		if param.solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
153 | 			print("Cross Validation Mean squared error = %g" % MSE)
154 | 			print("Cross Validation Squared correlation coefficient = %g" % SCC)
155 | 			return MSE
156 | 		else:
157 | 			print("Cross Validation Accuracy = %g%%" % ACC)
158 | 			return ACC
159 | 	else :
160 | 		m = liblinear.train(prob, param)
161 | 		m = toPyModel(m)
162 | 
163 | 		return m
164 | 
165 | def predict(y, x, m, options=""):
166 | 	"""
167 | 	predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
168 | 
169 | 	Predict data (y, x) with the SVM model m.
170 | 	options:
171 | 	    -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
172 | 	    -q quiet mode (no outputs)
173 | 
174 | 	The return tuple contains
175 | 	p_labels: a list of predicted labels
176 | 	p_acc: a tuple including  accuracy (for classification), mean-squared
177 | 	       error, and squared correlation coefficient (for regression).
178 | 	p_vals: a list of decision values or probability estimates (if '-b 1'
179 | 	        is specified). If k is the number of classes, for decision values,
180 | 	        each element includes results of predicting k binary-class
181 | 	        SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value
182 | 	        is returned. For probabilities, each element contains k values
183 | 	        indicating the probability that the testing instance is in each class.
184 | 	        Note that the order of classes here is the same as 'model.label'
185 | 	        field in the model structure.
186 | 	"""
187 | 
188 | 	def info(s):
189 | 		print(s)
190 | 
191 | 	predict_probability = 0
192 | 	argv = options.split()
193 | 	i = 0
194 | 	while i < len(argv):
195 | 		if argv[i] == '-b':
196 | 			i += 1
197 | 			predict_probability = int(argv[i])
198 | 		elif argv[i] == '-q':
199 | 			info = print_null
200 | 		else:
201 | 			raise ValueError("Wrong options")
202 | 		i+=1
203 | 
204 | 	solver_type = m.param.solver_type
205 | 	nr_class = m.get_nr_class()
206 | 	nr_feature = m.get_nr_feature()
207 | 	is_prob_model = m.is_probability_model()
208 | 	bias = m.bias
209 | 	if bias >= 0:
210 | 		biasterm = feature_node(nr_feature+1, bias)
211 | 	else:
212 | 		biasterm = feature_node(-1, bias)
213 | 	pred_labels = []
214 | 	pred_values = []
215 | 
216 | 	if predict_probability:
217 | 		if not is_prob_model:
218 | 			raise TypeError('probability output is only supported for logistic regression')
219 | 		prob_estimates = (c_double * nr_class)()
220 | 		for xi in x:
221 | 			xi, idx = gen_feature_nodearray(xi, feature_max=nr_feature)
222 | 			xi[-2] = biasterm
223 | 			label = liblinear.predict_probability(m, xi, prob_estimates)
224 | 			values = prob_estimates[:nr_class]
225 | 			pred_labels += [label]
226 | 			pred_values += [values]
227 | 	else:
228 | 		if nr_class <= 2:
229 | 			nr_classifier = 1
230 | 		else:
231 | 			nr_classifier = nr_class
232 | 		dec_values = (c_double * nr_classifier)()
233 | 		for xi in x:
234 | 			xi, idx = gen_feature_nodearray(xi, feature_max=nr_feature)
235 | 			xi[-2] = biasterm
236 | 			label = liblinear.predict_values(m, xi, dec_values)
237 | 			values = dec_values[:nr_classifier]
238 | 			pred_labels += [label]
239 | 			pred_values += [values]
240 | 	if len(y) == 0:
241 | 		y = [0] * len(x)
242 | 	ACC, MSE, SCC = evaluations(y, pred_labels)
243 | 	l = len(y)
244 | 	if solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
245 | 		info("Mean squared error = %g (regression)" % MSE)
246 | 		info("Squared correlation coefficient = %g (regression)" % SCC)
247 | 	else:
248 | 		info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
249 | 
250 | 	return pred_labels, (ACC, MSE, SCC), pred_values
251 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/tron.cpp:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | #include <stdarg.h>
  5 | #include "tron.h"
  6 | 
  7 | #ifndef min
  8 | template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
  9 | #endif
 10 | 
 11 | #ifndef max
 12 | template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
 13 | #endif
 14 | 
 15 | #ifdef __cplusplus
 16 | extern "C" {
 17 | #endif
 18 | 
 19 | extern double dnrm2_(int *, double *, int *);
 20 | extern double ddot_(int *, double *, int *, double *, int *);
 21 | extern int daxpy_(int *, double *, double *, int *, double *, int *);
 22 | extern int dscal_(int *, double *, double *, int *);
 23 | 
 24 | #ifdef __cplusplus
 25 | }
 26 | #endif
 27 | 
 28 | static void default_print(const char *buf)
 29 | {
 30 | 	fputs(buf,stdout);
 31 | 	fflush(stdout);
 32 | }
 33 | 
 34 | void TRON::info(const char *fmt,...)
 35 | {
 36 | 	char buf[BUFSIZ];
 37 | 	va_list ap;
 38 | 	va_start(ap,fmt);
 39 | 	vsprintf(buf,fmt,ap);
 40 | 	va_end(ap);
 41 | 	(*tron_print_string)(buf);
 42 | }
 43 | 
 44 | TRON::TRON(const function *fun_obj, double eps, int max_iter)
 45 | {
 46 | 	this->fun_obj=const_cast<function *>(fun_obj);
 47 | 	this->eps=eps;
 48 | 	this->max_iter=max_iter;
 49 | 	tron_print_string = default_print;
 50 | }
 51 | 
 52 | TRON::~TRON()
 53 | {
 54 | }
 55 | 
 56 | void TRON::tron(double *w)
 57 | {
 58 | 	// Parameters for updating the iterates.
 59 | 	double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
 60 | 
 61 | 	// Parameters for updating the trust region size delta.
 62 | 	double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
 63 | 
 64 | 	int n = fun_obj->get_nr_variable();
 65 | 	int i, cg_iter;
 66 | 	double delta, snorm, one=1.0;
 67 | 	double alpha, f, fnew, prered, actred, gs;
 68 | 	int search = 1, iter = 1, inc = 1;
 69 | 	double *s = new double[n];
 70 | 	double *r = new double[n];
 71 | 	double *w_new = new double[n];
 72 | 	double *g = new double[n];
 73 | 
 74 | 	for (i=0; i<n; i++)
 75 | 		w[i] = 0;
 76 | 
 77 | 	f = fun_obj->fun(w);
 78 | 	fun_obj->grad(w, g);
 79 | 	delta = dnrm2_(&n, g, &inc);
 80 | 	double gnorm1 = delta;
 81 | 	double gnorm = gnorm1;
 82 | 
 83 | 	if (gnorm <= eps*gnorm1)
 84 | 		search = 0;
 85 | 
 86 | 	iter = 1;
 87 | 
 88 | 	while (iter <= max_iter && search)
 89 | 	{
 90 | 		cg_iter = trcg(delta, g, s, r);
 91 | 
 92 | 		memcpy(w_new, w, sizeof(double)*n);
 93 | 		daxpy_(&n, &one, s, &inc, w_new, &inc);
 94 | 
 95 | 		gs = ddot_(&n, g, &inc, s, &inc);
 96 | 		prered = -0.5*(gs-ddot_(&n, s, &inc, r, &inc));
 97 | 		fnew = fun_obj->fun(w_new);
 98 | 
 99 | 		// Compute the actual reduction.
100 | 		actred = f - fnew;
101 | 
102 | 		// On the first iteration, adjust the initial step bound.
103 | 		snorm = dnrm2_(&n, s, &inc);
104 | 		if (iter == 1)
105 | 			delta = min(delta, snorm);
106 | 
107 | 		// Compute prediction alpha*snorm of the step.
108 | 		if (fnew - f - gs <= 0)
109 | 			alpha = sigma3;
110 | 		else
111 | 			alpha = max(sigma1, -0.5*(gs/(fnew - f - gs)));
112 | 
113 | 		// Update the trust region bound according to the ratio of actual to predicted reduction.
114 | 		if (actred < eta0*prered)
115 | 			delta = min(max(alpha, sigma1)*snorm, sigma2*delta);
116 | 		else if (actred < eta1*prered)
117 | 			delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta));
118 | 		else if (actred < eta2*prered)
119 | 			delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta));
120 | 		else
121 | 			delta = max(delta, min(alpha*snorm, sigma3*delta));
122 | 
123 | 		info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
124 | 
125 | 		if (actred > eta0*prered)
126 | 		{
127 | 			iter++;
128 | 			memcpy(w, w_new, sizeof(double)*n);
129 | 			f = fnew;
130 | 			fun_obj->grad(w, g);
131 | 
132 | 			gnorm = dnrm2_(&n, g, &inc);
133 | 			if (gnorm <= eps*gnorm1)
134 | 				break;
135 | 		}
136 | 		if (f < -1.0e+32)
137 | 		{
138 | 			info("WARNING: f < -1.0e+32\n");
139 | 			break;
140 | 		}
141 | 		if (fabs(actred) <= 0 && prered <= 0)
142 | 		{
143 | 			info("WARNING: actred and prered <= 0\n");
144 | 			break;
145 | 		}
146 | 		if (fabs(actred) <= 1.0e-12*fabs(f) &&
147 | 		    fabs(prered) <= 1.0e-12*fabs(f))
148 | 		{
149 | 			info("WARNING: actred and prered too small\n");
150 | 			break;
151 | 		}
152 | 	}
153 | 
154 | 	delete[] g;
155 | 	delete[] r;
156 | 	delete[] w_new;
157 | 	delete[] s;
158 | }
159 | 
160 | int TRON::trcg(double delta, double *g, double *s, double *r)
161 | {
162 | 	int i, inc = 1;
163 | 	int n = fun_obj->get_nr_variable();
164 | 	double one = 1;
165 | 	double *d = new double[n];
166 | 	double *Hd = new double[n];
167 | 	double rTr, rnewTrnew, alpha, beta, cgtol;
168 | 
169 | 	for (i=0; i<n; i++)
170 | 	{
171 | 		s[i] = 0;
172 | 		r[i] = -g[i];
173 | 		d[i] = r[i];
174 | 	}
175 | 	cgtol = 0.1*dnrm2_(&n, g, &inc);
176 | 
177 | 	int cg_iter = 0;
178 | 	rTr = ddot_(&n, r, &inc, r, &inc);
179 | 	while (1)
180 | 	{
181 | 		if (dnrm2_(&n, r, &inc) <= cgtol)
182 | 			break;
183 | 		cg_iter++;
184 | 		fun_obj->Hv(d, Hd);
185 | 
186 | 		alpha = rTr/ddot_(&n, d, &inc, Hd, &inc);
187 | 		daxpy_(&n, &alpha, d, &inc, s, &inc);
188 | 		if (dnrm2_(&n, s, &inc) > delta)
189 | 		{
190 | 			info("cg reaches trust region boundary\n");
191 | 			alpha = -alpha;
192 | 			daxpy_(&n, &alpha, d, &inc, s, &inc);
193 | 
194 | 			double std = ddot_(&n, s, &inc, d, &inc);
195 | 			double sts = ddot_(&n, s, &inc, s, &inc);
196 | 			double dtd = ddot_(&n, d, &inc, d, &inc);
197 | 			double dsq = delta*delta;
198 | 			double rad = sqrt(std*std + dtd*(dsq-sts));
199 | 			if (std >= 0)
200 | 				alpha = (dsq - sts)/(std + rad);
201 | 			else
202 | 				alpha = (rad - std)/dtd;
203 | 			daxpy_(&n, &alpha, d, &inc, s, &inc);
204 | 			alpha = -alpha;
205 | 			daxpy_(&n, &alpha, Hd, &inc, r, &inc);
206 | 			break;
207 | 		}
208 | 		alpha = -alpha;
209 | 		daxpy_(&n, &alpha, Hd, &inc, r, &inc);
210 | 		rnewTrnew = ddot_(&n, r, &inc, r, &inc);
211 | 		beta = rnewTrnew/rTr;
212 | 		dscal_(&n, &beta, d, &inc);
213 | 		daxpy_(&n, &one, r, &inc, d, &inc);
214 | 		rTr = rnewTrnew;
215 | 	}
216 | 
217 | 	delete[] d;
218 | 	delete[] Hd;
219 | 
220 | 	return(cg_iter);
221 | }
222 | 
223 | double TRON::norm_inf(int n, double *x)
224 | {
225 | 	double dmax = fabs(x[0]);
226 | 	for (int i=1; i<n; i++)
227 | 		if (fabs(x[i]) >= dmax)
228 | 			dmax = fabs(x[i]);
229 | 	return(dmax);
230 | }
231 | 
232 | void TRON::set_print_string(void (*print_string) (const char *buf))
233 | {
234 | 	tron_print_string = print_string;
235 | }
236 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/tron.h:
--------------------------------------------------------------------------------
 1 | #ifndef _TRON_H
 2 | #define _TRON_H
 3 | 
 4 | class function
 5 | {
 6 | public:
 7 | 	virtual double fun(double *w) = 0 ;
 8 | 	virtual void grad(double *w, double *g) = 0 ;
 9 | 	virtual void Hv(double *s, double *Hs) = 0 ;
10 | 
11 | 	virtual int get_nr_variable(void) = 0 ;
12 | 	virtual ~function(void){}
13 | };
14 | 
15 | class TRON
16 | {
17 | public:
18 | 	TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000);
19 | 	~TRON();
20 | 
21 | 	void tron(double *w);
22 | 	void set_print_string(void (*i_print) (const char *buf));
23 | 
24 | private:
25 | 	int trcg(double delta, double *g, double *s, double *r);
26 | 	double norm_inf(int n, double *x);
27 | 
28 | 	double eps;
29 | 	int max_iter;
30 | 	function *fun_obj;
31 | 	void info(const char *fmt,...);
32 | 	void (*tron_print_string)(const char *buf);
33 | };
34 | #endif
35 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/windows/liblinear.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/linux/evaluate/liblinear/windows/liblinear.dll


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/windows/libsvmread.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/linux/evaluate/liblinear/windows/libsvmread.mexw64


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/windows/libsvmwrite.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/linux/evaluate/liblinear/windows/libsvmwrite.mexw64


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/windows/predict.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/linux/evaluate/liblinear/windows/predict.exe


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/windows/predict.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/linux/evaluate/liblinear/windows/predict.mexw64


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/windows/test/model.txt:
--------------------------------------------------------------------------------
 1 | solver_type L2R_LR
 2 | nr_class 3
 3 | label 1 2 3
 4 | nr_feature 3
 5 | bias -1
 6 | w
 7 | 0 0 -0.9069706097048917 
 8 | 0.4 -0.4 0.3381644874876603 
 9 | -0.4 0.4 0.3381644874876603 
10 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/windows/test/output.txt:
--------------------------------------------------------------------------------
1 | labels 1 2 3
2 | 2 0.247901 0.451705 0.300393
3 | 


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/windows/test/test.txt:
--------------------------------------------------------------------------------
1 | 0 1:0.5 2:-0.5 3:1


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/windows/test/train.txt:
--------------------------------------------------------------------------------
1 | 1 1:1 2:0 3:0
2 | 1 1:1 2:0 3:-1
3 | 2 1:1 2:-1 3:0
4 | 2 1:1 2:0 3:0
5 | 3 1:1 2:1 3:1
6 | 3 1:-1 2:-1 3:-1


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/windows/train.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/linux/evaluate/liblinear/windows/train.exe


--------------------------------------------------------------------------------
/linux/evaluate/liblinear/windows/train.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/linux/evaluate/liblinear/windows/train.mexw64


--------------------------------------------------------------------------------
/linux/evaluate/make.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | 
3 | cd liblinear
4 | make
5 | cd ..
6 | 
7 | cd program
8 | g++ -O2 preprocess.cpp -o preprocess
9 | g++ -O2 score.cpp -o score


--------------------------------------------------------------------------------
/linux/evaluate/program/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/linux/evaluate/program/.DS_Store


--------------------------------------------------------------------------------
/linux/evaluate/program/score.cpp:
--------------------------------------------------------------------------------
  1 | //  Copyright 2013 Google Inc. All Rights Reserved.
  2 | //
  3 | //  Licensed under the Apache License, Version 2.0 (the "License");
  4 | //  you may not use this file except in compliance with the License.
  5 | //  You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | //  Unless required by applicable law or agreed to in writing, software
 10 | //  distributed under the License is distributed on an "AS IS" BASIS,
 11 | //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | //  See the License for the specific language governing permissions and
 13 | //  limitations under the License.
 14 | 
 15 | //  The input is biterm_file. The program will run word2vec on the word net.
 16 | //  Multi-threads are supported in this version.
 17 | 
 18 | #include <stdio.h>
 19 | #include <stdlib.h>
 20 | #include <string.h>
 21 | #include <math.h>
 22 | #include <algorithm>
 23 | #include <vector>
 24 | #include <set>
 25 | using namespace std;
 26 | 
 27 | #define MAX_STRING 200
 28 | #define MAX_LABEL 1000
 29 | 
 30 | struct Entry
 31 | {
 32 | 	int id;
 33 | 	double value;
 34 | 	friend bool operator < (Entry e1, Entry e2)
 35 | 	{
 36 | 		return e1.value > e2.value;
 37 | 	}
 38 | };
 39 | 
 40 | int id_size = 0, test_size = 0, label_size = 0;
 41 | int lb2id[MAX_LABEL];
 42 | int pst2id[MAX_LABEL];
 43 | Entry ranked_list[MAX_LABEL];
 44 | char candidate_file[MAX_STRING], predict_file[MAX_STRING];
 45 | set<int> truth[MAX_LABEL], predict[MAX_LABEL];
 46 | vector<int> v_nlabels;
 47 | 
 48 | // Reads a single word from a file, assuming space + tab + EOL to be word boundaries
 49 | void ReadWord(char *word, FILE *fin) {
 50 | 	int a = 0, ch;
 51 | 	while (!feof(fin)) {
 52 | 		ch = fgetc(fin);
 53 | 		if (ch == 13) continue;
 54 | 		if ((ch == ' ') || (ch == '\t') || (ch == '\n')) {
 55 | 			if (a > 0) {
 56 | 				if (ch == '\n') ungetc(ch, fin);
 57 | 				break;
 58 | 			}
 59 | 			if (ch == '\n') {
 60 | 				strcpy(word, (char *)"</s>");
 61 | 				return;
 62 | 			}
 63 | 			else continue;
 64 | 		}
 65 | 		word[a] = ch;
 66 | 		a++;
 67 | 		if (a >= MAX_STRING - 1) a--;   // Truncate too long words
 68 | 	}
 69 | 	word[a] = 0;
 70 | }
 71 | 
 72 | void TrainModel()
 73 | {
 74 | 	int len, lb, id, tmp;
 75 | 	char str[MAX_STRING];
 76 | 	double prob;
 77 | 
 78 | 	FILE *fi = fopen(candidate_file, "rb");
 79 | 	while (fscanf(fi, "%d", &len) == 1)
 80 | 	{
 81 | 		v_nlabels.push_back(len);
 82 | 		for (int k = 0; k != len; k++)
 83 | 		{
 84 | 			fscanf(fi, "%d", &lb);
 85 | 			if (lb2id[lb] == 0) lb2id[lb] = ++id_size;
 86 | 			id = lb2id[lb];
 87 | 			truth[id].insert(test_size);
 88 | 		}
 89 | 		test_size++;
 90 | 	}
 91 | 	fclose(fi);
 92 | 
 93 | 	fi = fopen(predict_file, "rb");
 94 | 	fscanf(fi, "%s", str);
 95 | 	while (1)
 96 | 	{
 97 | 		ReadWord(str, fi);
 98 | 		if (strcmp(str, "</s>") == 0) break;
 99 | 
100 | 		lb = atoi(str);
101 | 		if (lb2id[lb] == 0) lb2id[lb] = ++id_size;
102 | 		id = lb2id[lb];
103 | 		pst2id[label_size++] = id;
104 | 	}
105 | 	for (int k = 0; k != test_size; k++)
106 | 	{
107 | 		fscanf(fi, "%d", &tmp);
108 | 		for (int i = 0; i != label_size; i++)
109 | 		{
110 | 			fscanf(fi, "%lf", &prob);
111 | 			id = pst2id[i];
112 | 			ranked_list[i].id = id;
113 | 			ranked_list[i].value = prob;
114 | 		}
115 | 		sort(ranked_list, ranked_list + label_size);
116 | 		int n = v_nlabels[k];
117 | 		for (int i = 0; i != n; i++)
118 | 		{
119 | 			id = ranked_list[i].id;
120 | 			predict[id].insert(k);
121 | 		}
122 | 	}
123 | 	fclose(fi);
124 | 
125 | 	double macro_f1, micro_f1;
126 | 	double tp, fn, fp;
127 | 	double stp = 0, sfn = 0, sfp = 0, sf1 = 0;
128 | 	double P, R;
129 | 	set<int>::iterator i;
130 | 
131 | 	for (int k = 1; k <= id_size; k++)
132 | 	{
133 | 		tp = 0;
134 | 		for (i = truth[k].begin(); i != truth[k].end(); i++) if (predict[k].count(*i) != 0)
135 | 			tp++;
136 | 		fn = truth[k].size() - tp;
137 | 		fp = predict[k].size() - tp;
138 | 
139 | 		stp += tp;
140 | 		sfn += fn;
141 | 		sfp += fp;
142 | 
143 | 		if (tp + fp == 0) P = 0;
144 | 		else P = tp / (tp + fp);
145 | 		if (tp + fn == 0) R = 0;
146 | 		else R = tp / (tp + fn);
147 | 
148 | 		if (P + R != 0) sf1 += 2 * P * R / (P + R);
149 | 	}
150 | 
151 | 	macro_f1 = sf1 / id_size;
152 | 
153 | 	P = stp / (stp + sfp);
154 | 	R = stp / (stp + sfn);
155 | 	micro_f1 = 2 * P * R / (P + R);
156 | 
157 | 	printf("number of tests: %d\n", test_size);
158 | 	printf("number of labels: %d\n", id_size);
159 | 	printf("macro-f1: %lf\n", macro_f1);
160 | 	printf("micro-f1: %lf\n", micro_f1);
161 | }
162 | 
163 | int ArgPos(char *str, int argc, char **argv) {
164 | 	int a;
165 | 	for (a = 1; a < argc; a++) if (!strcmp(str, argv[a])) {
166 | 		if (a == argc - 1) {
167 | 			printf("Argument missing for %s\n", str);
168 | 			exit(1);
169 | 		}
170 | 		return a;
171 | 	}
172 | 	return -1;
173 | }
174 | 
175 | int main(int argc, char **argv) {
176 | 	int i;
177 | 	if (argc == 1) {
178 | 		printf("WORD VECTOR estimation toolkit v 0.1b\n\n");
179 | 		printf("Options:\n");
180 | 		printf("Parameters for training:\n");
181 | 		printf("\t-train <file>\n");
182 | 		printf("\t\tUse text data from <file> to train the model\n");
183 | 		printf("\t-test <file>\n");
184 | 		printf("\t\tUse text data from <file> to test the model\n");
185 | 		printf("\t-vector <file>\n");
186 | 		printf("\t\tUse vector data from <file>\n");
187 | 		printf("\nExamples:\n");
188 | 		printf("./evl -train train.txt -test test.txt -vector vec.txt \n\n");
189 | 		return 0;
190 | 	}
191 | 	if ((i = ArgPos((char *)"-predict", argc, argv)) > 0) strcpy(predict_file, argv[i + 1]);
192 | 	if ((i = ArgPos((char *)"-candidate", argc, argv)) > 0) strcpy(candidate_file, argv[i + 1]);
193 | 	TrainModel();
194 | 	return 0;
195 | }


--------------------------------------------------------------------------------
/linux/evaluate/run.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | vector_file=$1
 4 | times=10
 5 | portion=0.01
 6 | output_file=result.txt
 7 | 
 8 | vocab_file=program/vocab.txt
 9 | label_file=program/label.txt
10 | workspace=workspace/
11 | 
12 | ./program/preprocess -vocab ${vocab_file} -vector ${vector_file} -label ${label_file} -output ${workspace} -debug 2 -binary 1 -times ${times} -portion ${portion}
13 | 
14 | for (( i = 0; i < ${times} ; i ++ ))
15 | do
16 | 	./liblinear/train -s 0 -q ${workspace}train${i} ${workspace}model${i}
17 | done
18 | 
19 | for (( i = 0; i < ${times} ; i ++ ))
20 | do
21 | 	./liblinear/predict -b 1 -q ${workspace}test${i} ${workspace}model${i} ${workspace}predict${i}
22 | done
23 | 
24 | for (( i = 0; i < ${times} ; i ++ ))
25 | do
26 | 	./program/score -predict ${workspace}predict${i} -candidate ${workspace}can${i} >> ${output_file}
27 | done
28 | 


--------------------------------------------------------------------------------
/linux/evaluate/score.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | macro_f1 = 0
 5 | micro_f1 = 0
 6 | a = 0
 7 | b = 0
 8 | 
 9 | input_file = sys.argv[1]
10 | fi = open(input_file, 'r')
11 | for line in fi:
12 |     if line[0:9] == 'macro-f1:':
13 |         macro_f1 = macro_f1 + float(line.split(':')[1])
14 |         a = a + 1
15 |     if line[0:9] == 'micro-f1:':
16 |         micro_f1 = micro_f1 + float(line.split(':')[1])
17 |         b = b + 1
18 | fi.close()
19 | 
20 | macro_f1 = macro_f1 / a
21 | micro_f1 = micro_f1 / b
22 | 
23 | print("Macro-F1: " + str(macro_f1))
24 | print("Micro-F1: " + str(micro_f1))
25 | 


--------------------------------------------------------------------------------
/linux/line:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/linux/line


--------------------------------------------------------------------------------
/linux/normalize.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include <math.h>
 5 | #include <vector>
 6 | 
 7 | #define MAX_STRING 100
 8 | 
 9 | typedef float real;                    // Precision of float numbers
10 | 
11 | char input_file[MAX_STRING], output_file[MAX_STRING];
12 | int binary = 0;
13 | 
14 | void Normalize()
15 | {
16 | 	long long num_vertices, vector_dim, a, b;
17 | 	char name[MAX_STRING], ch;
18 | 	real *vec;
19 | 	double len;
20 | 
21 | 	FILE *fi, *fo;
22 | 
23 | 	fi = fopen(input_file, "rb");
24 | 	fo = fopen(output_file, "wb");
25 | 
26 | 	fscanf(fi, "%lld %lld", &num_vertices, &vector_dim);
27 | 	vec = (real *)malloc(vector_dim * sizeof(real));
28 | 	fprintf(fo, "%lld %lld\n", num_vertices, vector_dim);
29 | 	for (a = 0; a < num_vertices; a++)
30 | 	{
31 | 		fscanf(fi, "%s%c", name, &ch);
32 | 		for (b = 0; b < vector_dim; b++) fread(&vec[b], sizeof(real), 1, fi);
33 | 		len = 0;
34 | 		for (b = 0; b < vector_dim; b++) len += vec[b] * vec[b];
35 | 		len = sqrt(len);
36 | 		for (b = 0; b < vector_dim; b++) vec[b] /= len;
37 | 
38 | 		fprintf(fo, "%s ", name);
39 | 		if (binary)
40 | 		{
41 | 			for (b = 0; b < vector_dim; b++)
42 | 				fwrite(&vec[b], sizeof(real), 1, fo);
43 | 		}
44 | 		else
45 | 		{
46 | 			for (b = 0; b < vector_dim; b++)
47 | 				fprintf(fo, "%lf ", vec[b]);
48 | 		}
49 | 		fprintf(fo, "\n");
50 | 	}
51 | 	free(vec);
52 | 	fclose(fi);
53 | 	fclose(fo);
54 | }
55 | 
56 | int ArgPos(char *str, int argc, char **argv) {
57 | 	int a;
58 | 	for (a = 1; a < argc; a++) if (!strcmp(str, argv[a])) {
59 | 		if (a == argc - 1) {
60 | 			printf("Argument missing for %s\n", str);
61 | 			exit(1);
62 | 		}
63 | 		return a;
64 | 	}
65 | 	return -1;
66 | }
67 | 
68 | int main(int argc, char **argv) {
69 | 	int i;
70 | 	if (argc == 1) {
71 | 		printf("Normalize vertex embeddings by setting their L2 norm as 1\n\n");
72 | 		printf("Options:\n");
73 | 		printf("Parameters for training:\n");
74 | 		printf("\t-input <file>\n");
75 | 		printf("\t\tThe original vertex embeddings\n");
76 | 		printf("\t-output <file>\n");
77 | 		printf("\t\tUse <file> to save the normalized vertex embeddings\n");
78 | 		printf("\t-binary <int>\n");
79 | 		printf("\t\tSave the learnt embeddings in binary moded; default is 0 (off)\n");
80 | 		printf("\nExamples:\n");
81 | 		printf("./normalize -input vec_wo_norm.txt -output vec_norm.txt -binary 1\n\n");
82 | 		return 0;
83 | 	}
84 | 	if ((i = ArgPos((char *)"-input", argc, argv)) > 0) strcpy(input_file, argv[i + 1]);
85 | 	if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]);
86 | 	if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]);
87 | 	Normalize();
88 | 	return 0;
89 | }


--------------------------------------------------------------------------------
/linux/preprocess_youtube.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | fi = open(sys.argv[1], 'r')
 5 | fo = open(sys.argv[2], 'w')
 6 | for line in fi:
 7 | 	items = line.strip().split()
 8 | 	fo.write('{}\t{}\t1\n'.format(items[0], items[1]))
 9 | 	fo.write('{}\t{}\t1\n'.format(items[1], items[0]))
10 | fi.close()
11 | fo.close()
12 | 


--------------------------------------------------------------------------------
/linux/reconstruct.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <math.h>
  5 | #include <vector>
  6 | #include <algorithm>
  7 | #include <map>
  8 | #include <queue>
  9 | 
 10 | #define MAX_STRING 100
 11 | 
 12 | const int hash_table_size = 30000000;
 13 | 
 14 | typedef float real;                    // Precision of float numbers
 15 | 
 16 | struct ClassVertex
 17 | {
 18 | 	double degree, sum_weight;
 19 | 	char *name;
 20 | };
 21 | 
 22 | struct Neighbor
 23 | {
 24 | 	int vid;
 25 | 	double weight;
 26 | 	friend bool operator < (Neighbor n1, Neighbor n2)
 27 | 	{
 28 | 		return n1.weight > n2.weight;
 29 | 	}
 30 | };
 31 | 
 32 | 
 33 | char train_file[MAX_STRING], output_file[MAX_STRING];
 34 | struct ClassVertex *vertex;
 35 | int *vertex_hash_table;
 36 | int max_num_vertices = 1000, num_vertices = 0;
 37 | long long num_edges = 0;
 38 | 
 39 | int max_depth = 1, max_k = 0;
 40 | std::vector<int> vertex_set;
 41 | std::vector<Neighbor> *neighbor;
 42 | 
 43 | Neighbor *rank_list;
 44 | std::map<int, double> vid2weight;
 45 | 
 46 | /* Build a hash table, mapping each vertex name to a unique vertex id */
 47 | unsigned int Hash(char *key)
 48 | {
 49 | 	unsigned int seed = 131;
 50 | 	unsigned int hash = 0;
 51 | 	while (*key)
 52 | 	{
 53 | 		hash = hash * seed + (*key++);
 54 | 	}
 55 | 	return hash % hash_table_size;
 56 | }
 57 | 
 58 | void InitHashTable()
 59 | {
 60 | 	vertex_hash_table = (int *)malloc(hash_table_size * sizeof(int));
 61 | 	for (int k = 0; k != hash_table_size; k++) vertex_hash_table[k] = -1;
 62 | }
 63 | 
 64 | void InsertHashTable(char *key, int value)
 65 | {
 66 | 	int addr = Hash(key);
 67 | 	while (vertex_hash_table[addr] != -1) addr = (addr + 1) % hash_table_size;
 68 | 	vertex_hash_table[addr] = value;
 69 | }
 70 | 
 71 | int SearchHashTable(char *key)
 72 | {
 73 | 	int addr = Hash(key);
 74 | 	while (1)
 75 | 	{
 76 | 		if (vertex_hash_table[addr] == -1) return -1;
 77 | 		if (!strcmp(key, vertex[vertex_hash_table[addr]].name)) return vertex_hash_table[addr];
 78 | 		addr = (addr + 1) % hash_table_size;
 79 | 	}
 80 | 	return -1;
 81 | }
 82 | 
 83 | /* Add a vertex to the vertex set */
 84 | int AddVertex(char *name)
 85 | {
 86 | 	int length = strlen(name) + 1;
 87 | 	if (length > MAX_STRING) length = MAX_STRING;
 88 | 	vertex[num_vertices].name = (char *)calloc(length, sizeof(char));
 89 | 	strcpy(vertex[num_vertices].name, name);
 90 | 	vertex[num_vertices].sum_weight = 0;
 91 | 	num_vertices++;
 92 | 	if (num_vertices + 2 >= max_num_vertices)
 93 | 	{
 94 | 		max_num_vertices += 1000;
 95 | 		vertex = (struct ClassVertex *)realloc(vertex, max_num_vertices * sizeof(struct ClassVertex));
 96 | 	}
 97 | 	InsertHashTable(name, num_vertices - 1);
 98 | 	return num_vertices - 1;
 99 | }
100 | 
101 | /* Read network from the training file */
102 | void ReadData()
103 | {
104 | 	FILE *fin;
105 | 	char name_v1[MAX_STRING], name_v2[MAX_STRING], str[2 * MAX_STRING + 10000];
106 | 	int vid, u, v;
107 | 	double weight;
108 | 	Neighbor nb;
109 | 
110 | 	fin = fopen(train_file, "rb");
111 | 	if (fin == NULL)
112 | 	{
113 | 		printf("ERROR: network file not found!\n");
114 | 		exit(1);
115 | 	}
116 | 	num_edges = 0;
117 | 	while (fgets(str, sizeof(str), fin)) num_edges++;
118 | 	fclose(fin);
119 | 	printf("Number of edges: %lld          \n", num_edges);
120 | 
121 | 	fin = fopen(train_file, "rb");
122 | 	num_vertices = 0;
123 | 	for (int k = 0; k != num_edges; k++)
124 | 	{
125 | 		fscanf(fin, "%s %s %lf", name_v1, name_v2, &weight);
126 | 
127 | 		if (k % 10000 == 0)
128 | 		{
129 | 			printf("Reading edges: %.3lf%%%c", k / (double)(num_edges + 1) * 100, 13);
130 | 			fflush(stdout);
131 | 		}
132 | 
133 | 		vid = SearchHashTable(name_v1);
134 | 		if (vid == -1) vid = AddVertex(name_v1);
135 | 		vertex[vid].degree += weight;
136 | 
137 | 		vid = SearchHashTable(name_v2);
138 | 		if (vid == -1) vid = AddVertex(name_v2);
139 | 		vertex[vid].degree += weight;
140 | 	}
141 | 	fclose(fin);
142 | 	printf("Number of vertices: %d          \n", num_vertices);
143 | 
144 | 	neighbor = new std::vector<Neighbor>[num_vertices];
145 | 	rank_list = (Neighbor *)calloc(num_vertices, sizeof(Neighbor));
146 | 
147 | 	fin = fopen(train_file, "rb");
148 | 	for (long long k = 0; k != num_edges; k++)
149 | 	{
150 | 		fscanf(fin, "%s %s %lf", name_v1, name_v2, &weight);
151 | 
152 | 		if (k % 10000 == 0)
153 | 		{
154 | 			printf("Reading neighbors: %.3lf%%%c", k / (double)(num_edges + 1) * 100, 13);
155 | 			fflush(stdout);
156 | 		}
157 | 
158 | 		u = SearchHashTable(name_v1);
159 | 
160 | 		v = SearchHashTable(name_v2);
161 | 
162 | 		nb.vid = v;
163 | 		nb.weight = weight;
164 | 		neighbor[u].push_back(nb);
165 | 	}
166 | 	fclose(fin);
167 | 	printf("\n");
168 | 
169 | 	for (int k = 0; k != num_vertices; k++)
170 | 	{
171 | 		vertex[k].sum_weight = 0;
172 | 		int len = neighbor[k].size();
173 | 		for (int i = 0; i != len; i++)
174 | 			vertex[k].sum_weight += neighbor[k][i].weight;
175 | 	}
176 | }
177 | 
178 | void Reconstruct()
179 | {
180 | 	FILE *fo = fopen(output_file, "wb");
181 | 
182 | 	int sv, cv, cd, len, pst;
183 | 	long long num_edges_renet = 0;
184 | 	double cw, sum;
185 | 	std::queue<int> node, depth;
186 | 	std::queue<double> weight;
187 | 
188 | 	for (sv = 0; sv != num_vertices; sv++)
189 | 	{
190 | 		if (sv % 10 == 0)
191 | 		{
192 | 			printf("%cProgress: %.3lf%%", 13, (real)sv / (real)(num_vertices + 1) * 100);
193 | 			fflush(stdout);
194 | 		}
195 | 
196 | 		while (!node.empty()) node.pop();
197 | 		while (!depth.empty()) depth.pop();
198 | 		while (!weight.empty()) weight.pop();
199 | 		vid2weight.clear();
200 | 
201 | 		for (int i = 0; i != num_vertices; i++)
202 | 		{
203 | 			rank_list[i].vid = i;
204 | 			rank_list[i].weight = 0;
205 | 		}
206 | 
207 | 		len = neighbor[sv].size();
208 | 		if (len > max_k)
209 | 		{
210 | 			for (int i = 0; i != len; i++)
211 | 				fprintf(fo, "%s\t%s\t%lf\n", vertex[sv].name, vertex[neighbor[sv][i].vid].name, neighbor[sv][i].weight);
212 | 			num_edges_renet += len;
213 | 			continue;
214 | 		}
215 | 
216 | 		vid2weight[sv] += vertex[sv].degree / 10.0; // Set weights for self-links here!
217 | 
218 | 		len = neighbor[sv].size();
219 | 		sum = vertex[sv].sum_weight;
220 | 
221 | 		node.push(sv);
222 | 		depth.push(0);
223 | 		weight.push(sum);
224 | 
225 | 		while (!node.empty())
226 | 		{
227 | 			cv = node.front();
228 | 			cd = depth.front();
229 | 			cw = weight.front();
230 | 
231 | 			node.pop();
232 | 			depth.pop();
233 | 			weight.pop();
234 | 
235 | 			if (cd != 0) vid2weight[cv] += cw;
236 | 
237 | 			if (cd < max_depth)
238 | 			{
239 | 				len = neighbor[cv].size();
240 | 				sum = vertex[cv].sum_weight;
241 | 
242 | 				for (int i = 0; i != len; i++)
243 | 				{
244 | 					node.push(neighbor[cv][i].vid);
245 | 					depth.push(cd + 1);
246 | 					weight.push(cw * neighbor[cv][i].weight / sum);
247 | 				}
248 | 			}
249 | 		}
250 | 
251 | 		pst = 0;
252 | 		std::map<int, double>::iterator iter;
253 | 		for (iter = vid2weight.begin(); iter != vid2weight.end(); iter++)
254 | 		{
255 | 			rank_list[pst].vid = (iter->first);
256 | 			rank_list[pst].weight = (iter->second);
257 | 			pst++;
258 | 		}
259 | 		std::sort(rank_list, rank_list + pst);
260 | 
261 | 		for (int i = 0; i != max_k; i++)
262 | 		{
263 | 			if (i == pst) break;
264 | 			fprintf(fo, "%s\t%s\t%.6lf\n", vertex[sv].name, vertex[rank_list[i].vid].name, rank_list[i].weight);
265 | 			num_edges_renet++;
266 | 		}
267 | 	}
268 | 	printf("\n");
269 | 	fclose(fo);
270 | 
271 | 	printf("Number of edges in reconstructed network: %lld\n", num_edges_renet);
272 | 	return;
273 | }
274 | 
275 | void TrainLINE()
276 | {
277 | 	InitHashTable();
278 | 	ReadData();
279 | 	Reconstruct();
280 | }
281 | 
282 | int ArgPos(char *str, int argc, char **argv) {
283 | 	int a;
284 | 	for (a = 1; a < argc; a++) if (!strcmp(str, argv[a])) {
285 | 		if (a == argc - 1) {
286 | 			printf("Argument missing for %s\n", str);
287 | 			exit(1);
288 | 		}
289 | 		return a;
290 | 	}
291 | 	return -1;
292 | }
293 | 
294 | int main(int argc, char **argv) {
295 | 	int i;
296 | 	if (argc == 1) {
297 | 		printf("Reconstruct the network by using a Breadth-First-Search strategy\n\n");
298 | 		printf("Options:\n");
299 | 		printf("Parameters for training:\n");
300 | 		printf("\t-train <file>\n");
301 | 		printf("\t\tReconstruct the network from <file>\n");
302 | 		printf("\t-output <file>\n");
303 | 		printf("\t\tUse <file> to save the reconstructed network\n");
304 | 		printf("\t-depth <int>\n");
305 | 		printf("\t\tThe maximum depth in the Breadth-First-Search; default is 0\n");
306 | 		printf("\t-threshold <int>\n");
307 | 		printf("\t\tFor vertex whose degree is less than <int>, we will expand its neighbors until the degree reaches <iny>\n");
308 | 		printf("\nExamples:\n");
309 | 		printf("./reconstruct -train net.txt -output net_dense.txt -depth 2 -threshold 1000\n\n");
310 | 		return 0;
311 | 	}
312 | 	if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(train_file, argv[i + 1]);
313 | 	if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]);
314 | 	if ((i = ArgPos((char *)"-depth", argc, argv)) > 0) max_depth = atoi(argv[i + 1]);
315 | 	if ((i = ArgPos((char *)"-threshold", argc, argv)) > 0) max_k = atoi(argv[i + 1]);
316 | 	vertex = (struct ClassVertex *)calloc(max_num_vertices, sizeof(struct ClassVertex));
317 | 	TrainLINE();
318 | 	return 0;
319 | }


--------------------------------------------------------------------------------
/linux/train_youtube.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | 
 3 | g++ -lm -pthread -Ofast -march=native -Wall -funroll-loops -ffast-math -Wno-unused-result line.cpp -o line -lgsl -lm -lgslcblas
 4 | g++ -lm -pthread -Ofast -march=native -Wall -funroll-loops -ffast-math -Wno-unused-result reconstruct.cpp -o reconstruct
 5 | g++ -lm -pthread -Ofast -march=native -Wall -funroll-loops -ffast-math -Wno-unused-result normalize.cpp -o normalize
 6 | g++ -lm -pthread -Ofast -march=native -Wall -funroll-loops -ffast-math -Wno-unused-result concatenate.cpp -o concatenate
 7 | 
 8 | wget http://socialnetworks.mpi-sws.mpg.de/data/youtube-links.txt.gz
 9 | gunzip youtube-links.txt.gz
10 | 
11 | python3 preprocess_youtube.py youtube-links.txt net_youtube.txt
12 | ./reconstruct -train net_youtube.txt -output net_youtube_dense.txt -depth 2 -threshold 1000
13 | ./line -train net_youtube_dense.txt -output vec_1st_wo_norm.txt -binary 1 -size 128 -order 1 -negative 5 -samples 10000 -threads 40
14 | ./line -train net_youtube_dense.txt -output vec_2nd_wo_norm.txt -binary 1 -size 128 -order 2 -negative 5 -samples 10000 -threads 40
15 | ./normalize -input vec_1st_wo_norm.txt -output vec_1st.txt -binary 1
16 | ./normalize -input vec_2nd_wo_norm.txt -output vec_2nd.txt -binary 1
17 | ./concatenate -input1 vec_1st.txt -input2 vec_2nd.txt -output vec_all.txt -binary 1
18 | 
19 | cd evaluate
20 | ./run.sh ../vec_all.txt
21 | python3 score.py result.txt
22 | cd ..
23 | 


--------------------------------------------------------------------------------
/windows/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/windows/.DS_Store


--------------------------------------------------------------------------------
/windows/concatenate.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <math.h>
  5 | #include <vector>
  6 | 
  7 | #define MAX_STRING 100
  8 | 
  9 | const int hash_table_size = 30000000;
 10 | 
 11 | typedef float real;                    // Precision of float numbers
 12 | 
 13 | struct ClassVertex {
 14 | 	double degree;
 15 | 	char *name;
 16 | };
 17 | 
 18 | char vector_file1[MAX_STRING], vector_file2[MAX_STRING], output_file[MAX_STRING];
 19 | struct ClassVertex *vertex;
 20 | int binary = 0;
 21 | int *vertex_hash_table;
 22 | long long max_num_vertices = 1000, num_vertices = 0;
 23 | long long vector_dim1, vector_dim2;
 24 | real *vec1, *vec2;
 25 | 
 26 | /* Build a hash table, mapping each vertex name to a unique vertex id */
 27 | unsigned int Hash(char *key)
 28 | {
 29 | 	unsigned int seed = 131;
 30 | 	unsigned int hash = 0;
 31 | 	while (*key)
 32 | 	{
 33 | 		hash = hash * seed + (*key++);
 34 | 	}
 35 | 	return hash % hash_table_size;
 36 | }
 37 | 
 38 | void InitHashTable()
 39 | {
 40 | 	vertex_hash_table = (int *)malloc(hash_table_size * sizeof(int));
 41 | 	for (int k = 0; k != hash_table_size; k++) vertex_hash_table[k] = -1;
 42 | }
 43 | 
 44 | void InsertHashTable(char *key, int value)
 45 | {
 46 | 	int addr = Hash(key);
 47 | 	while (vertex_hash_table[addr] != -1) addr = (addr + 1) % hash_table_size;
 48 | 	vertex_hash_table[addr] = value;
 49 | }
 50 | 
 51 | int SearchHashTable(char *key)
 52 | {
 53 | 	int addr = Hash(key);
 54 | 	while (1)
 55 | 	{
 56 | 		if (vertex_hash_table[addr] == -1) return -1;
 57 | 		if (!strcmp(key, vertex[vertex_hash_table[addr]].name)) return vertex_hash_table[addr];
 58 | 		addr = (addr + 1) % hash_table_size;
 59 | 	}
 60 | 	return -1;
 61 | }
 62 | 
 63 | /* Add a vertex to the vertex set */
 64 | int AddVertex(char *name, int vid)
 65 | {
 66 | 	int length = strlen(name) + 1;
 67 | 	if (length > MAX_STRING) length = MAX_STRING;
 68 | 	vertex[vid].name = (char *)calloc(length, sizeof(char));
 69 | 	strcpy(vertex[vid].name, name);
 70 | 	vertex[vid].degree = 0;
 71 | 	InsertHashTable(name, vid);
 72 | 	return vid;
 73 | }
 74 | 
 75 | void ReadVector()
 76 | {
 77 | 	char ch, name[MAX_STRING];
 78 | 	real f_num;
 79 | 	long long l;
 80 | 
 81 | 	FILE *fi = fopen(vector_file1, "rb");
 82 | 	if (fi == NULL) {
 83 | 		printf("Vector file 1 not found\n");
 84 | 		exit(1);
 85 | 	}
 86 | 	fscanf(fi, "%lld %lld", &num_vertices, &vector_dim1);
 87 | 	vertex = (struct ClassVertex *)calloc(num_vertices, sizeof(struct ClassVertex));
 88 | 	vec1 = (real *)calloc(num_vertices * vector_dim1, sizeof(real));
 89 | 	for (long long k = 0; k != num_vertices; k++)
 90 | 	{
 91 | 		fscanf(fi, "%s", name);
 92 | 		ch = fgetc(fi);
 93 | 		AddVertex(name, k);
 94 | 		l = k * vector_dim1;
 95 | 		for (int c = 0; c != vector_dim1; c++)
 96 | 		{
 97 | 			fread(&f_num, sizeof(real), 1, fi);
 98 | 			vec1[c + l] = (real)f_num;
 99 | 		}
100 | 	}
101 | 	fclose(fi);
102 | 
103 | 	fi = fopen(vector_file2, "rb");
104 | 	if (fi == NULL) {
105 | 		printf("Vector file 2 not found\n");
106 | 		exit(1);
107 | 	}
108 | 	fscanf(fi, "%lld %lld", &l, &vector_dim2);
109 | 	vec2 = (real *)calloc((num_vertices + 1) * vector_dim2, sizeof(real));
110 | 	for (long long k = 0; k != num_vertices; k++)
111 | 	{
112 | 		fscanf(fi, "%s", name);
113 | 		ch = fgetc(fi);
114 | 		int i = SearchHashTable(name);
115 | 		if (i == -1) l = num_vertices * vector_dim2;
116 | 		else l = i * vector_dim2;
117 | 		for (int c = 0; c != vector_dim2; c++)
118 | 		{
119 | 			fread(&f_num, sizeof(float), 1, fi);
120 | 			vec2[c + l] = (real)f_num;
121 | 		}
122 | 	}
123 | 	fclose(fi);
124 | 
125 | 	printf("Vocab size: %lld\n", num_vertices);
126 | 	printf("Vector size 1: %lld\n", vector_dim1);
127 | 	printf("Vector size 2: %lld\n", vector_dim2);
128 | }
129 | 
130 | 
131 | void TrainModel() {
132 | 	long long a, b;
133 | 	double len;
134 | 
135 | 	InitHashTable();
136 | 	ReadVector();
137 | 
138 | 	FILE *fo;
139 | 	fo = fopen(output_file, "wb");
140 | 	fprintf(fo, "%lld %lld\n", num_vertices, vector_dim1 + vector_dim2);
141 | 	for (a = 0; a < num_vertices; a++) {
142 | 		fprintf(fo, "%s ", vertex[a].name);
143 | 
144 | 		len = 0;
145 | 		for (b = 0; b < vector_dim1; b++) len += vec1[b + a * vector_dim1] * vec1[b + a * vector_dim1];
146 | 		len = sqrt(len);
147 | 		for (b = 0; b < vector_dim1; b++) vec1[b + a * vector_dim1] /= len;
148 | 
149 | 		len = 0;
150 | 		for (b = 0; b < vector_dim2; b++) len += vec2[b + a * vector_dim2] * vec2[b + a * vector_dim2];
151 | 		len = sqrt(len);
152 | 		for (b = 0; b < vector_dim2; b++) vec2[b + a * vector_dim2] /= len;
153 | 
154 | 		if (binary)
155 | 		{
156 | 			for (b = 0; b < vector_dim1; b++)
157 | 				fwrite(&vec1[a * vector_dim1 + b], sizeof(real), 1, fo);
158 | 			for (b = 0; b < vector_dim2; b++)
159 | 				fwrite(&vec2[a * vector_dim2 + b], sizeof(real), 1, fo);
160 | 		}
161 | 		else
162 | 		{
163 | 			for (b = 0; b < vector_dim1; b++)
164 | 				fprintf(fo, "%lf ", vec1[a * vector_dim1 + b]);
165 | 			for (b = 0; b < vector_dim2; b++)
166 | 				fprintf(fo, "%lf ", vec2[a * vector_dim2 + b]);
167 | 		}
168 | 		fprintf(fo, "\n");
169 | 	}
170 | 	fclose(fo);
171 | }
172 | 
173 | int ArgPos(char *str, int argc, char **argv) {
174 | 	int a;
175 | 	for (a = 1; a < argc; a++) if (!strcmp(str, argv[a])) {
176 | 		if (a == argc - 1) {
177 | 			printf("Argument missing for %s\n", str);
178 | 			exit(1);
179 | 		}
180 | 		return a;
181 | 	}
182 | 	return -1;
183 | }
184 | 
185 | int main(int argc, char **argv) {
186 | 	int i;
187 | 	if (argc == 1) {
188 | 		printf("Concatenate the 1st-order embedding and the 2nd-order embeddings\n\n");
189 | 		printf("Options:\n");
190 | 		printf("Parameters for training:\n");
191 | 		printf("\t-input1 <file>\n");
192 | 		printf("\t\tThe 1st-order embeddings\n");
193 | 		printf("\t-input2 <file>\n");
194 | 		printf("\t\tThe 2nd-order embeddings\n");
195 | 		printf("\t-output <file>\n");
196 | 		printf("\t\tUse <file> to save the concatenated embeddings\n");
197 | 		printf("\t-binary <int>\n");
198 | 		printf("\t\tSave the learnt embeddings in binary moded; default is 0 (off)\n");
199 | 		printf("\nExamples:\n");
200 | 		printf("./concatenate -input1 vec_1st.txt -input2 vec_2nd.txt -output vec_all.txt -binary 1\n\n");
201 | 		return 0;
202 | 	}
203 | 	if ((i = ArgPos((char *)"-input1", argc, argv)) > 0) strcpy(vector_file1, argv[i + 1]);
204 | 	if ((i = ArgPos((char *)"-input2", argc, argv)) > 0) strcpy(vector_file2, argv[i + 1]);
205 | 	if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]);
206 | 	if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]);
207 | 	TrainModel();
208 | 	return 0;
209 | }


--------------------------------------------------------------------------------
/windows/evaluate/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/windows/evaluate/.DS_Store


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/COPYRIGHT:
--------------------------------------------------------------------------------
 1 | 
 2 | Copyright (c) 2007-2013 The LIBLINEAR Project.
 3 | All rights reserved.
 4 | 
 5 | Redistribution and use in source and binary forms, with or without
 6 | modification, are permitted provided that the following conditions
 7 | are met:
 8 | 
 9 | 1. Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 | 
12 | 2. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 | 
16 | 3. Neither name of copyright holders nor the names of its contributors
17 | may be used to endorse or promote products derived from this software
18 | without specific prior written permission.
19 | 
20 | 
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/Makefile:
--------------------------------------------------------------------------------
 1 | CXX ?= g++
 2 | CC ?= gcc
 3 | CFLAGS = -Wall -Wconversion -O3 -fPIC
 4 | LIBS = blas/blas.a
 5 | SHVER = 1
 6 | OS = $(shell uname)
 7 | #LIBS = -lblas
 8 | 
 9 | all: train predict
10 | 
11 | lib: linear.o tron.o blas/blas.a
12 | 	if [ "$(OS)" = "Darwin" ]; then \
13 | 		SHARED_LIB_FLAG="-dynamiclib -Wl,-install_name,liblinear.so.$(SHVER)"; \
14 | 	else \
15 | 		SHARED_LIB_FLAG="-shared -Wl,-soname,liblinear.so.$(SHVER)"; \
16 | 	fi; \
17 | 	$(CXX) $${SHARED_LIB_FLAG} linear.o tron.o blas/blas.a -o liblinear.so.$(SHVER)
18 | 
19 | train: tron.o linear.o train.c blas/blas.a
20 | 	$(CXX) $(CFLAGS) -o train train.c tron.o linear.o $(LIBS)
21 | 
22 | predict: tron.o linear.o predict.c blas/blas.a
23 | 	$(CXX) $(CFLAGS) -o predict predict.c tron.o linear.o $(LIBS)
24 | 
25 | tron.o: tron.cpp tron.h
26 | 	$(CXX) $(CFLAGS) -c -o tron.o tron.cpp
27 | 
28 | linear.o: linear.cpp linear.h
29 | 	$(CXX) $(CFLAGS) -c -o linear.o linear.cpp
30 | 
31 | blas/blas.a: blas/*.c blas/*.h
32 | 	make -C blas OPTFLAGS='$(CFLAGS)' CC='$(CC)';
33 | 
34 | clean:
35 | 	make -C blas clean
36 | 	make -C matlab clean
37 | 	rm -f *~ tron.o linear.o train predict liblinear.so.$(SHVER)
38 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/Makefile.win:
--------------------------------------------------------------------------------
 1 | #You must ensure nmake.exe, cl.exe, link.exe are in system path.
 2 | #VCVARS32.bat
 3 | #Under dosbox prompt
 4 | #nmake -f Makefile.win
 5 | 
 6 | ##########################################
 7 | CXX = cl.exe
 8 | CFLAGS = -nologo -O2 -EHsc -I. -D __WIN32__ -D _CRT_SECURE_NO_DEPRECATE
 9 | TARGET = windows
10 | 
11 | all: $(TARGET)\train.exe $(TARGET)\predict.exe
12 | 
13 | $(TARGET)\train.exe: tron.obj linear.obj train.c blas\*.c
14 | 	$(CXX) $(CFLAGS) -Fe$(TARGET)\train.exe tron.obj linear.obj train.c blas\*.c
15 | 
16 | $(TARGET)\predict.exe: tron.obj linear.obj predict.c blas\*.c
17 | 	$(CXX) $(CFLAGS) -Fe$(TARGET)\predict.exe tron.obj linear.obj predict.c blas\*.c
18 | 
19 | linear.obj: linear.cpp linear.h
20 | 	$(CXX) $(CFLAGS) -c linear.cpp
21 | 
22 | tron.obj: tron.cpp tron.h
23 | 	$(CXX) $(CFLAGS) -c tron.cpp
24 | 
25 | lib: linear.cpp linear.h linear.def tron.obj
26 | 	$(CXX) $(CFLAGS) -LD linear.cpp tron.obj blas\*.c -Fe$(TARGET)\liblinear -link -DEF:linear.def 
27 | 
28 | clean:
29 | 	 -erase /Q *.obj $(TARGET)\.
30 | 
31 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/blas/Makefile:
--------------------------------------------------------------------------------
 1 | AR     = ar rcv
 2 | RANLIB = ranlib 
 3 | 
 4 | HEADERS = blas.h blasp.h
 5 | FILES = dnrm2.o daxpy.o ddot.o dscal.o 
 6 | 
 7 | CFLAGS = $(OPTFLAGS) 
 8 | FFLAGS = $(OPTFLAGS)
 9 | 
10 | blas: $(FILES) $(HEADERS)
11 | 	$(AR) blas.a $(FILES)  
12 | 	$(RANLIB) blas.a
13 | 
14 | clean:
15 | 	- rm -f *.o
16 | 	- rm -f *.a
17 | 	- rm -f *~
18 | 
19 | .c.o:
20 | 	$(CC) $(CFLAGS) -c $*.c
21 | 
22 | 
23 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/blas/blas.h:
--------------------------------------------------------------------------------
 1 | /* blas.h  --  C header file for BLAS                         Ver 1.0 */
 2 | /* Jesse Bennett                                       March 23, 2000 */
 3 | 
 4 | /**  barf  [ba:rf]  2.  "He suggested using FORTRAN, and everybody barfed."
 5 | 
 6 | 	- From The Shogakukan DICTIONARY OF NEW ENGLISH (Second edition) */
 7 | 
 8 | #ifndef BLAS_INCLUDE
 9 | #define BLAS_INCLUDE
10 | 
11 | /* Data types specific to BLAS implementation */
12 | typedef struct { float r, i; } fcomplex;
13 | typedef struct { double r, i; } dcomplex;
14 | typedef int blasbool;
15 | 
16 | #include "blasp.h"    /* Prototypes for all BLAS functions */
17 | 
18 | #define FALSE 0
19 | #define TRUE  1
20 | 
21 | /* Macro functions */
22 | #define MIN(a,b) ((a) <= (b) ? (a) : (b))
23 | #define MAX(a,b) ((a) >= (b) ? (a) : (b))
24 | 
25 | #endif
26 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/blas/daxpy.c:
--------------------------------------------------------------------------------
 1 | #include "blas.h"
 2 | 
 3 | int daxpy_(int *n, double *sa, double *sx, int *incx, double *sy,
 4 |            int *incy)
 5 | {
 6 |   long int i, m, ix, iy, nn, iincx, iincy;
 7 |   register double ssa;
 8 | 
 9 |   /* constant times a vector plus a vector.   
10 |      uses unrolled loop for increments equal to one.   
11 |      jack dongarra, linpack, 3/11/78.   
12 |      modified 12/3/93, array(1) declarations changed to array(*) */
13 | 
14 |   /* Dereference inputs */
15 |   nn = *n;
16 |   ssa = *sa;
17 |   iincx = *incx;
18 |   iincy = *incy;
19 | 
20 |   if( nn > 0 && ssa != 0.0 )
21 |   {
22 |     if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
23 |     {
24 |       m = nn-3;
25 |       for (i = 0; i < m; i += 4)
26 |       {
27 |         sy[i] += ssa * sx[i];
28 |         sy[i+1] += ssa * sx[i+1];
29 |         sy[i+2] += ssa * sx[i+2];
30 |         sy[i+3] += ssa * sx[i+3];
31 |       }
32 |       for ( ; i < nn; ++i) /* clean-up loop */
33 |         sy[i] += ssa * sx[i];
34 |     }
35 |     else /* code for unequal increments or equal increments not equal to 1 */
36 |     {
37 |       ix = iincx >= 0 ? 0 : (1 - nn) * iincx;
38 |       iy = iincy >= 0 ? 0 : (1 - nn) * iincy;
39 |       for (i = 0; i < nn; i++)
40 |       {
41 |         sy[iy] += ssa * sx[ix];
42 |         ix += iincx;
43 |         iy += iincy;
44 |       }
45 |     }
46 |   }
47 | 
48 |   return 0;
49 | } /* daxpy_ */
50 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/blas/ddot.c:
--------------------------------------------------------------------------------
 1 | #include "blas.h"
 2 | 
 3 | double ddot_(int *n, double *sx, int *incx, double *sy, int *incy)
 4 | {
 5 |   long int i, m, nn, iincx, iincy;
 6 |   double stemp;
 7 |   long int ix, iy;
 8 | 
 9 |   /* forms the dot product of two vectors.   
10 |      uses unrolled loops for increments equal to one.   
11 |      jack dongarra, linpack, 3/11/78.   
12 |      modified 12/3/93, array(1) declarations changed to array(*) */
13 | 
14 |   /* Dereference inputs */
15 |   nn = *n;
16 |   iincx = *incx;
17 |   iincy = *incy;
18 | 
19 |   stemp = 0.0;
20 |   if (nn > 0)
21 |   {
22 |     if (iincx == 1 && iincy == 1) /* code for both increments equal to 1 */
23 |     {
24 |       m = nn-4;
25 |       for (i = 0; i < m; i += 5)
26 |         stemp += sx[i] * sy[i] + sx[i+1] * sy[i+1] + sx[i+2] * sy[i+2] +
27 |                  sx[i+3] * sy[i+3] + sx[i+4] * sy[i+4];
28 | 
29 |       for ( ; i < nn; i++)        /* clean-up loop */
30 |         stemp += sx[i] * sy[i];
31 |     }
32 |     else /* code for unequal increments or equal increments not equal to 1 */
33 |     {
34 |       ix = 0;
35 |       iy = 0;
36 |       if (iincx < 0)
37 |         ix = (1 - nn) * iincx;
38 |       if (iincy < 0)
39 |         iy = (1 - nn) * iincy;
40 |       for (i = 0; i < nn; i++)
41 |       {
42 |         stemp += sx[ix] * sy[iy];
43 |         ix += iincx;
44 |         iy += iincy;
45 |       }
46 |     }
47 |   }
48 | 
49 |   return stemp;
50 | } /* ddot_ */
51 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/blas/dnrm2.c:
--------------------------------------------------------------------------------
 1 | #include <math.h>  /* Needed for fabs() and sqrt() */
 2 | #include "blas.h"
 3 | 
 4 | double dnrm2_(int *n, double *x, int *incx)
 5 | {
 6 |   long int ix, nn, iincx;
 7 |   double norm, scale, absxi, ssq, temp;
 8 | 
 9 | /*  DNRM2 returns the euclidean norm of a vector via the function   
10 |     name, so that   
11 | 
12 |        DNRM2 := sqrt( x'*x )   
13 | 
14 |     -- This version written on 25-October-1982.   
15 |        Modified on 14-October-1993 to inline the call to SLASSQ.   
16 |        Sven Hammarling, Nag Ltd.   */
17 | 
18 |   /* Dereference inputs */
19 |   nn = *n;
20 |   iincx = *incx;
21 | 
22 |   if( nn > 0 && iincx > 0 )
23 |   {
24 |     if (nn == 1)
25 |     {
26 |       norm = fabs(x[0]);
27 |     }  
28 |     else
29 |     {
30 |       scale = 0.0;
31 |       ssq = 1.0;
32 | 
33 |       /* The following loop is equivalent to this call to the LAPACK 
34 |          auxiliary routine:   CALL SLASSQ( N, X, INCX, SCALE, SSQ ) */
35 | 
36 |       for (ix=(nn-1)*iincx; ix>=0; ix-=iincx)
37 |       {
38 |         if (x[ix] != 0.0)
39 |         {
40 |           absxi = fabs(x[ix]);
41 |           if (scale < absxi)
42 |           {
43 |             temp = scale / absxi;
44 |             ssq = ssq * (temp * temp) + 1.0;
45 |             scale = absxi;
46 |           }
47 |           else
48 |           {
49 |             temp = absxi / scale;
50 |             ssq += temp * temp;
51 |           }
52 |         }
53 |       }
54 |       norm = scale * sqrt(ssq);
55 |     }
56 |   }
57 |   else
58 |     norm = 0.0;
59 | 
60 |   return norm;
61 | 
62 | } /* dnrm2_ */
63 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/blas/dscal.c:
--------------------------------------------------------------------------------
 1 | #include "blas.h"
 2 | 
 3 | int dscal_(int *n, double *sa, double *sx, int *incx)
 4 | {
 5 |   long int i, m, nincx, nn, iincx;
 6 |   double ssa;
 7 | 
 8 |   /* scales a vector by a constant.   
 9 |      uses unrolled loops for increment equal to 1.   
10 |      jack dongarra, linpack, 3/11/78.   
11 |      modified 3/93 to return if incx .le. 0.   
12 |      modified 12/3/93, array(1) declarations changed to array(*) */
13 | 
14 |   /* Dereference inputs */
15 |   nn = *n;
16 |   iincx = *incx;
17 |   ssa = *sa;
18 | 
19 |   if (nn > 0 && iincx > 0)
20 |   {
21 |     if (iincx == 1) /* code for increment equal to 1 */
22 |     {
23 |       m = nn-4;
24 |       for (i = 0; i < m; i += 5)
25 |       {
26 |         sx[i] = ssa * sx[i];
27 |         sx[i+1] = ssa * sx[i+1];
28 |         sx[i+2] = ssa * sx[i+2];
29 |         sx[i+3] = ssa * sx[i+3];
30 |         sx[i+4] = ssa * sx[i+4];
31 |       }
32 |       for ( ; i < nn; ++i) /* clean-up loop */
33 |         sx[i] = ssa * sx[i];
34 |     }
35 |     else /* code for increment not equal to 1 */
36 |     {
37 |       nincx = nn * iincx;
38 |       for (i = 0; i < nincx; i += iincx)
39 |         sx[i] = ssa * sx[i];
40 |     }
41 |   }
42 | 
43 |   return 0;
44 | } /* dscal_ */
45 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/linear.def:
--------------------------------------------------------------------------------
 1 | LIBRARY liblinear
 2 | EXPORTS
 3 | 	train	@1
 4 | 	cross_validation	@2
 5 | 	save_model	@3
 6 | 	load_model	@4
 7 | 	get_nr_feature	@5
 8 | 	get_nr_class	@6
 9 | 	get_labels	@7
10 | 	predict_values	@8
11 | 	predict	@9
12 | 	predict_probability	@10
13 | 	free_and_destroy_model	@11
14 | 	free_model_content	@12
15 | 	destroy_param	@13
16 | 	check_parameter	@14
17 | 	check_probability_model	@15
18 | 	set_print_string_function	@16
19 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/linear.h:
--------------------------------------------------------------------------------
 1 | #ifndef _LIBLINEAR_H
 2 | #define _LIBLINEAR_H
 3 | 
 4 | #ifdef __cplusplus
 5 | extern "C" {
 6 | #endif
 7 | 
 8 | struct feature_node
 9 | {
10 | 	int index;
11 | 	double value;
12 | };
13 | 
14 | struct problem
15 | {
16 | 	int l, n;
17 | 	double *y;
18 | 	struct feature_node **x;
19 | 	double bias;            /* < 0 if no bias term */  
20 | };
21 | 
22 | enum { L2R_LR, L2R_L2LOSS_SVC_DUAL, L2R_L2LOSS_SVC, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L1R_L2LOSS_SVC, L1R_LR, L2R_LR_DUAL, L2R_L2LOSS_SVR = 11, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL }; /* solver_type */
23 | 
24 | struct parameter
25 | {
26 | 	int solver_type;
27 | 
28 | 	/* these are for training only */
29 | 	double eps;	        /* stopping criteria */
30 | 	double C;
31 | 	int nr_weight;
32 | 	int *weight_label;
33 | 	double* weight;
34 | 	double p;
35 | };
36 | 
37 | struct model
38 | {
39 | 	struct parameter param;
40 | 	int nr_class;		/* number of classes */
41 | 	int nr_feature;
42 | 	double *w;
43 | 	int *label;		/* label of each class */
44 | 	double bias;
45 | };
46 | 
47 | struct model* train(const struct problem *prob, const struct parameter *param);
48 | void cross_validation(const struct problem *prob, const struct parameter *param, int nr_fold, double *target);
49 | 
50 | double predict_values(const struct model *model_, const struct feature_node *x, double* dec_values);
51 | double predict(const struct model *model_, const struct feature_node *x);
52 | double predict_probability(const struct model *model_, const struct feature_node *x, double* prob_estimates);
53 | 
54 | int save_model(const char *model_file_name, const struct model *model_);
55 | struct model *load_model(const char *model_file_name);
56 | 
57 | int get_nr_feature(const struct model *model_);
58 | int get_nr_class(const struct model *model_);
59 | void get_labels(const struct model *model_, int* label);
60 | 
61 | void free_model_content(struct model *model_ptr);
62 | void free_and_destroy_model(struct model **model_ptr_ptr);
63 | void destroy_param(struct parameter *param);
64 | 
65 | const char *check_parameter(const struct problem *prob, const struct parameter *param);
66 | int check_probability_model(const struct model *model);
67 | void set_print_string_function(void (*print_func) (const char*));
68 | 
69 | #ifdef __cplusplus
70 | }
71 | #endif
72 | 
73 | #endif /* _LIBLINEAR_H */
74 | 
75 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/matlab/Makefile:
--------------------------------------------------------------------------------
 1 | # This Makefile is used under Linux
 2 | 
 3 | MATLABDIR ?= /usr/local/matlab
 4 | CXX ?= g++
 5 | #CXX = g++-3.3
 6 | CC ?= gcc
 7 | CFLAGS = -Wall -Wconversion -O3 -fPIC -I$(MATLABDIR)/extern/include -I..
 8 | 
 9 | MEX = $(MATLABDIR)/bin/mex
10 | MEX_OPTION = CC\#$(CXX) CXX\#$(CXX) CFLAGS\#"$(CFLAGS)" CXXFLAGS\#"$(CFLAGS)"
11 | # comment the following line if you use MATLAB on a 32-bit computer
12 | MEX_OPTION += -largeArrayDims
13 | MEX_EXT = $(shell $(MATLABDIR)/bin/mexext)
14 | 
15 | OCTAVEDIR ?= /usr/include/octave
16 | OCTAVE_MEX = env CC=$(CXX) mkoctfile
17 | OCTAVE_MEX_OPTION = --mex
18 | OCTAVE_MEX_EXT = mex
19 | OCTAVE_CFLAGS = -Wall -O3 -fPIC -I$(OCTAVEDIR) -I..
20 | 
21 | all:	matlab
22 | 
23 | matlab:	binary
24 | 
25 | octave:
26 | 	@make MEX="$(OCTAVE_MEX)" MEX_OPTION="$(OCTAVE_MEX_OPTION)" \
27 | 	MEX_EXT="$(OCTAVE_MEX_EXT)" CFLAGS="$(OCTAVE_CFLAGS)" \
28 | 	binary
29 | 
30 | binary: train.$(MEX_EXT) predict.$(MEX_EXT) libsvmread.$(MEX_EXT) libsvmwrite.$(MEX_EXT)
31 | 
32 | train.$(MEX_EXT): train.c ../linear.h ../tron.o ../linear.o linear_model_matlab.o ../blas/blas.a
33 | 	$(MEX) $(MEX_OPTION) train.c ../tron.o ../linear.o linear_model_matlab.o ../blas/blas.a
34 | 
35 | predict.$(MEX_EXT): predict.c ../linear.h ../tron.o ../linear.o linear_model_matlab.o ../blas/blas.a
36 | 	$(MEX) $(MEX_OPTION) predict.c ../tron.o ../linear.o linear_model_matlab.o ../blas/blas.a
37 | 
38 | libsvmread.$(MEX_EXT):	libsvmread.c
39 | 	$(MEX) $(MEX_OPTION) libsvmread.c
40 | 
41 | libsvmwrite.$(MEX_EXT):	libsvmwrite.c
42 | 	$(MEX) $(MEX_OPTION) libsvmwrite.c
43 | 
44 | linear_model_matlab.o: linear_model_matlab.c ../linear.h
45 | 	$(CXX) $(CFLAGS) -c linear_model_matlab.c
46 | 
47 | ../linear.o: ../linear.cpp ../linear.h
48 | 	make -C .. linear.o
49 | 
50 | ../tron.o: ../tron.cpp ../tron.h 
51 | 	make -C .. tron.o
52 | 
53 | ../blas/blas.a: ../blas/*.c ../blas/*.h
54 | 	make -C ../blas OPTFLAGS='$(CFLAGS)' CC='$(CC)';
55 | 
56 | clean:
57 | 	make -C ../blas clean
58 | 	rm -f *~ *.o *.mex* *.obj ../linear.o ../tron.o
59 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/matlab/README:
--------------------------------------------------------------------------------
  1 | --------------------------------------------
  2 | --- MATLAB/OCTAVE interface of LIBLINEAR ---
  3 | --------------------------------------------
  4 | 
  5 | Table of Contents
  6 | =================
  7 | 
  8 | - Introduction
  9 | - Installation
 10 | - Usage
 11 | - Returned Model Structure
 12 | - Other Utilities
 13 | - Examples
 14 | - Additional Information
 15 | 
 16 | 
 17 | Introduction
 18 | ============
 19 | 
 20 | This tool provides a simple interface to LIBLINEAR, a library for
 21 | large-scale regularized linear classification and regression
 22 | (http://www.csie.ntu.edu.tw/~cjlin/liblinear). It is very easy to use
 23 | as the usage and the way of specifying parameters are the same as that
 24 | of LIBLINEAR.
 25 | 
 26 | Installation
 27 | ============
 28 | 
 29 | On Windows systems, pre-built binary files are already in the
 30 | directory '..\windows', so no need to conduct installation. Now we
 31 | provide binary files only for 64bit MATLAB on Windows. If you would
 32 | like to re-build the package, please rely on the following steps.
 33 | 
 34 | We recommend using make.m on both MATLAB and OCTAVE. Just type 'make'
 35 | to build 'libsvmread.mex', 'libsvmwrite.mex', 'train.mex', and
 36 | 'predict.mex'.
 37 | 
 38 | On MATLAB or Octave:
 39 | 
 40 |         >> make
 41 | 
 42 | If make.m does not work on MATLAB (especially for Windows), try 'mex
 43 | -setup' to choose a suitable compiler for mex. Make sure your compiler
 44 | is accessible and workable. Then type 'make' to start the
 45 | installation.
 46 | 
 47 | Example:
 48 | 
 49 |         matlab>> mex -setup
 50 |         (ps: MATLAB will show the following messages to setup default compiler.)
 51 |         Please choose your compiler for building external interface (MEX) files:
 52 |         Would you like mex to locate installed compilers [y]/n? y
 53 |         Select a compiler:
 54 |         [1] Microsoft Visual C/C++ version 7.1 in C:\Program Files\Microsoft Visual Studio
 55 |         [0] None
 56 |         Compiler: 1
 57 |         Please verify your choices:
 58 |         Compiler: Microsoft Visual C/C++ 7.1
 59 |         Location: C:\Program Files\Microsoft Visual Studio
 60 |         Are these correct?([y]/n): y
 61 | 
 62 |         matlab>> make
 63 | 
 64 | On Unix systems, if neither make.m nor 'mex -setup' works, please use
 65 | Makefile and type 'make' in a command window. Note that we assume
 66 | your MATLAB is installed in '/usr/local/matlab'. If not, please change
 67 | MATLABDIR in Makefile.
 68 | 
 69 | Example:
 70 |         linux> make
 71 | 
 72 | To use octave, type 'make octave':
 73 | 
 74 | Example:
 75 |         linux> make octave
 76 | 
 77 | For a list of supported/compatible compilers for MATLAB, please check
 78 | the following page:
 79 | 
 80 | http://www.mathworks.com/support/compilers/current_release/
 81 | 
 82 | Usage
 83 | =====
 84 | 
 85 | matlab> model = train(training_label_vector, training_instance_matrix [,'liblinear_options', 'col']);
 86 | 
 87 |         -training_label_vector:
 88 |             An m by 1 vector of training labels. (type must be double)
 89 |         -training_instance_matrix:
 90 |             An m by n matrix of m training instances with n features.
 91 |             It must be a sparse matrix. (type must be double)
 92 |         -liblinear_options:
 93 |             A string of training options in the same format as that of LIBLINEAR.
 94 |         -col:
 95 |             if 'col' is set, each column of training_instance_matrix is a data instance. Otherwise each row is a data instance.
 96 | 
 97 | matlab> [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model [, 'liblinear_options', 'col']);
 98 | matlab> [predicted_label] = predict(testing_label_vector, testing_instance_matrix, model [, 'liblinear_options', 'col']);
 99 | 
100 |         -testing_label_vector:
101 |             An m by 1 vector of prediction labels. If labels of test
102 |             data are unknown, simply use any random values. (type must be double)
103 |         -testing_instance_matrix:
104 |             An m by n matrix of m testing instances with n features.
105 |             It must be a sparse matrix. (type must be double)
106 |         -model:
107 |             The output of train.
108 |         -liblinear_options:
109 |             A string of testing options in the same format as that of LIBLINEAR.
110 |         -col:
111 |             if 'col' is set, each column of testing_instance_matrix is a data instance. Otherwise each row is a data instance.
112 | 
113 | Returned Model Structure
114 | ========================
115 | 
116 | The 'train' function returns a model which can be used for future
117 | prediction.  It is a structure and is organized as [Parameters, nr_class,
118 | nr_feature, bias, Label, w]:
119 | 
120 |         -Parameters: Parameters
121 |         -nr_class: number of classes; = 2 for regression
122 |         -nr_feature: number of features in training data (without including the bias term)
123 |         -bias: If >= 0, we assume one additional feature is added to the end
124 |             of each data instance.
125 |         -Label: label of each class; empty for regression
126 |         -w: a nr_w-by-n matrix for the weights, where n is nr_feature
127 |             or nr_feature+1 depending on the existence of the bias term.
128 |             nr_w is 1 if nr_class=2 and -s is not 4 (i.e., not
129 |             multi-class svm by Crammer and Singer). It is
130 |             nr_class otherwise.
131 | 
132 | If the '-v' option is specified, cross validation is conducted and the
133 | returned model is just a scalar: cross-validation accuracy for 
134 | classification and mean-squared error for regression.
135 | 
136 | Result of Prediction
137 | ====================
138 | 
139 | The function 'predict' has three outputs. The first one,
140 | predicted_label, is a vector of predicted labels. The second output,
141 | accuracy, is a vector including accuracy (for classification), mean
142 | squared error, and squared correlation coefficient (for regression).
143 | The third is a matrix containing decision values or probability
144 | estimates (if '-b 1' is specified). If k is the number of classes
145 | and k' is the number of classifiers (k'=1 if k=2, otherwise k'=k), for decision values,
146 | each row includes results of k' binary linear classifiers. For probabilities,
147 | each row contains k values indicating the probability that the testing instance is in
148 | each class. Note that the order of classes here is the same as 'Label'
149 | field in the model structure.
150 | 
151 | Other Utilities
152 | ===============
153 | 
154 | A matlab function libsvmread reads files in LIBSVM format: 
155 | 
156 | [label_vector, instance_matrix] = libsvmread('data.txt'); 
157 | 
158 | Two outputs are labels and instances, which can then be used as inputs
159 | of svmtrain or svmpredict. 
160 | 
161 | A matlab function libsvmwrite writes Matlab matrix to a file in LIBSVM format:
162 | 
163 | libsvmwrite('data.txt', label_vector, instance_matrix]
164 | 
165 | The instance_matrix must be a sparse matrix. (type must be double)
166 | For windows, `libsvmread.mexw64' and `libsvmwrite.mexw64' are ready in 
167 | the directory `..\windows'.
168 | 
169 | These codes are prepared by Rong-En Fan and Kai-Wei Chang from National
170 | Taiwan University.
171 | 
172 | Examples
173 | ========
174 | 
175 | Train and test on the provided data heart_scale:
176 | 
177 | matlab> [heart_scale_label, heart_scale_inst] = libsvmread('../heart_scale');
178 | matlab> model = train(heart_scale_label, heart_scale_inst, '-c 1');
179 | matlab> [predict_label, accuracy, dec_values] = predict(heart_scale_label, heart_scale_inst, model); % test the training data
180 | 
181 | Note that for testing, you can put anything in the testing_label_vector.
182 | 
183 | For probability estimates, you need '-b 1' only in the testing phase:
184 | 
185 | matlab> [predict_label, accuracy, prob_estimates] = predict(heart_scale_label, heart_scale_inst, model, '-b 1');
186 | 
187 | Additional Information
188 | ======================
189 | 
190 | Please cite LIBLINEAR as follows
191 | 
192 | R.-E. Fan, K.-W. Chang, C.-J. Hsieh, X.-R. Wang, and C.-J. Lin.
193 | LIBLINEAR: A Library for Large Linear Classification, Journal of
194 | Machine Learning Research 9(2008), 1871-1874.Software available at
195 | http://www.csie.ntu.edu.tw/~cjlin/liblinear
196 | 
197 | For any question, please contact Chih-Jen Lin <cjlin@csie.ntu.edu.tw>.
198 | 
199 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/matlab/libsvmread.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <string.h>
  3 | #include <stdlib.h>
  4 | #include <ctype.h>
  5 | #include <errno.h>
  6 | 
  7 | #include "mex.h"
  8 | 
  9 | #ifdef MX_API_VER
 10 | #if MX_API_VER < 0x07030000
 11 | typedef int mwIndex;
 12 | #endif 
 13 | #endif 
 14 | #ifndef max
 15 | #define max(x,y) (((x)>(y))?(x):(y))
 16 | #endif
 17 | #ifndef min
 18 | #define min(x,y) (((x)<(y))?(x):(y))
 19 | #endif
 20 | 
 21 | void exit_with_help()
 22 | {
 23 | 	mexPrintf(
 24 | 	"Usage: [label_vector, instance_matrix] = libsvmread('filename');\n"
 25 | 	);
 26 | }
 27 | 
 28 | static void fake_answer(int nlhs, mxArray *plhs[])
 29 | {
 30 | 	int i;
 31 | 	for(i=0;i<nlhs;i++)
 32 | 		plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
 33 | }
 34 | 
 35 | static char *line;
 36 | static int max_line_len;
 37 | 
 38 | static char* readline(FILE *input)
 39 | {
 40 | 	int len;
 41 | 	
 42 | 	if(fgets(line,max_line_len,input) == NULL)
 43 | 		return NULL;
 44 | 
 45 | 	while(strrchr(line,'\n') == NULL)
 46 | 	{
 47 | 		max_line_len *= 2;
 48 | 		line = (char *) realloc(line, max_line_len);
 49 | 		len = (int) strlen(line);
 50 | 		if(fgets(line+len,max_line_len-len,input) == NULL)
 51 | 			break;
 52 | 	}
 53 | 	return line;
 54 | }
 55 | 
 56 | // read in a problem (in libsvm format)
 57 | void read_problem(const char *filename, int nlhs, mxArray *plhs[])
 58 | {
 59 | 	int max_index, min_index, inst_max_index, i;
 60 | 	long elements, k;
 61 | 	FILE *fp = fopen(filename,"r");
 62 | 	int l = 0;
 63 | 	char *endptr;
 64 | 	mwIndex *ir, *jc;
 65 | 	double *labels, *samples;
 66 | 	
 67 | 	if(fp == NULL)
 68 | 	{
 69 | 		mexPrintf("can't open input file %s\n",filename);
 70 | 		fake_answer(nlhs, plhs);
 71 | 		return;
 72 | 	}
 73 | 
 74 | 	max_line_len = 1024;
 75 | 	line = (char *) malloc(max_line_len*sizeof(char));
 76 | 
 77 | 	max_index = 0;
 78 | 	min_index = 1; // our index starts from 1
 79 | 	elements = 0;
 80 | 	while(readline(fp) != NULL)
 81 | 	{
 82 | 		char *idx, *val;
 83 | 		// features
 84 | 		int index = 0;
 85 | 
 86 | 		inst_max_index = -1; // strtol gives 0 if wrong format, and precomputed kernel has <index> start from 0
 87 | 		strtok(line," \t"); // label
 88 | 		while (1)
 89 | 		{
 90 | 			idx = strtok(NULL,":"); // index:value
 91 | 			val = strtok(NULL," \t");
 92 | 			if(val == NULL)
 93 | 				break;
 94 | 
 95 | 			errno = 0;
 96 | 			index = (int) strtol(idx,&endptr,10);
 97 | 			if(endptr == idx || errno != 0 || *endptr != '\0' || index <= inst_max_index)
 98 | 			{
 99 | 				mexPrintf("Wrong input format at line %d\n",l+1);
100 | 				fake_answer(nlhs, plhs);
101 | 				return;
102 | 			}
103 | 			else
104 | 				inst_max_index = index;
105 | 
106 | 			min_index = min(min_index, index);
107 | 			elements++;
108 | 		}
109 | 		max_index = max(max_index, inst_max_index);
110 | 		l++;
111 | 	}
112 | 	rewind(fp);
113 | 
114 | 	// y
115 | 	plhs[0] = mxCreateDoubleMatrix(l, 1, mxREAL);
116 | 	// x^T
117 | 	if (min_index <= 0)
118 | 		plhs[1] = mxCreateSparse(max_index-min_index+1, l, elements, mxREAL);
119 | 	else
120 | 		plhs[1] = mxCreateSparse(max_index, l, elements, mxREAL);
121 | 
122 | 	labels = mxGetPr(plhs[0]);
123 | 	samples = mxGetPr(plhs[1]);
124 | 	ir = mxGetIr(plhs[1]);
125 | 	jc = mxGetJc(plhs[1]);
126 | 
127 | 	k=0;
128 | 	for(i=0;i<l;i++)
129 | 	{
130 | 		char *idx, *val, *label;
131 | 		jc[i] = k;
132 | 
133 | 		readline(fp);
134 | 
135 | 		label = strtok(line," \t\n");
136 | 		if(label == NULL)
137 | 		{
138 | 			mexPrintf("Empty line at line %d\n",i+1);
139 | 			fake_answer(nlhs, plhs);
140 | 			return;
141 | 		}
142 | 		labels[i] = strtod(label,&endptr);
143 | 		if(endptr == label || *endptr != '\0')
144 | 		{
145 | 			mexPrintf("Wrong input format at line %d\n",i+1);
146 | 			fake_answer(nlhs, plhs);
147 | 			return;
148 | 		}
149 | 
150 | 		// features
151 | 		while(1)
152 | 		{
153 | 			idx = strtok(NULL,":");
154 | 			val = strtok(NULL," \t");
155 | 			if(val == NULL)
156 | 				break;
157 | 
158 | 			ir[k] = (mwIndex) (strtol(idx,&endptr,10) - min_index); // precomputed kernel has <index> start from 0
159 | 
160 | 			errno = 0;
161 | 			samples[k] = strtod(val,&endptr);
162 | 			if (endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
163 | 			{
164 | 				mexPrintf("Wrong input format at line %d\n",i+1);
165 | 				fake_answer(nlhs, plhs);
166 | 				return;
167 | 			}
168 | 			++k;
169 | 		}
170 | 	}
171 | 	jc[l] = k;
172 | 
173 | 	fclose(fp);
174 | 	free(line);
175 | 
176 | 	{
177 | 		mxArray *rhs[1], *lhs[1];
178 | 		rhs[0] = plhs[1];
179 | 		if(mexCallMATLAB(1, lhs, 1, rhs, "transpose"))
180 | 		{
181 | 			mexPrintf("Error: cannot transpose problem\n");
182 | 			fake_answer(nlhs, plhs);
183 | 			return;
184 | 		}
185 | 		plhs[1] = lhs[0];
186 | 	}
187 | }
188 | 
189 | void mexFunction( int nlhs, mxArray *plhs[],
190 | 		int nrhs, const mxArray *prhs[] )
191 | {
192 | 	char filename[256];
193 | 
194 | 	if(nrhs != 1 || nlhs != 2)
195 | 	{
196 | 		exit_with_help();
197 | 		fake_answer(nlhs, plhs);
198 | 		return;
199 | 	}
200 | 
201 | 	mxGetString(prhs[0], filename, mxGetN(prhs[0]) + 1);
202 | 
203 | 	if(filename == NULL)
204 | 	{
205 | 		mexPrintf("Error: filename is NULL\n");
206 | 		return;
207 | 	}
208 | 
209 | 	read_problem(filename, nlhs, plhs);
210 | 
211 | 	return;
212 | }
213 | 
214 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/matlab/libsvmwrite.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include "mex.h"
  5 | 
  6 | #ifdef MX_API_VER
  7 | #if MX_API_VER < 0x07030000
  8 | typedef int mwIndex;
  9 | #endif
 10 | #endif
 11 | 
 12 | void exit_with_help()
 13 | {
 14 | 	mexPrintf(
 15 | 	"Usage: libsvmwrite('filename', label_vector, instance_matrix);\n"
 16 | 	);
 17 | }
 18 | 
 19 | static void fake_answer(int nlhs, mxArray *plhs[])
 20 | {
 21 | 	int i;
 22 | 	for(i=0;i<nlhs;i++)
 23 | 		plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
 24 | }
 25 | 
 26 | void libsvmwrite(const char *filename, const mxArray *label_vec, const mxArray *instance_mat)
 27 | {
 28 | 	FILE *fp = fopen(filename,"w");
 29 | 	int i, k, low, high, l;
 30 | 	mwIndex *ir, *jc;
 31 | 	int label_vector_row_num;
 32 | 	double *samples, *labels;
 33 | 	mxArray *instance_mat_col; // instance sparse matrix in column format
 34 | 
 35 | 	if(fp ==NULL)
 36 | 	{
 37 | 		mexPrintf("can't open output file %s\n",filename);			
 38 | 		return;
 39 | 	}
 40 | 
 41 | 	// transpose instance matrix
 42 | 	{
 43 | 		mxArray *prhs[1], *plhs[1];
 44 | 		prhs[0] = mxDuplicateArray(instance_mat);
 45 | 		if(mexCallMATLAB(1, plhs, 1, prhs, "transpose"))
 46 | 		{
 47 | 			mexPrintf("Error: cannot transpose instance matrix\n");
 48 | 			return;
 49 | 		}
 50 | 		instance_mat_col = plhs[0];
 51 | 		mxDestroyArray(prhs[0]);
 52 | 	}
 53 | 
 54 | 	// the number of instance
 55 | 	l = (int) mxGetN(instance_mat_col);
 56 | 	label_vector_row_num = (int) mxGetM(label_vec);
 57 | 
 58 | 	if(label_vector_row_num!=l)
 59 | 	{
 60 | 		mexPrintf("Length of label vector does not match # of instances.\n");
 61 | 		return;
 62 | 	}
 63 | 
 64 | 	// each column is one instance
 65 | 	labels = mxGetPr(label_vec);
 66 | 	samples = mxGetPr(instance_mat_col);
 67 | 	ir = mxGetIr(instance_mat_col);
 68 | 	jc = mxGetJc(instance_mat_col);
 69 | 
 70 | 	for(i=0;i<l;i++)
 71 | 	{
 72 | 		fprintf(fp,"%g", labels[i]);
 73 | 
 74 | 		low = (int) jc[i], high = (int) jc[i+1];
 75 | 		for(k=low;k<high;k++)
 76 | 			fprintf(fp," %ld:%g", ir[k]+1, samples[k]);		
 77 | 
 78 | 		fprintf(fp,"\n");
 79 | 	}
 80 | 
 81 | 	fclose(fp);
 82 | 	return;
 83 | }
 84 | 
 85 | void mexFunction( int nlhs, mxArray *plhs[],
 86 | 		int nrhs, const mxArray *prhs[] )
 87 | {
 88 | 	if(nlhs > 0)
 89 | 	{
 90 | 		exit_with_help();
 91 | 		fake_answer(nlhs, plhs);
 92 | 		return;
 93 | 	}
 94 | 	
 95 | 	// Transform the input Matrix to libsvm format
 96 | 	if(nrhs == 3)
 97 | 	{
 98 | 		char filename[256];
 99 | 		if(!mxIsDouble(prhs[1]) || !mxIsDouble(prhs[2]))
100 | 		{
101 | 			mexPrintf("Error: label vector and instance matrix must be double\n");			
102 | 			return;
103 | 		}
104 | 		
105 | 		mxGetString(prhs[0], filename, mxGetN(prhs[0])+1);		
106 | 
107 | 		if(mxIsSparse(prhs[2]))
108 | 			libsvmwrite(filename, prhs[1], prhs[2]);
109 | 		else
110 | 		{
111 | 			mexPrintf("Instance_matrix must be sparse\n");			
112 | 			return;
113 | 		}
114 | 	}
115 | 	else
116 | 	{
117 | 		exit_with_help();		
118 | 		return;
119 | 	}
120 | }
121 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/matlab/linear_model_matlab.c:
--------------------------------------------------------------------------------
  1 | #include <stdlib.h>
  2 | #include <string.h>
  3 | #include "../linear.h"
  4 | 
  5 | #include "mex.h"
  6 | 
  7 | #ifdef MX_API_VER
  8 | #if MX_API_VER < 0x07030000
  9 | typedef int mwIndex;
 10 | #endif
 11 | #endif
 12 | 
 13 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 14 | 
 15 | #define NUM_OF_RETURN_FIELD 6
 16 | 
 17 | static const char *field_names[] = {
 18 | 	"Parameters",
 19 | 	"nr_class",
 20 | 	"nr_feature",
 21 | 	"bias",
 22 | 	"Label",
 23 | 	"w",
 24 | };
 25 | 
 26 | const char *model_to_matlab_structure(mxArray *plhs[], struct model *model_)
 27 | {
 28 | 	int i;
 29 | 	int nr_w;
 30 | 	double *ptr;
 31 | 	mxArray *return_model, **rhs;
 32 | 	int out_id = 0;
 33 | 	int n, w_size;
 34 | 
 35 | 	rhs = (mxArray **)mxMalloc(sizeof(mxArray *)*NUM_OF_RETURN_FIELD);
 36 | 
 37 | 	// Parameters
 38 | 	// for now, only solver_type is needed
 39 | 	rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
 40 | 	ptr = mxGetPr(rhs[out_id]);
 41 | 	ptr[0] = model_->param.solver_type;
 42 | 	out_id++;
 43 | 
 44 | 	// nr_class
 45 | 	rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
 46 | 	ptr = mxGetPr(rhs[out_id]);
 47 | 	ptr[0] = model_->nr_class;
 48 | 	out_id++;
 49 | 
 50 | 	if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
 51 | 		nr_w=1;
 52 | 	else
 53 | 		nr_w=model_->nr_class;
 54 | 
 55 | 	// nr_feature
 56 | 	rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
 57 | 	ptr = mxGetPr(rhs[out_id]);
 58 | 	ptr[0] = model_->nr_feature;
 59 | 	out_id++;
 60 | 
 61 | 	// bias
 62 | 	rhs[out_id] = mxCreateDoubleMatrix(1, 1, mxREAL);
 63 | 	ptr = mxGetPr(rhs[out_id]);
 64 | 	ptr[0] = model_->bias;
 65 | 	out_id++;
 66 | 
 67 | 	if(model_->bias>=0)
 68 | 		n=model_->nr_feature+1;
 69 | 	else
 70 | 		n=model_->nr_feature;
 71 | 
 72 | 	w_size = n;
 73 | 	// Label
 74 | 	if(model_->label)
 75 | 	{
 76 | 		rhs[out_id] = mxCreateDoubleMatrix(model_->nr_class, 1, mxREAL);
 77 | 		ptr = mxGetPr(rhs[out_id]);
 78 | 		for(i = 0; i < model_->nr_class; i++)
 79 | 			ptr[i] = model_->label[i];
 80 | 	}
 81 | 	else
 82 | 		rhs[out_id] = mxCreateDoubleMatrix(0, 0, mxREAL);
 83 | 	out_id++;
 84 | 
 85 | 	// w
 86 | 	rhs[out_id] = mxCreateDoubleMatrix(nr_w, w_size, mxREAL);
 87 | 	ptr = mxGetPr(rhs[out_id]);
 88 | 	for(i = 0; i < w_size*nr_w; i++)
 89 | 		ptr[i]=model_->w[i];
 90 | 	out_id++;
 91 | 
 92 | 	/* Create a struct matrix contains NUM_OF_RETURN_FIELD fields */
 93 | 	return_model = mxCreateStructMatrix(1, 1, NUM_OF_RETURN_FIELD, field_names);
 94 | 
 95 | 	/* Fill struct matrix with input arguments */
 96 | 	for(i = 0; i < NUM_OF_RETURN_FIELD; i++)
 97 | 		mxSetField(return_model,0,field_names[i],mxDuplicateArray(rhs[i]));
 98 | 	/* return */
 99 | 	plhs[0] = return_model;
100 | 	mxFree(rhs);
101 | 
102 | 	return NULL;
103 | }
104 | 
105 | const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_struct)
106 | {
107 | 	int i, num_of_fields;
108 | 	int nr_w;
109 | 	double *ptr;
110 | 	int id = 0;
111 | 	int n, w_size;
112 | 	mxArray **rhs;
113 | 
114 | 	num_of_fields = mxGetNumberOfFields(matlab_struct);
115 | 	rhs = (mxArray **) mxMalloc(sizeof(mxArray *)*num_of_fields);
116 | 
117 | 	for(i=0;i<num_of_fields;i++)
118 | 		rhs[i] = mxGetFieldByNumber(matlab_struct, 0, i);
119 | 
120 | 	model_->nr_class=0;
121 | 	nr_w=0;
122 | 	model_->nr_feature=0;
123 | 	model_->w=NULL;
124 | 	model_->label=NULL;
125 | 
126 | 	// Parameters
127 | 	ptr = mxGetPr(rhs[id]);
128 | 	model_->param.solver_type = (int)ptr[0];
129 | 	id++;
130 | 
131 | 	// nr_class
132 | 	ptr = mxGetPr(rhs[id]);
133 | 	model_->nr_class = (int)ptr[0];
134 | 	id++;
135 | 
136 | 	if(model_->nr_class==2 && model_->param.solver_type != MCSVM_CS)
137 | 		nr_w=1;
138 | 	else
139 | 		nr_w=model_->nr_class;
140 | 
141 | 	// nr_feature
142 | 	ptr = mxGetPr(rhs[id]);
143 | 	model_->nr_feature = (int)ptr[0];
144 | 	id++;
145 | 
146 | 	// bias
147 | 	ptr = mxGetPr(rhs[id]);
148 | 	model_->bias = (int)ptr[0];
149 | 	id++;
150 | 
151 | 	if(model_->bias>=0)
152 | 		n=model_->nr_feature+1;
153 | 	else
154 | 		n=model_->nr_feature;
155 | 	w_size = n;
156 | 
157 | 	// Label
158 | 	if(mxIsEmpty(rhs[id]) == 0)
159 | 	{
160 | 		model_->label = Malloc(int, model_->nr_class);
161 | 		ptr = mxGetPr(rhs[id]);
162 | 		for(i=0;i<model_->nr_class;i++)
163 | 			model_->label[i] = (int)ptr[i];
164 | 	}
165 | 	id++;
166 | 
167 | 	ptr = mxGetPr(rhs[id]);
168 | 	model_->w=Malloc(double, w_size*nr_w);
169 | 	for(i = 0; i < w_size*nr_w; i++)
170 | 		model_->w[i]=ptr[i];
171 | 	id++;
172 | 	mxFree(rhs);
173 | 
174 | 	return NULL;
175 | }
176 | 
177 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/matlab/linear_model_matlab.h:
--------------------------------------------------------------------------------
1 | const char *model_to_matlab_structure(mxArray *plhs[], struct model *model_);
2 | const char *matlab_matrix_to_model(struct model *model_, const mxArray *matlab_struct);
3 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/matlab/make.m:
--------------------------------------------------------------------------------
 1 | % This make.m is for MATLAB and OCTAVE under Windows, Mac, and Unix
 2 | 
 3 | try
 4 | 	Type = ver;
 5 | 	% This part is for OCTAVE
 6 | 	if(strcmp(Type(1).Name, 'Octave') == 1)
 7 | 		mex libsvmread.c
 8 | 		mex libsvmwrite.c
 9 | 		mex train.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/*.c
10 | 		mex predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp ../blas/*.c
11 | 	% This part is for MATLAB
12 | 	% Add -largeArrayDims on 64-bit machines of MATLAB
13 | 	else
14 | 		mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmread.c
15 | 		mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims libsvmwrite.c
16 | 		mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims train.c linear_model_matlab.c ../linear.cpp ../tron.cpp "../blas/*.c"
17 | 		mex CFLAGS="\$CFLAGS -std=c99" -largeArrayDims predict.c linear_model_matlab.c ../linear.cpp ../tron.cpp "../blas/*.c"
18 | 	end
19 | catch
20 | 	fprintf('If make.m fails, please check README about detailed instructions.\n');
21 | end
22 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/matlab/predict.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include "../linear.h"
  5 | 
  6 | #include "mex.h"
  7 | #include "linear_model_matlab.h"
  8 | 
  9 | #ifdef MX_API_VER
 10 | #if MX_API_VER < 0x07030000
 11 | typedef int mwIndex;
 12 | #endif
 13 | #endif
 14 | 
 15 | #define CMD_LEN 2048
 16 | 
 17 | #define Malloc(type,n) (type *)malloc((n)*sizeof(type))
 18 | 
 19 | int print_null(const char *s,...) {}
 20 | int (*info)(const char *fmt,...);
 21 | 
 22 | int col_format_flag;
 23 | 
 24 | void read_sparse_instance(const mxArray *prhs, int index, struct feature_node *x, int feature_number, double bias)
 25 | {
 26 | 	int i, j, low, high;
 27 | 	mwIndex *ir, *jc;
 28 | 	double *samples;
 29 | 
 30 | 	ir = mxGetIr(prhs);
 31 | 	jc = mxGetJc(prhs);
 32 | 	samples = mxGetPr(prhs);
 33 | 
 34 | 	// each column is one instance
 35 | 	j = 0;
 36 | 	low = (int) jc[index], high = (int) jc[index+1];
 37 | 	for(i=low; i<high && (int) (ir[i])<feature_number; i++)
 38 | 	{
 39 | 		x[j].index = (int) ir[i]+1;
 40 | 		x[j].value = samples[i];
 41 | 		j++;
 42 | 	}
 43 | 	if(bias>=0)
 44 | 	{
 45 | 		x[j].index = feature_number+1;
 46 | 		x[j].value = bias;
 47 | 		j++;
 48 | 	}
 49 | 	x[j].index = -1;
 50 | }
 51 | 
 52 | static void fake_answer(int nlhs, mxArray *plhs[])
 53 | {
 54 | 	int i;
 55 | 	for(i=0;i<nlhs;i++)
 56 | 		plhs[i] = mxCreateDoubleMatrix(0, 0, mxREAL);
 57 | }
 58 | 
 59 | void do_predict(int nlhs, mxArray *plhs[], const mxArray *prhs[], struct model *model_, const int predict_probability_flag)
 60 | {
 61 | 	int label_vector_row_num, label_vector_col_num;
 62 | 	int feature_number, testing_instance_number;
 63 | 	int instance_index;
 64 | 	double *ptr_label, *ptr_predict_label;
 65 | 	double *ptr_prob_estimates, *ptr_dec_values, *ptr;
 66 | 	struct feature_node *x;
 67 | 	mxArray *pplhs[1]; // instance sparse matrix in row format
 68 | 	mxArray *tplhs[3]; // temporary storage for plhs[]
 69 | 
 70 | 	int correct = 0;
 71 | 	int total = 0;
 72 | 	double error = 0;
 73 | 	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
 74 | 
 75 | 	int nr_class=get_nr_class(model_);
 76 | 	int nr_w;
 77 | 	double *prob_estimates=NULL;
 78 | 
 79 | 	if(nr_class==2 && model_->param.solver_type!=MCSVM_CS)
 80 | 		nr_w=1;
 81 | 	else
 82 | 		nr_w=nr_class;
 83 | 
 84 | 	// prhs[1] = testing instance matrix
 85 | 	feature_number = get_nr_feature(model_);
 86 | 	testing_instance_number = (int) mxGetM(prhs[1]);
 87 | 	if(col_format_flag)
 88 | 	{
 89 | 		feature_number = (int) mxGetM(prhs[1]);
 90 | 		testing_instance_number = (int) mxGetN(prhs[1]);
 91 | 	}
 92 | 
 93 | 	label_vector_row_num = (int) mxGetM(prhs[0]);
 94 | 	label_vector_col_num = (int) mxGetN(prhs[0]);
 95 | 
 96 | 	if(label_vector_row_num!=testing_instance_number)
 97 | 	{
 98 | 		mexPrintf("Length of label vector does not match # of instances.\n");
 99 | 		fake_answer(nlhs, plhs);
100 | 		return;
101 | 	}
102 | 	if(label_vector_col_num!=1)
103 | 	{
104 | 		mexPrintf("label (1st argument) should be a vector (# of column is 1).\n");
105 | 		fake_answer(nlhs, plhs);
106 | 		return;
107 | 	}
108 | 
109 | 	ptr_label    = mxGetPr(prhs[0]);
110 | 
111 | 	// transpose instance matrix
112 | 	if(col_format_flag)
113 | 		pplhs[0] = (mxArray *)prhs[1];
114 | 	else
115 | 	{
116 | 		mxArray *pprhs[1];
117 | 		pprhs[0] = mxDuplicateArray(prhs[1]);
118 | 		if(mexCallMATLAB(1, pplhs, 1, pprhs, "transpose"))
119 | 		{
120 | 			mexPrintf("Error: cannot transpose testing instance matrix\n");
121 | 			fake_answer(nlhs, plhs);
122 | 			return;
123 | 		}
124 | 	}
125 | 
126 | 
127 | 	prob_estimates = Malloc(double, nr_class);
128 | 
129 | 	tplhs[0] = mxCreateDoubleMatrix(testing_instance_number, 1, mxREAL);
130 | 	if(predict_probability_flag)
131 | 		tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_class, mxREAL);
132 | 	else
133 | 		tplhs[2] = mxCreateDoubleMatrix(testing_instance_number, nr_w, mxREAL);
134 | 
135 | 	ptr_predict_label = mxGetPr(tplhs[0]);
136 | 	ptr_prob_estimates = mxGetPr(tplhs[2]);
137 | 	ptr_dec_values = mxGetPr(tplhs[2]);
138 | 	x = Malloc(struct feature_node, feature_number+2);
139 | 	for(instance_index=0;instance_index<testing_instance_number;instance_index++)
140 | 	{
141 | 		int i;
142 | 		double target_label, predict_label;
143 | 
144 | 		target_label = ptr_label[instance_index];
145 | 
146 | 		// prhs[1] and prhs[1]^T are sparse
147 | 		read_sparse_instance(pplhs[0], instance_index, x, feature_number, model_->bias);
148 | 
149 | 		if(predict_probability_flag)
150 | 		{
151 | 			predict_label = predict_probability(model_, x, prob_estimates);
152 | 			ptr_predict_label[instance_index] = predict_label;
153 | 			for(i=0;i<nr_class;i++)
154 | 				ptr_prob_estimates[instance_index + i * testing_instance_number] = prob_estimates[i];
155 | 		}
156 | 		else
157 | 		{
158 | 			double *dec_values = Malloc(double, nr_class);
159 | 			predict_label = predict_values(model_, x, dec_values);
160 | 			ptr_predict_label[instance_index] = predict_label;
161 | 
162 | 			for(i=0;i<nr_w;i++)
163 | 				ptr_dec_values[instance_index + i * testing_instance_number] = dec_values[i];
164 | 			free(dec_values);
165 | 		}
166 | 
167 | 		if(predict_label == target_label)
168 | 			++correct;
169 | 		error += (predict_label-target_label)*(predict_label-target_label);
170 | 		sump += predict_label;
171 | 		sumt += target_label;
172 | 		sumpp += predict_label*predict_label;
173 | 		sumtt += target_label*target_label;
174 | 		sumpt += predict_label*target_label;
175 | 
176 | 		++total;
177 | 	}
178 | 
179 | 	if(model_->param.solver_type==L2R_L2LOSS_SVR ||
180 | 	   model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
181 | 	   model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
182 | 	{
183 | 		info("Mean squared error = %g (regression)\n",error/total);
184 | 		info("Squared correlation coefficient = %g (regression)\n",
185 | 			((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
186 | 			((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
187 | 			);
188 | 	}
189 | 	else
190 | 		info("Accuracy = %g%% (%d/%d)\n", (double) correct/total*100,correct,total);
191 | 
192 | 	// return accuracy, mean squared error, squared correlation coefficient
193 | 	tplhs[1] = mxCreateDoubleMatrix(3, 1, mxREAL);
194 | 	ptr = mxGetPr(tplhs[1]);
195 | 	ptr[0] = (double)correct/total*100;
196 | 	ptr[1] = error/total;
197 | 	ptr[2] = ((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
198 | 				((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt));
199 | 
200 | 	free(x);
201 | 	if(prob_estimates != NULL)
202 | 		free(prob_estimates);
203 | 
204 | 	switch(nlhs)
205 | 	{
206 | 		case 3:
207 | 			plhs[2] = tplhs[2];
208 | 			plhs[1] = tplhs[1];
209 | 		case 1:
210 | 		case 0:
211 | 			plhs[0] = tplhs[0];
212 | 	}
213 | }
214 | 
215 | void exit_with_help()
216 | {
217 | 	mexPrintf(
218 | 			"Usage: [predicted_label, accuracy, decision_values/prob_estimates] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n"
219 | 			"       [predicted_label] = predict(testing_label_vector, testing_instance_matrix, model, 'liblinear_options','col')\n"
220 | 			"liblinear_options:\n"
221 | 			"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
222 | 			"-q quiet mode (no outputs)\n"
223 | 			"col: if 'col' is setted testing_instance_matrix is parsed in column format, otherwise is in row format\n"
224 | 			"Returns:\n"
225 | 			"  predicted_label: prediction output vector.\n"
226 | 			"  accuracy: a vector with accuracy, mean squared error, squared correlation coefficient.\n"
227 | 			"  prob_estimates: If selected, probability estimate vector.\n"
228 | 			);
229 | }
230 | 
231 | void mexFunction( int nlhs, mxArray *plhs[],
232 | 		int nrhs, const mxArray *prhs[] )
233 | {
234 | 	int prob_estimate_flag = 0;
235 | 	struct model *model_;
236 | 	char cmd[CMD_LEN];
237 | 	info = &mexPrintf;
238 | 	col_format_flag = 0;
239 | 
240 | 	if(nlhs == 2 || nlhs > 3 || nrhs > 5 || nrhs < 3)
241 | 	{
242 | 		exit_with_help();
243 | 		fake_answer(nlhs, plhs);
244 | 		return;
245 | 	}
246 | 	if(nrhs == 5)
247 | 	{
248 | 		mxGetString(prhs[4], cmd, mxGetN(prhs[4])+1);
249 | 		if(strcmp(cmd, "col") == 0)
250 | 		{
251 | 			col_format_flag = 1;
252 | 		}
253 | 	}
254 | 
255 | 	if(!mxIsDouble(prhs[0]) || !mxIsDouble(prhs[1])) {
256 | 		mexPrintf("Error: label vector and instance matrix must be double\n");
257 | 		fake_answer(nlhs, plhs);
258 | 		return;
259 | 	}
260 | 
261 | 	if(mxIsStruct(prhs[2]))
262 | 	{
263 | 		const char *error_msg;
264 | 
265 | 		// parse options
266 | 		if(nrhs>=4)
267 | 		{
268 | 			int i, argc = 1;
269 | 			char *argv[CMD_LEN/2];
270 | 
271 | 			// put options in argv[]
272 | 			mxGetString(prhs[3], cmd,  mxGetN(prhs[3]) + 1);
273 | 			if((argv[argc] = strtok(cmd, " ")) != NULL)
274 | 				while((argv[++argc] = strtok(NULL, " ")) != NULL)
275 | 					;
276 | 
277 | 			for(i=1;i<argc;i++)
278 | 			{
279 | 				if(argv[i][0] != '-') break;
280 | 				++i;
281 | 				if(i>=argc && argv[i-1][1] != 'q')
282 | 				{
283 | 					exit_with_help();
284 | 					fake_answer(nlhs, plhs);
285 | 					return;
286 | 				}
287 | 				switch(argv[i-1][1])
288 | 				{
289 | 					case 'b':
290 | 						prob_estimate_flag = atoi(argv[i]);
291 | 						break;
292 | 					case 'q':
293 | 						info = &print_null;
294 | 						i--;
295 | 						break;
296 | 					default:
297 | 						mexPrintf("unknown option\n");
298 | 						exit_with_help();
299 | 						fake_answer(nlhs, plhs);
300 | 						return;
301 | 				}
302 | 			}
303 | 		}
304 | 
305 | 		model_ = Malloc(struct model, 1);
306 | 		error_msg = matlab_matrix_to_model(model_, prhs[2]);
307 | 		if(error_msg)
308 | 		{
309 | 			mexPrintf("Error: can't read model: %s\n", error_msg);
310 | 			free_and_destroy_model(&model_);
311 | 			fake_answer(nlhs, plhs);
312 | 			return;
313 | 		}
314 | 
315 | 		if(prob_estimate_flag)
316 | 		{
317 | 			if(!check_probability_model(model_))
318 | 			{
319 | 				mexPrintf("probability output is only supported for logistic regression\n");
320 | 				prob_estimate_flag=0;
321 | 			}
322 | 		}
323 | 
324 | 		if(mxIsSparse(prhs[1]))
325 | 			do_predict(nlhs, plhs, prhs, model_, prob_estimate_flag);
326 | 		else
327 | 		{
328 | 			mexPrintf("Testing_instance_matrix must be sparse; "
329 | 				"use sparse(Testing_instance_matrix) first\n");
330 | 			fake_answer(nlhs, plhs);
331 | 		}
332 | 
333 | 		// destroy model_
334 | 		free_and_destroy_model(&model_);
335 | 	}
336 | 	else
337 | 	{
338 | 		mexPrintf("model file should be a struct array\n");
339 | 		fake_answer(nlhs, plhs);
340 | 	}
341 | 
342 | 	return;
343 | }
344 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/predict.c:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <ctype.h>
  3 | #include <stdlib.h>
  4 | #include <string.h>
  5 | #include <errno.h>
  6 | #include "linear.h"
  7 | 
  8 | int print_null(const char *s,...) {return 0;}
  9 | 
 10 | static int (*info)(const char *fmt,...) = &printf;
 11 | 
 12 | struct feature_node *x;
 13 | int max_nr_attr = 64;
 14 | 
 15 | struct model* model_;
 16 | int flag_predict_probability=0;
 17 | 
 18 | void exit_input_error(int line_num)
 19 | {
 20 | 	fprintf(stderr,"Wrong input format at line %d\n", line_num);
 21 | 	exit(1);
 22 | }
 23 | 
 24 | static char *line = NULL;
 25 | static int max_line_len;
 26 | 
 27 | static char* readline(FILE *input)
 28 | {
 29 | 	int len;
 30 | 
 31 | 	if(fgets(line,max_line_len,input) == NULL)
 32 | 		return NULL;
 33 | 
 34 | 	while(strrchr(line,'\n') == NULL)
 35 | 	{
 36 | 		max_line_len *= 2;
 37 | 		line = (char *) realloc(line,max_line_len);
 38 | 		len = (int) strlen(line);
 39 | 		if(fgets(line+len,max_line_len-len,input) == NULL)
 40 | 			break;
 41 | 	}
 42 | 	return line;
 43 | }
 44 | 
 45 | void do_predict(FILE *input, FILE *output)
 46 | {
 47 | 	int correct = 0;
 48 | 	int total = 0;
 49 | 	double error = 0;
 50 | 	double sump = 0, sumt = 0, sumpp = 0, sumtt = 0, sumpt = 0;
 51 | 
 52 | 	int nr_class=get_nr_class(model_);
 53 | 	double *prob_estimates=NULL;
 54 | 	int j, n;
 55 | 	int nr_feature=get_nr_feature(model_);
 56 | 	if(model_->bias>=0)
 57 | 		n=nr_feature+1;
 58 | 	else
 59 | 		n=nr_feature;
 60 | 
 61 | 	if(flag_predict_probability)
 62 | 	{
 63 | 		int *labels;
 64 | 
 65 | 		if(!check_probability_model(model_))
 66 | 		{
 67 | 			fprintf(stderr, "probability output is only supported for logistic regression\n");
 68 | 			exit(1);
 69 | 		}
 70 | 
 71 | 		labels=(int *) malloc(nr_class*sizeof(int));
 72 | 		get_labels(model_,labels);
 73 | 		prob_estimates = (double *) malloc(nr_class*sizeof(double));
 74 | 		fprintf(output,"labels");
 75 | 		for(j=0;j<nr_class;j++)
 76 | 			fprintf(output," %d",labels[j]);
 77 | 		fprintf(output,"\n");
 78 | 		free(labels);
 79 | 	}
 80 | 
 81 | 	max_line_len = 1024;
 82 | 	line = (char *)malloc(max_line_len*sizeof(char));
 83 | 	while(readline(input) != NULL)
 84 | 	{
 85 | 		int i = 0;
 86 | 		double target_label, predict_label;
 87 | 		char *idx, *val, *label, *endptr;
 88 | 		int inst_max_index = 0; // strtol gives 0 if wrong format
 89 | 
 90 | 		label = strtok(line," \t\n");
 91 | 		if(label == NULL) // empty line
 92 | 			exit_input_error(total+1);
 93 | 
 94 | 		target_label = strtod(label,&endptr);
 95 | 		if(endptr == label || *endptr != '\0')
 96 | 			exit_input_error(total+1);
 97 | 
 98 | 		while(1)
 99 | 		{
100 | 			if(i>=max_nr_attr-2)	// need one more for index = -1
101 | 			{
102 | 				max_nr_attr *= 2;
103 | 				x = (struct feature_node *) realloc(x,max_nr_attr*sizeof(struct feature_node));
104 | 			}
105 | 
106 | 			idx = strtok(NULL,":");
107 | 			val = strtok(NULL," \t");
108 | 
109 | 			if(val == NULL)
110 | 				break;
111 | 			errno = 0;
112 | 			x[i].index = (int) strtol(idx,&endptr,10);
113 | 			if(endptr == idx || errno != 0 || *endptr != '\0' || x[i].index <= inst_max_index)
114 | 				exit_input_error(total+1);
115 | 			else
116 | 				inst_max_index = x[i].index;
117 | 
118 | 			errno = 0;
119 | 			x[i].value = strtod(val,&endptr);
120 | 			if(endptr == val || errno != 0 || (*endptr != '\0' && !isspace(*endptr)))
121 | 				exit_input_error(total+1);
122 | 
123 | 			// feature indices larger than those in training are not used
124 | 			if(x[i].index <= nr_feature)
125 | 				++i;
126 | 		}
127 | 
128 | 		if(model_->bias>=0)
129 | 		{
130 | 			x[i].index = n;
131 | 			x[i].value = model_->bias;
132 | 			i++;
133 | 		}
134 | 		x[i].index = -1;
135 | 
136 | 		if(flag_predict_probability)
137 | 		{
138 | 			int j;
139 | 			predict_label = predict_probability(model_,x,prob_estimates);
140 | 			fprintf(output,"%g",predict_label);
141 | 			for(j=0;j<model_->nr_class;j++)
142 | 				fprintf(output," %g",prob_estimates[j]);
143 | 			fprintf(output,"\n");
144 | 		}
145 | 		else
146 | 		{
147 | 			predict_label = predict(model_,x);
148 | 			fprintf(output,"%g\n",predict_label);
149 | 		}
150 | 
151 | 		if(predict_label == target_label)
152 | 			++correct;
153 | 		error += (predict_label-target_label)*(predict_label-target_label);
154 | 		sump += predict_label;
155 | 		sumt += target_label;
156 | 		sumpp += predict_label*predict_label;
157 | 		sumtt += target_label*target_label;
158 | 		sumpt += predict_label*target_label;
159 | 		++total;
160 | 	}
161 | 	if(model_->param.solver_type==L2R_L2LOSS_SVR ||
162 | 	   model_->param.solver_type==L2R_L1LOSS_SVR_DUAL ||
163 | 	   model_->param.solver_type==L2R_L2LOSS_SVR_DUAL)
164 | 	{
165 | 		info("Mean squared error = %g (regression)\n",error/total);
166 | 		info("Squared correlation coefficient = %g (regression)\n",
167 | 			((total*sumpt-sump*sumt)*(total*sumpt-sump*sumt))/
168 | 			((total*sumpp-sump*sump)*(total*sumtt-sumt*sumt))
169 | 			);
170 | 	}
171 | 	else
172 | 		info("Accuracy = %g%% (%d/%d)\n",(double) correct/total*100,correct,total);
173 | 	if(flag_predict_probability)
174 | 		free(prob_estimates);
175 | }
176 | 
177 | void exit_with_help()
178 | {
179 | 	printf(
180 | 	"Usage: predict [options] test_file model_file output_file\n"
181 | 	"options:\n"
182 | 	"-b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only\n"
183 | 	"-q : quiet mode (no outputs)\n"
184 | 	);
185 | 	exit(1);
186 | }
187 | 
188 | int main(int argc, char **argv)
189 | {
190 | 	FILE *input, *output;
191 | 	int i;
192 | 
193 | 	// parse options
194 | 	for(i=1;i<argc;i++)
195 | 	{
196 | 		if(argv[i][0] != '-') break;
197 | 		++i;
198 | 		switch(argv[i-1][1])
199 | 		{
200 | 			case 'b':
201 | 				flag_predict_probability = atoi(argv[i]);
202 | 				break;
203 | 			case 'q':
204 | 				info = &print_null;
205 | 				i--;
206 | 				break;
207 | 			default:
208 | 				fprintf(stderr,"unknown option: -%c\n", argv[i-1][1]);
209 | 				exit_with_help();
210 | 				break;
211 | 		}
212 | 	}
213 | 	if(i>=argc)
214 | 		exit_with_help();
215 | 
216 | 	input = fopen(argv[i],"r");
217 | 	if(input == NULL)
218 | 	{
219 | 		fprintf(stderr,"can't open input file %s\n",argv[i]);
220 | 		exit(1);
221 | 	}
222 | 
223 | 	output = fopen(argv[i+2],"w");
224 | 	if(output == NULL)
225 | 	{
226 | 		fprintf(stderr,"can't open output file %s\n",argv[i+2]);
227 | 		exit(1);
228 | 	}
229 | 
230 | 	if((model_=load_model(argv[i+1]))==0)
231 | 	{
232 | 		fprintf(stderr,"can't open model file %s\n",argv[i+1]);
233 | 		exit(1);
234 | 	}
235 | 
236 | 	x = (struct feature_node *) malloc(max_nr_attr*sizeof(struct feature_node));
237 | 	do_predict(input, output);
238 | 	free_and_destroy_model(&model_);
239 | 	free(line);
240 | 	free(x);
241 | 	fclose(input);
242 | 	fclose(output);
243 | 	return 0;
244 | }
245 | 
246 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/python/Makefile:
--------------------------------------------------------------------------------
1 | all = lib
2 | 
3 | lib:
4 | 	make -C .. lib
5 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/python/liblinear.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | from ctypes import *
  4 | from ctypes.util import find_library
  5 | from os import path
  6 | import sys
  7 | 
  8 | try:
  9 | 	dirname = path.dirname(path.abspath(__file__))
 10 | 	if sys.platform == 'win32':
 11 | 		liblinear = CDLL(path.join(dirname, r'..\windows\liblinear.dll'))
 12 | 	else:
 13 | 		liblinear = CDLL(path.join(dirname, '../liblinear.so.1'))
 14 | except:
 15 | # For unix the prefix 'lib' is not considered.
 16 | 	if find_library('linear'):
 17 | 		liblinear = CDLL(find_library('linear'))
 18 | 	elif find_library('liblinear'):
 19 | 		liblinear = CDLL(find_library('liblinear'))
 20 | 	else:
 21 | 		raise Exception('LIBLINEAR library not found.')
 22 | 
 23 | # Construct constants
 24 | SOLVER_TYPE = ['L2R_LR', 'L2R_L2LOSS_SVC_DUAL', 'L2R_L2LOSS_SVC', 'L2R_L1LOSS_SVC_DUAL',\
 25 | 		'MCSVM_CS', 'L1R_L2LOSS_SVC', 'L1R_LR', 'L2R_LR_DUAL', \
 26 | 		None, None, None, \
 27 | 		'L2R_L2LOSS_SVR', 'L2R_L2LOSS_SVR_DUAL', 'L2R_L1LOSS_SVR_DUAL']
 28 | for i, s in enumerate(SOLVER_TYPE): 
 29 | 	if s is not None: exec("%s = %d" % (s , i))
 30 | 
 31 | PRINT_STRING_FUN = CFUNCTYPE(None, c_char_p)
 32 | def print_null(s): 
 33 | 	return 
 34 | 
 35 | def genFields(names, types): 
 36 | 	return list(zip(names, types))
 37 | 
 38 | def fillprototype(f, restype, argtypes): 
 39 | 	f.restype = restype
 40 | 	f.argtypes = argtypes
 41 | 
 42 | class feature_node(Structure):
 43 | 	_names = ["index", "value"]
 44 | 	_types = [c_int, c_double]
 45 | 	_fields_ = genFields(_names, _types)
 46 | 
 47 | 	def __str__(self):
 48 | 		return '%d:%g' % (self.index, self.value)
 49 | 
 50 | def gen_feature_nodearray(xi, feature_max=None, issparse=True):
 51 | 	if isinstance(xi, dict):
 52 | 		index_range = xi.keys()
 53 | 	elif isinstance(xi, (list, tuple)):
 54 | 		xi = [0] + xi  # idx should start from 1
 55 | 		index_range = range(1, len(xi))
 56 | 	else:
 57 | 		raise TypeError('xi should be a dictionary, list or tuple')
 58 | 
 59 | 	if feature_max:
 60 | 		assert(isinstance(feature_max, int))
 61 | 		index_range = filter(lambda j: j <= feature_max, index_range)
 62 | 	if issparse: 
 63 | 		index_range = filter(lambda j:xi[j] != 0, index_range)
 64 | 
 65 | 	index_range = sorted(index_range)
 66 | 	ret = (feature_node * (len(index_range)+2))()
 67 | 	ret[-1].index = -1 # for bias term
 68 | 	ret[-2].index = -1
 69 | 	for idx, j in enumerate(index_range):
 70 | 		ret[idx].index = j
 71 | 		ret[idx].value = xi[j]
 72 | 	max_idx = 0
 73 | 	if index_range : 
 74 | 		max_idx = index_range[-1]
 75 | 	return ret, max_idx
 76 | 
 77 | class problem(Structure):
 78 | 	_names = ["l", "n", "y", "x", "bias"]
 79 | 	_types = [c_int, c_int, POINTER(c_double), POINTER(POINTER(feature_node)), c_double]
 80 | 	_fields_ = genFields(_names, _types)
 81 | 
 82 | 	def __init__(self, y, x, bias = -1):
 83 | 		if len(y) != len(x) :
 84 | 			raise ValueError("len(y) != len(x)")
 85 | 		self.l = l = len(y)
 86 | 		self.bias = -1
 87 | 
 88 | 		max_idx = 0
 89 | 		x_space = self.x_space = []
 90 | 		for i, xi in enumerate(x):
 91 | 			tmp_xi, tmp_idx = gen_feature_nodearray(xi)
 92 | 			x_space += [tmp_xi]
 93 | 			max_idx = max(max_idx, tmp_idx)
 94 | 		self.n = max_idx
 95 | 
 96 | 		self.y = (c_double * l)()
 97 | 		for i, yi in enumerate(y): self.y[i] = y[i]
 98 | 
 99 | 		self.x = (POINTER(feature_node) * l)() 
100 | 		for i, xi in enumerate(self.x_space): self.x[i] = xi
101 | 
102 | 		self.set_bias(bias)
103 | 
104 | 	def set_bias(self, bias):
105 | 		if self.bias == bias:
106 | 			return 
107 | 		if bias >= 0 and self.bias < 0: 
108 | 			self.n += 1
109 | 			node = feature_node(self.n, bias)
110 | 		if bias < 0 and self.bias >= 0: 
111 | 			self.n -= 1
112 | 			node = feature_node(-1, bias)
113 | 
114 | 		for xi in self.x_space:
115 | 			xi[-2] = node
116 | 		self.bias = bias
117 | 
118 | 
119 | class parameter(Structure):
120 | 	_names = ["solver_type", "eps", "C", "nr_weight", "weight_label", "weight", "p"]
121 | 	_types = [c_int, c_double, c_double, c_int, POINTER(c_int), POINTER(c_double), c_double]
122 | 	_fields_ = genFields(_names, _types)
123 | 
124 | 	def __init__(self, options = None):
125 | 		if options == None:
126 | 			options = ''
127 | 		self.parse_options(options)
128 | 
129 | 	def __str__(self):
130 | 		s = ''
131 | 		attrs = parameter._names + list(self.__dict__.keys())
132 | 		values = map(lambda attr: getattr(self, attr), attrs) 
133 | 		for attr, val in zip(attrs, values):
134 | 			s += (' %s: %s\n' % (attr, val))
135 | 		s = s.strip()
136 | 
137 | 		return s
138 | 
139 | 	def set_to_default_values(self):
140 | 		self.solver_type = L2R_L2LOSS_SVC_DUAL
141 | 		self.eps = float('inf')
142 | 		self.C = 1
143 | 		self.p = 0.1
144 | 		self.nr_weight = 0
145 | 		self.weight_label = (c_int * 0)()
146 | 		self.weight = (c_double * 0)()
147 | 		self.bias = -1
148 | 		self.cross_validation = False
149 | 		self.nr_fold = 0
150 | 		self.print_func = cast(None, PRINT_STRING_FUN)
151 | 
152 | 	def parse_options(self, options):
153 | 		if isinstance(options, list):
154 | 			argv = options
155 | 		elif isinstance(options, str):
156 | 			argv = options.split()
157 | 		else:
158 | 			raise TypeError("arg 1 should be a list or a str.")
159 | 		self.set_to_default_values()
160 | 		self.print_func = cast(None, PRINT_STRING_FUN)
161 | 		weight_label = []
162 | 		weight = []
163 | 
164 | 		i = 0
165 | 		while i < len(argv) :
166 | 			if argv[i] == "-s":
167 | 				i = i + 1
168 | 				self.solver_type = int(argv[i])
169 | 			elif argv[i] == "-c":
170 | 				i = i + 1
171 | 				self.C = float(argv[i])
172 | 			elif argv[i] == "-p":
173 | 				i = i + 1
174 | 				self.p = float(argv[i])
175 | 			elif argv[i] == "-e":
176 | 				i = i + 1
177 | 				self.eps = float(argv[i])
178 | 			elif argv[i] == "-B":
179 | 				i = i + 1
180 | 				self.bias = float(argv[i])
181 | 			elif argv[i] == "-v":
182 | 				i = i + 1
183 | 				self.cross_validation = 1
184 | 				self.nr_fold = int(argv[i])
185 | 				if self.nr_fold < 2 :
186 | 					raise ValueError("n-fold cross validation: n must >= 2")
187 | 			elif argv[i].startswith("-w"):
188 | 				i = i + 1
189 | 				self.nr_weight += 1
190 | 				nr_weight = self.nr_weight
191 | 				weight_label += [int(argv[i-1][2:])]
192 | 				weight += [float(argv[i])]
193 | 			elif argv[i] == "-q":
194 | 				self.print_func = PRINT_STRING_FUN(print_null)
195 | 			else :
196 | 				raise ValueError("Wrong options")
197 | 			i += 1
198 | 
199 | 		liblinear.set_print_string_function(self.print_func)
200 | 		self.weight_label = (c_int*self.nr_weight)()
201 | 		self.weight = (c_double*self.nr_weight)()
202 | 		for i in range(self.nr_weight): 
203 | 			self.weight[i] = weight[i]
204 | 			self.weight_label[i] = weight_label[i]
205 | 
206 | 		if self.eps == float('inf'):
207 | 			if self.solver_type in [L2R_LR, L2R_L2LOSS_SVC]:
208 | 				self.eps = 0.01
209 | 			elif self.solver_type in [L2R_L2LOSS_SVR]:
210 | 				self.eps = 0.001
211 | 			elif self.solver_type in [L2R_L2LOSS_SVC_DUAL, L2R_L1LOSS_SVC_DUAL, MCSVM_CS, L2R_LR_DUAL]:
212 | 				self.eps = 0.1
213 | 			elif self.solver_type in [L1R_L2LOSS_SVC, L1R_LR]:
214 | 				self.eps = 0.01
215 | 			elif self.solver_type in [L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
216 | 				self.eps = 0.1
217 | 
218 | class model(Structure):
219 | 	_names = ["param", "nr_class", "nr_feature", "w", "label", "bias"]
220 | 	_types = [parameter, c_int, c_int, POINTER(c_double), POINTER(c_int), c_double]
221 | 	_fields_ = genFields(_names, _types)
222 | 
223 | 	def __init__(self):
224 | 		self.__createfrom__ = 'python'
225 | 
226 | 	def __del__(self):
227 | 		# free memory created by C to avoid memory leak
228 | 		if hasattr(self, '__createfrom__') and self.__createfrom__ == 'C':
229 | 			liblinear.free_and_destroy_model(pointer(self))
230 | 
231 | 	def get_nr_feature(self):
232 | 		return liblinear.get_nr_feature(self)
233 | 
234 | 	def get_nr_class(self):
235 | 		return liblinear.get_nr_class(self)
236 | 
237 | 	def get_labels(self):
238 | 		nr_class = self.get_nr_class()
239 | 		labels = (c_int * nr_class)()
240 | 		liblinear.get_labels(self, labels)
241 | 		return labels[:nr_class]
242 | 
243 | 	def is_probability_model(self):
244 | 		return (liblinear.check_probability_model(self) == 1)
245 | 
246 | def toPyModel(model_ptr):
247 | 	"""
248 | 	toPyModel(model_ptr) -> model
249 | 
250 | 	Convert a ctypes POINTER(model) to a Python model
251 | 	"""
252 | 	if bool(model_ptr) == False:
253 | 		raise ValueError("Null pointer")
254 | 	m = model_ptr.contents
255 | 	m.__createfrom__ = 'C'
256 | 	return m
257 | 
258 | fillprototype(liblinear.train, POINTER(model), [POINTER(problem), POINTER(parameter)])
259 | fillprototype(liblinear.cross_validation, None, [POINTER(problem), POINTER(parameter), c_int, POINTER(c_double)])
260 | 
261 | fillprototype(liblinear.predict_values, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
262 | fillprototype(liblinear.predict, c_double, [POINTER(model), POINTER(feature_node)])
263 | fillprototype(liblinear.predict_probability, c_double, [POINTER(model), POINTER(feature_node), POINTER(c_double)])
264 | 
265 | fillprototype(liblinear.save_model, c_int, [c_char_p, POINTER(model)])
266 | fillprototype(liblinear.load_model, POINTER(model), [c_char_p])
267 | 
268 | fillprototype(liblinear.get_nr_feature, c_int, [POINTER(model)])
269 | fillprototype(liblinear.get_nr_class, c_int, [POINTER(model)])
270 | fillprototype(liblinear.get_labels, None, [POINTER(model), POINTER(c_int)])
271 | 
272 | fillprototype(liblinear.free_model_content, None, [POINTER(model)])
273 | fillprototype(liblinear.free_and_destroy_model, None, [POINTER(POINTER(model))])
274 | fillprototype(liblinear.destroy_param, None, [POINTER(parameter)])
275 | fillprototype(liblinear.check_parameter, c_char_p, [POINTER(problem), POINTER(parameter)])
276 | fillprototype(liblinear.check_probability_model, c_int, [POINTER(model)])
277 | fillprototype(liblinear.set_print_string_function, None, [CFUNCTYPE(None, c_char_p)])
278 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/python/liblinearutil.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | 
  3 | import os, sys
  4 | sys.path = [os.path.dirname(os.path.abspath(__file__))] + sys.path 
  5 | from liblinear import *
  6 | 
  7 | def svm_read_problem(data_file_name):
  8 | 	"""
  9 | 	svm_read_problem(data_file_name) -> [y, x]
 10 | 
 11 | 	Read LIBSVM-format data from data_file_name and return labels y
 12 | 	and data instances x.
 13 | 	"""
 14 | 	prob_y = []
 15 | 	prob_x = []
 16 | 	for line in open(data_file_name):
 17 | 		line = line.split(None, 1)
 18 | 		# In case an instance with all zero features
 19 | 		if len(line) == 1: line += ['']
 20 | 		label, features = line
 21 | 		xi = {}
 22 | 		for e in features.split():
 23 | 			ind, val = e.split(":")
 24 | 			xi[int(ind)] = float(val)
 25 | 		prob_y += [float(label)]
 26 | 		prob_x += [xi]
 27 | 	return (prob_y, prob_x)
 28 | 
 29 | def load_model(model_file_name):
 30 | 	"""
 31 | 	load_model(model_file_name) -> model
 32 | 
 33 | 	Load a LIBLINEAR model from model_file_name and return.
 34 | 	"""
 35 | 	model = liblinear.load_model(model_file_name.encode())
 36 | 	if not model:
 37 | 		print("can't open model file %s" % model_file_name)
 38 | 		return None
 39 | 	model = toPyModel(model)
 40 | 	return model
 41 | 
 42 | def save_model(model_file_name, model):
 43 | 	"""
 44 | 	save_model(model_file_name, model) -> None
 45 | 
 46 | 	Save a LIBLINEAR model to the file model_file_name.
 47 | 	"""
 48 | 	liblinear.save_model(model_file_name.encode(), model)
 49 | 
 50 | def evaluations(ty, pv):
 51 | 	"""
 52 | 	evaluations(ty, pv) -> (ACC, MSE, SCC)
 53 | 
 54 | 	Calculate accuracy, mean squared error and squared correlation coefficient
 55 | 	using the true values (ty) and predicted values (pv).
 56 | 	"""
 57 | 	if len(ty) != len(pv):
 58 | 		raise ValueError("len(ty) must equal to len(pv)")
 59 | 	total_correct = total_error = 0
 60 | 	sumv = sumy = sumvv = sumyy = sumvy = 0
 61 | 	for v, y in zip(pv, ty):
 62 | 		if y == v:
 63 | 			total_correct += 1
 64 | 		total_error += (v-y)*(v-y)
 65 | 		sumv += v
 66 | 		sumy += y
 67 | 		sumvv += v*v
 68 | 		sumyy += y*y
 69 | 		sumvy += v*y
 70 | 	l = len(ty)
 71 | 	ACC = 100.0*total_correct/l
 72 | 	MSE = total_error/l
 73 | 	try:
 74 | 		SCC = ((l*sumvy-sumv*sumy)*(l*sumvy-sumv*sumy))/((l*sumvv-sumv*sumv)*(l*sumyy-sumy*sumy))
 75 | 	except:
 76 | 		SCC = float('nan')
 77 | 	return (ACC, MSE, SCC)
 78 | 
 79 | def train(arg1, arg2=None, arg3=None):
 80 | 	"""
 81 | 	train(y, x [, options]) -> model | ACC
 82 | 	train(prob [, options]) -> model | ACC
 83 | 	train(prob, param) -> model | ACC
 84 | 
 85 | 	Train a model from data (y, x) or a problem prob using
 86 | 	'options' or a parameter param.
 87 | 	If '-v' is specified in 'options' (i.e., cross validation)
 88 | 	either accuracy (ACC) or mean-squared error (MSE) is returned.
 89 | 
 90 | 	options:
 91 | 		-s type : set type of solver (default 1)
 92 | 		  for multi-class classification
 93 | 			 0 -- L2-regularized logistic regression (primal)
 94 | 			 1 -- L2-regularized L2-loss support vector classification (dual)
 95 | 			 2 -- L2-regularized L2-loss support vector classification (primal)
 96 | 			 3 -- L2-regularized L1-loss support vector classification (dual)
 97 | 			 4 -- support vector classification by Crammer and Singer
 98 | 			 5 -- L1-regularized L2-loss support vector classification
 99 | 			 6 -- L1-regularized logistic regression
100 | 			 7 -- L2-regularized logistic regression (dual)
101 | 		  for regression
102 | 			11 -- L2-regularized L2-loss support vector regression (primal)
103 | 			12 -- L2-regularized L2-loss support vector regression (dual)
104 | 			13 -- L2-regularized L1-loss support vector regression (dual)
105 | 		-c cost : set the parameter C (default 1)
106 | 		-p epsilon : set the epsilon in loss function of SVR (default 0.1)
107 | 		-e epsilon : set tolerance of termination criterion
108 | 			-s 0 and 2
109 | 				|f'(w)|_2 <= eps*min(pos,neg)/l*|f'(w0)|_2,
110 | 				where f is the primal function, (default 0.01)
111 | 			-s 11
112 | 				|f'(w)|_2 <= eps*|f'(w0)|_2 (default 0.001)
113 | 			-s 1, 3, 4, and 7
114 | 				Dual maximal violation <= eps; similar to liblinear (default 0.)
115 | 			-s 5 and 6
116 | 				|f'(w)|_inf <= eps*min(pos,neg)/l*|f'(w0)|_inf,
117 | 				where f is the primal function (default 0.01)
118 | 			-s 12 and 13
119 | 				|f'(alpha)|_1 <= eps |f'(alpha0)|,
120 | 				where f is the dual function (default 0.1)
121 | 		-B bias : if bias >= 0, instance x becomes [x; bias]; if < 0, no bias term added (default -1)
122 | 		-wi weight: weights adjust the parameter C of different classes (see README for details)
123 | 		-v n: n-fold cross validation mode
124 | 	    -q : quiet mode (no outputs)
125 | 	"""
126 | 	prob, param = None, None
127 | 	if isinstance(arg1, (list, tuple)):
128 | 		assert isinstance(arg2, (list, tuple))
129 | 		y, x, options = arg1, arg2, arg3
130 | 		prob = problem(y, x)
131 | 		param = parameter(options)
132 | 	elif isinstance(arg1, problem):
133 | 		prob = arg1
134 | 		if isinstance(arg2, parameter):
135 | 			param = arg2
136 | 		else :
137 | 			param = parameter(arg2)
138 | 	if prob == None or param == None :
139 | 		raise TypeError("Wrong types for the arguments")
140 | 
141 | 	prob.set_bias(param.bias)
142 | 	liblinear.set_print_string_function(param.print_func)
143 | 	err_msg = liblinear.check_parameter(prob, param)
144 | 	if err_msg :
145 | 		raise ValueError('Error: %s' % err_msg)
146 | 
147 | 	if param.cross_validation:
148 | 		l, nr_fold = prob.l, param.nr_fold
149 | 		target = (c_double * l)()
150 | 		liblinear.cross_validation(prob, param, nr_fold, target)
151 | 		ACC, MSE, SCC = evaluations(prob.y[:l], target[:l])
152 | 		if param.solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
153 | 			print("Cross Validation Mean squared error = %g" % MSE)
154 | 			print("Cross Validation Squared correlation coefficient = %g" % SCC)
155 | 			return MSE
156 | 		else:
157 | 			print("Cross Validation Accuracy = %g%%" % ACC)
158 | 			return ACC
159 | 	else :
160 | 		m = liblinear.train(prob, param)
161 | 		m = toPyModel(m)
162 | 
163 | 		return m
164 | 
165 | def predict(y, x, m, options=""):
166 | 	"""
167 | 	predict(y, x, m [, options]) -> (p_labels, p_acc, p_vals)
168 | 
169 | 	Predict data (y, x) with the SVM model m.
170 | 	options:
171 | 	    -b probability_estimates: whether to output probability estimates, 0 or 1 (default 0); currently for logistic regression only
172 | 	    -q quiet mode (no outputs)
173 | 
174 | 	The return tuple contains
175 | 	p_labels: a list of predicted labels
176 | 	p_acc: a tuple including  accuracy (for classification), mean-squared
177 | 	       error, and squared correlation coefficient (for regression).
178 | 	p_vals: a list of decision values or probability estimates (if '-b 1'
179 | 	        is specified). If k is the number of classes, for decision values,
180 | 	        each element includes results of predicting k binary-class
181 | 	        SVMs. if k = 2 and solver is not MCSVM_CS, only one decision value
182 | 	        is returned. For probabilities, each element contains k values
183 | 	        indicating the probability that the testing instance is in each class.
184 | 	        Note that the order of classes here is the same as 'model.label'
185 | 	        field in the model structure.
186 | 	"""
187 | 
188 | 	def info(s):
189 | 		print(s)
190 | 
191 | 	predict_probability = 0
192 | 	argv = options.split()
193 | 	i = 0
194 | 	while i < len(argv):
195 | 		if argv[i] == '-b':
196 | 			i += 1
197 | 			predict_probability = int(argv[i])
198 | 		elif argv[i] == '-q':
199 | 			info = print_null
200 | 		else:
201 | 			raise ValueError("Wrong options")
202 | 		i+=1
203 | 
204 | 	solver_type = m.param.solver_type
205 | 	nr_class = m.get_nr_class()
206 | 	nr_feature = m.get_nr_feature()
207 | 	is_prob_model = m.is_probability_model()
208 | 	bias = m.bias
209 | 	if bias >= 0:
210 | 		biasterm = feature_node(nr_feature+1, bias)
211 | 	else:
212 | 		biasterm = feature_node(-1, bias)
213 | 	pred_labels = []
214 | 	pred_values = []
215 | 
216 | 	if predict_probability:
217 | 		if not is_prob_model:
218 | 			raise TypeError('probability output is only supported for logistic regression')
219 | 		prob_estimates = (c_double * nr_class)()
220 | 		for xi in x:
221 | 			xi, idx = gen_feature_nodearray(xi, feature_max=nr_feature)
222 | 			xi[-2] = biasterm
223 | 			label = liblinear.predict_probability(m, xi, prob_estimates)
224 | 			values = prob_estimates[:nr_class]
225 | 			pred_labels += [label]
226 | 			pred_values += [values]
227 | 	else:
228 | 		if nr_class <= 2:
229 | 			nr_classifier = 1
230 | 		else:
231 | 			nr_classifier = nr_class
232 | 		dec_values = (c_double * nr_classifier)()
233 | 		for xi in x:
234 | 			xi, idx = gen_feature_nodearray(xi, feature_max=nr_feature)
235 | 			xi[-2] = biasterm
236 | 			label = liblinear.predict_values(m, xi, dec_values)
237 | 			values = dec_values[:nr_classifier]
238 | 			pred_labels += [label]
239 | 			pred_values += [values]
240 | 	if len(y) == 0:
241 | 		y = [0] * len(x)
242 | 	ACC, MSE, SCC = evaluations(y, pred_labels)
243 | 	l = len(y)
244 | 	if solver_type in [L2R_L2LOSS_SVR, L2R_L2LOSS_SVR_DUAL, L2R_L1LOSS_SVR_DUAL]:
245 | 		info("Mean squared error = %g (regression)" % MSE)
246 | 		info("Squared correlation coefficient = %g (regression)" % SCC)
247 | 	else:
248 | 		info("Accuracy = %g%% (%d/%d) (classification)" % (ACC, int(l*ACC/100), l))
249 | 
250 | 	return pred_labels, (ACC, MSE, SCC), pred_values
251 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/tron.cpp:
--------------------------------------------------------------------------------
  1 | #include <math.h>
  2 | #include <stdio.h>
  3 | #include <string.h>
  4 | #include <stdarg.h>
  5 | #include "tron.h"
  6 | 
  7 | #ifndef min
  8 | template <class T> static inline T min(T x,T y) { return (x<y)?x:y; }
  9 | #endif
 10 | 
 11 | #ifndef max
 12 | template <class T> static inline T max(T x,T y) { return (x>y)?x:y; }
 13 | #endif
 14 | 
 15 | #ifdef __cplusplus
 16 | extern "C" {
 17 | #endif
 18 | 
 19 | extern double dnrm2_(int *, double *, int *);
 20 | extern double ddot_(int *, double *, int *, double *, int *);
 21 | extern int daxpy_(int *, double *, double *, int *, double *, int *);
 22 | extern int dscal_(int *, double *, double *, int *);
 23 | 
 24 | #ifdef __cplusplus
 25 | }
 26 | #endif
 27 | 
 28 | static void default_print(const char *buf)
 29 | {
 30 | 	fputs(buf,stdout);
 31 | 	fflush(stdout);
 32 | }
 33 | 
 34 | void TRON::info(const char *fmt,...)
 35 | {
 36 | 	char buf[BUFSIZ];
 37 | 	va_list ap;
 38 | 	va_start(ap,fmt);
 39 | 	vsprintf(buf,fmt,ap);
 40 | 	va_end(ap);
 41 | 	(*tron_print_string)(buf);
 42 | }
 43 | 
 44 | TRON::TRON(const function *fun_obj, double eps, int max_iter)
 45 | {
 46 | 	this->fun_obj=const_cast<function *>(fun_obj);
 47 | 	this->eps=eps;
 48 | 	this->max_iter=max_iter;
 49 | 	tron_print_string = default_print;
 50 | }
 51 | 
 52 | TRON::~TRON()
 53 | {
 54 | }
 55 | 
 56 | void TRON::tron(double *w)
 57 | {
 58 | 	// Parameters for updating the iterates.
 59 | 	double eta0 = 1e-4, eta1 = 0.25, eta2 = 0.75;
 60 | 
 61 | 	// Parameters for updating the trust region size delta.
 62 | 	double sigma1 = 0.25, sigma2 = 0.5, sigma3 = 4;
 63 | 
 64 | 	int n = fun_obj->get_nr_variable();
 65 | 	int i, cg_iter;
 66 | 	double delta, snorm, one=1.0;
 67 | 	double alpha, f, fnew, prered, actred, gs;
 68 | 	int search = 1, iter = 1, inc = 1;
 69 | 	double *s = new double[n];
 70 | 	double *r = new double[n];
 71 | 	double *w_new = new double[n];
 72 | 	double *g = new double[n];
 73 | 
 74 | 	for (i=0; i<n; i++)
 75 | 		w[i] = 0;
 76 | 
 77 | 	f = fun_obj->fun(w);
 78 | 	fun_obj->grad(w, g);
 79 | 	delta = dnrm2_(&n, g, &inc);
 80 | 	double gnorm1 = delta;
 81 | 	double gnorm = gnorm1;
 82 | 
 83 | 	if (gnorm <= eps*gnorm1)
 84 | 		search = 0;
 85 | 
 86 | 	iter = 1;
 87 | 
 88 | 	while (iter <= max_iter && search)
 89 | 	{
 90 | 		cg_iter = trcg(delta, g, s, r);
 91 | 
 92 | 		memcpy(w_new, w, sizeof(double)*n);
 93 | 		daxpy_(&n, &one, s, &inc, w_new, &inc);
 94 | 
 95 | 		gs = ddot_(&n, g, &inc, s, &inc);
 96 | 		prered = -0.5*(gs-ddot_(&n, s, &inc, r, &inc));
 97 | 		fnew = fun_obj->fun(w_new);
 98 | 
 99 | 		// Compute the actual reduction.
100 | 		actred = f - fnew;
101 | 
102 | 		// On the first iteration, adjust the initial step bound.
103 | 		snorm = dnrm2_(&n, s, &inc);
104 | 		if (iter == 1)
105 | 			delta = min(delta, snorm);
106 | 
107 | 		// Compute prediction alpha*snorm of the step.
108 | 		if (fnew - f - gs <= 0)
109 | 			alpha = sigma3;
110 | 		else
111 | 			alpha = max(sigma1, -0.5*(gs/(fnew - f - gs)));
112 | 
113 | 		// Update the trust region bound according to the ratio of actual to predicted reduction.
114 | 		if (actred < eta0*prered)
115 | 			delta = min(max(alpha, sigma1)*snorm, sigma2*delta);
116 | 		else if (actred < eta1*prered)
117 | 			delta = max(sigma1*delta, min(alpha*snorm, sigma2*delta));
118 | 		else if (actred < eta2*prered)
119 | 			delta = max(sigma1*delta, min(alpha*snorm, sigma3*delta));
120 | 		else
121 | 			delta = max(delta, min(alpha*snorm, sigma3*delta));
122 | 
123 | 		info("iter %2d act %5.3e pre %5.3e delta %5.3e f %5.3e |g| %5.3e CG %3d\n", iter, actred, prered, delta, f, gnorm, cg_iter);
124 | 
125 | 		if (actred > eta0*prered)
126 | 		{
127 | 			iter++;
128 | 			memcpy(w, w_new, sizeof(double)*n);
129 | 			f = fnew;
130 | 			fun_obj->grad(w, g);
131 | 
132 | 			gnorm = dnrm2_(&n, g, &inc);
133 | 			if (gnorm <= eps*gnorm1)
134 | 				break;
135 | 		}
136 | 		if (f < -1.0e+32)
137 | 		{
138 | 			info("WARNING: f < -1.0e+32\n");
139 | 			break;
140 | 		}
141 | 		if (fabs(actred) <= 0 && prered <= 0)
142 | 		{
143 | 			info("WARNING: actred and prered <= 0\n");
144 | 			break;
145 | 		}
146 | 		if (fabs(actred) <= 1.0e-12*fabs(f) &&
147 | 		    fabs(prered) <= 1.0e-12*fabs(f))
148 | 		{
149 | 			info("WARNING: actred and prered too small\n");
150 | 			break;
151 | 		}
152 | 	}
153 | 
154 | 	delete[] g;
155 | 	delete[] r;
156 | 	delete[] w_new;
157 | 	delete[] s;
158 | }
159 | 
160 | int TRON::trcg(double delta, double *g, double *s, double *r)
161 | {
162 | 	int i, inc = 1;
163 | 	int n = fun_obj->get_nr_variable();
164 | 	double one = 1;
165 | 	double *d = new double[n];
166 | 	double *Hd = new double[n];
167 | 	double rTr, rnewTrnew, alpha, beta, cgtol;
168 | 
169 | 	for (i=0; i<n; i++)
170 | 	{
171 | 		s[i] = 0;
172 | 		r[i] = -g[i];
173 | 		d[i] = r[i];
174 | 	}
175 | 	cgtol = 0.1*dnrm2_(&n, g, &inc);
176 | 
177 | 	int cg_iter = 0;
178 | 	rTr = ddot_(&n, r, &inc, r, &inc);
179 | 	while (1)
180 | 	{
181 | 		if (dnrm2_(&n, r, &inc) <= cgtol)
182 | 			break;
183 | 		cg_iter++;
184 | 		fun_obj->Hv(d, Hd);
185 | 
186 | 		alpha = rTr/ddot_(&n, d, &inc, Hd, &inc);
187 | 		daxpy_(&n, &alpha, d, &inc, s, &inc);
188 | 		if (dnrm2_(&n, s, &inc) > delta)
189 | 		{
190 | 			info("cg reaches trust region boundary\n");
191 | 			alpha = -alpha;
192 | 			daxpy_(&n, &alpha, d, &inc, s, &inc);
193 | 
194 | 			double std = ddot_(&n, s, &inc, d, &inc);
195 | 			double sts = ddot_(&n, s, &inc, s, &inc);
196 | 			double dtd = ddot_(&n, d, &inc, d, &inc);
197 | 			double dsq = delta*delta;
198 | 			double rad = sqrt(std*std + dtd*(dsq-sts));
199 | 			if (std >= 0)
200 | 				alpha = (dsq - sts)/(std + rad);
201 | 			else
202 | 				alpha = (rad - std)/dtd;
203 | 			daxpy_(&n, &alpha, d, &inc, s, &inc);
204 | 			alpha = -alpha;
205 | 			daxpy_(&n, &alpha, Hd, &inc, r, &inc);
206 | 			break;
207 | 		}
208 | 		alpha = -alpha;
209 | 		daxpy_(&n, &alpha, Hd, &inc, r, &inc);
210 | 		rnewTrnew = ddot_(&n, r, &inc, r, &inc);
211 | 		beta = rnewTrnew/rTr;
212 | 		dscal_(&n, &beta, d, &inc);
213 | 		daxpy_(&n, &one, r, &inc, d, &inc);
214 | 		rTr = rnewTrnew;
215 | 	}
216 | 
217 | 	delete[] d;
218 | 	delete[] Hd;
219 | 
220 | 	return(cg_iter);
221 | }
222 | 
223 | double TRON::norm_inf(int n, double *x)
224 | {
225 | 	double dmax = fabs(x[0]);
226 | 	for (int i=1; i<n; i++)
227 | 		if (fabs(x[i]) >= dmax)
228 | 			dmax = fabs(x[i]);
229 | 	return(dmax);
230 | }
231 | 
232 | void TRON::set_print_string(void (*print_string) (const char *buf))
233 | {
234 | 	tron_print_string = print_string;
235 | }
236 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/tron.h:
--------------------------------------------------------------------------------
 1 | #ifndef _TRON_H
 2 | #define _TRON_H
 3 | 
 4 | class function
 5 | {
 6 | public:
 7 | 	virtual double fun(double *w) = 0 ;
 8 | 	virtual void grad(double *w, double *g) = 0 ;
 9 | 	virtual void Hv(double *s, double *Hs) = 0 ;
10 | 
11 | 	virtual int get_nr_variable(void) = 0 ;
12 | 	virtual ~function(void){}
13 | };
14 | 
15 | class TRON
16 | {
17 | public:
18 | 	TRON(const function *fun_obj, double eps = 0.1, int max_iter = 1000);
19 | 	~TRON();
20 | 
21 | 	void tron(double *w);
22 | 	void set_print_string(void (*i_print) (const char *buf));
23 | 
24 | private:
25 | 	int trcg(double delta, double *g, double *s, double *r);
26 | 	double norm_inf(int n, double *x);
27 | 
28 | 	double eps;
29 | 	int max_iter;
30 | 	function *fun_obj;
31 | 	void info(const char *fmt,...);
32 | 	void (*tron_print_string)(const char *buf);
33 | };
34 | #endif
35 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/windows/liblinear.dll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/windows/evaluate/liblinear/windows/liblinear.dll


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/windows/libsvmread.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/windows/evaluate/liblinear/windows/libsvmread.mexw64


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/windows/libsvmwrite.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/windows/evaluate/liblinear/windows/libsvmwrite.mexw64


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/windows/predict.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/windows/evaluate/liblinear/windows/predict.exe


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/windows/predict.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/windows/evaluate/liblinear/windows/predict.mexw64


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/windows/test/model.txt:
--------------------------------------------------------------------------------
 1 | solver_type L2R_LR
 2 | nr_class 3
 3 | label 1 2 3
 4 | nr_feature 3
 5 | bias -1
 6 | w
 7 | 0 0 -0.9069706097048917 
 8 | 0.4 -0.4 0.3381644874876603 
 9 | -0.4 0.4 0.3381644874876603 
10 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/windows/test/output.txt:
--------------------------------------------------------------------------------
1 | labels 1 2 3
2 | 2 0.247901 0.451705 0.300393
3 | 


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/windows/test/test.txt:
--------------------------------------------------------------------------------
1 | 0 1:0.5 2:-0.5 3:1


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/windows/test/train.txt:
--------------------------------------------------------------------------------
1 | 1 1:1 2:0 3:0
2 | 1 1:1 2:0 3:-1
3 | 2 1:1 2:-1 3:0
4 | 2 1:1 2:0 3:0
5 | 3 1:1 2:1 3:1
6 | 3 1:-1 2:-1 3:-1


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/windows/train.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/windows/evaluate/liblinear/windows/train.exe


--------------------------------------------------------------------------------
/windows/evaluate/liblinear/windows/train.mexw64:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/windows/evaluate/liblinear/windows/train.mexw64


--------------------------------------------------------------------------------
/windows/evaluate/program/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/windows/evaluate/program/.DS_Store


--------------------------------------------------------------------------------
/windows/evaluate/program/score.cpp:
--------------------------------------------------------------------------------
  1 | //  Copyright 2013 Google Inc. All Rights Reserved.
  2 | //
  3 | //  Licensed under the Apache License, Version 2.0 (the "License");
  4 | //  you may not use this file except in compliance with the License.
  5 | //  You may obtain a copy of the License at
  6 | //
  7 | //      http://www.apache.org/licenses/LICENSE-2.0
  8 | //
  9 | //  Unless required by applicable law or agreed to in writing, software
 10 | //  distributed under the License is distributed on an "AS IS" BASIS,
 11 | //  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | //  See the License for the specific language governing permissions and
 13 | //  limitations under the License.
 14 | 
 15 | //  The input is biterm_file. The program will run word2vec on the word net.
 16 | //  Multi-threads are supported in this version.
 17 | 
 18 | #include <stdio.h>
 19 | #include <stdlib.h>
 20 | #include <string.h>
 21 | #include <math.h>
 22 | #include <algorithm>
 23 | #include <vector>
 24 | #include <set>
 25 | using namespace std;
 26 | 
 27 | #define MAX_STRING 200
 28 | #define MAX_LABEL 1000
 29 | 
 30 | struct Entry
 31 | {
 32 | 	int label;
 33 | 	double value;
 34 | 	friend bool operator < (Entry e1, Entry e2)
 35 | 	{
 36 | 		return e1.value > e2.value;
 37 | 	}
 38 | };
 39 | 
 40 | int label_size = 47, ntests = 0;
 41 | int pst2lb[MAX_LABEL];
 42 | Entry ranked_list[MAX_LABEL];
 43 | char candidate_file[MAX_STRING], predict_file[MAX_STRING];
 44 | vector< set<int> > can;
 45 | 
 46 | void TrainModel()
 47 | {
 48 | 	set<int> curset;
 49 | 	int len, lb, tmp;
 50 | 	char str[MAX_STRING];
 51 | 	double prob;
 52 | 
 53 | 	FILE *fi = fopen(candidate_file, "rb");
 54 | 	while (fscanf(fi, "%d", &len) == 1)
 55 | 	{
 56 | 		for (int k = 0; k != len; k++)
 57 | 		{
 58 | 			fscanf(fi, "%d", &lb);
 59 | 			curset.insert(lb);
 60 | 		}
 61 | 		can.push_back(curset);
 62 | 		curset.clear();
 63 | 		ntests++;
 64 | 	}
 65 | 	fclose(fi);
 66 | 
 67 | 	double macro_f1, micro_f1;
 68 | 	double tp, fn, fp;
 69 | 	double stp = 0, sfn = 0, sfp = 0;
 70 | 	double P, R;
 71 | 	double sP = 0, sR = 0;
 72 | 
 73 | 	fi = fopen(predict_file, "rb");
 74 | 	fscanf(fi, "%s", str);
 75 | 	for (int k = 0; k != label_size; k++)
 76 | 	{
 77 | 		fscanf(fi, "%d", &lb);
 78 | 		pst2lb[k] = lb;
 79 | 	}
 80 | 	for (int k = 0; k != ntests; k++)
 81 | 	{
 82 | 		fscanf(fi, "%d", &tmp);
 83 | 		for (int i = 0; i != label_size; i++)
 84 | 		{
 85 | 			fscanf(fi, "%lf", &prob);
 86 | 			lb = pst2lb[i];
 87 | 			ranked_list[i].label = lb;
 88 | 			ranked_list[i].value = prob;
 89 | 		}
 90 | 		sort(ranked_list, ranked_list + label_size);
 91 | 		int n = can[k].size();
 92 | 		tp = 0;
 93 | 		for (int i = 0; i != n; i++)
 94 | 		{
 95 | 			lb = ranked_list[i].label;
 96 | 			if (can[k].count(lb) != 0)
 97 | 				tp++;
 98 | 		}
 99 | 		fn = n - tp;
100 | 		fp = n - tp;
101 | 
102 | 		stp += tp;
103 | 		sfn += fn;
104 | 		sfp += fp;
105 | 
106 | 		P = tp / (tp + fp);
107 | 		R = tp / (tp + fn);
108 | 		
109 | 		sP += P;
110 | 		sR += R;
111 | 	}
112 | 
113 | 	P = sP / ntests;
114 | 	R = sR / ntests;
115 | 	macro_f1 = 2 * P * R / (P + R);
116 | 
117 | 	P = stp / (stp + sfp);
118 | 	R = stp / (stp + sfn);
119 | 	micro_f1 = 2 * P * R / (P + R);
120 | 
121 | 	printf("number of tests: %d\n", ntests);
122 | 	printf("macro-f1: %lf\n", macro_f1);
123 | 	printf("micro-f1: %lf\n", micro_f1);
124 | }
125 | 
126 | int ArgPos(char *str, int argc, char **argv) {
127 | 	int a;
128 | 	for (a = 1; a < argc; a++) if (!strcmp(str, argv[a])) {
129 | 		if (a == argc - 1) {
130 | 			printf("Argument missing for %s\n", str);
131 | 			exit(1);
132 | 		}
133 | 		return a;
134 | 	}
135 | 	return -1;
136 | }
137 | 
138 | int main(int argc, char **argv) {
139 | 	int i;
140 | 	if (argc == 1) {
141 | 		printf("WORD VECTOR estimation toolkit v 0.1b\n\n");
142 | 		printf("Options:\n");
143 | 		printf("Parameters for training:\n");
144 | 		printf("\t-train <file>\n");
145 | 		printf("\t\tUse text data from <file> to train the model\n");
146 | 		printf("\t-test <file>\n");
147 | 		printf("\t\tUse text data from <file> to test the model\n");
148 | 		printf("\t-vector <file>\n");
149 | 		printf("\t\tUse vector data from <file>\n");
150 | 		printf("\nExamples:\n");
151 | 		printf("./evl -train train.txt -test test.txt -vector vec.txt \n\n");
152 | 		return 0;
153 | 	}
154 | 	if ((i = ArgPos((char *)"-predict", argc, argv)) > 0) strcpy(predict_file, argv[i + 1]);
155 | 	if ((i = ArgPos((char *)"-candidate", argc, argv)) > 0) strcpy(candidate_file, argv[i + 1]);
156 | 	TrainModel();
157 | 	return 0;
158 | }


--------------------------------------------------------------------------------
/windows/evaluate/run.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | 
 3 | set vector_file=$1
 4 | set times=10
 5 | set portion=0.01
 6 | set output_file=result.txt
 7 | 
 8 | set vocab_file=program\vocab.txt
 9 | set label_file=program\label.txt
10 | set /a T=%times%-1
11 | 
12 | program\preprocess.exe -vocab %vocab_file% -vector %vector_file% -label %label_file% -output workspace\ -debug 2 -binary 1 -times %times% -portion %portion%
13 | 
14 | for /l %%i in (0,1,%T%) do D:\v-mequ\software\liblinear\windows\train.exe -s 0 -q workspace\train%%i workspace\model%%i
15 | for /l %%i in (0,1,%T%) do D:\v-mequ\software\liblinear\windows\predict.exe -b 1 -q workspace\test%%i workspace\model%%i workspace\predict%%i
16 | for /l %%i in (0,1,%T%) do program\score.exe -predict workspace\predict%%i -candidate workspace\can%%i>>%output_file%


--------------------------------------------------------------------------------
/windows/evaluate/score.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | macro_f1 = 0
 5 | micro_f1 = 0
 6 | a = 0
 7 | b = 0
 8 | 
 9 | input_file = sys.argv[1]
10 | fi = open(input_file, 'r')
11 | for line in fi:
12 |     if line[0:9] == 'macro-f1:':
13 |         macro_f1 = macro_f1 + float(line.split(':')[1])
14 |         a = a + 1
15 |     if line[0:9] == 'micro-f1:':
16 |         micro_f1 = micro_f1 + float(line.split(':')[1])
17 |         b = b + 1
18 | fi.close()
19 | 
20 | macro_f1 = macro_f1 / a
21 | micro_f1 = micro_f1 / b
22 | 
23 | print("Macro-F1: " + str(macro_f1))
24 | print("Micro-F1: " + str(micro_f1))
25 | 


--------------------------------------------------------------------------------
/windows/line.exe:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tangjianpku/LINE/d5f840941e0f4026090d1b1feeaf15da38e2b24b/windows/line.exe


--------------------------------------------------------------------------------
/windows/normalize.cpp:
--------------------------------------------------------------------------------
 1 | #include <stdio.h>
 2 | #include <stdlib.h>
 3 | #include <string.h>
 4 | #include <math.h>
 5 | #include <vector>
 6 | 
 7 | #define MAX_STRING 100
 8 | 
 9 | typedef float real;                    // Precision of float numbers
10 | 
11 | char input_file[MAX_STRING], output_file[MAX_STRING];
12 | int binary = 0;
13 | 
14 | void Normalize()
15 | {
16 | 	long long num_vertices, vector_dim, a, b;
17 | 	char name[MAX_STRING], ch;
18 | 	real *vec;
19 | 	double len;
20 | 
21 | 	FILE *fi, *fo;
22 | 
23 | 	fi = fopen(input_file, "rb");
24 | 	fo = fopen(output_file, "wb");
25 | 
26 | 	fscanf(fi, "%lld %lld", &num_vertices, &vector_dim);
27 | 	vec = (real *)malloc(vector_dim * sizeof(real));
28 | 	fprintf(fo, "%lld %lld\n", num_vertices, vector_dim);
29 | 	for (a = 0; a < num_vertices; a++)
30 | 	{
31 | 		fscanf(fi, "%s%c", name, &ch);
32 | 		for (b = 0; b < vector_dim; b++) fread(&vec[b], sizeof(real), 1, fi);
33 | 		len = 0;
34 | 		for (b = 0; b < vector_dim; b++) len += vec[b] * vec[b];
35 | 		len = sqrt(len);
36 | 		for (b = 0; b < vector_dim; b++) vec[b] /= len;
37 | 
38 | 		fprintf(fo, "%s ", name);
39 | 		if (binary)
40 | 		{
41 | 			for (b = 0; b < vector_dim; b++)
42 | 				fwrite(&vec[b], sizeof(real), 1, fo);
43 | 		}
44 | 		else
45 | 		{
46 | 			for (b = 0; b < vector_dim; b++)
47 | 				fprintf(fo, "%lf ", vec[b]);
48 | 		}
49 | 		fprintf(fo, "\n");
50 | 	}
51 | 	free(vec);
52 | 	fclose(fi);
53 | 	fclose(fo);
54 | }
55 | 
56 | int ArgPos(char *str, int argc, char **argv) {
57 | 	int a;
58 | 	for (a = 1; a < argc; a++) if (!strcmp(str, argv[a])) {
59 | 		if (a == argc - 1) {
60 | 			printf("Argument missing for %s\n", str);
61 | 			exit(1);
62 | 		}
63 | 		return a;
64 | 	}
65 | 	return -1;
66 | }
67 | 
68 | int main(int argc, char **argv) {
69 | 	int i;
70 | 	if (argc == 1) {
71 | 		printf("Normalize vertex embeddings by setting their L2 norm as 1\n\n");
72 | 		printf("Options:\n");
73 | 		printf("Parameters for training:\n");
74 | 		printf("\t-input <file>\n");
75 | 		printf("\t\tThe original vertex embeddings\n");
76 | 		printf("\t-output <file>\n");
77 | 		printf("\t\tUse <file> to save the normalized vertex embeddings\n");
78 | 		printf("\t-binary <int>\n");
79 | 		printf("\t\tSave the learnt embeddings in binary moded; default is 0 (off)\n");
80 | 		printf("\nExamples:\n");
81 | 		printf("./normalize -input vec_wo_norm.txt -output vec_norm.txt -binary 1\n\n");
82 | 		return 0;
83 | 	}
84 | 	if ((i = ArgPos((char *)"-input", argc, argv)) > 0) strcpy(input_file, argv[i + 1]);
85 | 	if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]);
86 | 	if ((i = ArgPos((char *)"-binary", argc, argv)) > 0) binary = atoi(argv[i + 1]);
87 | 	Normalize();
88 | 	return 0;
89 | }


--------------------------------------------------------------------------------
/windows/preprocess_youtube.py:
--------------------------------------------------------------------------------
 1 | import sys
 2 | import os
 3 | 
 4 | fi = open(sys.argv[1], 'r')
 5 | fo = open(sys.argv[2], 'w')
 6 | for line in fi:
 7 | 	items = line.strip().split()
 8 | 	fo.write('{}\t{}\t1\n'.format(items[0], items[1]))
 9 | 	fo.write('{}\t{}\t1\n'.format(items[1], items[0]))
10 | fi.close()
11 | fo.close()
12 | 


--------------------------------------------------------------------------------
/windows/reconstruct.cpp:
--------------------------------------------------------------------------------
  1 | #include <stdio.h>
  2 | #include <stdlib.h>
  3 | #include <string.h>
  4 | #include <math.h>
  5 | #include <vector>
  6 | #include <algorithm>
  7 | #include <map>
  8 | #include <queue>
  9 | 
 10 | #define MAX_STRING 100
 11 | 
 12 | const int hash_table_size = 30000000;
 13 | 
 14 | typedef float real;                    // Precision of float numbers
 15 | 
 16 | struct ClassVertex
 17 | {
 18 | 	double degree, sum_weight;
 19 | 	char *name;
 20 | };
 21 | 
 22 | struct Neighbor
 23 | {
 24 | 	int vid;
 25 | 	double weight;
 26 | 	friend bool operator < (Neighbor n1, Neighbor n2)
 27 | 	{
 28 | 		return n1.weight > n2.weight;
 29 | 	}
 30 | };
 31 | 
 32 | 
 33 | char train_file[MAX_STRING], output_file[MAX_STRING];
 34 | struct ClassVertex *vertex;
 35 | int *vertex_hash_table;
 36 | int max_num_vertices = 1000, num_vertices = 0;
 37 | long long num_edges = 0;
 38 | 
 39 | int max_depth = 1, max_k = 0;
 40 | std::vector<int> vertex_set;
 41 | std::vector<Neighbor> *neighbor;
 42 | 
 43 | Neighbor *rank_list;
 44 | std::map<int, double> vid2weight;
 45 | 
 46 | /* Build a hash table, mapping each vertex name to a unique vertex id */
 47 | unsigned int Hash(char *key)
 48 | {
 49 | 	unsigned int seed = 131;
 50 | 	unsigned int hash = 0;
 51 | 	while (*key)
 52 | 	{
 53 | 		hash = hash * seed + (*key++);
 54 | 	}
 55 | 	return hash % hash_table_size;
 56 | }
 57 | 
 58 | void InitHashTable()
 59 | {
 60 | 	vertex_hash_table = (int *)malloc(hash_table_size * sizeof(int));
 61 | 	for (int k = 0; k != hash_table_size; k++) vertex_hash_table[k] = -1;
 62 | }
 63 | 
 64 | void InsertHashTable(char *key, int value)
 65 | {
 66 | 	int addr = Hash(key);
 67 | 	while (vertex_hash_table[addr] != -1) addr = (addr + 1) % hash_table_size;
 68 | 	vertex_hash_table[addr] = value;
 69 | }
 70 | 
 71 | int SearchHashTable(char *key)
 72 | {
 73 | 	int addr = Hash(key);
 74 | 	while (1)
 75 | 	{
 76 | 		if (vertex_hash_table[addr] == -1) return -1;
 77 | 		if (!strcmp(key, vertex[vertex_hash_table[addr]].name)) return vertex_hash_table[addr];
 78 | 		addr = (addr + 1) % hash_table_size;
 79 | 	}
 80 | 	return -1;
 81 | }
 82 | 
 83 | /* Add a vertex to the vertex set */
 84 | int AddVertex(char *name)
 85 | {
 86 | 	int length = strlen(name) + 1;
 87 | 	if (length > MAX_STRING) length = MAX_STRING;
 88 | 	vertex[num_vertices].name = (char *)calloc(length, sizeof(char));
 89 | 	strcpy(vertex[num_vertices].name, name);
 90 | 	vertex[num_vertices].sum_weight = 0;
 91 | 	num_vertices++;
 92 | 	if (num_vertices + 2 >= max_num_vertices)
 93 | 	{
 94 | 		max_num_vertices += 1000;
 95 | 		vertex = (struct ClassVertex *)realloc(vertex, max_num_vertices * sizeof(struct ClassVertex));
 96 | 	}
 97 | 	InsertHashTable(name, num_vertices - 1);
 98 | 	return num_vertices - 1;
 99 | }
100 | 
101 | /* Read network from the training file */
102 | void ReadData()
103 | {
104 | 	FILE *fin;
105 | 	char name_v1[MAX_STRING], name_v2[MAX_STRING], str[2 * MAX_STRING + 10000];
106 | 	int vid, u, v;
107 | 	double weight;
108 | 	Neighbor nb;
109 | 
110 | 	fin = fopen(train_file, "rb");
111 | 	if (fin == NULL)
112 | 	{
113 | 		printf("ERROR: network file not found!\n");
114 | 		exit(1);
115 | 	}
116 | 	num_edges = 0;
117 | 	while (fgets(str, sizeof(str), fin)) num_edges++;
118 | 	fclose(fin);
119 | 	printf("Number of edges: %lld          \n", num_edges);
120 | 
121 | 	fin = fopen(train_file, "rb");
122 | 	num_vertices = 0;
123 | 	for (int k = 0; k != num_edges; k++)
124 | 	{
125 | 		fscanf(fin, "%s %s %lf", name_v1, name_v2, &weight);
126 | 
127 | 		if (k % 10000 == 0)
128 | 		{
129 | 			printf("Reading edges: %.3lf%%%c", k / (double)(num_edges + 1) * 100, 13);
130 | 			fflush(stdout);
131 | 		}
132 | 
133 | 		vid = SearchHashTable(name_v1);
134 | 		if (vid == -1) vid = AddVertex(name_v1);
135 | 		vertex[vid].degree += weight;
136 | 
137 | 		vid = SearchHashTable(name_v2);
138 | 		if (vid == -1) vid = AddVertex(name_v2);
139 | 		vertex[vid].degree += weight;
140 | 	}
141 | 	fclose(fin);
142 | 	printf("Number of vertices: %lld          \n", num_vertices);
143 | 
144 | 	neighbor = new std::vector<Neighbor>[num_vertices];
145 | 	rank_list = (Neighbor *)calloc(num_vertices, sizeof(Neighbor));
146 | 
147 | 	fin = fopen(train_file, "rb");
148 | 	for (long long k = 0; k != num_edges; k++)
149 | 	{
150 | 		fscanf(fin, "%s %s %lf", name_v1, name_v2, &weight);
151 | 
152 | 		if (k % 10000 == 0)
153 | 		{
154 | 			printf("Reading neighbors: %.3lf%%%c", k / (double)(num_edges + 1) * 100, 13);
155 | 			fflush(stdout);
156 | 		}
157 | 
158 | 		u = SearchHashTable(name_v1);
159 | 
160 | 		v = SearchHashTable(name_v2);
161 | 
162 | 		nb.vid = v;
163 | 		nb.weight = weight;
164 | 		neighbor[u].push_back(nb);
165 | 	}
166 | 	fclose(fin);
167 | 	printf("\n");
168 | 
169 | 	for (int k = 0; k != num_vertices; k++)
170 | 	{
171 | 		vertex[k].sum_weight = 0;
172 | 		int len = neighbor[k].size();
173 | 		for (int i = 0; i != len; i++)
174 | 			vertex[k].sum_weight += neighbor[k][i].weight;
175 | 	}
176 | }
177 | 
178 | void Reconstruct()
179 | {
180 | 	FILE *fo = fopen(output_file, "wb");
181 | 
182 | 	int sv, cv, cd, len, pst;
183 | 	long long num_edges_renet = 0;
184 | 	double cw, sum;
185 | 	std::queue<int> node, depth;
186 | 	std::queue<double> weight;
187 | 
188 | 	for (sv = 0; sv != num_vertices; sv++)
189 | 	{
190 | 		if (sv % 10 == 0)
191 | 		{
192 | 			printf("%cProgress: %.3lf%%", 13, (real)sv / (real)(num_vertices + 1) * 100);
193 | 			fflush(stdout);
194 | 		}
195 | 
196 | 		while (!node.empty()) node.pop();
197 | 		while (!depth.empty()) depth.pop();
198 | 		while (!weight.empty()) weight.pop();
199 | 		vid2weight.clear();
200 | 
201 | 		for (int i = 0; i != num_vertices; i++)
202 | 		{
203 | 			rank_list[i].vid = i;
204 | 			rank_list[i].weight = 0;
205 | 		}
206 | 
207 | 		len = neighbor[sv].size();
208 | 		if (len > max_k)
209 | 		{
210 | 			for (int i = 0; i != len; i++)
211 | 				fprintf(fo, "%s\t%s\t%lf\n", vertex[sv].name, vertex[neighbor[sv][i].vid].name, neighbor[sv][i].weight);
212 | 			num_edges_renet += len;
213 | 			continue;
214 | 		}
215 | 
216 | 		vid2weight[sv] += vertex[sv].degree / 10.0; // Set weights for self-links here!
217 | 
218 | 		len = neighbor[sv].size();
219 | 		sum = vertex[sv].sum_weight;
220 | 
221 | 		node.push(sv);
222 | 		depth.push(0);
223 | 		weight.push(sum);
224 | 
225 | 		while (!node.empty())
226 | 		{
227 | 			cv = node.front();
228 | 			cd = depth.front();
229 | 			cw = weight.front();
230 | 
231 | 			node.pop();
232 | 			depth.pop();
233 | 			weight.pop();
234 | 
235 | 			if (cd != 0) vid2weight[cv] += cw;
236 | 
237 | 			if (cd < max_depth)
238 | 			{
239 | 				len = neighbor[cv].size();
240 | 				sum = vertex[cv].sum_weight;
241 | 
242 | 				for (int i = 0; i != len; i++)
243 | 				{
244 | 					node.push(neighbor[cv][i].vid);
245 | 					depth.push(cd + 1);
246 | 					weight.push(cw * neighbor[cv][i].weight / sum);
247 | 				}
248 | 			}
249 | 		}
250 | 
251 | 		pst = 0;
252 | 		std::map<int, double>::iterator iter;
253 | 		for (iter = vid2weight.begin(); iter != vid2weight.end(); iter++)
254 | 		{
255 | 			rank_list[pst].vid = (iter->first);
256 | 			rank_list[pst].weight = (iter->second);
257 | 			pst++;
258 | 		}
259 | 		std::sort(rank_list, rank_list + pst);
260 | 
261 | 		for (int i = 0; i != max_k; i++)
262 | 		{
263 | 			if (i == pst) break;
264 | 			fprintf(fo, "%s\t%s\t%.6lf\n", vertex[sv].name, vertex[rank_list[i].vid].name, rank_list[i].weight);
265 | 			num_edges_renet++;
266 | 		}
267 | 	}
268 | 	printf("\n");
269 | 	fclose(fo);
270 | 
271 | 	printf("Number of edges in reconstructed network: %lld\n", num_edges_renet);
272 | 	return;
273 | }
274 | 
275 | void TrainLINE()
276 | {
277 | 	InitHashTable();
278 | 	ReadData();
279 | 	Reconstruct();
280 | }
281 | 
282 | int ArgPos(char *str, int argc, char **argv) {
283 | 	int a;
284 | 	for (a = 1; a < argc; a++) if (!strcmp(str, argv[a])) {
285 | 		if (a == argc - 1) {
286 | 			printf("Argument missing for %s\n", str);
287 | 			exit(1);
288 | 		}
289 | 		return a;
290 | 	}
291 | 	return -1;
292 | }
293 | 
294 | int main(int argc, char **argv) {
295 | 	int i;
296 | 	if (argc == 1) {
297 | 		printf("Reconstruct the network by using a Breadth-First-Search strategy\n\n");
298 | 		printf("Options:\n");
299 | 		printf("Parameters for training:\n");
300 | 		printf("\t-train <file>\n");
301 | 		printf("\t\tReconstruct the network from <file>\n");
302 | 		printf("\t-output <file>\n");
303 | 		printf("\t\tUse <file> to save the reconstructed network\n");
304 | 		printf("\t-depth <int>\n");
305 | 		printf("\t\tThe maximum depth in the Breadth-First-Search; default is 0\n");
306 | 		printf("\t-threshold <int>\n");
307 | 		printf("\t\tFor vertex whose degree is less than <int>, we will expand its neighbors until the degree reaches <iny>\n");
308 | 		printf("\nExamples:\n");
309 | 		printf("./reconstruct -train net.txt -output net_dense.txt -depth 2 -threshold 1000\n\n");
310 | 		return 0;
311 | 	}
312 | 	if ((i = ArgPos((char *)"-train", argc, argv)) > 0) strcpy(train_file, argv[i + 1]);
313 | 	if ((i = ArgPos((char *)"-output", argc, argv)) > 0) strcpy(output_file, argv[i + 1]);
314 | 	if ((i = ArgPos((char *)"-depth", argc, argv)) > 0) max_depth = atoi(argv[i + 1]);
315 | 	if ((i = ArgPos((char *)"-threshold", argc, argv)) > 0) max_k = atoi(argv[i + 1]);
316 | 	vertex = (struct ClassVertex *)calloc(max_num_vertices, sizeof(struct ClassVertex));
317 | 	TrainLINE();
318 | 	return 0;
319 | }


--------------------------------------------------------------------------------
/windows/train_youtube.bat:
--------------------------------------------------------------------------------
 1 | @echo off
 2 | 
 3 | python3 preprocess_youtube.py youtube-links.txt net_youtube.txt
 4 | reconstruct.exe -train net_youtube.txt -output net_youtube_dense.txt -depth 2 -threshold 1000
 5 | line.exe -train net_youtube_dense.txt -output vec_1st_wo_norm.txt -binary 1 -size 128 -order 1 -negative 5 -samples 10000 -threads 40
 6 | line.exe -train net_youtube_dense.txt -output vec_2nd_wo_norm.txt -binary 1 -size 128 -order 2 -negative 5 -samples 10000 -threads 40
 7 | normalize.exe -input vec_1st_wo_norm.txt -output vec_1st.txt -binary 1
 8 | normalize.exe -input vec_2nd_wo_norm.txt -output vec_2nd.txt -binary 1
 9 | concatenate.exe -input1 vec_1st.txt -input2 vec_2nd.txt -output vec_all.txt -binary 1
10 | 
11 | cd evaluate
12 | run.bat ../vec_all.txt
13 | python3 score.py result.txt
14 | cd ..


--------------------------------------------------------------------------------