├── nbproject
├── private
│ ├── config.properties
│ ├── private.properties
│ └── private.xml
├── genfiles.properties
├── project.xml
├── project.properties
└── build-impl.xml
├── dist
└── libsvm-java.jar
├── manifest.mf
├── demo.sh
├── src
├── ca
│ └── uwo
│ │ └── csd
│ │ └── ai
│ │ └── nlp
│ │ ├── libsvm
│ │ ├── svm_print_interface.java
│ │ ├── svm_node.java
│ │ ├── svm_problem.java
│ │ ├── ex
│ │ │ ├── Instance.java
│ │ │ ├── SVMPredictor.java
│ │ │ └── SVMTrainer.java
│ │ ├── svm_model.java
│ │ └── svm_parameter.java
│ │ ├── kernel
│ │ ├── CustomKernel.java
│ │ ├── KernelManager.java
│ │ ├── CompositeKernel.java
│ │ ├── LinearKernel.java
│ │ ├── RBFKernel.java
│ │ └── TreeKernel.java
│ │ └── common
│ │ ├── Tree.java
│ │ └── SparseVector.java
├── utils
│ └── DataFileReader.java
├── Demo.java
└── svm_scale.java
├── check.sh
├── README.md
├── COPYRIGHT
├── COPYRIGHT.libsvm
└── README.libsvm
/nbproject/private/config.properties:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/dist/libsvm-java.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/syeedibnfaiz/libsvm-java-kernel/HEAD/dist/libsvm-java.jar
--------------------------------------------------------------------------------
/manifest.mf:
--------------------------------------------------------------------------------
1 | Manifest-Version: 1.0
2 | X-COMMENT: Main-Class will be added automatically by build
3 |
4 |
--------------------------------------------------------------------------------
/demo.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #build
3 | ant
4 | java -cp dist/libsvm-java.jar Demo a1a.train a1a.test a1a.out
5 |
6 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/libsvm/svm_print_interface.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.libsvm;
2 |
3 | public interface svm_print_interface {
4 |
5 | public void print(String s);
6 | }
7 |
--------------------------------------------------------------------------------
/nbproject/private/private.properties:
--------------------------------------------------------------------------------
1 | compile.on.save=true
2 | do.depend=false
3 | do.jar=true
4 | javac.debug=true
5 | javadoc.preview=true
6 | user.properties.file=C:\\Users\\tonatuni\\.netbeans\\7.0\\build.properties
7 |
--------------------------------------------------------------------------------
/nbproject/private/private.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/libsvm/svm_node.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.libsvm;
2 |
3 | public class svm_node implements java.io.Serializable {
4 |
5 | public Object data;
6 |
7 | public svm_node() {
8 | }
9 |
10 | public svm_node(Object data) {
11 | this.data = data;
12 | }
13 | }
14 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/kernel/CustomKernel.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.kernel;
2 |
3 | import ca.uwo.csd.ai.nlp.libsvm.svm_node;
4 |
5 | /**
6 | * Interface for a custom kernel function
7 | * @author Syeed Ibn Faiz
8 | */
9 | public interface CustomKernel {
10 | double evaluate(svm_node x, svm_node y);
11 | }
12 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/libsvm/svm_problem.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.libsvm;
2 |
3 | public class svm_problem implements java.io.Serializable {
4 |
5 | public int l;
6 | public double[] y;
7 | public svm_node[] x;
8 |
9 | public svm_problem(int l, double[] y, svm_node[] x) {
10 | this.l = l;
11 | this.y = y;
12 | this.x = x;
13 | }
14 |
15 | public svm_problem() {
16 | }
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/nbproject/genfiles.properties:
--------------------------------------------------------------------------------
1 | build.xml.data.CRC32=1390663b
2 | build.xml.script.CRC32=b6f8644c
3 | build.xml.stylesheet.CRC32=28e38971@1.44.1.45
4 | # This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml.
5 | # Do not edit this file. You may delete it but then the IDE will never regenerate such files for you.
6 | nbproject/build-impl.xml.data.CRC32=1390663b
7 | nbproject/build-impl.xml.script.CRC32=3a4ce78d
8 | nbproject/build-impl.xml.stylesheet.CRC32=0ae3a408@1.44.1.45
9 |
--------------------------------------------------------------------------------
/nbproject/project.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 | org.netbeans.modules.java.j2seproject
4 |
5 |
6 | libsvm-java
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/kernel/KernelManager.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.kernel;
2 |
3 | /**
4 | * KernelManager provides the custom kernel function to svm.
5 | * @author Syeed Ibn Faiz
6 | */
7 | public class KernelManager {
8 | static private CustomKernel customKernel;
9 |
10 | public static CustomKernel getCustomKernel() {
11 | return customKernel;
12 | }
13 |
14 | /**
15 | * Registers the custom kernel
16 | * @param customKernel
17 | */
18 | public static void setCustomKernel(CustomKernel customKernel) {
19 | KernelManager.customKernel = customKernel;
20 | }
21 |
22 | }
23 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/libsvm/ex/Instance.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.libsvm.ex;
2 |
3 | /**
4 | *
5 | * @author Syeed Ibn Faiz
6 | */
7 | public class Instance {
8 | private double label;
9 | private Object data;
10 |
11 | public Instance(double label, Object data) {
12 | this.label = label;
13 | this.data = data;
14 | }
15 |
16 | public Object getData() {
17 | return data;
18 | }
19 |
20 | public void setData(Object data) {
21 | this.data = data;
22 | }
23 |
24 | public double getLabel() {
25 | return label;
26 | }
27 |
28 | public void setLabel(double label) {
29 | this.label = label;
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/check.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | #This script checks the integrity of the library
3 | #The output produced by the library is matched against
4 | #that produced by the original tool
5 |
6 | #generate output
7 | #note that the Demo uses the linear kernel
8 | java -cp dist/libsvm-java.jar Demo a1a.train a1a.test a1a.out
9 |
10 | #download libsvm
11 | wget http://www.csie.ntu.edu.tw/~cjlin/cgi-bin/libsvm.cgi?+http://www.csie.ntu.edu.tw/~cjlin/libsvm+tar.gz
12 | mv libsvm* libsvm.tar.gz
13 | tar xzf libsvm.tar.gz
14 | rm libsvm.tar.gz
15 | cd libsvm*
16 | make
17 | #generate output using the linear kernel
18 | ./svm-train -t 0 ../a1a.train a1a.train.model
19 | ./svm-predict ../a1a.test a1a.train.model a1a.out
20 | cd ..
21 | diff -w a1a.out ./libsvm*/a1a.out
22 |
23 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Refactored LibSVM in Java making it easy to plug in a custom kernel.
2 |
3 | Use demo.sh to run the demo program.
4 | To learn how you can use the library see src/Demo.java.
5 |
6 | To write a kernel all you have to do is to implement kernel.CustomKernel and
7 | then register your kernel with the kernel.KernelManager.
8 |
9 | For example, the following code snippet registers an anonymous custom kernel:
10 | KernelManager.setCustomKernel(new CustomKernel() {
11 | @Override
12 | public double evaluate(svm_node x, svm_node y) {
13 | //do something fancy
14 | return 0.3141592654;
15 | }
16 | });
17 |
18 |
19 | Syeed Ibn Faiz
20 | University of Western Ontario
21 | syeedibnfaiz@gmail.com
22 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/kernel/CompositeKernel.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.kernel;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 | import ca.uwo.csd.ai.nlp.libsvm.svm_node;
6 |
7 | /**
8 | *
9 | * @author Syeed Ibn Faiz
10 | */
11 | public class CompositeKernel implements CustomKernel {
12 | private List kernels;
13 |
14 | public CompositeKernel() {
15 | this(new ArrayList());
16 | }
17 |
18 | public CompositeKernel(List kernels) {
19 | this.kernels = kernels;
20 | }
21 |
22 | @Override
23 | public double evaluate(svm_node x, svm_node y) {
24 | double value = 0.0;
25 | for (CustomKernel kernel : kernels) {
26 | value += kernel.evaluate(x, y);
27 | }
28 | return value;
29 | }
30 | }
31 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/common/Tree.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.common;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 |
6 | /**
7 | * Dummy Tree data structure
8 | * @author Syeed Ibn Faiz
9 | */
10 | public class Tree {
11 | private String value;
12 | private List children;
13 |
14 | public Tree(String value) {
15 | this.value = value;
16 | children = new ArrayList();
17 | }
18 |
19 | public Tree(String value, List children) {
20 | this.value = value;
21 | this.children = children;
22 | }
23 |
24 | public List getChildrenAsList() {
25 | return children;
26 | }
27 |
28 | public String value() {
29 | return value;
30 | }
31 |
32 | public boolean isLeaf() {
33 | return children.isEmpty();
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/kernel/LinearKernel.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.kernel;
2 |
3 | import ca.uwo.csd.ai.nlp.common.SparseVector;
4 | import ca.uwo.csd.ai.nlp.libsvm.svm_node;
5 | import java.io.Serializable;
6 |
7 | /**
8 | * LinearKernel implements a linear kernel function.
9 | * @author Syeed Ibn Faiz
10 | */
11 | public class LinearKernel implements CustomKernel, Serializable {
12 |
13 | @Override
14 | public double evaluate(svm_node x, svm_node y) {
15 | if (!(x.data instanceof SparseVector) || !(y.data instanceof SparseVector)) {
16 | throw new RuntimeException("Could not find sparse vectors in svm_nodes");
17 | }
18 | SparseVector v1 = (SparseVector) x.data;
19 | SparseVector v2 = (SparseVector) y.data;
20 |
21 | return v1.dot(v2);
22 | }
23 | }
24 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/libsvm/svm_model.java:
--------------------------------------------------------------------------------
1 | //
2 | // svm_model
3 | //
4 | package ca.uwo.csd.ai.nlp.libsvm;
5 |
6 | public class svm_model implements java.io.Serializable {
7 |
8 | public svm_parameter param; // parameter
9 | public int nr_class; // number of classes, = 2 in regression/one class svm
10 | public int l; // total #SV
11 | public svm_node[] SV; // SVs (SV[l])
12 | public double[][] sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l])
13 | public double[] rho; // constants in decision functions (rho[k*(k-1)/2])
14 | public double[] probA; // pariwise probability information
15 | public double[] probB;
16 | // for classification only
17 | public int[] label; // label of each class (label[k])
18 | public int[] nSV; // number of SVs for each class (nSV[k])
19 | // nSV[0] + nSV[1] + ... + nSV[k-1] = l
20 | };
21 |
--------------------------------------------------------------------------------
/COPYRIGHT:
--------------------------------------------------------------------------------
1 |
2 | Copyright (c) 2000-2012 Chih-Chung Chang and Chih-Jen Lin
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions
7 | are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 |
16 | 3. Neither name of copyright holders nor the names of its contributors
17 | may be used to endorse or promote products derived from this software
18 | without specific prior written permission.
19 |
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 |
--------------------------------------------------------------------------------
/COPYRIGHT.libsvm:
--------------------------------------------------------------------------------
1 |
2 | Copyright (c) 2000-2012 Chih-Chung Chang and Chih-Jen Lin
3 | All rights reserved.
4 |
5 | Redistribution and use in source and binary forms, with or without
6 | modification, are permitted provided that the following conditions
7 | are met:
8 |
9 | 1. Redistributions of source code must retain the above copyright
10 | notice, this list of conditions and the following disclaimer.
11 |
12 | 2. Redistributions in binary form must reproduce the above copyright
13 | notice, this list of conditions and the following disclaimer in the
14 | documentation and/or other materials provided with the distribution.
15 |
16 | 3. Neither name of copyright holders nor the names of its contributors
17 | may be used to endorse or promote products derived from this software
18 | without specific prior written permission.
19 |
20 |
21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/libsvm/svm_parameter.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.libsvm;
2 |
3 | public class svm_parameter implements Cloneable, java.io.Serializable {
4 | /* svm_type */
5 |
6 | public static final int C_SVC = 0;
7 | public static final int NU_SVC = 1;
8 | public static final int ONE_CLASS = 2;
9 | public static final int EPSILON_SVR = 3;
10 | public static final int NU_SVR = 4;
11 |
12 | /* kernel_type */
13 | public static final int CUSTOM = 0;
14 |
15 | public int svm_type;
16 | public int kernel_type;
17 | public int degree; // for poly
18 | public double gamma; // for poly/rbf/sigmoid
19 | public double coef0; // for poly/sigmoid
20 | // these are for training only
21 | public double cache_size; // in MB
22 | public double eps; // stopping criteria
23 | public double C; // for C_SVC, EPSILON_SVR and NU_SVR
24 | public int nr_weight; // for C_SVC
25 | public int[] weight_label; // for C_SVC
26 | public double[] weight; // for C_SVC
27 | public double nu; // for NU_SVC, ONE_CLASS, and NU_SVR
28 | public double p; // for EPSILON_SVR
29 | public int shrinking; // use the shrinking heuristics
30 | public int probability; // do probability estimates
31 |
32 |
33 | public svm_parameter() {
34 | svm_type = svm_parameter.C_SVC;
35 | kernel_type = svm_parameter.CUSTOM;
36 | degree = 3;
37 | gamma = 0; // 1/num_features
38 | coef0 = 0;
39 | nu = 0.5;
40 | cache_size = 100;
41 | C = 1;
42 | eps = 1e-3;
43 | p = 0.1;
44 | shrinking = 1;
45 | probability = 0;
46 | nr_weight = 0;
47 | weight_label = new int[0];
48 | weight = new double[0];
49 | }
50 |
51 | public Object clone() {
52 | try {
53 | return super.clone();
54 | } catch (CloneNotSupportedException e) {
55 | return null;
56 | }
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/kernel/RBFKernel.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.kernel;
2 |
3 | import ca.uwo.csd.ai.nlp.common.SparseVector;
4 | import ca.uwo.csd.ai.nlp.libsvm.svm_node;
5 | import ca.uwo.csd.ai.nlp.common.SparseVector.Element;
6 | import ca.uwo.csd.ai.nlp.libsvm.svm_parameter;
7 |
8 | /**
9 | * RBFKernel implements an RBF kernel.
10 | * @author Syeed Ibn Faiz
11 | */
12 | public class RBFKernel implements CustomKernel {
13 |
14 | svm_parameter param;
15 | public RBFKernel(svm_parameter param) {
16 | this.param = param;
17 | }
18 |
19 |
20 | @Override
21 | public double evaluate(svm_node x, svm_node y) {
22 | if (!(x.data instanceof SparseVector) || !(y.data instanceof SparseVector)) {
23 | throw new RuntimeException("svm_nodes should contain sparse vectors.");
24 | }
25 |
26 | SparseVector v1 = (SparseVector) x.data;
27 | SparseVector v2 = (SparseVector) y.data;
28 | double result = 0.0;
29 | int i = 0;
30 | int j = 0;
31 |
32 | while (i < v1.size() && j < v2.size()) {
33 | Element e1 = v1.get(i);
34 | Element e2 = v2.get(j);
35 |
36 | if (e1.index == e2.index) {
37 | double d = e1.value - e2.value;
38 | result += d * d;
39 | i++;
40 | j++;
41 | } else if (e1.index < e2.index) {
42 | result += e1.value * e1.value;
43 | i++;
44 | } else {
45 | result += e2.value * e2.value;
46 | j++;
47 | }
48 | }
49 |
50 | while (i < v1.size()) {
51 | Element e1 = v1.get(i);
52 | result += e1.value * e1.value;
53 | i++;
54 | }
55 |
56 | while (j < v2.size()) {
57 | Element e2 = v2.get(j);
58 | result += e2.value * e2.value;
59 | j++;
60 | }
61 |
62 | //System.out.println("score: " + result);
63 | return Math.exp(-param.gamma * result);
64 | }
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/src/utils/DataFileReader.java:
--------------------------------------------------------------------------------
1 | package utils;
2 |
3 | import ca.uwo.csd.ai.nlp.common.SparseVector;
4 | import java.io.BufferedReader;
5 | import java.io.FileReader;
6 | import java.io.IOException;
7 | import java.util.ArrayList;
8 | import ca.uwo.csd.ai.nlp.libsvm.ex.Instance;
9 |
10 | /**
11 | * DataFileReader reads data files written in LibSVM format.
12 | * @author Syeed Ibn Faiz
13 | */
14 | public class DataFileReader {
15 |
16 | public static Instance[] readDataFile(String fileName) throws IOException {
17 | BufferedReader reader = new BufferedReader(new FileReader(fileName));
18 |
19 | ArrayList labels = new ArrayList();
20 | ArrayList vectors = new ArrayList();
21 |
22 | String line;
23 | int lineCount = 0;
24 | while ((line = reader.readLine()) != null) {
25 | lineCount++;
26 | String[] tokens = line.split("\\s+");
27 | if (tokens.length < 2) {
28 | System.err.println("Inappropriate file format: " + fileName);
29 | System.err.println("Error in line " + lineCount);
30 | System.exit(-1);
31 | }
32 |
33 | labels.add(Double.parseDouble(tokens[0]));
34 | SparseVector vector = new SparseVector(tokens.length - 1);
35 |
36 | for (int i = 1; i < tokens.length; i++) {
37 | String[] fields = tokens[i].split(":");
38 | if (fields.length < 2) {
39 | System.err.println("Inappropriate file format: " + fileName);
40 | System.err.println("Error in line " + lineCount);
41 | System.exit(-1);
42 | }
43 | int index = Integer.parseInt(fields[0]);
44 | double value = Double.parseDouble(fields[1]);
45 | vector.add(index, value);
46 | }
47 |
48 | vectors.add(vector);
49 | }
50 |
51 | Instance[] instances = new Instance[labels.size()];
52 | for (int i = 0; i < instances.length; i++) {
53 | instances[i] = new Instance(labels.get(i), vectors.get(i));
54 | }
55 |
56 | return instances;
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/nbproject/project.properties:
--------------------------------------------------------------------------------
1 | annotation.processing.enabled=true
2 | annotation.processing.enabled.in.editor=false
3 | annotation.processing.run.all.processors=true
4 | annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output
5 | application.title=libsvm-java
6 | application.vendor=tonatuni
7 | build.classes.dir=${build.dir}/classes
8 | build.classes.excludes=**/*.java,**/*.form
9 | # This directory is removed when the project is cleaned:
10 | build.dir=build
11 | build.generated.dir=${build.dir}/generated
12 | build.generated.sources.dir=${build.dir}/generated-sources
13 | # Only compile against the classpath explicitly listed here:
14 | build.sysclasspath=ignore
15 | build.test.classes.dir=${build.dir}/test/classes
16 | build.test.results.dir=${build.dir}/test/results
17 | # Uncomment to specify the preferred debugger connection transport:
18 | #debug.transport=dt_socket
19 | debug.classpath=\
20 | ${run.classpath}
21 | debug.test.classpath=\
22 | ${run.test.classpath}
23 | # This directory is removed when the project is cleaned:
24 | dist.dir=dist
25 | dist.jar=${dist.dir}/libsvm-java.jar
26 | dist.javadoc.dir=${dist.dir}/javadoc
27 | endorsed.classpath=
28 | excludes=
29 | includes=**
30 | jar.compress=false
31 | javac.classpath=
32 | # Space-separated list of extra javac options
33 | javac.compilerargs=
34 | javac.deprecation=false
35 | javac.processorpath=\
36 | ${javac.classpath}
37 | javac.source=1.6
38 | javac.target=1.6
39 | javac.test.classpath=\
40 | ${javac.classpath}:\
41 | ${build.classes.dir}
42 | javac.test.processorpath=\
43 | ${javac.test.classpath}
44 | javadoc.additionalparam=
45 | javadoc.author=false
46 | javadoc.encoding=${source.encoding}
47 | javadoc.noindex=false
48 | javadoc.nonavbar=false
49 | javadoc.notree=false
50 | javadoc.private=false
51 | javadoc.splitindex=true
52 | javadoc.use=true
53 | javadoc.version=false
54 | javadoc.windowtitle=
55 | main.class=
56 | manifest.file=manifest.mf
57 | meta.inf.dir=${src.dir}/META-INF
58 | mkdist.disabled=false
59 | platform.active=default_platform
60 | run.classpath=\
61 | ${javac.classpath}:\
62 | ${build.classes.dir}
63 | # Space-separated list of JVM arguments used when running the project
64 | # (you may also define separate properties like run-sys-prop.name=value instead of -Dname=value
65 | # or test-sys-prop.name=value to set system properties for unit tests):
66 | run.jvmargs=
67 | run.test.classpath=\
68 | ${javac.test.classpath}:\
69 | ${build.test.classes.dir}
70 | source.encoding=UTF-8
71 | src.dir=src
72 | test.src.dir=test
73 |
--------------------------------------------------------------------------------
/src/Demo.java:
--------------------------------------------------------------------------------
1 |
2 | import java.io.BufferedWriter;
3 | import java.io.FileWriter;
4 | import java.io.IOException;
5 | import ca.uwo.csd.ai.nlp.kernel.KernelManager;
6 | import ca.uwo.csd.ai.nlp.kernel.LinearKernel;
7 | import ca.uwo.csd.ai.nlp.libsvm.ex.Instance;
8 | import ca.uwo.csd.ai.nlp.libsvm.ex.SVMPredictor;
9 | import ca.uwo.csd.ai.nlp.libsvm.ex.SVMTrainer;
10 | import ca.uwo.csd.ai.nlp.libsvm.svm_model;
11 | import ca.uwo.csd.ai.nlp.libsvm.svm_parameter;
12 | import utils.DataFileReader;
13 |
14 | /**
15 | * Demonstration of sample usage
16 | * @author Syeed Ibn Faiz
17 | */
18 | public class Demo {
19 |
20 | public static void testLinearKernel(String[] args) throws IOException, ClassNotFoundException {
21 | String trainFileName = args[0];
22 | String testFileName = args[1];
23 | String outputFileName = args[2];
24 |
25 | //Read training file
26 | Instance[] trainingInstances = DataFileReader.readDataFile(trainFileName);
27 |
28 | //Register kernel function
29 | KernelManager.setCustomKernel(new LinearKernel());
30 |
31 | //Setup parameters
32 | svm_parameter param = new svm_parameter();
33 |
34 | //Train the model
35 | System.out.println("Training started...");
36 | svm_model model = SVMTrainer.train(trainingInstances, param);
37 | System.out.println("Training completed.");
38 |
39 | //Save the trained model
40 | //SVMTrainer.saveModel(model, "a1a.model");
41 | //model = SVMPredictor.load_model("a1a.model");
42 |
43 | //Read test file
44 | Instance[] testingInstances = DataFileReader.readDataFile(testFileName);
45 | //Predict results
46 | double[] predictions = SVMPredictor.predict(testingInstances, model, true);
47 | writeOutputs(outputFileName, predictions);
48 | //SVMTrainer.doCrossValidation(trainingInstances, param, 10, true);
49 | //SVMTrainer.doInOrderCrossValidation(trainingInstances, param, 10, true);
50 | }
51 |
52 | private static void writeOutputs(String outputFileName, double[] predictions) throws IOException {
53 | BufferedWriter writer = new BufferedWriter(new FileWriter(outputFileName));
54 | for (double p : predictions) {
55 | writer.write(String.format("%.0f\n", p));
56 | }
57 | writer.close();
58 | }
59 |
60 | private static void showUsage() {
61 | System.out.println("Demo training-file testing-file output-file");
62 | }
63 |
64 | private static boolean checkArgument(String[] args) {
65 | return args.length == 3;
66 | }
67 |
68 | public static void main(String[] args) throws IOException, ClassNotFoundException {
69 | if (checkArgument(args)) {
70 | testLinearKernel(args);
71 | } else {
72 | showUsage();
73 | }
74 | }
75 |
76 | }
77 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/libsvm/ex/SVMPredictor.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.libsvm.ex;
2 |
3 | import ca.uwo.csd.ai.nlp.libsvm.svm;
4 | import ca.uwo.csd.ai.nlp.libsvm.svm_model;
5 | import ca.uwo.csd.ai.nlp.libsvm.svm_node;
6 | import java.io.IOException;
7 | import java.util.List;
8 |
9 | /**
10 | *
11 | * @author Syeed Ibn Faiz
12 | */
13 | public class SVMPredictor {
14 |
15 | public static double[] predict(List instances, svm_model model) {
16 | return predict(instances, model, true);
17 | }
18 |
19 | public static double[] predict(List instances, svm_model model, boolean displayResult) {
20 | Instance[] array = new Instance[instances.size()];
21 | array = instances.toArray(array);
22 | return predict(array, model, displayResult);
23 | }
24 |
25 | public static double predict(Instance instance, svm_model model, boolean displayResult) {
26 | return svm.svm_predict(model, new svm_node(instance.getData()));
27 | }
28 |
29 | public static double predictProbability(Instance instance, svm_model model, double[] probabilities) {
30 | return svm.svm_predict_probability(model, new svm_node(instance.getData()), probabilities);
31 | }
32 | public static double[] predict(Instance[] instances, svm_model model, boolean displayResult) {
33 | int total = 0;
34 | int correct = 0;
35 |
36 | int tp = 0;
37 | int fp = 0;
38 | int fn = 0;
39 |
40 | boolean binary = model.nr_class == 2;
41 | double[] predictions = new double[instances.length];
42 | int count = 0;
43 |
44 | for (Instance instance : instances) {
45 | double target = instance.getLabel();
46 | double p = svm.svm_predict(model, new svm_node(instance.getData()));
47 | predictions[count++] = p;
48 |
49 | ++total;
50 | if (p == target) {
51 | correct++;
52 | if (target > 0) {
53 | tp++;
54 | }
55 | } else if (target > 0) {
56 | fn++;
57 | } else {
58 | fp++;
59 | }
60 | }
61 | if (displayResult) {
62 | System.out.print("Accuracy = " + (double) correct / total * 100
63 | + "% (" + correct + "/" + total + ") (classification)\n");
64 |
65 | if (binary) {
66 | double precision = (double) tp / (tp + fp);
67 | double recall = (double) tp / (tp + fn);
68 | System.out.println("Precision: " + precision);
69 | System.out.println("Recall: " + recall);
70 | System.out.println("Fscore: " + 2 * precision * recall / (precision + recall));
71 | }
72 | }
73 | return predictions;
74 | }
75 |
76 | public static svm_model loadModel(String filePath) throws IOException, ClassNotFoundException {
77 | return svm.svm_load_model(filePath);
78 | }
79 | }
80 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/kernel/TreeKernel.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.kernel;
2 |
3 | //import edu.stanford.nlp.trees.Tree;
4 | import ca.uwo.csd.ai.nlp.common.Tree;
5 | import java.util.ArrayList;
6 | import java.util.List;
7 | import ca.uwo.csd.ai.nlp.libsvm.svm_node;
8 |
9 | /**
10 | * TreeKernel provides a naive implementation of the kernel function described in
11 | * 'Parsing with a single neuron: Convolution kernels for NLP problems'.
12 | * @author Syeed Ibn Faiz
13 | */
14 | public class TreeKernel implements CustomKernel {
15 | private final static int MAX_NODE = 300;
16 | double mem[][] = new double[MAX_NODE][MAX_NODE];
17 | private double lambda; //penalizing factor
18 |
19 | public TreeKernel() {
20 | this(0.5);
21 | }
22 |
23 | public TreeKernel(double lambda) {
24 | this.lambda = lambda;
25 | }
26 |
27 | @Override
28 | public double evaluate(svm_node x, svm_node y) {
29 | Object k1 = x.data;
30 | Object k2 = y.data;
31 |
32 | if (!(k1 instanceof Tree) || !(k2 instanceof Tree)) {
33 | throw new IllegalArgumentException("svm_node does not contain tree data.");
34 | }
35 |
36 | Tree t1 = (Tree) k1;
37 | Tree t2 = (Tree) k2;
38 |
39 | List nodes1 = getNodes(t1);
40 | List nodes2 = getNodes(t2);
41 |
42 | int N1 = Math.min(MAX_NODE, nodes1.size());
43 | int N2 = Math.min(MAX_NODE, nodes2.size());
44 |
45 | //fill mem with -1.0
46 | initMem(mem, N1, N2);
47 |
48 | double result = 0.0;
49 | for (int i = 0; i < N1; i++) {
50 | for (int j = 0; j < N2; j++) {
51 | result += compute(i, j, nodes1, nodes2, mem);
52 | }
53 | }
54 |
55 | return result;
56 | }
57 |
58 | /**
59 | * Efficient computation avoiding costly equals method of Tree
60 | * @param trees
61 | * @param t
62 | * @return
63 | */
64 | private int indexOf(List trees, Tree t) {
65 | for (int i = 0; i < trees.size(); i++) {
66 | if (t == trees.get(i)) {
67 | return i;
68 | }
69 | }
70 | return -1;
71 | }
72 |
73 | private double compute(int i, int j, List nodes1, List nodes2, double[][] mem) {
74 | if (mem[i][j] >= 0) {
75 | return mem[i][j];
76 | }
77 | //if (sameProduction(nodes1.get(i), nodes2.get(j))) {
78 | if (nodes1.get(i).value().equals(nodes2.get(j).value()) &&
79 | nodes1.get(i).hashCode() == nodes2.get(j).hashCode()) { //similar hashCode -> same production
80 |
81 | mem[i][j] = lambda * lambda;
82 | if (!nodes1.get(i).isLeaf() && !nodes2.get(j).isLeaf()) {
83 | List childList1 = nodes1.get(i).getChildrenAsList();
84 | List childList2 = nodes2.get(j).getChildrenAsList();
85 | for (int k = 0; k < childList1.size(); k++) {
86 | //mem[i][j] *= 1 + compute(nodes1.indexOf(childList1.get(k)), nodes2.indexOf(childList2.get(k)), nodes1, nodes2, mem);
87 | mem[i][j] *= 1 + compute(indexOf(nodes1, childList1.get(k)), indexOf(nodes2, childList2.get(k)), nodes1, nodes2, mem);
88 | }
89 | }
90 | } else {
91 | mem[i][j] = 0.0;
92 | }
93 |
94 | return mem[i][j];
95 | }
96 |
97 | private boolean sameProduction(Tree t1, Tree t2) {
98 | if (t1.value().equals(t2.value())) {
99 | List childList1 = t1.getChildrenAsList();
100 | List childList2 = t2.getChildrenAsList();
101 | if (childList1.size() == childList2.size()) {
102 | for (int i = 0; i < childList1.size(); i++) {
103 | if (!childList1.get(i).value().equals(childList2.get(i).value())) {
104 | return false;
105 | }
106 | }
107 | return true;
108 | }
109 | }
110 | return false;
111 | }
112 | private void initMem(double[][] mem, int N1, int N2) {
113 | for (int i = 0; i < N1; i++) {
114 | for (int j = 0; j < N2; j++) {
115 | mem[i][j] = -1.0;
116 | }
117 | }
118 | }
119 | private List getNodes(Tree t) {
120 | ArrayList nodes = new ArrayList();
121 | addNodes(t, nodes);
122 | return nodes;
123 | }
124 |
125 | private void addNodes(Tree t, List nodes) {
126 | nodes.add(t);
127 | List childList = t.getChildrenAsList();
128 | for (Tree child : childList) {
129 | addNodes(child, nodes);
130 | }
131 | }
132 | }
133 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/common/SparseVector.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.common;
2 |
3 | import java.io.Serializable;
4 | import java.util.Arrays;
5 |
6 | /**
7 | * SparseVector stores a sparse vector in a memory
8 | * efficient manner. It stores the elements of a vector
9 | * as a list of index:value pairs.
10 | * @author Syeed Ibn Faiz
11 | */
12 | public class SparseVector implements Serializable {
13 |
14 | public static class Element implements Serializable, Comparable {
15 | public int index;
16 | public double value;
17 |
18 | public Element(int index, double value) {
19 | this.index = index;
20 | this.value = value;
21 | }
22 |
23 | @Override
24 | public int compareTo(Element o) {
25 | if (index < o.index) {
26 | return -1;
27 | } else if (index > o.index) {
28 | return 1;
29 | }
30 | return 0;
31 | }
32 | }
33 |
34 | private Element[] elements;
35 | private int size;
36 | private final int MAX_SIZE = 100;
37 |
38 | public SparseVector(int capacity) {
39 | elements = new Element[capacity];
40 | }
41 |
42 | public SparseVector() {
43 | elements = new Element[MAX_SIZE];
44 | }
45 |
46 | public void add(int index, double value) {
47 | add(new Element(index, value));
48 | }
49 |
50 | public void add(Element elem) {
51 | if (isFull()) {
52 | resize();
53 | }
54 | elements[size++] = elem;
55 | }
56 |
57 | public Element get(int n) {
58 | if (n >= size) {
59 | return null;
60 | }
61 | return elements[n];
62 | }
63 |
64 | public boolean isFull() {
65 | return size == elements.length;
66 | }
67 |
68 | public boolean isEmpty() {
69 | return size == 0;
70 | }
71 |
72 | public int size() {
73 | return size;
74 | }
75 |
76 | private void resize() {
77 | Element[] newElements = new Element[size + MAX_SIZE];
78 | for (int i = 0; i < elements.length; i++) {
79 | newElements[i] = elements[i];
80 | elements[i] = null;
81 | }
82 | elements = newElements;
83 | }
84 |
85 | public void sortByIndices() {
86 | Arrays.sort(elements, 0, size);
87 | }
88 |
89 | @Override
90 | public String toString() {
91 | StringBuilder sb = new StringBuilder();
92 | for (int i = 0; i < size; i++) {
93 | Element element = elements[i];
94 | sb.append(element.index).append(":").append(element.value).append(" ");
95 | }
96 | return sb.toString();
97 | }
98 | /**
99 | * Computes dot product between this vector and the argument vector
100 | * @param vector
101 | * @return
102 | */
103 | public double dot(SparseVector vector) {
104 | SparseVector v1 = this;
105 | SparseVector v2 = vector;
106 | double result = 0.0;
107 | int i = 0;
108 | int j = 0;
109 |
110 | while (i < v1.size() && j < v2.size()) {
111 | Element e1 = v1.get(i);
112 | Element e2 = v2.get(j);
113 |
114 | if (e1.index == e2.index) {
115 | result += e1.value * e2.value;
116 | i++;
117 | j++;
118 | } else if (e1.index < e2.index) {
119 | i++;
120 | } else {
121 | j++;
122 | }
123 | }
124 |
125 | return result;
126 | }
127 |
128 | /**
129 | * Computes normalized dot product
130 | * @param vector
131 | * @return a positive real number in the range [0.0,1.0]
132 | */
133 | public double normDot(SparseVector vector) {
134 | double dot = this.dot(vector);
135 | double d = Math.sqrt(this.size() * vector.size());
136 | if (d > 0) {
137 | dot /= d;
138 | }
139 | return dot;
140 | }
141 |
142 | /**
143 | * Computes square of the Euclidean distance
144 | * @param vector
145 | * @return
146 | */
147 | public double squaredDistance(SparseVector vector) {
148 | SparseVector v1 = this;
149 | SparseVector v2 = vector;
150 | double result = 0.0;
151 | int i = 0;
152 | int j = 0;
153 |
154 | while (i < v1.size() && j < v2.size()) {
155 | Element e1 = v1.get(i);
156 | Element e2 = v2.get(j);
157 |
158 | if (e1.index == e2.index) {
159 | double d = e1.value - e2.value;
160 | result += d*d;
161 | i++;
162 | j++;
163 | } else if (e1.index < e2.index) {
164 | result += e1.value * e1.value;
165 | i++;
166 | } else {
167 | result += e2.value * e2.value;
168 | j++;
169 | }
170 | }
171 |
172 | while (i < v1.size()) {
173 | Element e1 = v1.get(i);
174 | result += e1.value * e1.value;
175 | i++;
176 | }
177 |
178 | while (j < v2.size()) {
179 | Element e2 = v2.get(j);
180 | result += e2.value * e2.value;
181 | j++;
182 | }
183 | return result;
184 | }
185 |
186 | public void removeDuplicates() {
187 | int last = 0;
188 | for (int i = 1; i < size; i++) {
189 | if (elements[last].index != elements[i].index) {
190 | last++;
191 | elements[last] = elements[i];
192 | }
193 | }
194 | size = last + 1;
195 | }
196 |
197 | public static void main(String[] args) {
198 | SparseVector vector = new SparseVector();
199 | vector.add(3, 1.2);
200 | vector.add(3, 1.2);
201 | vector.add(1, 1.6);
202 | vector.add(1, 1.6);
203 | vector.add(5, 2.3);
204 | vector.add(3, 1.2);
205 | vector.add(5, 2.3);
206 | vector.add(1, 1.6);
207 | vector.add(1, 1.6);
208 | System.out.println("before: " + vector);
209 | vector.sortByIndices();
210 | System.out.println("after: " + vector);
211 | vector.removeDuplicates();
212 | System.out.println("after: " + vector);
213 | }
214 | }
215 |
--------------------------------------------------------------------------------
/src/ca/uwo/csd/ai/nlp/libsvm/ex/SVMTrainer.java:
--------------------------------------------------------------------------------
1 | package ca.uwo.csd.ai.nlp.libsvm.ex;
2 |
3 |
4 | import ca.uwo.csd.ai.nlp.libsvm.svm;
5 | import ca.uwo.csd.ai.nlp.libsvm.svm_model;
6 | import ca.uwo.csd.ai.nlp.libsvm.svm_node;
7 | import ca.uwo.csd.ai.nlp.libsvm.svm_parameter;
8 | import ca.uwo.csd.ai.nlp.libsvm.svm_problem;
9 | import java.io.IOException;
10 | import java.util.ArrayList;
11 | import java.util.List;
12 |
13 | /**
14 | * SVMTrainer performs training of an SVM.
15 | * @author Syeed Ibn Faiz
16 | */
17 | public class SVMTrainer {
18 |
19 | private static svm_problem prepareProblem(List instances) {
20 | Instance[] array = new Instance[instances.size()];
21 | array = instances.toArray(array);
22 | return prepareProblem(array);
23 | }
24 |
25 | private static svm_problem prepareProblem(Instance[] instances) {
26 | return prepareProblem(instances, 0, instances.length - 1);
27 | }
28 |
29 | private static svm_problem prepareProblem(Instance[] instances, int begin, int end) {
30 | svm_problem prob = new svm_problem();
31 | prob.l = (end - begin) + 1;
32 | prob.y = new double[prob.l];
33 | prob.x = new svm_node[prob.l];
34 |
35 | for (int i = begin; i <= end; i++) {
36 | prob.y[i-begin] = instances[i].getLabel();
37 | prob.x[i-begin] = new svm_node(instances[i].getData());
38 | }
39 | return prob;
40 | }
41 |
42 | /**
43 | * Builds an SVM model
44 | * @param instances
45 | * @param param
46 | * @return
47 | */
48 | public static svm_model train(Instance[] instances, svm_parameter param) {
49 | //prepare svm_problem
50 | svm_problem prob = prepareProblem(instances);
51 |
52 | String error_msg = svm.svm_check_parameter(prob, param);
53 |
54 | if (error_msg != null) {
55 | System.err.print("ERROR: " + error_msg + "\n");
56 | System.exit(1);
57 | }
58 |
59 | return svm.svm_train(prob, param);
60 | }
61 |
62 | public static svm_model train(List instances, svm_parameter param) {
63 | Instance[] array = new Instance[instances.size()];
64 | array = instances.toArray(array);
65 | return train(array, param);
66 | }
67 |
68 | /**
69 | * Performs N-fold cross validation
70 | * @param instances
71 | * @param param parameters
72 | * @param nr_fold number of folds (N)
73 | * @param binary whether doing binary classification
74 | */
75 | public static void doCrossValidation(Instance[] instances, svm_parameter param, int nr_fold, boolean binary) {
76 | svm_problem prob = prepareProblem(instances);
77 |
78 | int i;
79 | int total_correct = 0;
80 | double total_error = 0;
81 | double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0;
82 | double[] target = new double[prob.l];
83 |
84 | svm.svm_cross_validation(prob, param, nr_fold, target);
85 | if (param.svm_type == svm_parameter.EPSILON_SVR
86 | || param.svm_type == svm_parameter.NU_SVR) {
87 | for (i = 0; i < prob.l; i++) {
88 | double y = prob.y[i];
89 | double v = target[i];
90 | total_error += (v - y) * (v - y);
91 | sumv += v;
92 | sumy += y;
93 | sumvv += v * v;
94 | sumyy += y * y;
95 | sumvy += v * y;
96 | }
97 | System.out.print("Cross Validation Mean squared error = " + total_error / prob.l + "\n");
98 | System.out.print("Cross Validation Squared correlation coefficient = "
99 | + ((prob.l * sumvy - sumv * sumy) * (prob.l * sumvy - sumv * sumy))
100 | / ((prob.l * sumvv - sumv * sumv) * (prob.l * sumyy - sumy * sumy)) + "\n");
101 | } else {
102 | int tp = 0;
103 | int fp = 0;
104 | int fn = 0;
105 |
106 | for (i = 0; i < prob.l; i++) {
107 | if (target[i] == prob.y[i]) {
108 | ++total_correct;
109 | if (prob.y[i] > 0) {
110 | tp++;
111 | }
112 | } else if (prob.y[i] > 0) {
113 | fn++;
114 | } else if (prob.y[i] < 0) {
115 | fp++;
116 | }
117 | }
118 | System.out.print("Cross Validation Accuracy = " + 100.0 * total_correct / prob.l + "%\n");
119 | if (binary) {
120 | double precision = (double) tp / (tp + fp);
121 | double recall = (double) tp / (tp + fn);
122 | System.out.println("Precision: " + precision);
123 | System.out.println("Recall: " + recall);
124 | System.out.println("FScore: " + 2 * precision * recall / (precision + recall));
125 | }
126 | }
127 | }
128 |
129 | public static void doInOrderCrossValidation(Instance[] instances, svm_parameter param, int nr_fold, boolean binary) {
130 | int size = instances.length;
131 | int chunkSize = size/nr_fold;
132 | int begin = 0;
133 | int end = chunkSize - 1;
134 | int tp = 0;
135 | int fp = 0;
136 | int fn = 0;
137 | int total = 0;
138 |
139 | for (int i = 0; i < nr_fold; i++) {
140 | System.out.println("Iteration: " + (i+1));
141 | List trainingInstances = new ArrayList();
142 | List testingInstances = new ArrayList();
143 | for (int j = 0; j < size; j++) {
144 | if (j >= begin && j <= end) {
145 | testingInstances.add(instances[j]);
146 | } else {
147 | trainingInstances.add(instances[j]);
148 | }
149 | }
150 |
151 | svm_model trainModel = train(trainingInstances, param);
152 | double[] predictions = SVMPredictor.predict(testingInstances, trainModel);
153 | for (int k = 0; k < predictions.length; k++) {
154 |
155 | if (predictions[k] == testingInstances.get(k).getLabel()) {
156 | //if (Math.abs(predictions[k] - testingInstances.get(k).getLabel()) < 0.00001) {
157 | if (testingInstances.get(k).getLabel() > 0) {
158 | tp++;
159 | }
160 | } else if (testingInstances.get(k).getLabel() > 0) {
161 | fn++;
162 | } else if (testingInstances.get(k).getLabel() < 0) {
163 | //System.out.println(testingInstances.get(k).getData());
164 | fp++;
165 | }
166 | total++;
167 | }
168 | //update
169 | begin = end+1;
170 | end = begin + chunkSize - 1;
171 | if (end >= size) {
172 | end = size-1;
173 | }
174 | }
175 |
176 | double precision = (double) tp / (tp + fp);
177 | double recall = (double) tp / (tp + fn);
178 | System.out.println("Precision: " + precision);
179 | System.out.println("Recall: " + recall);
180 | System.out.println("FScore: " + 2 * precision * recall / (precision + recall));
181 | }
182 |
183 | public static void saveModel(svm_model model, String filePath) throws IOException {
184 | svm.svm_save_model(filePath, model);
185 | }
186 | }
187 |
--------------------------------------------------------------------------------
/src/svm_scale.java:
--------------------------------------------------------------------------------
1 |
2 | import java.io.*;
3 | import java.util.*;
4 |
5 | class svm_scale {
6 |
7 | private String line = null;
8 | private double lower = -1.0;
9 | private double upper = 1.0;
10 | private double y_lower;
11 | private double y_upper;
12 | private boolean y_scaling = false;
13 | private double[] feature_max;
14 | private double[] feature_min;
15 | private double y_max = -Double.MAX_VALUE;
16 | private double y_min = Double.MAX_VALUE;
17 | private int max_index;
18 | private long num_nonzeros = 0;
19 | private long new_num_nonzeros = 0;
20 |
21 | private static void exit_with_help() {
22 | System.out.print(
23 | "Usage: svm-scale [options] data_filename\n"
24 | + "options:\n"
25 | + "-l lower : x scaling lower limit (default -1)\n"
26 | + "-u upper : x scaling upper limit (default +1)\n"
27 | + "-y y_lower y_upper : y scaling limits (default: no y scaling)\n"
28 | + "-s save_filename : save scaling parameters to save_filename\n"
29 | + "-r restore_filename : restore scaling parameters from restore_filename\n");
30 | System.exit(1);
31 | }
32 |
33 | private BufferedReader rewind(BufferedReader fp, String filename) throws IOException {
34 | fp.close();
35 | return new BufferedReader(new FileReader(filename));
36 | }
37 |
38 | private void output_target(double value) {
39 | if (y_scaling) {
40 | if (value == y_min) {
41 | value = y_lower;
42 | } else if (value == y_max) {
43 | value = y_upper;
44 | } else {
45 | value = y_lower + (y_upper - y_lower)
46 | * (value - y_min) / (y_max - y_min);
47 | }
48 | }
49 |
50 | System.out.print(value + " ");
51 | }
52 |
53 | private void output(int index, double value) {
54 | /* skip single-valued attribute */
55 | if (feature_max[index] == feature_min[index]) {
56 | return;
57 | }
58 |
59 | if (value == feature_min[index]) {
60 | value = lower;
61 | } else if (value == feature_max[index]) {
62 | value = upper;
63 | } else {
64 | value = lower + (upper - lower)
65 | * (value - feature_min[index])
66 | / (feature_max[index] - feature_min[index]);
67 | }
68 |
69 | if (value != 0) {
70 | System.out.print(index + ":" + value + " ");
71 | new_num_nonzeros++;
72 | }
73 | }
74 |
75 | private String readline(BufferedReader fp) throws IOException {
76 | line = fp.readLine();
77 | return line;
78 | }
79 |
80 | private void run(String[] argv) throws IOException {
81 | int i, index;
82 | BufferedReader fp = null, fp_restore = null;
83 | String save_filename = null;
84 | String restore_filename = null;
85 | String data_filename = null;
86 |
87 |
88 | for (i = 0; i < argv.length; i++) {
89 | if (argv[i].charAt(0) != '-') {
90 | break;
91 | }
92 | ++i;
93 | switch (argv[i - 1].charAt(1)) {
94 | case 'l':
95 | lower = Double.parseDouble(argv[i]);
96 | break;
97 | case 'u':
98 | upper = Double.parseDouble(argv[i]);
99 | break;
100 | case 'y':
101 | y_lower = Double.parseDouble(argv[i]);
102 | ++i;
103 | y_upper = Double.parseDouble(argv[i]);
104 | y_scaling = true;
105 | break;
106 | case 's':
107 | save_filename = argv[i];
108 | break;
109 | case 'r':
110 | restore_filename = argv[i];
111 | break;
112 | default:
113 | System.err.println("unknown option");
114 | exit_with_help();
115 | }
116 | }
117 |
118 | if (!(upper > lower) || (y_scaling && !(y_upper > y_lower))) {
119 | System.err.println("inconsistent lower/upper specification");
120 | System.exit(1);
121 | }
122 | if (restore_filename != null && save_filename != null) {
123 | System.err.println("cannot use -r and -s simultaneously");
124 | System.exit(1);
125 | }
126 |
127 | if (argv.length != i + 1) {
128 | exit_with_help();
129 | }
130 |
131 | data_filename = argv[i];
132 | try {
133 | fp = new BufferedReader(new FileReader(data_filename));
134 | } catch (Exception e) {
135 | System.err.println("can't open file " + data_filename);
136 | System.exit(1);
137 | }
138 |
139 | /* assumption: min index of attributes is 1 */
140 | /* pass 1: find out max index of attributes */
141 | max_index = 0;
142 |
143 | if (restore_filename != null) {
144 | int idx, c;
145 |
146 | try {
147 | fp_restore = new BufferedReader(new FileReader(restore_filename));
148 | } catch (Exception e) {
149 | System.err.println("can't open file " + restore_filename);
150 | System.exit(1);
151 | }
152 | if ((c = fp_restore.read()) == 'y') {
153 | fp_restore.readLine();
154 | fp_restore.readLine();
155 | fp_restore.readLine();
156 | }
157 | fp_restore.readLine();
158 | fp_restore.readLine();
159 |
160 | String restore_line = null;
161 | while ((restore_line = fp_restore.readLine()) != null) {
162 | StringTokenizer st2 = new StringTokenizer(restore_line);
163 | idx = Integer.parseInt(st2.nextToken());
164 | max_index = Math.max(max_index, idx);
165 | }
166 | fp_restore = rewind(fp_restore, restore_filename);
167 | }
168 |
169 | while (readline(fp) != null) {
170 | StringTokenizer st = new StringTokenizer(line, " \t\n\r\f:");
171 | st.nextToken();
172 | while (st.hasMoreTokens()) {
173 | index = Integer.parseInt(st.nextToken());
174 | max_index = Math.max(max_index, index);
175 | st.nextToken();
176 | num_nonzeros++;
177 | }
178 | }
179 |
180 | try {
181 | feature_max = new double[(max_index + 1)];
182 | feature_min = new double[(max_index + 1)];
183 | } catch (OutOfMemoryError e) {
184 | System.err.println("can't allocate enough memory");
185 | System.exit(1);
186 | }
187 |
188 | for (i = 0; i <= max_index; i++) {
189 | feature_max[i] = -Double.MAX_VALUE;
190 | feature_min[i] = Double.MAX_VALUE;
191 | }
192 |
193 | fp = rewind(fp, data_filename);
194 |
195 | /* pass 2: find out min/max value */
196 | while (readline(fp) != null) {
197 | int next_index = 1;
198 | double target;
199 | double value;
200 |
201 | StringTokenizer st = new StringTokenizer(line, " \t\n\r\f:");
202 | target = Double.parseDouble(st.nextToken());
203 | y_max = Math.max(y_max, target);
204 | y_min = Math.min(y_min, target);
205 |
206 | while (st.hasMoreTokens()) {
207 | index = Integer.parseInt(st.nextToken());
208 | value = Double.parseDouble(st.nextToken());
209 |
210 | for (i = next_index; i < index; i++) {
211 | feature_max[i] = Math.max(feature_max[i], 0);
212 | feature_min[i] = Math.min(feature_min[i], 0);
213 | }
214 |
215 | feature_max[index] = Math.max(feature_max[index], value);
216 | feature_min[index] = Math.min(feature_min[index], value);
217 | next_index = index + 1;
218 | }
219 |
220 | for (i = next_index; i <= max_index; i++) {
221 | feature_max[i] = Math.max(feature_max[i], 0);
222 | feature_min[i] = Math.min(feature_min[i], 0);
223 | }
224 | }
225 |
226 | fp = rewind(fp, data_filename);
227 |
228 | /* pass 2.5: save/restore feature_min/feature_max */
229 | if (restore_filename != null) {
230 | // fp_restore rewinded in finding max_index
231 | int idx, c;
232 | double fmin, fmax;
233 |
234 | fp_restore.mark(2); // for reset
235 | if ((c = fp_restore.read()) == 'y') {
236 | fp_restore.readLine(); // pass the '\n' after 'y'
237 | StringTokenizer st = new StringTokenizer(fp_restore.readLine());
238 | y_lower = Double.parseDouble(st.nextToken());
239 | y_upper = Double.parseDouble(st.nextToken());
240 | st = new StringTokenizer(fp_restore.readLine());
241 | y_min = Double.parseDouble(st.nextToken());
242 | y_max = Double.parseDouble(st.nextToken());
243 | y_scaling = true;
244 | } else {
245 | fp_restore.reset();
246 | }
247 |
248 | if (fp_restore.read() == 'x') {
249 | fp_restore.readLine(); // pass the '\n' after 'x'
250 | StringTokenizer st = new StringTokenizer(fp_restore.readLine());
251 | lower = Double.parseDouble(st.nextToken());
252 | upper = Double.parseDouble(st.nextToken());
253 | String restore_line = null;
254 | while ((restore_line = fp_restore.readLine()) != null) {
255 | StringTokenizer st2 = new StringTokenizer(restore_line);
256 | idx = Integer.parseInt(st2.nextToken());
257 | fmin = Double.parseDouble(st2.nextToken());
258 | fmax = Double.parseDouble(st2.nextToken());
259 | if (idx <= max_index) {
260 | feature_min[idx] = fmin;
261 | feature_max[idx] = fmax;
262 | }
263 | }
264 | }
265 | fp_restore.close();
266 | }
267 |
268 | if (save_filename != null) {
269 | Formatter formatter = new Formatter(new StringBuilder());
270 | BufferedWriter fp_save = null;
271 |
272 | try {
273 | fp_save = new BufferedWriter(new FileWriter(save_filename));
274 | } catch (IOException e) {
275 | System.err.println("can't open file " + save_filename);
276 | System.exit(1);
277 | }
278 |
279 | if (y_scaling) {
280 | formatter.format("y\n");
281 | formatter.format("%.16g %.16g\n", y_lower, y_upper);
282 | formatter.format("%.16g %.16g\n", y_min, y_max);
283 | }
284 | formatter.format("x\n");
285 | formatter.format("%.16g %.16g\n", lower, upper);
286 | for (i = 1; i <= max_index; i++) {
287 | if (feature_min[i] != feature_max[i]) {
288 | formatter.format("%d %.16g %.16g\n", i, feature_min[i], feature_max[i]);
289 | }
290 | }
291 | fp_save.write(formatter.toString());
292 | fp_save.close();
293 | }
294 |
295 | /* pass 3: scale */
296 | while (readline(fp) != null) {
297 | int next_index = 1;
298 | double target;
299 | double value;
300 |
301 | StringTokenizer st = new StringTokenizer(line, " \t\n\r\f:");
302 | target = Double.parseDouble(st.nextToken());
303 | output_target(target);
304 | while (st.hasMoreElements()) {
305 | index = Integer.parseInt(st.nextToken());
306 | value = Double.parseDouble(st.nextToken());
307 | for (i = next_index; i < index; i++) {
308 | output(i, 0);
309 | }
310 | output(index, value);
311 | next_index = index + 1;
312 | }
313 |
314 | for (i = next_index; i <= max_index; i++) {
315 | output(i, 0);
316 | }
317 | System.out.print("\n");
318 | }
319 | if (new_num_nonzeros > num_nonzeros) {
320 | System.err.print(
321 | "WARNING: original #nonzeros " + num_nonzeros + "\n"
322 | + " new #nonzeros " + new_num_nonzeros + "\n"
323 | + "Use -l 0 if many original feature values are zeros\n");
324 | }
325 |
326 | fp.close();
327 | }
328 |
329 | public static void main(String argv[]) throws IOException {
330 | svm_scale s = new svm_scale();
331 | s.run(argv);
332 | }
333 | }
334 |
--------------------------------------------------------------------------------
/README.libsvm:
--------------------------------------------------------------------------------
1 | Libsvm is a simple, easy-to-use, and efficient software for SVM
2 | classification and regression. It solves C-SVM classification, nu-SVM
3 | classification, one-class-SVM, epsilon-SVM regression, and nu-SVM
4 | regression. It also provides an automatic model selection tool for
5 | C-SVM classification. This document explains the use of libsvm.
6 |
7 | Libsvm is available at
8 | http://www.csie.ntu.edu.tw/~cjlin/libsvm
9 | Please read the COPYRIGHT file before using libsvm.
10 |
11 | Table of Contents
12 | =================
13 |
14 | - Quick Start
15 | - Installation and Data Format
16 | - `svm-train' Usage
17 | - `svm-predict' Usage
18 | - `svm-scale' Usage
19 | - Tips on Practical Use
20 | - Examples
21 | - Precomputed Kernels
22 | - Library Usage
23 | - Java Version
24 | - Building Windows Binaries
25 | - Additional Tools: Sub-sampling, Parameter Selection, Format checking, etc.
26 | - MATLAB/OCTAVE Interface
27 | - Python Interface
28 | - Additional Information
29 |
30 | Quick Start
31 | ===========
32 |
33 | If you are new to SVM and if the data is not large, please go to
34 | `tools' directory and use easy.py after installation. It does
35 | everything automatic -- from data scaling to parameter selection.
36 |
37 | Usage: easy.py training_file [testing_file]
38 |
39 | More information about parameter selection can be found in
40 | `tools/README.'
41 |
42 | Installation and Data Format
43 | ============================
44 |
45 | On Unix systems, type `make' to build the `svm-train' and `svm-predict'
46 | programs. Run them without arguments to show the usages of them.
47 |
48 | On other systems, consult `Makefile' to build them (e.g., see
49 | 'Building Windows binaries' in this file) or use the pre-built
50 | binaries (Windows binaries are in the directory `windows').
51 |
52 | The format of training and testing data file is:
53 |
54 |