├── nbproject ├── private │ ├── config.properties │ ├── private.properties │ └── private.xml ├── genfiles.properties ├── project.xml ├── project.properties └── build-impl.xml ├── dist └── libsvm-java.jar ├── manifest.mf ├── demo.sh ├── src ├── ca │ └── uwo │ │ └── csd │ │ └── ai │ │ └── nlp │ │ ├── libsvm │ │ ├── svm_print_interface.java │ │ ├── svm_node.java │ │ ├── svm_problem.java │ │ ├── ex │ │ │ ├── Instance.java │ │ │ ├── SVMPredictor.java │ │ │ └── SVMTrainer.java │ │ ├── svm_model.java │ │ └── svm_parameter.java │ │ ├── kernel │ │ ├── CustomKernel.java │ │ ├── KernelManager.java │ │ ├── CompositeKernel.java │ │ ├── LinearKernel.java │ │ ├── RBFKernel.java │ │ └── TreeKernel.java │ │ └── common │ │ ├── Tree.java │ │ └── SparseVector.java ├── utils │ └── DataFileReader.java ├── Demo.java └── svm_scale.java ├── check.sh ├── README.md ├── COPYRIGHT ├── COPYRIGHT.libsvm └── README.libsvm /nbproject/private/config.properties: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dist/libsvm-java.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syeedibnfaiz/libsvm-java-kernel/HEAD/dist/libsvm-java.jar -------------------------------------------------------------------------------- /manifest.mf: -------------------------------------------------------------------------------- 1 | Manifest-Version: 1.0 2 | X-COMMENT: Main-Class will be added automatically by build 3 | 4 | -------------------------------------------------------------------------------- /demo.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #build 3 | ant 4 | java -cp dist/libsvm-java.jar Demo a1a.train a1a.test a1a.out 5 | 6 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/libsvm/svm_print_interface.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.libsvm; 2 | 3 | public interface svm_print_interface { 4 | 5 | public void print(String s); 6 | } 7 | -------------------------------------------------------------------------------- /nbproject/private/private.properties: -------------------------------------------------------------------------------- 1 | compile.on.save=true 2 | do.depend=false 3 | do.jar=true 4 | javac.debug=true 5 | javadoc.preview=true 6 | user.properties.file=C:\\Users\\tonatuni\\.netbeans\\7.0\\build.properties 7 | -------------------------------------------------------------------------------- /nbproject/private/private.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/libsvm/svm_node.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.libsvm; 2 | 3 | public class svm_node implements java.io.Serializable { 4 | 5 | public Object data; 6 | 7 | public svm_node() { 8 | } 9 | 10 | public svm_node(Object data) { 11 | this.data = data; 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/kernel/CustomKernel.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.kernel; 2 | 3 | import ca.uwo.csd.ai.nlp.libsvm.svm_node; 4 | 5 | /** 6 | * Interface for a custom kernel function 7 | * @author Syeed Ibn Faiz 8 | */ 9 | public interface CustomKernel { 10 | double evaluate(svm_node x, svm_node y); 11 | } 12 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/libsvm/svm_problem.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.libsvm; 2 | 3 | public class svm_problem implements java.io.Serializable { 4 | 5 | public int l; 6 | public double[] y; 7 | public svm_node[] x; 8 | 9 | public svm_problem(int l, double[] y, svm_node[] x) { 10 | this.l = l; 11 | this.y = y; 12 | this.x = x; 13 | } 14 | 15 | public svm_problem() { 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /nbproject/genfiles.properties: -------------------------------------------------------------------------------- 1 | build.xml.data.CRC32=1390663b 2 | build.xml.script.CRC32=b6f8644c 3 | build.xml.stylesheet.CRC32=28e38971@1.44.1.45 4 | # This file is used by a NetBeans-based IDE to track changes in generated files such as build-impl.xml. 5 | # Do not edit this file. You may delete it but then the IDE will never regenerate such files for you. 6 | nbproject/build-impl.xml.data.CRC32=1390663b 7 | nbproject/build-impl.xml.script.CRC32=3a4ce78d 8 | nbproject/build-impl.xml.stylesheet.CRC32=0ae3a408@1.44.1.45 9 | -------------------------------------------------------------------------------- /nbproject/project.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | org.netbeans.modules.java.j2seproject 4 | 5 | 6 | libsvm-java 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/kernel/KernelManager.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.kernel; 2 | 3 | /** 4 | * KernelManager provides the custom kernel function to svm. 5 | * @author Syeed Ibn Faiz 6 | */ 7 | public class KernelManager { 8 | static private CustomKernel customKernel; 9 | 10 | public static CustomKernel getCustomKernel() { 11 | return customKernel; 12 | } 13 | 14 | /** 15 | * Registers the custom kernel 16 | * @param customKernel 17 | */ 18 | public static void setCustomKernel(CustomKernel customKernel) { 19 | KernelManager.customKernel = customKernel; 20 | } 21 | 22 | } 23 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/libsvm/ex/Instance.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.libsvm.ex; 2 | 3 | /** 4 | * 5 | * @author Syeed Ibn Faiz 6 | */ 7 | public class Instance { 8 | private double label; 9 | private Object data; 10 | 11 | public Instance(double label, Object data) { 12 | this.label = label; 13 | this.data = data; 14 | } 15 | 16 | public Object getData() { 17 | return data; 18 | } 19 | 20 | public void setData(Object data) { 21 | this.data = data; 22 | } 23 | 24 | public double getLabel() { 25 | return label; 26 | } 27 | 28 | public void setLabel(double label) { 29 | this.label = label; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /check.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | #This script checks the integrity of the library 3 | #The output produced by the library is matched against 4 | #that produced by the original tool 5 | 6 | #generate output 7 | #note that the Demo uses the linear kernel 8 | java -cp dist/libsvm-java.jar Demo a1a.train a1a.test a1a.out 9 | 10 | #download libsvm 11 | wget http://www.csie.ntu.edu.tw/~cjlin/cgi-bin/libsvm.cgi?+http://www.csie.ntu.edu.tw/~cjlin/libsvm+tar.gz 12 | mv libsvm* libsvm.tar.gz 13 | tar xzf libsvm.tar.gz 14 | rm libsvm.tar.gz 15 | cd libsvm* 16 | make 17 | #generate output using the linear kernel 18 | ./svm-train -t 0 ../a1a.train a1a.train.model 19 | ./svm-predict ../a1a.test a1a.train.model a1a.out 20 | cd .. 21 | diff -w a1a.out ./libsvm*/a1a.out 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Refactored LibSVM in Java making it easy to plug in a custom kernel. 2 | 3 | Use demo.sh to run the demo program. 4 | To learn how you can use the library see src/Demo.java. 5 | 6 | To write a kernel all you have to do is to implement kernel.CustomKernel and 7 | then register your kernel with the kernel.KernelManager. 8 | 9 | For example, the following code snippet registers an anonymous custom kernel: 10 | KernelManager.setCustomKernel(new CustomKernel() { 11 | @Override 12 | public double evaluate(svm_node x, svm_node y) { 13 | //do something fancy 14 | return 0.3141592654; 15 | } 16 | }); 17 | 18 | 19 | Syeed Ibn Faiz 20 | University of Western Ontario 21 | syeedibnfaiz@gmail.com 22 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/kernel/CompositeKernel.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.kernel; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import ca.uwo.csd.ai.nlp.libsvm.svm_node; 6 | 7 | /** 8 | * 9 | * @author Syeed Ibn Faiz 10 | */ 11 | public class CompositeKernel implements CustomKernel { 12 | private List kernels; 13 | 14 | public CompositeKernel() { 15 | this(new ArrayList()); 16 | } 17 | 18 | public CompositeKernel(List kernels) { 19 | this.kernels = kernels; 20 | } 21 | 22 | @Override 23 | public double evaluate(svm_node x, svm_node y) { 24 | double value = 0.0; 25 | for (CustomKernel kernel : kernels) { 26 | value += kernel.evaluate(x, y); 27 | } 28 | return value; 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/common/Tree.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.common; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | /** 7 | * Dummy Tree data structure 8 | * @author Syeed Ibn Faiz 9 | */ 10 | public class Tree { 11 | private String value; 12 | private List children; 13 | 14 | public Tree(String value) { 15 | this.value = value; 16 | children = new ArrayList(); 17 | } 18 | 19 | public Tree(String value, List children) { 20 | this.value = value; 21 | this.children = children; 22 | } 23 | 24 | public List getChildrenAsList() { 25 | return children; 26 | } 27 | 28 | public String value() { 29 | return value; 30 | } 31 | 32 | public boolean isLeaf() { 33 | return children.isEmpty(); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/kernel/LinearKernel.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.kernel; 2 | 3 | import ca.uwo.csd.ai.nlp.common.SparseVector; 4 | import ca.uwo.csd.ai.nlp.libsvm.svm_node; 5 | import java.io.Serializable; 6 | 7 | /** 8 | * LinearKernel implements a linear kernel function. 9 | * @author Syeed Ibn Faiz 10 | */ 11 | public class LinearKernel implements CustomKernel, Serializable { 12 | 13 | @Override 14 | public double evaluate(svm_node x, svm_node y) { 15 | if (!(x.data instanceof SparseVector) || !(y.data instanceof SparseVector)) { 16 | throw new RuntimeException("Could not find sparse vectors in svm_nodes"); 17 | } 18 | SparseVector v1 = (SparseVector) x.data; 19 | SparseVector v2 = (SparseVector) y.data; 20 | 21 | return v1.dot(v2); 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/libsvm/svm_model.java: -------------------------------------------------------------------------------- 1 | // 2 | // svm_model 3 | // 4 | package ca.uwo.csd.ai.nlp.libsvm; 5 | 6 | public class svm_model implements java.io.Serializable { 7 | 8 | public svm_parameter param; // parameter 9 | public int nr_class; // number of classes, = 2 in regression/one class svm 10 | public int l; // total #SV 11 | public svm_node[] SV; // SVs (SV[l]) 12 | public double[][] sv_coef; // coefficients for SVs in decision functions (sv_coef[k-1][l]) 13 | public double[] rho; // constants in decision functions (rho[k*(k-1)/2]) 14 | public double[] probA; // pariwise probability information 15 | public double[] probB; 16 | // for classification only 17 | public int[] label; // label of each class (label[k]) 18 | public int[] nSV; // number of SVs for each class (nSV[k]) 19 | // nSV[0] + nSV[1] + ... + nSV[k-1] = l 20 | }; 21 | -------------------------------------------------------------------------------- /COPYRIGHT: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2000-2012 Chih-Chung Chang and Chih-Jen Lin 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither name of copyright holders nor the names of its contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /COPYRIGHT.libsvm: -------------------------------------------------------------------------------- 1 | 2 | Copyright (c) 2000-2012 Chih-Chung Chang and Chih-Jen Lin 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions 7 | are met: 8 | 9 | 1. Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | 2. Redistributions in binary form must reproduce the above copyright 13 | notice, this list of conditions and the following disclaimer in the 14 | documentation and/or other materials provided with the distribution. 15 | 16 | 3. Neither name of copyright holders nor the names of its contributors 17 | may be used to endorse or promote products derived from this software 18 | without specific prior written permission. 19 | 20 | 21 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR 25 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 26 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 27 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 28 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 29 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 30 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 31 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/libsvm/svm_parameter.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.libsvm; 2 | 3 | public class svm_parameter implements Cloneable, java.io.Serializable { 4 | /* svm_type */ 5 | 6 | public static final int C_SVC = 0; 7 | public static final int NU_SVC = 1; 8 | public static final int ONE_CLASS = 2; 9 | public static final int EPSILON_SVR = 3; 10 | public static final int NU_SVR = 4; 11 | 12 | /* kernel_type */ 13 | public static final int CUSTOM = 0; 14 | 15 | public int svm_type; 16 | public int kernel_type; 17 | public int degree; // for poly 18 | public double gamma; // for poly/rbf/sigmoid 19 | public double coef0; // for poly/sigmoid 20 | // these are for training only 21 | public double cache_size; // in MB 22 | public double eps; // stopping criteria 23 | public double C; // for C_SVC, EPSILON_SVR and NU_SVR 24 | public int nr_weight; // for C_SVC 25 | public int[] weight_label; // for C_SVC 26 | public double[] weight; // for C_SVC 27 | public double nu; // for NU_SVC, ONE_CLASS, and NU_SVR 28 | public double p; // for EPSILON_SVR 29 | public int shrinking; // use the shrinking heuristics 30 | public int probability; // do probability estimates 31 | 32 | 33 | public svm_parameter() { 34 | svm_type = svm_parameter.C_SVC; 35 | kernel_type = svm_parameter.CUSTOM; 36 | degree = 3; 37 | gamma = 0; // 1/num_features 38 | coef0 = 0; 39 | nu = 0.5; 40 | cache_size = 100; 41 | C = 1; 42 | eps = 1e-3; 43 | p = 0.1; 44 | shrinking = 1; 45 | probability = 0; 46 | nr_weight = 0; 47 | weight_label = new int[0]; 48 | weight = new double[0]; 49 | } 50 | 51 | public Object clone() { 52 | try { 53 | return super.clone(); 54 | } catch (CloneNotSupportedException e) { 55 | return null; 56 | } 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/kernel/RBFKernel.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.kernel; 2 | 3 | import ca.uwo.csd.ai.nlp.common.SparseVector; 4 | import ca.uwo.csd.ai.nlp.libsvm.svm_node; 5 | import ca.uwo.csd.ai.nlp.common.SparseVector.Element; 6 | import ca.uwo.csd.ai.nlp.libsvm.svm_parameter; 7 | 8 | /** 9 | * RBFKernel implements an RBF kernel. 10 | * @author Syeed Ibn Faiz 11 | */ 12 | public class RBFKernel implements CustomKernel { 13 | 14 | svm_parameter param; 15 | public RBFKernel(svm_parameter param) { 16 | this.param = param; 17 | } 18 | 19 | 20 | @Override 21 | public double evaluate(svm_node x, svm_node y) { 22 | if (!(x.data instanceof SparseVector) || !(y.data instanceof SparseVector)) { 23 | throw new RuntimeException("svm_nodes should contain sparse vectors."); 24 | } 25 | 26 | SparseVector v1 = (SparseVector) x.data; 27 | SparseVector v2 = (SparseVector) y.data; 28 | double result = 0.0; 29 | int i = 0; 30 | int j = 0; 31 | 32 | while (i < v1.size() && j < v2.size()) { 33 | Element e1 = v1.get(i); 34 | Element e2 = v2.get(j); 35 | 36 | if (e1.index == e2.index) { 37 | double d = e1.value - e2.value; 38 | result += d * d; 39 | i++; 40 | j++; 41 | } else if (e1.index < e2.index) { 42 | result += e1.value * e1.value; 43 | i++; 44 | } else { 45 | result += e2.value * e2.value; 46 | j++; 47 | } 48 | } 49 | 50 | while (i < v1.size()) { 51 | Element e1 = v1.get(i); 52 | result += e1.value * e1.value; 53 | i++; 54 | } 55 | 56 | while (j < v2.size()) { 57 | Element e2 = v2.get(j); 58 | result += e2.value * e2.value; 59 | j++; 60 | } 61 | 62 | //System.out.println("score: " + result); 63 | return Math.exp(-param.gamma * result); 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /src/utils/DataFileReader.java: -------------------------------------------------------------------------------- 1 | package utils; 2 | 3 | import ca.uwo.csd.ai.nlp.common.SparseVector; 4 | import java.io.BufferedReader; 5 | import java.io.FileReader; 6 | import java.io.IOException; 7 | import java.util.ArrayList; 8 | import ca.uwo.csd.ai.nlp.libsvm.ex.Instance; 9 | 10 | /** 11 | * DataFileReader reads data files written in LibSVM format. 12 | * @author Syeed Ibn Faiz 13 | */ 14 | public class DataFileReader { 15 | 16 | public static Instance[] readDataFile(String fileName) throws IOException { 17 | BufferedReader reader = new BufferedReader(new FileReader(fileName)); 18 | 19 | ArrayList labels = new ArrayList(); 20 | ArrayList vectors = new ArrayList(); 21 | 22 | String line; 23 | int lineCount = 0; 24 | while ((line = reader.readLine()) != null) { 25 | lineCount++; 26 | String[] tokens = line.split("\\s+"); 27 | if (tokens.length < 2) { 28 | System.err.println("Inappropriate file format: " + fileName); 29 | System.err.println("Error in line " + lineCount); 30 | System.exit(-1); 31 | } 32 | 33 | labels.add(Double.parseDouble(tokens[0])); 34 | SparseVector vector = new SparseVector(tokens.length - 1); 35 | 36 | for (int i = 1; i < tokens.length; i++) { 37 | String[] fields = tokens[i].split(":"); 38 | if (fields.length < 2) { 39 | System.err.println("Inappropriate file format: " + fileName); 40 | System.err.println("Error in line " + lineCount); 41 | System.exit(-1); 42 | } 43 | int index = Integer.parseInt(fields[0]); 44 | double value = Double.parseDouble(fields[1]); 45 | vector.add(index, value); 46 | } 47 | 48 | vectors.add(vector); 49 | } 50 | 51 | Instance[] instances = new Instance[labels.size()]; 52 | for (int i = 0; i < instances.length; i++) { 53 | instances[i] = new Instance(labels.get(i), vectors.get(i)); 54 | } 55 | 56 | return instances; 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /nbproject/project.properties: -------------------------------------------------------------------------------- 1 | annotation.processing.enabled=true 2 | annotation.processing.enabled.in.editor=false 3 | annotation.processing.run.all.processors=true 4 | annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output 5 | application.title=libsvm-java 6 | application.vendor=tonatuni 7 | build.classes.dir=${build.dir}/classes 8 | build.classes.excludes=**/*.java,**/*.form 9 | # This directory is removed when the project is cleaned: 10 | build.dir=build 11 | build.generated.dir=${build.dir}/generated 12 | build.generated.sources.dir=${build.dir}/generated-sources 13 | # Only compile against the classpath explicitly listed here: 14 | build.sysclasspath=ignore 15 | build.test.classes.dir=${build.dir}/test/classes 16 | build.test.results.dir=${build.dir}/test/results 17 | # Uncomment to specify the preferred debugger connection transport: 18 | #debug.transport=dt_socket 19 | debug.classpath=\ 20 | ${run.classpath} 21 | debug.test.classpath=\ 22 | ${run.test.classpath} 23 | # This directory is removed when the project is cleaned: 24 | dist.dir=dist 25 | dist.jar=${dist.dir}/libsvm-java.jar 26 | dist.javadoc.dir=${dist.dir}/javadoc 27 | endorsed.classpath= 28 | excludes= 29 | includes=** 30 | jar.compress=false 31 | javac.classpath= 32 | # Space-separated list of extra javac options 33 | javac.compilerargs= 34 | javac.deprecation=false 35 | javac.processorpath=\ 36 | ${javac.classpath} 37 | javac.source=1.6 38 | javac.target=1.6 39 | javac.test.classpath=\ 40 | ${javac.classpath}:\ 41 | ${build.classes.dir} 42 | javac.test.processorpath=\ 43 | ${javac.test.classpath} 44 | javadoc.additionalparam= 45 | javadoc.author=false 46 | javadoc.encoding=${source.encoding} 47 | javadoc.noindex=false 48 | javadoc.nonavbar=false 49 | javadoc.notree=false 50 | javadoc.private=false 51 | javadoc.splitindex=true 52 | javadoc.use=true 53 | javadoc.version=false 54 | javadoc.windowtitle= 55 | main.class= 56 | manifest.file=manifest.mf 57 | meta.inf.dir=${src.dir}/META-INF 58 | mkdist.disabled=false 59 | platform.active=default_platform 60 | run.classpath=\ 61 | ${javac.classpath}:\ 62 | ${build.classes.dir} 63 | # Space-separated list of JVM arguments used when running the project 64 | # (you may also define separate properties like run-sys-prop.name=value instead of -Dname=value 65 | # or test-sys-prop.name=value to set system properties for unit tests): 66 | run.jvmargs= 67 | run.test.classpath=\ 68 | ${javac.test.classpath}:\ 69 | ${build.test.classes.dir} 70 | source.encoding=UTF-8 71 | src.dir=src 72 | test.src.dir=test 73 | -------------------------------------------------------------------------------- /src/Demo.java: -------------------------------------------------------------------------------- 1 | 2 | import java.io.BufferedWriter; 3 | import java.io.FileWriter; 4 | import java.io.IOException; 5 | import ca.uwo.csd.ai.nlp.kernel.KernelManager; 6 | import ca.uwo.csd.ai.nlp.kernel.LinearKernel; 7 | import ca.uwo.csd.ai.nlp.libsvm.ex.Instance; 8 | import ca.uwo.csd.ai.nlp.libsvm.ex.SVMPredictor; 9 | import ca.uwo.csd.ai.nlp.libsvm.ex.SVMTrainer; 10 | import ca.uwo.csd.ai.nlp.libsvm.svm_model; 11 | import ca.uwo.csd.ai.nlp.libsvm.svm_parameter; 12 | import utils.DataFileReader; 13 | 14 | /** 15 | * Demonstration of sample usage 16 | * @author Syeed Ibn Faiz 17 | */ 18 | public class Demo { 19 | 20 | public static void testLinearKernel(String[] args) throws IOException, ClassNotFoundException { 21 | String trainFileName = args[0]; 22 | String testFileName = args[1]; 23 | String outputFileName = args[2]; 24 | 25 | //Read training file 26 | Instance[] trainingInstances = DataFileReader.readDataFile(trainFileName); 27 | 28 | //Register kernel function 29 | KernelManager.setCustomKernel(new LinearKernel()); 30 | 31 | //Setup parameters 32 | svm_parameter param = new svm_parameter(); 33 | 34 | //Train the model 35 | System.out.println("Training started..."); 36 | svm_model model = SVMTrainer.train(trainingInstances, param); 37 | System.out.println("Training completed."); 38 | 39 | //Save the trained model 40 | //SVMTrainer.saveModel(model, "a1a.model"); 41 | //model = SVMPredictor.load_model("a1a.model"); 42 | 43 | //Read test file 44 | Instance[] testingInstances = DataFileReader.readDataFile(testFileName); 45 | //Predict results 46 | double[] predictions = SVMPredictor.predict(testingInstances, model, true); 47 | writeOutputs(outputFileName, predictions); 48 | //SVMTrainer.doCrossValidation(trainingInstances, param, 10, true); 49 | //SVMTrainer.doInOrderCrossValidation(trainingInstances, param, 10, true); 50 | } 51 | 52 | private static void writeOutputs(String outputFileName, double[] predictions) throws IOException { 53 | BufferedWriter writer = new BufferedWriter(new FileWriter(outputFileName)); 54 | for (double p : predictions) { 55 | writer.write(String.format("%.0f\n", p)); 56 | } 57 | writer.close(); 58 | } 59 | 60 | private static void showUsage() { 61 | System.out.println("Demo training-file testing-file output-file"); 62 | } 63 | 64 | private static boolean checkArgument(String[] args) { 65 | return args.length == 3; 66 | } 67 | 68 | public static void main(String[] args) throws IOException, ClassNotFoundException { 69 | if (checkArgument(args)) { 70 | testLinearKernel(args); 71 | } else { 72 | showUsage(); 73 | } 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/libsvm/ex/SVMPredictor.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.libsvm.ex; 2 | 3 | import ca.uwo.csd.ai.nlp.libsvm.svm; 4 | import ca.uwo.csd.ai.nlp.libsvm.svm_model; 5 | import ca.uwo.csd.ai.nlp.libsvm.svm_node; 6 | import java.io.IOException; 7 | import java.util.List; 8 | 9 | /** 10 | * 11 | * @author Syeed Ibn Faiz 12 | */ 13 | public class SVMPredictor { 14 | 15 | public static double[] predict(List instances, svm_model model) { 16 | return predict(instances, model, true); 17 | } 18 | 19 | public static double[] predict(List instances, svm_model model, boolean displayResult) { 20 | Instance[] array = new Instance[instances.size()]; 21 | array = instances.toArray(array); 22 | return predict(array, model, displayResult); 23 | } 24 | 25 | public static double predict(Instance instance, svm_model model, boolean displayResult) { 26 | return svm.svm_predict(model, new svm_node(instance.getData())); 27 | } 28 | 29 | public static double predictProbability(Instance instance, svm_model model, double[] probabilities) { 30 | return svm.svm_predict_probability(model, new svm_node(instance.getData()), probabilities); 31 | } 32 | public static double[] predict(Instance[] instances, svm_model model, boolean displayResult) { 33 | int total = 0; 34 | int correct = 0; 35 | 36 | int tp = 0; 37 | int fp = 0; 38 | int fn = 0; 39 | 40 | boolean binary = model.nr_class == 2; 41 | double[] predictions = new double[instances.length]; 42 | int count = 0; 43 | 44 | for (Instance instance : instances) { 45 | double target = instance.getLabel(); 46 | double p = svm.svm_predict(model, new svm_node(instance.getData())); 47 | predictions[count++] = p; 48 | 49 | ++total; 50 | if (p == target) { 51 | correct++; 52 | if (target > 0) { 53 | tp++; 54 | } 55 | } else if (target > 0) { 56 | fn++; 57 | } else { 58 | fp++; 59 | } 60 | } 61 | if (displayResult) { 62 | System.out.print("Accuracy = " + (double) correct / total * 100 63 | + "% (" + correct + "/" + total + ") (classification)\n"); 64 | 65 | if (binary) { 66 | double precision = (double) tp / (tp + fp); 67 | double recall = (double) tp / (tp + fn); 68 | System.out.println("Precision: " + precision); 69 | System.out.println("Recall: " + recall); 70 | System.out.println("Fscore: " + 2 * precision * recall / (precision + recall)); 71 | } 72 | } 73 | return predictions; 74 | } 75 | 76 | public static svm_model loadModel(String filePath) throws IOException, ClassNotFoundException { 77 | return svm.svm_load_model(filePath); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/kernel/TreeKernel.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.kernel; 2 | 3 | //import edu.stanford.nlp.trees.Tree; 4 | import ca.uwo.csd.ai.nlp.common.Tree; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | import ca.uwo.csd.ai.nlp.libsvm.svm_node; 8 | 9 | /** 10 | * TreeKernel provides a naive implementation of the kernel function described in 11 | * 'Parsing with a single neuron: Convolution kernels for NLP problems'. 12 | * @author Syeed Ibn Faiz 13 | */ 14 | public class TreeKernel implements CustomKernel { 15 | private final static int MAX_NODE = 300; 16 | double mem[][] = new double[MAX_NODE][MAX_NODE]; 17 | private double lambda; //penalizing factor 18 | 19 | public TreeKernel() { 20 | this(0.5); 21 | } 22 | 23 | public TreeKernel(double lambda) { 24 | this.lambda = lambda; 25 | } 26 | 27 | @Override 28 | public double evaluate(svm_node x, svm_node y) { 29 | Object k1 = x.data; 30 | Object k2 = y.data; 31 | 32 | if (!(k1 instanceof Tree) || !(k2 instanceof Tree)) { 33 | throw new IllegalArgumentException("svm_node does not contain tree data."); 34 | } 35 | 36 | Tree t1 = (Tree) k1; 37 | Tree t2 = (Tree) k2; 38 | 39 | List nodes1 = getNodes(t1); 40 | List nodes2 = getNodes(t2); 41 | 42 | int N1 = Math.min(MAX_NODE, nodes1.size()); 43 | int N2 = Math.min(MAX_NODE, nodes2.size()); 44 | 45 | //fill mem with -1.0 46 | initMem(mem, N1, N2); 47 | 48 | double result = 0.0; 49 | for (int i = 0; i < N1; i++) { 50 | for (int j = 0; j < N2; j++) { 51 | result += compute(i, j, nodes1, nodes2, mem); 52 | } 53 | } 54 | 55 | return result; 56 | } 57 | 58 | /** 59 | * Efficient computation avoiding costly equals method of Tree 60 | * @param trees 61 | * @param t 62 | * @return 63 | */ 64 | private int indexOf(List trees, Tree t) { 65 | for (int i = 0; i < trees.size(); i++) { 66 | if (t == trees.get(i)) { 67 | return i; 68 | } 69 | } 70 | return -1; 71 | } 72 | 73 | private double compute(int i, int j, List nodes1, List nodes2, double[][] mem) { 74 | if (mem[i][j] >= 0) { 75 | return mem[i][j]; 76 | } 77 | //if (sameProduction(nodes1.get(i), nodes2.get(j))) { 78 | if (nodes1.get(i).value().equals(nodes2.get(j).value()) && 79 | nodes1.get(i).hashCode() == nodes2.get(j).hashCode()) { //similar hashCode -> same production 80 | 81 | mem[i][j] = lambda * lambda; 82 | if (!nodes1.get(i).isLeaf() && !nodes2.get(j).isLeaf()) { 83 | List childList1 = nodes1.get(i).getChildrenAsList(); 84 | List childList2 = nodes2.get(j).getChildrenAsList(); 85 | for (int k = 0; k < childList1.size(); k++) { 86 | //mem[i][j] *= 1 + compute(nodes1.indexOf(childList1.get(k)), nodes2.indexOf(childList2.get(k)), nodes1, nodes2, mem); 87 | mem[i][j] *= 1 + compute(indexOf(nodes1, childList1.get(k)), indexOf(nodes2, childList2.get(k)), nodes1, nodes2, mem); 88 | } 89 | } 90 | } else { 91 | mem[i][j] = 0.0; 92 | } 93 | 94 | return mem[i][j]; 95 | } 96 | 97 | private boolean sameProduction(Tree t1, Tree t2) { 98 | if (t1.value().equals(t2.value())) { 99 | List childList1 = t1.getChildrenAsList(); 100 | List childList2 = t2.getChildrenAsList(); 101 | if (childList1.size() == childList2.size()) { 102 | for (int i = 0; i < childList1.size(); i++) { 103 | if (!childList1.get(i).value().equals(childList2.get(i).value())) { 104 | return false; 105 | } 106 | } 107 | return true; 108 | } 109 | } 110 | return false; 111 | } 112 | private void initMem(double[][] mem, int N1, int N2) { 113 | for (int i = 0; i < N1; i++) { 114 | for (int j = 0; j < N2; j++) { 115 | mem[i][j] = -1.0; 116 | } 117 | } 118 | } 119 | private List getNodes(Tree t) { 120 | ArrayList nodes = new ArrayList(); 121 | addNodes(t, nodes); 122 | return nodes; 123 | } 124 | 125 | private void addNodes(Tree t, List nodes) { 126 | nodes.add(t); 127 | List childList = t.getChildrenAsList(); 128 | for (Tree child : childList) { 129 | addNodes(child, nodes); 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/common/SparseVector.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.common; 2 | 3 | import java.io.Serializable; 4 | import java.util.Arrays; 5 | 6 | /** 7 | * SparseVector stores a sparse vector in a memory 8 | * efficient manner. It stores the elements of a vector 9 | * as a list of index:value pairs. 10 | * @author Syeed Ibn Faiz 11 | */ 12 | public class SparseVector implements Serializable { 13 | 14 | public static class Element implements Serializable, Comparable { 15 | public int index; 16 | public double value; 17 | 18 | public Element(int index, double value) { 19 | this.index = index; 20 | this.value = value; 21 | } 22 | 23 | @Override 24 | public int compareTo(Element o) { 25 | if (index < o.index) { 26 | return -1; 27 | } else if (index > o.index) { 28 | return 1; 29 | } 30 | return 0; 31 | } 32 | } 33 | 34 | private Element[] elements; 35 | private int size; 36 | private final int MAX_SIZE = 100; 37 | 38 | public SparseVector(int capacity) { 39 | elements = new Element[capacity]; 40 | } 41 | 42 | public SparseVector() { 43 | elements = new Element[MAX_SIZE]; 44 | } 45 | 46 | public void add(int index, double value) { 47 | add(new Element(index, value)); 48 | } 49 | 50 | public void add(Element elem) { 51 | if (isFull()) { 52 | resize(); 53 | } 54 | elements[size++] = elem; 55 | } 56 | 57 | public Element get(int n) { 58 | if (n >= size) { 59 | return null; 60 | } 61 | return elements[n]; 62 | } 63 | 64 | public boolean isFull() { 65 | return size == elements.length; 66 | } 67 | 68 | public boolean isEmpty() { 69 | return size == 0; 70 | } 71 | 72 | public int size() { 73 | return size; 74 | } 75 | 76 | private void resize() { 77 | Element[] newElements = new Element[size + MAX_SIZE]; 78 | for (int i = 0; i < elements.length; i++) { 79 | newElements[i] = elements[i]; 80 | elements[i] = null; 81 | } 82 | elements = newElements; 83 | } 84 | 85 | public void sortByIndices() { 86 | Arrays.sort(elements, 0, size); 87 | } 88 | 89 | @Override 90 | public String toString() { 91 | StringBuilder sb = new StringBuilder(); 92 | for (int i = 0; i < size; i++) { 93 | Element element = elements[i]; 94 | sb.append(element.index).append(":").append(element.value).append(" "); 95 | } 96 | return sb.toString(); 97 | } 98 | /** 99 | * Computes dot product between this vector and the argument vector 100 | * @param vector 101 | * @return 102 | */ 103 | public double dot(SparseVector vector) { 104 | SparseVector v1 = this; 105 | SparseVector v2 = vector; 106 | double result = 0.0; 107 | int i = 0; 108 | int j = 0; 109 | 110 | while (i < v1.size() && j < v2.size()) { 111 | Element e1 = v1.get(i); 112 | Element e2 = v2.get(j); 113 | 114 | if (e1.index == e2.index) { 115 | result += e1.value * e2.value; 116 | i++; 117 | j++; 118 | } else if (e1.index < e2.index) { 119 | i++; 120 | } else { 121 | j++; 122 | } 123 | } 124 | 125 | return result; 126 | } 127 | 128 | /** 129 | * Computes normalized dot product 130 | * @param vector 131 | * @return a positive real number in the range [0.0,1.0] 132 | */ 133 | public double normDot(SparseVector vector) { 134 | double dot = this.dot(vector); 135 | double d = Math.sqrt(this.size() * vector.size()); 136 | if (d > 0) { 137 | dot /= d; 138 | } 139 | return dot; 140 | } 141 | 142 | /** 143 | * Computes square of the Euclidean distance 144 | * @param vector 145 | * @return 146 | */ 147 | public double squaredDistance(SparseVector vector) { 148 | SparseVector v1 = this; 149 | SparseVector v2 = vector; 150 | double result = 0.0; 151 | int i = 0; 152 | int j = 0; 153 | 154 | while (i < v1.size() && j < v2.size()) { 155 | Element e1 = v1.get(i); 156 | Element e2 = v2.get(j); 157 | 158 | if (e1.index == e2.index) { 159 | double d = e1.value - e2.value; 160 | result += d*d; 161 | i++; 162 | j++; 163 | } else if (e1.index < e2.index) { 164 | result += e1.value * e1.value; 165 | i++; 166 | } else { 167 | result += e2.value * e2.value; 168 | j++; 169 | } 170 | } 171 | 172 | while (i < v1.size()) { 173 | Element e1 = v1.get(i); 174 | result += e1.value * e1.value; 175 | i++; 176 | } 177 | 178 | while (j < v2.size()) { 179 | Element e2 = v2.get(j); 180 | result += e2.value * e2.value; 181 | j++; 182 | } 183 | return result; 184 | } 185 | 186 | public void removeDuplicates() { 187 | int last = 0; 188 | for (int i = 1; i < size; i++) { 189 | if (elements[last].index != elements[i].index) { 190 | last++; 191 | elements[last] = elements[i]; 192 | } 193 | } 194 | size = last + 1; 195 | } 196 | 197 | public static void main(String[] args) { 198 | SparseVector vector = new SparseVector(); 199 | vector.add(3, 1.2); 200 | vector.add(3, 1.2); 201 | vector.add(1, 1.6); 202 | vector.add(1, 1.6); 203 | vector.add(5, 2.3); 204 | vector.add(3, 1.2); 205 | vector.add(5, 2.3); 206 | vector.add(1, 1.6); 207 | vector.add(1, 1.6); 208 | System.out.println("before: " + vector); 209 | vector.sortByIndices(); 210 | System.out.println("after: " + vector); 211 | vector.removeDuplicates(); 212 | System.out.println("after: " + vector); 213 | } 214 | } 215 | -------------------------------------------------------------------------------- /src/ca/uwo/csd/ai/nlp/libsvm/ex/SVMTrainer.java: -------------------------------------------------------------------------------- 1 | package ca.uwo.csd.ai.nlp.libsvm.ex; 2 | 3 | 4 | import ca.uwo.csd.ai.nlp.libsvm.svm; 5 | import ca.uwo.csd.ai.nlp.libsvm.svm_model; 6 | import ca.uwo.csd.ai.nlp.libsvm.svm_node; 7 | import ca.uwo.csd.ai.nlp.libsvm.svm_parameter; 8 | import ca.uwo.csd.ai.nlp.libsvm.svm_problem; 9 | import java.io.IOException; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | /** 14 | * SVMTrainer performs training of an SVM. 15 | * @author Syeed Ibn Faiz 16 | */ 17 | public class SVMTrainer { 18 | 19 | private static svm_problem prepareProblem(List instances) { 20 | Instance[] array = new Instance[instances.size()]; 21 | array = instances.toArray(array); 22 | return prepareProblem(array); 23 | } 24 | 25 | private static svm_problem prepareProblem(Instance[] instances) { 26 | return prepareProblem(instances, 0, instances.length - 1); 27 | } 28 | 29 | private static svm_problem prepareProblem(Instance[] instances, int begin, int end) { 30 | svm_problem prob = new svm_problem(); 31 | prob.l = (end - begin) + 1; 32 | prob.y = new double[prob.l]; 33 | prob.x = new svm_node[prob.l]; 34 | 35 | for (int i = begin; i <= end; i++) { 36 | prob.y[i-begin] = instances[i].getLabel(); 37 | prob.x[i-begin] = new svm_node(instances[i].getData()); 38 | } 39 | return prob; 40 | } 41 | 42 | /** 43 | * Builds an SVM model 44 | * @param instances 45 | * @param param 46 | * @return 47 | */ 48 | public static svm_model train(Instance[] instances, svm_parameter param) { 49 | //prepare svm_problem 50 | svm_problem prob = prepareProblem(instances); 51 | 52 | String error_msg = svm.svm_check_parameter(prob, param); 53 | 54 | if (error_msg != null) { 55 | System.err.print("ERROR: " + error_msg + "\n"); 56 | System.exit(1); 57 | } 58 | 59 | return svm.svm_train(prob, param); 60 | } 61 | 62 | public static svm_model train(List instances, svm_parameter param) { 63 | Instance[] array = new Instance[instances.size()]; 64 | array = instances.toArray(array); 65 | return train(array, param); 66 | } 67 | 68 | /** 69 | * Performs N-fold cross validation 70 | * @param instances 71 | * @param param parameters 72 | * @param nr_fold number of folds (N) 73 | * @param binary whether doing binary classification 74 | */ 75 | public static void doCrossValidation(Instance[] instances, svm_parameter param, int nr_fold, boolean binary) { 76 | svm_problem prob = prepareProblem(instances); 77 | 78 | int i; 79 | int total_correct = 0; 80 | double total_error = 0; 81 | double sumv = 0, sumy = 0, sumvv = 0, sumyy = 0, sumvy = 0; 82 | double[] target = new double[prob.l]; 83 | 84 | svm.svm_cross_validation(prob, param, nr_fold, target); 85 | if (param.svm_type == svm_parameter.EPSILON_SVR 86 | || param.svm_type == svm_parameter.NU_SVR) { 87 | for (i = 0; i < prob.l; i++) { 88 | double y = prob.y[i]; 89 | double v = target[i]; 90 | total_error += (v - y) * (v - y); 91 | sumv += v; 92 | sumy += y; 93 | sumvv += v * v; 94 | sumyy += y * y; 95 | sumvy += v * y; 96 | } 97 | System.out.print("Cross Validation Mean squared error = " + total_error / prob.l + "\n"); 98 | System.out.print("Cross Validation Squared correlation coefficient = " 99 | + ((prob.l * sumvy - sumv * sumy) * (prob.l * sumvy - sumv * sumy)) 100 | / ((prob.l * sumvv - sumv * sumv) * (prob.l * sumyy - sumy * sumy)) + "\n"); 101 | } else { 102 | int tp = 0; 103 | int fp = 0; 104 | int fn = 0; 105 | 106 | for (i = 0; i < prob.l; i++) { 107 | if (target[i] == prob.y[i]) { 108 | ++total_correct; 109 | if (prob.y[i] > 0) { 110 | tp++; 111 | } 112 | } else if (prob.y[i] > 0) { 113 | fn++; 114 | } else if (prob.y[i] < 0) { 115 | fp++; 116 | } 117 | } 118 | System.out.print("Cross Validation Accuracy = " + 100.0 * total_correct / prob.l + "%\n"); 119 | if (binary) { 120 | double precision = (double) tp / (tp + fp); 121 | double recall = (double) tp / (tp + fn); 122 | System.out.println("Precision: " + precision); 123 | System.out.println("Recall: " + recall); 124 | System.out.println("FScore: " + 2 * precision * recall / (precision + recall)); 125 | } 126 | } 127 | } 128 | 129 | public static void doInOrderCrossValidation(Instance[] instances, svm_parameter param, int nr_fold, boolean binary) { 130 | int size = instances.length; 131 | int chunkSize = size/nr_fold; 132 | int begin = 0; 133 | int end = chunkSize - 1; 134 | int tp = 0; 135 | int fp = 0; 136 | int fn = 0; 137 | int total = 0; 138 | 139 | for (int i = 0; i < nr_fold; i++) { 140 | System.out.println("Iteration: " + (i+1)); 141 | List trainingInstances = new ArrayList(); 142 | List testingInstances = new ArrayList(); 143 | for (int j = 0; j < size; j++) { 144 | if (j >= begin && j <= end) { 145 | testingInstances.add(instances[j]); 146 | } else { 147 | trainingInstances.add(instances[j]); 148 | } 149 | } 150 | 151 | svm_model trainModel = train(trainingInstances, param); 152 | double[] predictions = SVMPredictor.predict(testingInstances, trainModel); 153 | for (int k = 0; k < predictions.length; k++) { 154 | 155 | if (predictions[k] == testingInstances.get(k).getLabel()) { 156 | //if (Math.abs(predictions[k] - testingInstances.get(k).getLabel()) < 0.00001) { 157 | if (testingInstances.get(k).getLabel() > 0) { 158 | tp++; 159 | } 160 | } else if (testingInstances.get(k).getLabel() > 0) { 161 | fn++; 162 | } else if (testingInstances.get(k).getLabel() < 0) { 163 | //System.out.println(testingInstances.get(k).getData()); 164 | fp++; 165 | } 166 | total++; 167 | } 168 | //update 169 | begin = end+1; 170 | end = begin + chunkSize - 1; 171 | if (end >= size) { 172 | end = size-1; 173 | } 174 | } 175 | 176 | double precision = (double) tp / (tp + fp); 177 | double recall = (double) tp / (tp + fn); 178 | System.out.println("Precision: " + precision); 179 | System.out.println("Recall: " + recall); 180 | System.out.println("FScore: " + 2 * precision * recall / (precision + recall)); 181 | } 182 | 183 | public static void saveModel(svm_model model, String filePath) throws IOException { 184 | svm.svm_save_model(filePath, model); 185 | } 186 | } 187 | -------------------------------------------------------------------------------- /src/svm_scale.java: -------------------------------------------------------------------------------- 1 | 2 | import java.io.*; 3 | import java.util.*; 4 | 5 | class svm_scale { 6 | 7 | private String line = null; 8 | private double lower = -1.0; 9 | private double upper = 1.0; 10 | private double y_lower; 11 | private double y_upper; 12 | private boolean y_scaling = false; 13 | private double[] feature_max; 14 | private double[] feature_min; 15 | private double y_max = -Double.MAX_VALUE; 16 | private double y_min = Double.MAX_VALUE; 17 | private int max_index; 18 | private long num_nonzeros = 0; 19 | private long new_num_nonzeros = 0; 20 | 21 | private static void exit_with_help() { 22 | System.out.print( 23 | "Usage: svm-scale [options] data_filename\n" 24 | + "options:\n" 25 | + "-l lower : x scaling lower limit (default -1)\n" 26 | + "-u upper : x scaling upper limit (default +1)\n" 27 | + "-y y_lower y_upper : y scaling limits (default: no y scaling)\n" 28 | + "-s save_filename : save scaling parameters to save_filename\n" 29 | + "-r restore_filename : restore scaling parameters from restore_filename\n"); 30 | System.exit(1); 31 | } 32 | 33 | private BufferedReader rewind(BufferedReader fp, String filename) throws IOException { 34 | fp.close(); 35 | return new BufferedReader(new FileReader(filename)); 36 | } 37 | 38 | private void output_target(double value) { 39 | if (y_scaling) { 40 | if (value == y_min) { 41 | value = y_lower; 42 | } else if (value == y_max) { 43 | value = y_upper; 44 | } else { 45 | value = y_lower + (y_upper - y_lower) 46 | * (value - y_min) / (y_max - y_min); 47 | } 48 | } 49 | 50 | System.out.print(value + " "); 51 | } 52 | 53 | private void output(int index, double value) { 54 | /* skip single-valued attribute */ 55 | if (feature_max[index] == feature_min[index]) { 56 | return; 57 | } 58 | 59 | if (value == feature_min[index]) { 60 | value = lower; 61 | } else if (value == feature_max[index]) { 62 | value = upper; 63 | } else { 64 | value = lower + (upper - lower) 65 | * (value - feature_min[index]) 66 | / (feature_max[index] - feature_min[index]); 67 | } 68 | 69 | if (value != 0) { 70 | System.out.print(index + ":" + value + " "); 71 | new_num_nonzeros++; 72 | } 73 | } 74 | 75 | private String readline(BufferedReader fp) throws IOException { 76 | line = fp.readLine(); 77 | return line; 78 | } 79 | 80 | private void run(String[] argv) throws IOException { 81 | int i, index; 82 | BufferedReader fp = null, fp_restore = null; 83 | String save_filename = null; 84 | String restore_filename = null; 85 | String data_filename = null; 86 | 87 | 88 | for (i = 0; i < argv.length; i++) { 89 | if (argv[i].charAt(0) != '-') { 90 | break; 91 | } 92 | ++i; 93 | switch (argv[i - 1].charAt(1)) { 94 | case 'l': 95 | lower = Double.parseDouble(argv[i]); 96 | break; 97 | case 'u': 98 | upper = Double.parseDouble(argv[i]); 99 | break; 100 | case 'y': 101 | y_lower = Double.parseDouble(argv[i]); 102 | ++i; 103 | y_upper = Double.parseDouble(argv[i]); 104 | y_scaling = true; 105 | break; 106 | case 's': 107 | save_filename = argv[i]; 108 | break; 109 | case 'r': 110 | restore_filename = argv[i]; 111 | break; 112 | default: 113 | System.err.println("unknown option"); 114 | exit_with_help(); 115 | } 116 | } 117 | 118 | if (!(upper > lower) || (y_scaling && !(y_upper > y_lower))) { 119 | System.err.println("inconsistent lower/upper specification"); 120 | System.exit(1); 121 | } 122 | if (restore_filename != null && save_filename != null) { 123 | System.err.println("cannot use -r and -s simultaneously"); 124 | System.exit(1); 125 | } 126 | 127 | if (argv.length != i + 1) { 128 | exit_with_help(); 129 | } 130 | 131 | data_filename = argv[i]; 132 | try { 133 | fp = new BufferedReader(new FileReader(data_filename)); 134 | } catch (Exception e) { 135 | System.err.println("can't open file " + data_filename); 136 | System.exit(1); 137 | } 138 | 139 | /* assumption: min index of attributes is 1 */ 140 | /* pass 1: find out max index of attributes */ 141 | max_index = 0; 142 | 143 | if (restore_filename != null) { 144 | int idx, c; 145 | 146 | try { 147 | fp_restore = new BufferedReader(new FileReader(restore_filename)); 148 | } catch (Exception e) { 149 | System.err.println("can't open file " + restore_filename); 150 | System.exit(1); 151 | } 152 | if ((c = fp_restore.read()) == 'y') { 153 | fp_restore.readLine(); 154 | fp_restore.readLine(); 155 | fp_restore.readLine(); 156 | } 157 | fp_restore.readLine(); 158 | fp_restore.readLine(); 159 | 160 | String restore_line = null; 161 | while ((restore_line = fp_restore.readLine()) != null) { 162 | StringTokenizer st2 = new StringTokenizer(restore_line); 163 | idx = Integer.parseInt(st2.nextToken()); 164 | max_index = Math.max(max_index, idx); 165 | } 166 | fp_restore = rewind(fp_restore, restore_filename); 167 | } 168 | 169 | while (readline(fp) != null) { 170 | StringTokenizer st = new StringTokenizer(line, " \t\n\r\f:"); 171 | st.nextToken(); 172 | while (st.hasMoreTokens()) { 173 | index = Integer.parseInt(st.nextToken()); 174 | max_index = Math.max(max_index, index); 175 | st.nextToken(); 176 | num_nonzeros++; 177 | } 178 | } 179 | 180 | try { 181 | feature_max = new double[(max_index + 1)]; 182 | feature_min = new double[(max_index + 1)]; 183 | } catch (OutOfMemoryError e) { 184 | System.err.println("can't allocate enough memory"); 185 | System.exit(1); 186 | } 187 | 188 | for (i = 0; i <= max_index; i++) { 189 | feature_max[i] = -Double.MAX_VALUE; 190 | feature_min[i] = Double.MAX_VALUE; 191 | } 192 | 193 | fp = rewind(fp, data_filename); 194 | 195 | /* pass 2: find out min/max value */ 196 | while (readline(fp) != null) { 197 | int next_index = 1; 198 | double target; 199 | double value; 200 | 201 | StringTokenizer st = new StringTokenizer(line, " \t\n\r\f:"); 202 | target = Double.parseDouble(st.nextToken()); 203 | y_max = Math.max(y_max, target); 204 | y_min = Math.min(y_min, target); 205 | 206 | while (st.hasMoreTokens()) { 207 | index = Integer.parseInt(st.nextToken()); 208 | value = Double.parseDouble(st.nextToken()); 209 | 210 | for (i = next_index; i < index; i++) { 211 | feature_max[i] = Math.max(feature_max[i], 0); 212 | feature_min[i] = Math.min(feature_min[i], 0); 213 | } 214 | 215 | feature_max[index] = Math.max(feature_max[index], value); 216 | feature_min[index] = Math.min(feature_min[index], value); 217 | next_index = index + 1; 218 | } 219 | 220 | for (i = next_index; i <= max_index; i++) { 221 | feature_max[i] = Math.max(feature_max[i], 0); 222 | feature_min[i] = Math.min(feature_min[i], 0); 223 | } 224 | } 225 | 226 | fp = rewind(fp, data_filename); 227 | 228 | /* pass 2.5: save/restore feature_min/feature_max */ 229 | if (restore_filename != null) { 230 | // fp_restore rewinded in finding max_index 231 | int idx, c; 232 | double fmin, fmax; 233 | 234 | fp_restore.mark(2); // for reset 235 | if ((c = fp_restore.read()) == 'y') { 236 | fp_restore.readLine(); // pass the '\n' after 'y' 237 | StringTokenizer st = new StringTokenizer(fp_restore.readLine()); 238 | y_lower = Double.parseDouble(st.nextToken()); 239 | y_upper = Double.parseDouble(st.nextToken()); 240 | st = new StringTokenizer(fp_restore.readLine()); 241 | y_min = Double.parseDouble(st.nextToken()); 242 | y_max = Double.parseDouble(st.nextToken()); 243 | y_scaling = true; 244 | } else { 245 | fp_restore.reset(); 246 | } 247 | 248 | if (fp_restore.read() == 'x') { 249 | fp_restore.readLine(); // pass the '\n' after 'x' 250 | StringTokenizer st = new StringTokenizer(fp_restore.readLine()); 251 | lower = Double.parseDouble(st.nextToken()); 252 | upper = Double.parseDouble(st.nextToken()); 253 | String restore_line = null; 254 | while ((restore_line = fp_restore.readLine()) != null) { 255 | StringTokenizer st2 = new StringTokenizer(restore_line); 256 | idx = Integer.parseInt(st2.nextToken()); 257 | fmin = Double.parseDouble(st2.nextToken()); 258 | fmax = Double.parseDouble(st2.nextToken()); 259 | if (idx <= max_index) { 260 | feature_min[idx] = fmin; 261 | feature_max[idx] = fmax; 262 | } 263 | } 264 | } 265 | fp_restore.close(); 266 | } 267 | 268 | if (save_filename != null) { 269 | Formatter formatter = new Formatter(new StringBuilder()); 270 | BufferedWriter fp_save = null; 271 | 272 | try { 273 | fp_save = new BufferedWriter(new FileWriter(save_filename)); 274 | } catch (IOException e) { 275 | System.err.println("can't open file " + save_filename); 276 | System.exit(1); 277 | } 278 | 279 | if (y_scaling) { 280 | formatter.format("y\n"); 281 | formatter.format("%.16g %.16g\n", y_lower, y_upper); 282 | formatter.format("%.16g %.16g\n", y_min, y_max); 283 | } 284 | formatter.format("x\n"); 285 | formatter.format("%.16g %.16g\n", lower, upper); 286 | for (i = 1; i <= max_index; i++) { 287 | if (feature_min[i] != feature_max[i]) { 288 | formatter.format("%d %.16g %.16g\n", i, feature_min[i], feature_max[i]); 289 | } 290 | } 291 | fp_save.write(formatter.toString()); 292 | fp_save.close(); 293 | } 294 | 295 | /* pass 3: scale */ 296 | while (readline(fp) != null) { 297 | int next_index = 1; 298 | double target; 299 | double value; 300 | 301 | StringTokenizer st = new StringTokenizer(line, " \t\n\r\f:"); 302 | target = Double.parseDouble(st.nextToken()); 303 | output_target(target); 304 | while (st.hasMoreElements()) { 305 | index = Integer.parseInt(st.nextToken()); 306 | value = Double.parseDouble(st.nextToken()); 307 | for (i = next_index; i < index; i++) { 308 | output(i, 0); 309 | } 310 | output(index, value); 311 | next_index = index + 1; 312 | } 313 | 314 | for (i = next_index; i <= max_index; i++) { 315 | output(i, 0); 316 | } 317 | System.out.print("\n"); 318 | } 319 | if (new_num_nonzeros > num_nonzeros) { 320 | System.err.print( 321 | "WARNING: original #nonzeros " + num_nonzeros + "\n" 322 | + " new #nonzeros " + new_num_nonzeros + "\n" 323 | + "Use -l 0 if many original feature values are zeros\n"); 324 | } 325 | 326 | fp.close(); 327 | } 328 | 329 | public static void main(String argv[]) throws IOException { 330 | svm_scale s = new svm_scale(); 331 | s.run(argv); 332 | } 333 | } 334 | -------------------------------------------------------------------------------- /README.libsvm: -------------------------------------------------------------------------------- 1 | Libsvm is a simple, easy-to-use, and efficient software for SVM 2 | classification and regression. It solves C-SVM classification, nu-SVM 3 | classification, one-class-SVM, epsilon-SVM regression, and nu-SVM 4 | regression. It also provides an automatic model selection tool for 5 | C-SVM classification. This document explains the use of libsvm. 6 | 7 | Libsvm is available at 8 | http://www.csie.ntu.edu.tw/~cjlin/libsvm 9 | Please read the COPYRIGHT file before using libsvm. 10 | 11 | Table of Contents 12 | ================= 13 | 14 | - Quick Start 15 | - Installation and Data Format 16 | - `svm-train' Usage 17 | - `svm-predict' Usage 18 | - `svm-scale' Usage 19 | - Tips on Practical Use 20 | - Examples 21 | - Precomputed Kernels 22 | - Library Usage 23 | - Java Version 24 | - Building Windows Binaries 25 | - Additional Tools: Sub-sampling, Parameter Selection, Format checking, etc. 26 | - MATLAB/OCTAVE Interface 27 | - Python Interface 28 | - Additional Information 29 | 30 | Quick Start 31 | =========== 32 | 33 | If you are new to SVM and if the data is not large, please go to 34 | `tools' directory and use easy.py after installation. It does 35 | everything automatic -- from data scaling to parameter selection. 36 | 37 | Usage: easy.py training_file [testing_file] 38 | 39 | More information about parameter selection can be found in 40 | `tools/README.' 41 | 42 | Installation and Data Format 43 | ============================ 44 | 45 | On Unix systems, type `make' to build the `svm-train' and `svm-predict' 46 | programs. Run them without arguments to show the usages of them. 47 | 48 | On other systems, consult `Makefile' to build them (e.g., see 49 | 'Building Windows binaries' in this file) or use the pre-built 50 | binaries (Windows binaries are in the directory `windows'). 51 | 52 | The format of training and testing data file is: 53 | 54 |