├── sspace └── test ├── src ├── main │ └── java │ │ ├── edu │ │ └── ucla │ │ │ └── sspace │ │ │ ├── tools │ │ │ ├── SenseEvalCleaner.java │ │ │ ├── SemEval2010Cleaner.java │ │ │ ├── MatrixTranspose.java │ │ │ ├── BasisPrinter.java │ │ │ └── SelectTopKWords.java │ │ │ ├── evaluation │ │ │ ├── DeeseEvaluator.java │ │ │ ├── NormedWordPrimingTest.java │ │ │ ├── WordAssociationTest.java │ │ │ ├── WordChoiceEvaluation.java │ │ │ ├── NormedWordPrimingReport.java │ │ │ ├── NormedPrimingQuestion.java │ │ │ ├── MultipleChoiceQuestion.java │ │ │ ├── WordSimilarity.java │ │ │ ├── WordAssociationReport.java │ │ │ ├── WordPrimingReport.java │ │ │ ├── WordSimilarityReport.java │ │ │ ├── WordPrimingTest.java │ │ │ ├── SimpleWordSimilarity.java │ │ │ ├── WordChoiceReport.java │ │ │ └── WordSimilarityEvaluation.java │ │ │ ├── svs │ │ │ └── RelationTuple.java │ │ │ ├── common │ │ │ ├── statistics │ │ │ │ ├── SignificanceTest.java │ │ │ │ ├── GTest.java │ │ │ │ └── PointwiseMutualInformationTest.java │ │ │ ├── DimensionallyInterpretableSemanticSpace.java │ │ │ └── Filterable.java │ │ │ ├── vector │ │ │ ├── SparseIntegerVector.java │ │ │ ├── SparseVector.java │ │ │ └── SparseDoubleVector.java │ │ │ ├── dependency │ │ │ ├── DependencyPermutationFunction.java │ │ │ ├── FlatPathWeight.java │ │ │ ├── UniversalRelationAcceptor.java │ │ │ ├── LengthPathWeight.java │ │ │ ├── SubjObjRelationAcceptor.java │ │ │ ├── UniversalPathAcceptor.java │ │ │ ├── DependencyTreeTransform.java │ │ │ ├── DependencyRelation.java │ │ │ ├── DependencyPathAcceptor.java │ │ │ ├── DependencyPathWeight.java │ │ │ └── DependencyTreeNode.java │ │ │ ├── text │ │ │ ├── TemporalUsenetCorpusReader.java │ │ │ ├── Stemmer.java │ │ │ ├── Document.java │ │ │ ├── LabeledParsedDocument.java │ │ │ ├── TemporalDocument.java │ │ │ ├── LabeledDocument.java │ │ │ ├── TemporalBloglinesCorpusReader.java │ │ │ ├── AnnotatedDocument.java │ │ │ ├── GermanStemmer.java │ │ │ ├── EnglishStemmer.java │ │ │ ├── ItalianStemmer.java │ │ │ ├── SnowballPorterStemmer.java │ │ │ ├── LabeledStringDocument.java │ │ │ └── StringDocument.java │ │ │ ├── graph │ │ │ ├── WeightedDirectedEdge.java │ │ │ ├── WeightedTypedEdge.java │ │ │ ├── WeightedEdge.java │ │ │ ├── TemporalEdge.java │ │ │ ├── DirectedEdge.java │ │ │ ├── DirectedTypedEdge.java │ │ │ ├── WeightedDirectedTypedEdge.java │ │ │ ├── GraphConstructionException.java │ │ │ └── TypedEdge.java │ │ │ ├── clustering │ │ │ ├── Assignment.java │ │ │ ├── DataMatrixLinkClustering.java │ │ │ ├── seeding │ │ │ │ └── KMeansSeed.java │ │ │ └── criterion │ │ │ │ └── H2Function.java │ │ │ ├── util │ │ │ ├── ObjectEntry.java │ │ │ ├── primitive │ │ │ │ ├── IntIterator.java │ │ │ │ └── IntPair.java │ │ │ ├── IntegerEntry.java │ │ │ ├── DoubleEntry.java │ │ │ ├── BiMap.java │ │ │ ├── Generator.java │ │ │ ├── ReflectionUtil.java │ │ │ ├── ResourceFinder.java │ │ │ ├── Duple.java │ │ │ ├── SynchronizedIterator.java │ │ │ └── FileResourceFinder.java │ │ │ ├── dv │ │ │ └── DependencyPathBasisMapping.java │ │ │ ├── matrix │ │ │ ├── MatrixEntry.java │ │ │ ├── MatrixIOException.java │ │ │ └── SimpleEntry.java │ │ │ ├── hal │ │ │ ├── EvenWeighting.java │ │ │ ├── LinearWeighting.java │ │ │ ├── WeightingFunction.java │ │ │ └── GeometricWeighting.java │ │ │ ├── gws │ │ │ ├── WordOrderBasisMapping.java │ │ │ └── WordBasisMapping.java │ │ │ ├── basis │ │ │ └── StringBasisMapping.java │ │ │ ├── index │ │ │ ├── PermutationFunction.java │ │ │ ├── DoubleVectorGenerator.java │ │ │ └── IntegerVectorGenerator.java │ │ │ ├── similarity │ │ │ ├── AbstractSymmetricSimilarityFunction.java │ │ │ ├── OneSimilarity.java │ │ │ ├── KendallsTau.java │ │ │ └── TanimotoCoefficient.java │ │ │ ├── wordsi │ │ │ ├── OccurrenceDependencyContextGenerator.java │ │ │ ├── OrderingDependencyContextGenerator.java │ │ │ ├── PartOfSpeechDependencyContextGenerator.java │ │ │ └── ContextExtractor.java │ │ │ └── mains │ │ │ └── TopicWordsiMain.java │ │ ├── org │ │ └── tartarus │ │ │ └── snowball │ │ │ ├── SnowballStemmer.java │ │ │ └── Among.java │ │ └── jnt │ │ └── FFT │ │ ├── README │ │ └── Test.java └── test │ └── java │ └── edu │ └── ucla │ └── sspace │ ├── vector │ ├── SparseHashVectorTests.java │ ├── VectorIOTest.java │ ├── MaskedDoubleVectorViewTest.java │ └── DenseVectorTests.java │ ├── matrix │ ├── factorization │ │ ├── SingularValueDecompositionLibJTest.java │ │ ├── SingularValueDecompositionLibCTest.java │ │ ├── SingularValueDecompositionOctaveTest.java │ │ └── SingularValueDecompositionMatlabTest.java │ ├── MatlabSparseFileTransformerTest.java │ ├── SvdlibcDenseTextFileTransformerTest.java │ ├── SvdlibcSparseTextFileTransformerTest.java │ ├── SvdlibcSparseBinaryFileTransformerTest.java │ └── SvdlibcDenseBinaryFileTransformerTest.java │ ├── ri │ └── TestRandomIndexing.java │ ├── common │ └── DummySemanticSpace.java │ ├── text │ └── PorterStemmerTest.java │ ├── dependency │ ├── AbstractPathUtil.java │ ├── AbstractPathTestBase.java │ ├── FlatPathWeightTest.java │ └── LengthPathWeightTest.java │ └── util │ └── ObjectCounterTest.java ├── opt ├── lib │ └── jaws-bin-1.2.jar └── add_non_maven_jars.sh ├── hadoop └── pom.xml └── README.md /sspace/test: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/tools/SenseEvalCleaner.java: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/tools/SemEval2010Cleaner.java: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /opt/lib/jaws-bin-1.2.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fozziethebeat/S-Space/HEAD/opt/lib/jaws-bin-1.2.jar -------------------------------------------------------------------------------- /src/main/java/org/tartarus/snowball/SnowballStemmer.java: -------------------------------------------------------------------------------- 1 | 2 | package org.tartarus.snowball; 3 | import java.lang.reflect.InvocationTargetException; 4 | 5 | public abstract class SnowballStemmer extends SnowballProgram { 6 | public abstract boolean stem(); 7 | }; 8 | -------------------------------------------------------------------------------- /opt/add_non_maven_jars.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | mvn install:install-file -DgroupId=jama -DartifactId=jama \ 4 | -Dversion=1.0 -Dpackaging=jar -Dfile=lib/jama.jar 5 | 6 | mvn install:install-file -DgroupId=jaws -DartifactId=jaws \ 7 | -Dversion=1.2 -Dpackaging=jar -Dfile=lib/jaws-bin-1.2.jar 8 | 9 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/tools/MatrixTranspose.java: -------------------------------------------------------------------------------- 1 | package edu.ucla.sspace.tools; 2 | 3 | import edu.ucla.sspace.matrix.*; 4 | 5 | import java.io.*; 6 | 7 | /** 8 | * @author Keith Stevens 9 | */ 10 | public class MatrixTranspose { 11 | public static void main(String[] args) throws Exception { 12 | Matrix m = MatrixIO.readMatrix(new File(args[0]), MatrixIO.Format.DENSE_TEXT); 13 | m = Matrices.transpose(m); 14 | File out = new File(args[1]); 15 | MatrixIO.writeMatrix(m, out, MatrixIO.Format.DENSE_TEXT); 16 | } 17 | 18 | } 19 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/tools/BasisPrinter.java: -------------------------------------------------------------------------------- 1 | package edu.ucla.sspace.tools; 2 | 3 | import edu.ucla.sspace.basis.BasisMapping; 4 | import edu.ucla.sspace.util.SerializableUtil; 5 | 6 | import java.io.File; 7 | 8 | 9 | /** 10 | * @author Keith Stevens 11 | */ 12 | public class BasisPrinter { 13 | public static void main(String[] args) { 14 | BasisMapping basis = 15 | SerializableUtil.load(new File(args[0])); 16 | for (int i = 0; i < basis.numDimensions(); ++i) 17 | System.out.println(basis.getDimensionDescription(i)); 18 | } 19 | } 20 | 21 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/DeeseEvaluator.java: -------------------------------------------------------------------------------- 1 | package edu.ucla.sspace.evaluation; 2 | 3 | import edu.ucla.sspace.common.SemanticSpace; 4 | import edu.ucla.sspace.common.SemanticSpaceIO; 5 | 6 | 7 | public class DeeseEvaluator { 8 | public static void main(String[] args) throws Exception { 9 | DeeseAntonymEvaluation evaluator = new DeeseAntonymEvaluation(); 10 | for (String file : args) { 11 | SemanticSpace sspace = SemanticSpaceIO.load(file); 12 | WordAssociationReport report = evaluator.evaluate(sspace); 13 | System.out.printf("%s: %.3f\n", file, report.correlation()); 14 | } 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/svs/RelationTuple.java: -------------------------------------------------------------------------------- 1 | package edu.ucla.sspace.svs; 2 | 3 | 4 | /** 5 | * @author Keith Stevens 6 | */ 7 | public class RelationTuple { 8 | public int head; 9 | public String relation; 10 | 11 | public RelationTuple(int head, String relation) { 12 | this.head = head; 13 | this.relation = relation; 14 | } 15 | 16 | public boolean equals(Object o) { 17 | if (o == null || !(o instanceof RelationTuple)) 18 | return false; 19 | RelationTuple r = (RelationTuple) o; 20 | return this.head == r.head && this.relation == r.relation; 21 | } 22 | 23 | public int hashCode() { 24 | return head ^ relation.hashCode(); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/jnt/FFT/README: -------------------------------------------------------------------------------- 1 | README for jnt.FFT 2 | Java Numerical Toolkit subpackage for Fast Fourier Transforms. 3 | Bruce R. Miller 4 | 5 | ***NOTE*** 6 | Several routines in this subpackage were derived from 7 | Brian Gough's FFT routines in the Gnu Scientific Library (GSL). 8 | GSL is released under the Gnu General Public License 9 | (see http://www.gnu.org/copyleft/gpl.html) 10 | As such, this package must also be released under GPL. 11 | 12 | The modifications I have made to port the routines from 13 | C to Java, and the additional classes developed were 14 | developed as part of my official duties as a U.S. 15 | government employee, and are therefore not subject 16 | to copyright. 17 | 18 | Furthermore, this software is under development, and is 19 | in no way certified or guaranteed. -------------------------------------------------------------------------------- /src/main/java/jnt/FFT/Test.java: -------------------------------------------------------------------------------- 1 | package jnt.FFT; 2 | 3 | class Test { 4 | public static void main(String[] args) { 5 | RealDoubleFFT_Radix2 ffter = new RealDoubleFFT_Radix2(32); 6 | double[] cat = { 0.1151, -0.1175, -0.0573, -0.0733, -0.0406, -0.0332, -0.3583, 0.0166, -0.1998, -0.1076, -0.0756, -0.2580, 0.0614, -0.2200, -0.0827, 0.0026, 0.0850, -0.4051, -0.0536, 0.0355, -0.0947, -0.0242, 0.0421, 0.1048, -0.1097, -0.0729, 0.0020, -0.0699, -0.1137, 0.0702, 0.1843, -0.1336}; 7 | double[] are = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000}; 8 | ffter.transform(cat, 0, 1); 9 | for (int i = 0; i < 32; i++) { 10 | System.out.println(cat[i]); 11 | } 12 | } 13 | } 14 | 15 | 16 | -------------------------------------------------------------------------------- /hadoop/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | 6 | sspace 7 | edu.ucla.sspace 8 | 2.0 9 | ../pom.xml 10 | 11 | 12 | sspace-hadoop 13 | Hadoop S-Space 14 | Hadoop based Semantic Space Implementations 15 | 16 | 17 | 18 | 19 | ${project.groupId} 20 | sspace 21 | 2.0 22 | 23 | 24 | org.apache.hadoop 25 | hadoop-core 26 | 1.0.1 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /src/main/java/org/tartarus/snowball/Among.java: -------------------------------------------------------------------------------- 1 | package org.tartarus.snowball; 2 | 3 | import java.lang.reflect.Method; 4 | 5 | public class Among { 6 | public Among (String s, int substring_i, int result, 7 | String methodname, SnowballProgram methodobject) { 8 | this.s_size = s.length(); 9 | this.s = s.toCharArray(); 10 | this.substring_i = substring_i; 11 | this.result = result; 12 | this.methodobject = methodobject; 13 | if (methodname.length() == 0) { 14 | this.method = null; 15 | } else { 16 | try { 17 | this.method = methodobject.getClass(). 18 | getDeclaredMethod(methodname, new Class[0]); 19 | } catch (NoSuchMethodException e) { 20 | throw new RuntimeException(e); 21 | } 22 | } 23 | } 24 | 25 | public final int s_size; /* search string */ 26 | public final char[] s; /* search string */ 27 | public final int substring_i; /* index to longest matching substring */ 28 | public final int result; /* result of the lookup */ 29 | public final Method method; /* method to use if substring matches */ 30 | public final SnowballProgram methodobject; /* object to invoke method on */ 31 | }; 32 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/common/statistics/SignificanceTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.common.statistics; 23 | 24 | public interface SignificanceTest { 25 | 26 | double score (int both, int justA, int justB, int neither); 27 | 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/vector/SparseIntegerVector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.vector; 23 | 24 | 25 | /** 26 | * An interface for sparse {@link IntegerVector} instances. 27 | */ 28 | public interface SparseIntegerVector 29 | extends SparseVector, IntegerVector { } 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | The S-Space Package is a collection of algorithms for building Semantic Spaces 2 | as well as a highly-scalable library for designing new distributional semantics 3 | algorithms. Distributional algorithms process text corpora and represent the 4 | semantic for words as high dimensional feature vectors. These approaches are 5 | known by many names, such as word spaces, semantic spaces, or distributed 6 | semantics and rest upon the Distributional Hypothesis: words that appear in 7 | similar contexts have similar meanings. 8 | 9 | The research and development is being done by the Natural Language Processing 10 | group at UCLA led by David Jurgens and Keith Stevens, under the advisory of Dr. 11 | Michael Dyer. 12 | 13 | See the [Getting Started](../../wiki/GettingStarted) page for 14 | a quick introduction on how to use the S-Space package, see the [Package 15 | Overview](../../wiki/PackageLayout) for information on the 16 | code and available features, or dive right into the 17 | [Javadoc](http://fozziethebeat.github.com/S-Space/apidocs/) to see what's 18 | available now. For any questions, please contact us via our mailing lists: 19 | [S-Space-Users][1] and [S-Space-Research-Dev][2]. 20 | 21 | [1]:mailto:s-space-users@googlegroups.com 22 | [2]:mailto:s-space-research-dev@googlegroups.com 23 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/dependency/DependencyPermutationFunction.java: -------------------------------------------------------------------------------- 1 | package edu.ucla.sspace.dependency; 2 | 3 | import edu.ucla.sspace.vector.Vector; 4 | 5 | import java.util.LinkedList; 6 | 7 | 8 | /** 9 | * An interface for permuting a {@link Vector} based on a dependecny path, 10 | * represented as a list of {@link DependencyRelation}s. Implemenations are 11 | * recomended to extend existing {@link 12 | * edu.ucla.sspace.index.PermutationFunction PermutationFunction}s but simply 13 | * using an existing {@link edu.ucla.sspace.index.PermutationFunction 14 | * PermutationFunction}. Implementations are also suggested to be thread-safe. 15 | * 16 | * @see edu.ucla.sspace.index.PermutationFunction 17 | * 18 | * @author Keith Stevens 19 | */ 20 | public interface DependencyPermutationFunction { 21 | 22 | /** 23 | * Returns a permuted form of {code vector} based on the dependency path 24 | * provided. 25 | * 26 | * @param path A linked list of word,relation pairs that compose a 27 | * dependency path 28 | * @param vector The {@link Vector} to permute 29 | * 30 | * @return A new permuted {@link Vector} of the same type as {@code vector} 31 | * that is 32 | */ 33 | T permute(T vector, DependencyPath path); 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/NormedWordPrimingTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | import edu.ucla.sspace.common.SemanticSpace; 25 | 26 | 27 | /** 28 | * @author Keith Stevens 29 | */ 30 | public interface NormedWordPrimingTest { 31 | 32 | public NormedWordPrimingReport evaluate(SemanticSpace sspace); 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/WordAssociationTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | import edu.ucla.sspace.common.SemanticSpace; 25 | 26 | 27 | /** 28 | * 29 | * 30 | * @author David Jurgens 31 | */ 32 | public interface WordAssociationTest { 33 | 34 | public WordAssociationReport evaluate(SemanticSpace sspace); 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/TemporalUsenetCorpusReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | 25 | /** 26 | * A subclass of {@code UsenetCorpusReader} that always includes timestamps. 27 | * 28 | * @author Keith Stevens 29 | */ 30 | public class TemporalUsenetCorpusReader extends UsenetCorpusReader { 31 | 32 | public TemporalUsenetCorpusReader() { 33 | super(true); 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/Stemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | 25 | /** 26 | * An interface for classes that stem tokens. 28 | */ 29 | public interface Stemmer { 30 | 31 | /** 32 | * Converts the token to its root form, or if it is already in root form, 33 | * returns the token. 34 | */ 35 | public String stem(String token); 36 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/Document.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | import java.io.BufferedReader; 25 | 26 | /** 27 | * An abstraction for a document that allows document processors to access text 28 | * in a uniform manner. 29 | */ 30 | public interface Document { 31 | 32 | /** 33 | * Returns the {@code BufferedReader} for this document's text 34 | */ 35 | BufferedReader reader(); 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/LabeledParsedDocument.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | 25 | /** 26 | * A union interface for a document that has been (or will be) dependency parsed 27 | * to generate an accompanying parse tree of its contents and that has an 28 | * accompanying label about its source or contents. 29 | */ 30 | public interface LabeledParsedDocument extends LabeledDocument, ParsedDocument { 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/dependency/FlatPathWeight.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | 25 | /** 26 | * A {@link DependencyPathWeight} that returns {@code 1} for every path. 27 | * 28 | * @author Keith Stevens 29 | */ 30 | public class FlatPathWeight implements DependencyPathWeight { 31 | 32 | /** 33 | * {@inheritDoc} 34 | */ 35 | public double scorePath(DependencyPath path) { 36 | return 1; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/dependency/UniversalRelationAcceptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | 25 | /** 26 | * A {@link DependencyPathAcceptor} that accepts all links. 27 | * 28 | * @author Keith Stevens 29 | */ 30 | public class UniversalRelationAcceptor implements DependencyRelationAcceptor { 31 | 32 | /** 33 | * {@inheritDoc} 34 | */ 35 | public boolean accept(DependencyRelation relation) { 36 | return true; 37 | } 38 | } 39 | 40 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/graph/WeightedDirectedEdge.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.graph; 23 | 24 | 25 | /** 26 | * An interface for representing a weigthed edge between two vertices. 27 | */ 28 | public interface WeightedDirectedEdge extends WeightedEdge, DirectedEdge { 29 | 30 | /** 31 | * Returns {@code true} if {@code o} connects the same two vertices with the 32 | * same the edge orientation regardless of edge weight. 33 | */ 34 | boolean equals(Object o); 35 | 36 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/clustering/Assignment.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.clustering; 23 | 24 | 25 | /** 26 | * A cluster assignment for a data point. 27 | * 28 | * @see Clustering 29 | */ 30 | public interface Assignment { 31 | 32 | /** 33 | * Returns the cluster id's that a specific data point was assigned to. 34 | */ 35 | int[] assignments(); 36 | 37 | /** 38 | * Returns the number of assignments given for this data point. 39 | */ 40 | int length(); 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/vector/SparseVector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.vector; 23 | 24 | 25 | /** 26 | * An interface for {@code Vector} implementations whose values are sparse and 27 | * that support access to only those indices with non-zero values. 28 | * 29 | * @author Keith Stevens 30 | */ 31 | public interface SparseVector extends Vector { 32 | 33 | /** 34 | * Returns all the indices whose values are non-zero 35 | */ 36 | int[] getNonZeroIndices(); 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/vector/SparseDoubleVector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.vector; 23 | 24 | 25 | /** 26 | * An interface for sparse {@link DoubleVector} instances. 27 | */ 28 | public interface SparseDoubleVector 29 | extends SparseVector, DoubleVector { 30 | 31 | /** 32 | * Returns a new instance of a vector with the same type. If the vector is 33 | * bounded by size, the returned instance will have the same bound. 34 | */ 35 | SparseDoubleVector instanceCopy(); 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/TemporalDocument.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | import edu.ucla.sspace.text.Document; 25 | 26 | import java.io.BufferedReader; 27 | 28 | /** 29 | * An abstraction for a document that allows document processors to access 30 | * time-annotated text in a uniform manner. 31 | */ 32 | public interface TemporalDocument extends Document { 33 | 34 | /** 35 | * Returns the time at which this document was created. 36 | */ 37 | long timeStamp(); 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/graph/WeightedTypedEdge.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.graph; 23 | 24 | 25 | /** 26 | * An interface for representing a weigthed edge between two vertices in a 27 | * multigraph. 28 | * 29 | * @see Multigraph 30 | */ 31 | public interface WeightedTypedEdge extends TypedEdge, WeightedEdge { 32 | 33 | /** 34 | * Returns {@code true} if {@code o} connects the same two vertices 35 | * regardless of the edge orientation, type, and weight. 36 | */ 37 | boolean equals(Object o); 38 | 39 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/graph/WeightedEdge.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.graph; 23 | 24 | 25 | /** 26 | * An interface for representing a weigthed edge between two vertices. 27 | */ 28 | public interface WeightedEdge extends Edge { 29 | 30 | /** 31 | * Returns {@code true} if {@code o} connects the same two vertices 32 | * regardless of the edge orientation and weight. 33 | */ 34 | boolean equals(Object o); 35 | 36 | /** 37 | * Returns the weight for this edge. 38 | */ 39 | double weight(); 40 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/util/ObjectEntry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util; 23 | 24 | 25 | /** 26 | * An object that represents an index that has an associated typed {@code 27 | * Object} value. 28 | * 29 | * @param the type of the object that this entry maps to 30 | */ 31 | public interface ObjectEntry { 32 | 33 | /** 34 | * Returns the index position of this entry. 35 | */ 36 | int index(); 37 | 38 | /** 39 | * Returns the object at this entry's index. 40 | */ 41 | T value(); 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/LabeledDocument.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | import java.io.BufferedReader; 25 | 26 | /** 27 | * An abstraction for a document that has an accompanying label or name. 28 | */ 29 | public interface LabeledDocument extends Document { 30 | 31 | /** 32 | * Returns a label associated with this particular document. The label is 33 | * intended to provide information on the source of the document or the 34 | * contents therein. 35 | */ 36 | String label(); 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/TemporalBloglinesCorpusReader.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | /** 25 | * A subclass of {@code BloglinesCorpusReader} that always includes timestamps. 26 | * 27 | * @author Keith Stevens 28 | */ 29 | public class TemporalBloglinesCorpusReader extends BloglinesCorpusReader { 30 | 31 | /** 32 | * Creates a {@code BloglinesCorpusReader} that will always include 33 | * timestamps. 34 | */ 35 | public TemporalBloglinesCorpusReader() { 36 | super(true); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/util/primitive/IntIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util.primitive; 23 | 24 | import java.util.Iterator; 25 | 26 | 27 | /** 28 | * A refinement of the {@link Iterator} interface for iterating over primitive 29 | * {@code int} values. 30 | */ 31 | public interface IntIterator extends Iterator { 32 | 33 | /** 34 | * Returns the next {@code int} in the sequence. 35 | * 36 | * @throws NoSuchElementException if no further {@code int} values remain 37 | */ 38 | int nextInt(); 39 | 40 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/dependency/LengthPathWeight.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | 25 | /** 26 | * A {@link DependencyPathWeight} that scores paths inversely proportional to 27 | * their length. The scoring function is 1 / number of relations. 28 | * 29 | * @author Keith Stevens 30 | */ 31 | public class LengthPathWeight implements DependencyPathWeight { 32 | 33 | /** 34 | * {@inheritDoc} 35 | */ 36 | public double scorePath(DependencyPath path) { 37 | return 1d / (path.length()); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/graph/TemporalEdge.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.graph; 23 | 24 | 25 | /** 26 | * An interface for representing a edge in a {@link DynamicGraph}, where each 27 | * edge is associated with a specific time. 28 | */ 29 | public interface TemporalEdge extends Edge { 30 | 31 | /** 32 | * Returns {@code true} if {@code o} connects the same two vertices 33 | * at the same time. 34 | */ 35 | boolean equals(Object o); 36 | 37 | /** 38 | * Returns milliseconds since the epoch when this edge occurrend. 39 | */ 40 | long time(); 41 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/dependency/SubjObjRelationAcceptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | 25 | /** 26 | * A {@link DependencyPathAcceptor} that accepts links with the {@code subj} or 27 | * {@code obj} relation. 28 | * 29 | * @author Keith Stevens 30 | */ 31 | public class SubjObjRelationAcceptor implements DependencyRelationAcceptor { 32 | 33 | /** 34 | * {@inheritDoc} 35 | */ 36 | public boolean accept(DependencyRelation relation) { 37 | return relation.relation().equals("SBJ") 38 | || relation.relation().equals("OBJ"); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/dependency/UniversalPathAcceptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | 25 | /** 26 | * A {@link DependencyPathAcceptor} that accepts all links. 27 | * 28 | * @author Keith Stevens 29 | */ 30 | public class UniversalPathAcceptor implements DependencyPathAcceptor { 31 | 32 | /** 33 | * {@inheritDoc} 34 | */ 35 | public boolean accepts(DependencyPath relation) { 36 | return true; 37 | } 38 | 39 | /** 40 | * {@inheritDoc} 41 | */ 42 | public int maxPathLength() { 43 | return Integer.MAX_VALUE; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/WordChoiceEvaluation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | import java.util.Collection; 25 | 26 | import edu.ucla.sspace.common.SemanticSpace; 27 | 28 | /** 29 | * A evaluation metric that uses a selection of a word from multiple choices. 30 | * 31 | * @author David Jurgens 32 | */ 33 | public interface WordChoiceEvaluation { 34 | 35 | /** 36 | * Returns a collection of multiple choice questions that can be used to 37 | * evaluate a {@link SemanticSpace}. 38 | */ 39 | Collection getQuestions(); 40 | 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/util/IntegerEntry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util; 23 | 24 | 25 | /** 26 | * An object that represents an index that has an associated {@code int} 27 | * value. This class is intended to support other classes that provide iterator 28 | * access over their indexable values without needing to incur auto-boxing 29 | * overhead. 30 | */ 31 | public interface IntegerEntry { 32 | 33 | /** 34 | * Returns the index position of this entry. 35 | */ 36 | int index(); 37 | 38 | /** 39 | * Returns the value at this entry's index. 40 | */ 41 | int value(); 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/dv/DependencyPathBasisMapping.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dv; 23 | 24 | import edu.ucla.sspace.basis.BasisMapping; 25 | 26 | import edu.ucla.sspace.dependency.DependencyPath; 27 | 28 | 29 | /** 30 | * An interface for specifying how the occurrence of a word in a specific 31 | * syntactic relationship is quantified as a dimension in the vector basis. For 32 | * example, each word may correspond to a unique dimension regardless of how it 33 | * is grammatically related. 34 | */ 35 | public interface DependencyPathBasisMapping 36 | extends BasisMapping { 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/util/DoubleEntry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util; 23 | 24 | 25 | /** 26 | * An object that represents an index that has an associated {@code double} 27 | * value. This class is intended to support other classes that provide iterator 28 | * access over their indexable values without needing to incur auto-boxing 29 | * overhead. 30 | */ 31 | public interface DoubleEntry { 32 | 33 | /** 34 | * Returns the index position of this entry. 35 | */ 36 | int index(); 37 | 38 | /** 39 | * Returns the value at this entry's index. 40 | */ 41 | double value(); 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/matrix/MatrixEntry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.matrix; 23 | 24 | 25 | /** 26 | * An abstraction of the entries in a matrix. 27 | * 28 | * @see MatrixIO#getMatrixFileIterator(File,MatrixIO.Format) 29 | */ 30 | public interface MatrixEntry { 31 | 32 | /** 33 | * Returns the column index of this entry in the matrix 34 | */ 35 | int column(); 36 | 37 | /** 38 | * Returns the row index of this entry in the matrix 39 | */ 40 | int row(); 41 | 42 | /** 43 | * Returns the value of the matrix at this row and column 44 | */ 45 | double value(); 46 | 47 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/common/statistics/GTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.common.statistics; 23 | 24 | public class GTest implements SignificanceTest { 25 | 26 | /** 27 | * Returns the G-test statistic 28 | */ 29 | public double score (int both, int justA, int justB, int neither) { 30 | 31 | int all = both + justA + justB + neither; 32 | double probA = (both + justA) / (double)all; 33 | double probB = (both + justB) / (double)all; 34 | 35 | double expectedBoth = (probA * probB) * all; 36 | 37 | return 2 * (both * Math.log(both / expectedBoth)); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/graph/DirectedEdge.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.graph; 23 | 24 | 25 | /** 26 | * An interface for representing a directed edge between 28 | * two vertices. This interface can be seen as a refinement of the {@link 29 | * #equal(Object) equals} method that takes into account the orientation of the 30 | * edge. 31 | */ 32 | public interface DirectedEdge extends Edge { 33 | 34 | /** 35 | * Returns {@code true} if {@code o} connects the same two vertices and have 36 | * the same edge orientation. 37 | */ 38 | boolean equals(Object o); 39 | 40 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/dependency/DependencyTreeTransform.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | 25 | /** 26 | * An interface for transforming a dependency tree represented by a series of 27 | * {@link DependencyTreeNode}s. 28 | * 29 | * @author Keith Stevens 30 | */ 31 | public interface DependencyTreeTransform { 32 | 33 | /** 34 | * Transforms the {@link DependencyRelation} links within a series of {@link 35 | * DependencyTreeNode}s. Relations may be added or removed, and entire 36 | * nodes may even be removed as long as the tree remaains connected. 37 | */ 38 | DependencyTreeNode[] transform(DependencyTreeNode[] tree); 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/NormedWordPrimingReport.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | /** 25 | * @author Keith Stevens 26 | */ 27 | public interface NormedWordPrimingReport { 28 | 29 | /** 30 | * Returns the total number of cues being reported. 31 | */ 32 | int numberOfCues(); 33 | 34 | /** 35 | * Returns the number of cues that could not be answered at all. 36 | */ 37 | int numberOfUnanswerableCues(); 38 | 39 | /** 40 | * Returns the average correlation of the normed cue to target strength and 41 | * the semantic similarity results generated by a sspace. 42 | */ 43 | double averageCorrelation(); 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/AnnotatedDocument.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | import java.util.Iterator; 25 | 26 | /** 27 | * An abstraction for a document that allows document processors to access text 28 | * in a uniform manner. 29 | */ 30 | public interface AnnotatedDocument extends Document { 31 | 32 | /** 33 | * Returns the timestamp when this document was created 34 | */ 35 | long creationDate(); 36 | 37 | /** 38 | * Returns a label associated with this particular document. The label is 39 | * intended to provide information on the source of the document or the 40 | * contents therein. 41 | */ 42 | String label(); 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/hal/EvenWeighting.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.hal; 23 | 24 | /** 25 | * An weighting scheme where all words have the same weight when specifying how 26 | * a {@link HyperspaceAnalogueToLanguage} instance should weigh co-occurrences 27 | * based on the word distance. 28 | */ 29 | public class EvenWeighting implements WeightingFunction { 30 | 31 | /** 32 | * Returns a constant value for the weight regardless of distance 33 | * 34 | * @param positionOffset {@inheritDoc} 35 | * @param windowSize {@inheritDoc} 36 | * 37 | * @return {@inheritDoc} 38 | */ 39 | public double weight(int positionOffset, int windowSize) { 40 | return 1; 41 | } 42 | 43 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/tools/SelectTopKWords.java: -------------------------------------------------------------------------------- 1 | package edu.ucla.sspace.tools; 2 | 3 | import edu.ucla.sspace.basis.BasisMapping; 4 | 5 | import edu.ucla.sspace.matrix.Matrix; 6 | import edu.ucla.sspace.matrix.MatrixIO; 7 | import edu.ucla.sspace.matrix.MatrixIO.Format; 8 | 9 | import edu.ucla.sspace.util.BoundedSortedMultiMap; 10 | import edu.ucla.sspace.util.MultiMap; 11 | import edu.ucla.sspace.util.SerializableUtil; 12 | 13 | import java.io.File; 14 | 15 | import java.util.ArrayList; 16 | import java.util.List; 17 | 18 | 19 | /** 20 | * @author Keith Stevens 21 | */ 22 | public class SelectTopKWords { 23 | public static void main(String[] args) throws Exception { 24 | // Load the basis mapping. 25 | BasisMapping basis = 26 | SerializableUtil.load(new File(args[0])); 27 | 28 | // Create the top 10 lists for each topic in the word space. 29 | List> topTerms = new ArrayList>(); 30 | Matrix m = MatrixIO.readMatrix(new File(args[1]), Format.DENSE_TEXT); 31 | for (int c = 0; c < m.columns(); ++c) 32 | topTerms.add(new BoundedSortedMultiMap(10)); 33 | 34 | for (int r = 0; r < m.rows(); ++r) { 35 | String term = basis.getDimensionDescription(r); 36 | for (int c = 0; c < m.columns(); ++c) 37 | topTerms.get(c).put(m.get(r, c), term); 38 | } 39 | 40 | for (MultiMap topicTerms : topTerms) { 41 | for (String term : topicTerms.values()) 42 | System.out.printf("%s ", term); 43 | System.out.println(); 44 | } 45 | } 46 | } 47 | 48 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/common/statistics/PointwiseMutualInformationTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.common.statistics; 23 | 24 | import edu.ucla.sspace.common.Statistics; 25 | 26 | public class PointwiseMutualInformationTest implements SignificanceTest { 27 | 28 | /** 29 | * Returns the PMI score of the both A and B. 30 | */ 31 | public double score (int both, int justA, int justB, int neither) { 32 | int all = both + justA + justB + neither; 33 | double probA = (both + justA) / (double)all; 34 | double probB = (both + justB) / (double)all; 35 | double probAandB = both / (double)all; 36 | return Statistics.log2(probAandB / (probA * probB)); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/graph/DirectedTypedEdge.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.graph; 23 | 24 | 25 | /** 26 | * An interface for directed edges in multigraphs. This interface refines the 27 | * {@link #equals(Object) equals} method of {@link TypedEdge} to include edge 28 | * orientation. Two {@code DirectedTypedEdge} must share the same edge 29 | * orientation to be equivalent. 30 | * 31 | * @see Multigraph 32 | */ 33 | public interface DirectedTypedEdge extends DirectedEdge, TypedEdge { 34 | 35 | /** 36 | * Returns {@code true} if {@code o} connects the same vertices, has the 37 | * same edge orientation, and has edge type information that is equivalent. 38 | */ 39 | boolean equals(Object o); 40 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/NormedPrimingQuestion.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | import java.util.List; 25 | 26 | 27 | /** 28 | * @author Keith Stevens 29 | */ 30 | public interface NormedPrimingQuestion { 31 | 32 | /** 33 | * Returns the priming cue. 34 | */ 35 | String getCue(); 36 | 37 | /** 38 | * Returns the number of targets associated with the cue. 39 | */ 40 | int numberOfTargets(); 41 | 42 | /** 43 | * Returns the {@code i}th target assicated with the cue. 44 | */ 45 | String getTarget(int i); 46 | 47 | /** 48 | * Returns the {@code i}th target strength assicated with the cue. 49 | */ 50 | double getStrength(int i); 51 | } 52 | 53 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/gws/WordOrderBasisMapping.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.gws; 23 | 24 | import edu.ucla.sspace.basis.AbstractBasisMapping; 25 | 26 | import edu.ucla.sspace.util.Duple; 27 | 28 | 29 | /** 30 | * A {@link BasisMapping} implementation where each word and position 31 | * corresponds to a unique dimension. 32 | * 33 | * @author David Jurgens 34 | */ 35 | public class WordOrderBasisMapping 36 | extends AbstractBasisMapping, String> { 37 | 38 | private static final long serialVersionUID = 1L; 39 | 40 | /** 41 | * {@inheritDoc} 42 | */ 43 | public int getDimension(Duple key) { 44 | return getDimensionInternal(key.toString()); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/gws/WordBasisMapping.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.gws; 23 | 24 | import edu.ucla.sspace.basis.AbstractBasisMapping; 25 | 26 | import edu.ucla.sspace.util.Duple; 27 | 28 | 29 | /** 30 | * A {@link BasisMapping} implementation where each word corresponds to a unique 31 | * dimension regardless of its word position. 32 | * 33 | * @author David Jurgens 34 | */ 35 | public class WordBasisMapping 36 | extends AbstractBasisMapping, String> { 37 | 38 | private static final long serialVersionUID = 1L; 39 | 40 | /** 41 | * {@inheritDoc} 42 | */ 43 | public int getDimension(Duple key) { 44 | return getDimensionInternal(key.x); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/vector/SparseHashVectorTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.vector; 23 | 24 | import org.junit.Ignore; 25 | import org.junit.Test; 26 | 27 | import static org.junit.Assert.*; 28 | 29 | import java.util.HashMap; 30 | import java.util.Map; 31 | 32 | 33 | /** 34 | * Tests for the {@link SparseHashVector} class. 35 | */ 36 | public class SparseHashVectorTests { 37 | 38 | @Test public void testMagnitude() { 39 | SparseHashVector v = new SparseHashVector(100); 40 | assertEquals(0, v.magnitude(), .0001); 41 | 42 | v.set(1, 1); 43 | assertEquals(1, v.magnitude(), .0001); 44 | 45 | v.set(1, 3); 46 | v.set(2, 4); 47 | assertEquals(5, v.magnitude(), .0001); 48 | } 49 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/util/BiMap.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util; 23 | 24 | import java.util.Map; 25 | 26 | 27 | /** 28 | * This interface allows for a bi-directional mapping, where keys can map to 29 | * values and values can map to keys. This is expected to be used with 30 | * one-to-oen mappings. 31 | * 32 | * @author Keith Stevens 33 | */ 34 | public interface BiMap extends Map { 35 | 36 | /** 37 | * Returns a reversed form of this {@link BiMap}, where values in this 38 | * {@link BiMap} will map to keys in this {@link BiMap}. Calling {@code 39 | * inverse} on the returned {@link BiMap} should return a pointer to the 40 | * original {@link BiMap}. 41 | */ 42 | BiMap inverse(); 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/util/Generator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util; 23 | 24 | 25 | /** 26 | * An interface for classes which will maintain and generate new instances of 27 | * classes that require several parameters that will be used consistently 28 | * several times. Implementations of this interface are also used to be used 29 | * in conjunction with a {@link GeneratorMap}, which will create new instances 30 | * for keys not currently in the map by using an instance of a {@link 31 | * Generator}. 32 | * 33 | * @see GeneratorMap 34 | * 35 | * @author Keith Stevens 36 | */ 37 | public interface Generator { 38 | 39 | /** 40 | * Creates a new instance of type {@code T}. 41 | */ 42 | public T generate(); 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/MultipleChoiceQuestion.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | import java.util.List; 25 | 26 | /** 27 | * A question that provides a prompt and one or more options to choose from as 28 | * the answer. 29 | * 30 | * @see WordChoiceEvaluation 31 | */ 32 | public interface MultipleChoiceQuestion { 33 | 34 | /** 35 | * Returns the question prompt. This may be a short as a single word. 36 | */ 37 | String getPrompt(); 38 | 39 | /** 40 | * Returns a list of options to the prompt question. 41 | */ 42 | List getOptions(); 43 | 44 | /** 45 | * Returns the index of the correct answer in the list of options. 46 | */ 47 | int getCorrectAnswer(); 48 | 49 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/dependency/DependencyRelation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | 25 | /** 26 | * An interface for representing a dependency relationship between a head node 27 | * and its dependent relation. 28 | */ 29 | public interface DependencyRelation { 30 | 31 | /** 32 | * Returns the dependent node that is related to the head node. 33 | */ 34 | DependencyTreeNode dependentNode(); 35 | 36 | /** 37 | * Returns the head node on which the second node has dependent relation. 38 | */ 39 | DependencyTreeNode headNode(); 40 | 41 | /** 42 | * Returns the relation the the current has with the next token in a {@link 43 | * DependencyPath}. 44 | */ 45 | String relation(); 46 | 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/GermanStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | import org.tartarus.snowball.ext.germanStemmer; 25 | 26 | /** 27 | * A wrapper for the german Snowball 28 | * Stemmer. Details for this specific stemmer can be found at here. 30 | * 31 | * @author Keith Stevens. 32 | */ 33 | public class GermanStemmer implements Stemmer{ 34 | 35 | /** 36 | * {@inheritDoc} 37 | */ 38 | public String stem(String token) { 39 | germanStemmer stemmer = new germanStemmer(); 40 | stemmer.setCurrent(token); 41 | stemmer.stem(); 42 | return stemmer.getCurrent(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/EnglishStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | import org.tartarus.snowball.ext.englishStemmer; 25 | 26 | /** 27 | * A wrapper for the english Snowball 28 | * Stemmer. Details for this specific stemmer can be found at here. 30 | * 31 | * @author Keith Stevens. 32 | */ 33 | public class EnglishStemmer implements Stemmer{ 34 | 35 | /** 36 | * {@inheritDoc} 37 | */ 38 | public String stem(String token) { 39 | englishStemmer stemmer = new englishStemmer(); 40 | stemmer.setCurrent(token); 41 | stemmer.stem(); 42 | return stemmer.getCurrent(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/ItalianStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | import org.tartarus.snowball.ext.italianStemmer; 25 | 26 | /** 27 | * A wrapper for the italian Snowball 28 | * Stemmer. Details for this specific stemmer can be found at here. 30 | * 31 | * @author Keith Stevens. 32 | */ 33 | public class ItalianStemmer implements Stemmer{ 34 | 35 | /** 36 | * {@inheritDoc} 37 | */ 38 | public String stem(String token) { 39 | italianStemmer stemmer = new italianStemmer(); 40 | stemmer.setCurrent(token); 41 | stemmer.stem(); 42 | return stemmer.getCurrent(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/dependency/DependencyPathAcceptor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | 25 | /** 26 | * An interface for deciding whether a provided path is valid for further 27 | * processing. 28 | */ 29 | public interface DependencyPathAcceptor { 30 | 31 | /** 32 | * Returns {@code true} if the path is valid according to this acceptor's 33 | * standards 34 | * 35 | * @param path a dependency path 36 | * 37 | * @return {@code true} if the path is valid 38 | */ 39 | boolean accepts(DependencyPath path); 40 | 41 | /** 42 | * Returns the maximum path length allowed by this acceptor. 43 | * 44 | * @return the length above which no path will be accepted 45 | */ 46 | int maxPathLength(); 47 | 48 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/WordSimilarity.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | /** 25 | * A human-based similarity judgement for two words. Note that the ordering 26 | * from the orginal question is preserved, which allows users to consider 27 | * possible asymmetric evalutions based on word ordering. 28 | * 29 | * @author David Jurgens 30 | */ 31 | public interface WordSimilarity { 32 | 33 | /** 34 | * Returns the first word in the pair. 35 | */ 36 | String getFirstWord(); 37 | 38 | /** 39 | * Returns the second word in the pair. 40 | */ 41 | String getSecondWord(); 42 | 43 | /** 44 | * Returns the human similarity judgement for the two words. 45 | */ 46 | double getSimilarity(); 47 | 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/graph/WeightedDirectedTypedEdge.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.graph; 23 | 24 | 25 | /** 26 | * An interface for weigthed, directed edges in multigraphs. This interface 27 | * does not refine the {@link #equals(Object) equals} method of {@link 28 | * DirectedTypedEdge}, i.e. two {@code WeightedDirectedTypedEdge} are equivalent 29 | * independent of their edge weights. 30 | * 31 | * @see Multigraph 32 | */ 33 | public interface WeightedDirectedTypedEdge 34 | extends DirectedTypedEdge, WeightedDirectedEdge { 35 | 36 | /** 37 | * Returns {@code true} if {@code o} connects the same vertices, has the 38 | * same edge orientation, and has edge type information that is equivalent. 39 | */ 40 | boolean equals(Object o); 41 | 42 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/SnowballPorterStemmer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | import org.tartarus.snowball.ext.porterStemmer; 25 | 26 | /** 27 | * A wrapper for the porter Snowball 28 | * Stemmer. Details for this specific stemmer can be found at here. 30 | * 31 | * @author Keith Stevens. 32 | */ 33 | public class SnowballPorterStemmer implements Stemmer{ 34 | 35 | /** 36 | * {@inheritDoc} 37 | */ 38 | public String stem(String token) { 39 | porterStemmer stemmer = new porterStemmer(); 40 | stemmer.setCurrent(token); 41 | stemmer.stem(); 42 | return stemmer.getCurrent(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/matrix/factorization/SingularValueDecompositionLibJTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at 3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens, 4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved. 5 | * 6 | * This file is part of the S-Space package and is covered under the terms and 7 | * conditions therein. 8 | * 9 | * The S-Space package is free software: you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License version 2 as published 11 | * by the Free Software Foundation and distributed hereunder to you. 12 | * 13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 18 | * RIGHTS. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with this program. If not, see . 22 | */ 23 | 24 | package edu.ucla.sspace.matrix.factorization; 25 | 26 | import org.junit.Ignore; 27 | import org.junit.Test; 28 | 29 | import static org.junit.Assert.*; 30 | 31 | 32 | /** 33 | * @author Keith Stevens 34 | */ 35 | public class SingularValueDecompositionLibJTest { 36 | 37 | @Test public void testMatrixReduction() { 38 | // This test is known to fail. 39 | /* 40 | SingularValueDecompositionTestUtil.testReductionMatrix( 41 | new SingularValueDecompositionLibJ()); 42 | */ 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/matrix/factorization/SingularValueDecompositionLibCTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at 3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens, 4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved. 5 | * 6 | * This file is part of the S-Space package and is covered under the terms and 7 | * conditions therein. 8 | * 9 | * The S-Space package is free software: you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License version 2 as published 11 | * by the Free Software Foundation and distributed hereunder to you. 12 | * 13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 18 | * RIGHTS. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with this program. If not, see . 22 | */ 23 | 24 | package edu.ucla.sspace.matrix.factorization; 25 | 26 | import org.junit.Ignore; 27 | import org.junit.Test; 28 | 29 | import edu.ucla.sspace.matrix.SVD; 30 | 31 | import static org.junit.Assert.*; 32 | 33 | 34 | /** 35 | * @author Keith Stevens 36 | */ 37 | public class SingularValueDecompositionLibCTest { 38 | 39 | @Test public void testMatrixReduction() { 40 | if (SVD.isSVDLIBCavailable()) 41 | SingularValueDecompositionTestUtil.testReductionMatrix( 42 | new SingularValueDecompositionLibC()); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/basis/StringBasisMapping.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.basis; 23 | 24 | import java.util.Set; 25 | 26 | 27 | /** 28 | * A string based {@link BasisMapping}. Keys must be strings and each dimension 29 | * is described by the associated key. 30 | * 31 | * @author Keith Stevens 32 | */ 33 | public class StringBasisMapping extends AbstractBasisMapping { 34 | 35 | private static final long serialVersionUID = 1L; 36 | 37 | public StringBasisMapping() { 38 | } 39 | 40 | public StringBasisMapping(Set words) { 41 | for (String word : words) 42 | getDimension(word); 43 | } 44 | 45 | /** 46 | * {@inheritDoc} 47 | */ 48 | public int getDimension(String key) { 49 | return getDimensionInternal(key); 50 | } 51 | } 52 | 53 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/matrix/factorization/SingularValueDecompositionOctaveTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at 3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens, 4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved. 5 | * 6 | * This file is part of the S-Space package and is covered under the terms and 7 | * conditions therein. 8 | * 9 | * The S-Space package is free software: you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License version 2 as published 11 | * by the Free Software Foundation and distributed hereunder to you. 12 | * 13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 18 | * RIGHTS. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with this program. If not, see . 22 | */ 23 | 24 | package edu.ucla.sspace.matrix.factorization; 25 | 26 | import edu.ucla.sspace.matrix.SVD; 27 | 28 | import org.junit.Ignore; 29 | import org.junit.Test; 30 | 31 | import static org.junit.Assert.*; 32 | 33 | 34 | /** 35 | * @author Keith Stevens 36 | */ 37 | public class SingularValueDecompositionOctaveTest { 38 | 39 | @Ignore public void testMatrixReduction() { 40 | if (SVD.isOctaveAvailable()) 41 | SingularValueDecompositionTestUtil.testReductionMatrix( 42 | new SingularValueDecompositionOctave()); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/matrix/factorization/SingularValueDecompositionMatlabTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at 3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens, 4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved. 5 | * 6 | * This file is part of the S-Space package and is covered under the terms and 7 | * conditions therein. 8 | * 9 | * The S-Space package is free software: you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License version 2 as published 11 | * by the Free Software Foundation and distributed hereunder to you. 12 | * 13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 18 | * RIGHTS. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with this program. If not, see . 22 | */ 23 | 24 | package edu.ucla.sspace.matrix.factorization; 25 | 26 | import edu.ucla.sspace.matrix.SVD; 27 | 28 | import org.junit.Ignore; 29 | import org.junit.Test; 30 | 31 | import static org.junit.Assert.*; 32 | 33 | 34 | /** 35 | * @author Keith Stevens 36 | */ 37 | public class SingularValueDecompositionMatlabTest { 38 | 39 | @Test public void testMatrixReduction() { 40 | if (SVD.isMatlabAvailable()) 41 | SingularValueDecompositionTestUtil.testReductionMatrix( 42 | new SingularValueDecompositionMatlab()); 43 | } 44 | } 45 | 46 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/util/ReflectionUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util; 23 | 24 | 25 | /** 26 | * A collection of miscellaneous, but useful, functions for working with 27 | * reflection 28 | */ 29 | public class ReflectionUtil { 30 | 31 | /** 32 | * Uninstantiable 33 | */ 34 | private ReflectionUtil() { } 35 | 36 | /** 37 | * Returns an arbitrary object instance based on a class name. 38 | * 39 | * @param className The name of a desired class to instantiate. 40 | */ 41 | @SuppressWarnings("unchecked") 42 | public static T getObjectInstance(String className) { 43 | try { 44 | Class clazz = Class.forName(className); 45 | return (T) clazz.newInstance(); 46 | } catch (Exception e) { 47 | throw new Error(e); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/WordAssociationReport.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | /** 25 | * A report of the performance of a {@link SemanticSpace} on a particular 26 | * {@link WordAssociationTest}. 27 | * 28 | * @author David Jurgens 29 | */ 30 | public interface WordAssociationReport { 31 | 32 | /** 33 | * Returns the total number of word pairs. 34 | */ 35 | int numberOfWordPairs(); 36 | 37 | /** 38 | * Returns the correlation between the similarity judgemnts from a {@link 39 | * SemanticSpace} similarity and the provided human similarity judgements. 40 | */ 41 | double correlation(); 42 | 43 | /** 44 | * Returns the number of questions for which a {@link SemanticSpace} 45 | * could not give an answer due to missing word vectors. 46 | */ 47 | int unanswerableQuestions(); 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/WordPrimingReport.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | /** 25 | * A report of the performance of a {@link SemanticSpace} on a particular 26 | * {@link WordPrimingTest}. 27 | * 28 | * @author Keith Stevens 29 | */ 30 | public interface WordPrimingReport { 31 | 32 | /** 33 | * Returns the total number of word pairs. 34 | */ 35 | int numberOfWordPairs(); 36 | 37 | /** 38 | * Returns the priming score for related word pairs. 39 | */ 40 | double relatedPriming(); 41 | 42 | /** 43 | * Returns the priming score for unrelated word pairs. 44 | */ 45 | double unrelatedPriming(); 46 | 47 | /** 48 | * Returns the effect of priming, which is the difference bewtween the 49 | * priming score for related and unrelated pairs. 50 | */ 51 | double effect(); 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/util/ResourceFinder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util; 23 | 24 | import java.io.BufferedReader; 25 | import java.io.IOException; 26 | 27 | 28 | /** 29 | * An interface for reader file-based resources regardless of the environment in 30 | * which the system is operating, e.g. a Hadoop environment. 31 | */ 32 | public interface ResourceFinder { 33 | 34 | /** 35 | * Finds the file with the specified name and returns a reader for that 36 | * files contents. 37 | * 38 | * @param fileName the name of a file 39 | * 40 | * @return a {@code BufferedReader} to the contents of the specified file 41 | * 42 | * @throws IOException if the resource cannot be found or if an error occurs 43 | * while opening the resource 44 | */ 45 | BufferedReader open(String fileName) throws IOException; 46 | 47 | } -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/ri/TestRandomIndexing.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.ri; 23 | 24 | import java.io.*; 25 | import java.util.*; 26 | 27 | import org.junit.Ignore; 28 | import org.junit.Test; 29 | 30 | import static org.junit.Assert.*; 31 | 32 | 33 | public class TestRandomIndexing { 34 | 35 | private static final long SEED = 42L; 36 | 37 | @Test public void test() throws IOException { 38 | RandomIndexing ri = new RandomIndexing(new Properties()); 39 | ri.RANDOM.setSeed(SEED); 40 | 41 | String text = "the quick brown fox jumps over the lazy dog"; 42 | ri.processDocument(new BufferedReader(new StringReader(text))); 43 | 44 | Set words = new LinkedHashSet(); 45 | for (String s : text.split("\\s+")) 46 | words.add(s); 47 | 48 | assertEquals(words, ri.getWords()); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/WordSimilarityReport.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | /** 25 | * A report of the performance of a {@link SemanticSpace} on a particular 26 | * {@link WordSimilarityEvaluation} test. 27 | * 28 | * @author David Jurgens 29 | */ 30 | public interface WordSimilarityReport { 31 | 32 | /** 33 | * Returns the total number of word pairs. 34 | */ 35 | int numberOfWordPairs(); 36 | 37 | /** 38 | * Returns the correlation between the similarity judgemnts from a {@link 39 | * SemanticSpace} similarity and the provided human similarity judgements. 40 | */ 41 | double correlation(); 42 | 43 | /** 44 | * Returns the number of questions for which a {@link SemanticSpace} 45 | * could not give an answer due to missing word vectors. 46 | */ 47 | int unanswerableQuestions(); 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/WordPrimingTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | import edu.ucla.sspace.common.SemanticSpace; 25 | 26 | 27 | /** 28 | * An interface for performing priming tests where there is no normed set of 29 | * responses to compare against. These tests simply measure the associational 30 | * strength between a prime,target pair and prime, unrelated target pairs. The 31 | * key result is the effect of related primes, where a high effect suggests that 32 | * that semantic space models the particular form of priming modeled by some 33 | * implemented test. 34 | * 35 | * @author Keith Stevens 36 | */ 37 | public interface WordPrimingTest { 38 | 39 | /** 40 | * Evaluates a {@link SemanticSpace} on a particular test of word priming 41 | * pairs. 42 | */ 43 | public WordPrimingReport evaluate(SemanticSpace sspace); 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/hal/LinearWeighting.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.hal; 23 | 24 | /** 25 | * A linear weighting scheme for specifying how a {@link 26 | * HyperspaceAnalogueToLanguage} instance should weigh co-occurrences based on 27 | * the word distance. 28 | */ 29 | public class LinearWeighting implements WeightingFunction { 30 | 31 | /** 32 | * Returns the weighed value where the closest words receive a weight equal 33 | * to the window size and the most distance words receive a weight of {@code 34 | * 1}, using a linear decrease for in-between values. 35 | * 36 | * @param positionOffset {@inheritDoc} 37 | * @param windowSize {@inheritDoc} 38 | * 39 | * @return {@inheritDoc} 40 | */ 41 | public double weight(int positionOffset, int windowSize) { 42 | return windowSize - (Math.abs(positionOffset) - 1); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/common/DimensionallyInterpretableSemanticSpace.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.common; 23 | 24 | 25 | /** 26 | * An interface for {@link SemanticSpace} instances that are meaningfully 27 | * interpretable. In most cases, the dimensions will be understandable by human 28 | * viewers, but this interface provides support for mapping a dimension to a 29 | * generic {@code Object} for using the description in some programatic manner. 30 | */ 31 | public interface DimensionallyInterpretableSemanticSpace 32 | extends SemanticSpace { 33 | 34 | /** 35 | * Returns a description of what features with which the specified dimension 36 | * corresponds. 37 | * 38 | * @param dimension a dimension number 39 | * 40 | * @return a description of the features for the dimension 41 | */ 42 | T getDimensionDescription(int dimension); 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/dependency/DependencyPathWeight.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | import java.util.List; 25 | 26 | 27 | /** 28 | * An interface for weighting, or scoring, dependency paths. Implementations 29 | * are suggested to be thread-safe and stateless . 30 | * 31 | * @author Keith Stevens 32 | */ 33 | public interface DependencyPathWeight { 34 | 35 | /** 36 | * Returns the score of the provided {@link DependencyPath}. The score may 37 | * be a function of the length of the path, arbitrary, e.g., 1 for all 38 | * paths, or may be a function of the relations and terms in the path. 39 | * 40 | * @param path A list of the term,relation links in the {@link 41 | * DependencyPath} being scored 42 | * 43 | * @return The score of the dependecy path 44 | */ 45 | double scorePath(DependencyPath path); 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/index/PermutationFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.index; 23 | 24 | import edu.ucla.sspace.vector.Vector; 25 | 26 | 27 | /** 28 | * An interface for functions that permute the ordering of {@code 29 | * TernaryVector}s. Implementations are expected to be thread safe when 30 | * performing permutations. 31 | */ 32 | public interface PermutationFunction { 33 | 34 | /** 35 | * Permutes the provided {@code TernaryVector} the specified number of 36 | * times. 37 | * 38 | * @param v an index vector to permute 39 | * @param numPermutations the number of times the permutation function 40 | * should be applied to the provided index vector. 41 | * 42 | * @return the original index vector permuted the specified number of times 43 | */ 44 | T permute(T v, int numPermutations); 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/hal/WeightingFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.hal; 23 | 24 | /** 25 | * A function interface for specifying how a {@link 26 | * HyperspaceAnalogueToLanguage} instance should weigh co-occurrences based on 27 | * the word distance. 28 | */ 29 | public interface WeightingFunction { 30 | 31 | /** 32 | * Returns the weighed value for a word at the specified offset. Negative 33 | * offsets indicate the the word appears before; positive offsets indicate 34 | * that the word appears after 35 | * 36 | * @param positionOffset the location of the word relative to the current 37 | * position 38 | * @param windowSize the maximum nubmer of words on one side that will be 39 | * considered for weighting 40 | * 41 | * @return the weight to apply 42 | */ 43 | double weight(int positionOffset, int windowSize); 44 | 45 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/index/DoubleVectorGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.index; 23 | 24 | import edu.ucla.sspace.util.Generator; 25 | 26 | import edu.ucla.sspace.vector.DoubleVector; 27 | 28 | 29 | /** 30 | * An interface for classes which will maintain and generate random {@code 31 | * DoubleVector}s. The main purpose of this of this class is to allow any 32 | * algorithm that makes use of some sort of random vector, such as Random 33 | * Indexing, can easily swap out the type of indexing used for experimentation 34 | * purposes. 35 | */ 36 | public interface DoubleVectorGenerator 37 | extends Generator { 38 | 39 | /** 40 | * Creates an {@code VectorVector} with the provided length. 41 | * 42 | * @param length the length of the index vector 43 | * 44 | * @return an index vector 45 | */ 46 | public T generate(); 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/index/IntegerVectorGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.index; 23 | 24 | import edu.ucla.sspace.util.Generator; 25 | 26 | import edu.ucla.sspace.vector.IntegerVector; 27 | 28 | 29 | /** 30 | * An interface for classes which will maintain and generate random {@code 31 | * IntegerVector}s. The main purpose of this of this class is to allow any 32 | * algorithm that makes use of some sort of random vector, such as Random 33 | * Indexing, can easily swap out the type of indexing used for experimentation 34 | * purposes. 35 | */ 36 | public interface IntegerVectorGenerator 37 | extends Generator { 38 | 39 | /** 40 | * Creates an {@code VectorVector} with the provided length. 41 | * 42 | * @param length the length of the index vector 43 | * 44 | * @return an index vector 45 | */ 46 | public T generate(); 47 | } 48 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/vector/VectorIOTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.vector; 23 | 24 | import org.junit.Ignore; 25 | import org.junit.Test; 26 | 27 | import static org.junit.Assert.*; 28 | 29 | 30 | /** 31 | * @author Keith Stevens 32 | */ 33 | public class VectorIOTest { 34 | 35 | @Test public void testSparseToString() { 36 | Vector vector = new CompactSparseVector(new double[]{0, 0, 0, 1}); 37 | assertEquals("3,1.0", VectorIO.toString(vector)); 38 | } 39 | 40 | @Test public void testSparseToString2() { 41 | Vector vector = new CompactSparseVector(new double[]{0, 1, 0, 5}); 42 | assertEquals("1,1.0;3,5.0", VectorIO.toString(vector)); 43 | } 44 | 45 | @Test public void testDenseToString() { 46 | Vector vector = new DenseVector(new double[]{0, 1, 0, 5}); 47 | assertEquals("0.0 1.0 0.0 5.0", VectorIO.toString(vector)); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/graph/GraphConstructionException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.graph; 23 | 24 | 25 | /** 26 | * An exception for cases where a change to a graph has resulted in an invalid 27 | * construction according to its design contract. For example, this might 28 | * include adding an edge to an acyclic-by-contract graph that would cause it to 29 | * become cyclic. Callers should catch this exception if the construction of 30 | * the graph may be repealed, i.e. if the operation that caused this exception 31 | * may be rolled back and the program continue as expected. 32 | */ 33 | public class GraphConstructionException extends RuntimeException { 34 | 35 | private static final long serialVersionUID = 1L; 36 | 37 | public GraphConstructionException() { 38 | super(); 39 | } 40 | 41 | public GraphConstructionException(String message) { 42 | super(message); 43 | } 44 | 45 | } -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/common/DummySemanticSpace.java: -------------------------------------------------------------------------------- 1 | package edu.ucla.sspace.common; 2 | 3 | import edu.ucla.sspace.matrix.*; 4 | import edu.ucla.sspace.text.*; 5 | import edu.ucla.sspace.util.*; 6 | import edu.ucla.sspace.vector.*; 7 | 8 | import java.io.*; 9 | 10 | import java.util.HashMap; 11 | import java.util.Map; 12 | import java.util.Properties; 13 | import java.util.Set; 14 | 15 | /** 16 | * A test-only {@link SemanticSpace}, where all the semantic vectors must be 17 | * manually asssigned. 18 | */ 19 | public class DummySemanticSpace implements SemanticSpace { 20 | 21 | private final Map wordToVector; 22 | 23 | private int dimensions; 24 | 25 | public DummySemanticSpace() { 26 | wordToVector = new HashMap(); 27 | } 28 | 29 | /** 30 | * Does nothing 31 | */ 32 | public void processDocument(BufferedReader document) throws IOException { } 33 | 34 | /** 35 | * {@inheritDoc} 36 | */ 37 | public Set getWords() { 38 | return wordToVector.keySet(); 39 | } 40 | 41 | /** 42 | * Returns the manually assigned vector for the word 43 | */ 44 | public Vector getVector(String word) { 45 | return wordToVector.get(word); 46 | } 47 | 48 | /** 49 | * Sets the vector for the word 50 | */ 51 | public Vector setVector(String word, Vector vector) { 52 | dimensions = vector.length(); 53 | return wordToVector.put(word, vector); 54 | } 55 | 56 | /** 57 | * {@inheritDoc} 58 | */ 59 | public int getVectorLength() { 60 | return dimensions; 61 | } 62 | 63 | /** 64 | * Does nothing 65 | */ 66 | public void processSpace(Properties properties) { } 67 | 68 | /** 69 | * {@inheritDoc} 70 | */ 71 | public String getSpaceName() { 72 | return "DummySemanticSpace"; 73 | } 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/SimpleWordSimilarity.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | /** 25 | * The default implementation of {@link WordSimilarity} 26 | */ 27 | public class SimpleWordSimilarity implements WordSimilarity { 28 | 29 | private final String first; 30 | 31 | private final String second; 32 | 33 | private final double sim; 34 | 35 | public SimpleWordSimilarity(String first, String second, double sim) { 36 | this.first = first; 37 | this.second = second; 38 | this.sim = sim; 39 | } 40 | 41 | /** 42 | * {@inheritDoc} 43 | */ 44 | public String getFirstWord() { 45 | return first; 46 | } 47 | 48 | /** 49 | * {@inheritDoc} 50 | */ 51 | public String getSecondWord() { 52 | return second; 53 | } 54 | 55 | /** 56 | * {@inheritDoc} 57 | */ 58 | public double getSimilarity() { 59 | return sim; 60 | } 61 | 62 | } -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/text/PorterStemmerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | import java.io.*; 25 | import java.util.*; 26 | 27 | import org.junit.Ignore; 28 | import org.junit.Test; 29 | 30 | import static org.junit.Assert.*; 31 | 32 | 33 | public class PorterStemmerTest { 34 | @Test public void testStemming() { 35 | String[][] testWords = {{"cats", "cat"}, 36 | {"cat", "cat"}, 37 | {"opened", "open"}, 38 | {"open", "open"}, 39 | {"candies", "candi"}, 40 | {"candy", "candi"}, 41 | {"immediately", "immedi"}}; 42 | for (String[] testExpected : testWords) { 43 | String stem = new PorterStemmer().stem(testExpected[0]); 44 | assertEquals(stem, testExpected[1]); 45 | } 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/clustering/DataMatrixLinkClustering.java: -------------------------------------------------------------------------------- 1 | package edu.ucla.sspace.clustering; 2 | 3 | import edu.ucla.sspace.matrix.AffinityMatrixCreator; 4 | import edu.ucla.sspace.matrix.Matrix; 5 | import edu.ucla.sspace.matrix.MatrixFile; 6 | import edu.ucla.sspace.matrix.NearestNeighborAffinityMatrixCreator; 7 | 8 | import edu.ucla.sspace.similarity.CosineSimilarity; 9 | import edu.ucla.sspace.similarity.SimilarityFunction; 10 | 11 | import java.util.Properties; 12 | 13 | 14 | /** 15 | * @author Keith Stevens 16 | */ 17 | public class DataMatrixLinkClustering implements Clustering { 18 | 19 | private final AffinityMatrixCreator creator; 20 | 21 | private final LinkClustering linkCluster; 22 | 23 | public DataMatrixLinkClustering() { 24 | this(createDefaultAffinityMatrixCreator()); 25 | } 26 | 27 | public DataMatrixLinkClustering(AffinityMatrixCreator creator) { 28 | this.creator = creator; 29 | this.linkCluster = new LinkClustering(); 30 | } 31 | 32 | public static AffinityMatrixCreator createDefaultAffinityMatrixCreator() { 33 | SimilarityFunction simFunc = new CosineSimilarity(); 34 | AffinityMatrixCreator creator = 35 | new NearestNeighborAffinityMatrixCreator(); 36 | creator.setParams(10); 37 | creator.setFunctions(simFunc, simFunc); 38 | return creator; 39 | } 40 | 41 | public Assignments cluster(Matrix matrix, 42 | int numClusters, 43 | Properties props) { 44 | MatrixFile affinityMatrix = creator.calculate(matrix); 45 | return linkCluster.cluster(affinityMatrix.load(), numClusters, props); 46 | } 47 | 48 | public Assignments cluster(Matrix matrix, Properties props) { 49 | MatrixFile affinityMatrix = creator.calculate(matrix); 50 | return linkCluster.cluster(affinityMatrix.load(), props); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/dependency/AbstractPathUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | import java.util.LinkedList; 25 | import java.util.List; 26 | 27 | import org.junit.Ignore; 28 | import org.junit.Test; 29 | 30 | import static org.junit.Assert.*; 31 | 32 | 33 | public class AbstractPathUtil { 34 | 35 | protected DependencyPath makePath(String[][] pathString) { 36 | List path = new LinkedList(); 37 | for (String[] link : pathString) { 38 | DependencyTreeNode n1 = 39 | new SimpleDependencyTreeNode(link[0], link[1], 0); 40 | String relation = link[2]; 41 | DependencyTreeNode n2 = 42 | new SimpleDependencyTreeNode(link[3], link[4], 0); 43 | 44 | path.add(new SimpleDependencyRelation(n1, relation, n2)); 45 | } 46 | return new SimpleDependencyPath(path); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/similarity/AbstractSymmetricSimilarityFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at 3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens, 4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved. 5 | * 6 | * This file is part of the S-Space package and is covered under the terms and 7 | * conditions therein. 8 | * 9 | * The S-Space package is free software: you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License version 2 as published 11 | * by the Free Software Foundation and distributed hereunder to you. 12 | * 13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 18 | * RIGHTS. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with this program. If not, see . 22 | */ 23 | 24 | package edu.ucla.sspace.similarity; 25 | 26 | 27 | /** 28 | * A base implementation for any symmetic {@link SimilarityFunction} that 29 | * requires no parameters. Any subclass will return {@code true} for calls to 30 | * {@link #isSymmetric}. 31 | * 32 | * @author Keith Stevens 33 | */ 34 | public abstract class AbstractSymmetricSimilarityFunction 35 | implements SimilarityFunction { 36 | 37 | /** 38 | * Performs a no-op and sets no parameters 39 | */ 40 | public void setParams(double... arguments) { 41 | } 42 | 43 | /** 44 | * Returns {@code true}. 45 | */ 46 | public boolean isSymmetric() { 47 | return true; 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/dependency/AbstractPathTestBase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | import java.util.LinkedList; 25 | import java.util.List; 26 | 27 | import org.junit.Ignore; 28 | import org.junit.Test; 29 | 30 | import static org.junit.Assert.*; 31 | 32 | 33 | public class AbstractPathTestBase { 34 | 35 | protected DependencyPath makePath(String[][] pathString) { 36 | List path = new LinkedList(); 37 | for (String[] link : pathString) { 38 | DependencyTreeNode n1 = 39 | new SimpleDependencyTreeNode(link[0], link[1], 0); 40 | String relation = link[2]; 41 | DependencyTreeNode n2 = 42 | new SimpleDependencyTreeNode(link[3], link[4], 0); 43 | 44 | path.add(new SimpleDependencyRelation(n1, relation, n2)); 45 | } 46 | return new SimpleDependencyPath(path); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/clustering/seeding/KMeansSeed.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.clustering.seeding; 23 | 24 | import edu.ucla.sspace.matrix.Matrix; 25 | 26 | import edu.ucla.sspace.vector.DoubleVector; 27 | 28 | 29 | /** 30 | * An interface for KMeans seeding algorithms. Implementations must compose 31 | * initial centroid seeds from a data set by either choosing an already existing 32 | * data point or composing a linear combination of existing data points. 33 | * 34 | *

35 | * 36 | * Implementations must be state free and threadsafe. 37 | * 38 | * @author Keith Stevens 39 | */ 40 | public interface KMeansSeed { 41 | 42 | /** 43 | * Returns an array of length {@code numCentroids} that contains centroids 44 | * composed of either vectors from {@code dataPoints} or a linear combination 45 | * of vectors from {@code dataPoints}. 46 | */ 47 | DoubleVector[] chooseSeeds(int numCentroids, Matrix dataPoints); 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/util/Duple.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util; 23 | 24 | import java.io.Serializable; 25 | 26 | 27 | /** 28 | * A wrapper for containing two objects of different types. 29 | */ 30 | public class Duple implements Serializable { 31 | 32 | private static final long serialVersionUID = 1L; 33 | 34 | public final T x; 35 | 36 | public final U y; 37 | 38 | public Duple(T x, U y) { 39 | this.x = x; 40 | this.y = y; 41 | } 42 | 43 | public boolean equals(Object o) { 44 | if (o == null || !(o instanceof Duple)) 45 | return false; 46 | Duple d = (Duple)o; 47 | return (x == d.x || (x != null && x.equals(d.x))) && 48 | (y == d.y || (y != null && y.equals(d.y))); 49 | } 50 | 51 | public int hashCode() { 52 | return ((x == null) ? 0 : x.hashCode()) ^ 53 | ((y == null) ? 0 : y.hashCode()); 54 | } 55 | 56 | public String toString() { 57 | return "{" + x + ", " + y + "}"; 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/hal/GeometricWeighting.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.hal; 23 | 24 | /** 25 | * A geometically-decreasing weighting scheme for specifying how a {@link 26 | * HyperspaceAnalogueToLanguage} instance should weigh co-occurrences based on 27 | * the word distance. 28 | */ 29 | public class GeometricWeighting implements WeightingFunction { 30 | 31 | /** 32 | * Returns the weighed value where the closest words receive a weight equal 33 | * to the window size and the most distance words receive a weight of {@code 34 | * 1}, using a geometric (1 / 2n) decrease for in-between values. 35 | * 36 | * @param positionOffset {@inheritDoc} 37 | * @param windowSize {@inheritDoc} 38 | * 39 | * @return {@inheritDoc} 40 | */ 41 | public double weight(int positionOffset, int windowSize) { 42 | return ((1 << (windowSize - (Math.abs(positionOffset) - 1))) / 43 | (double)(1 << windowSize)) * windowSize; 44 | } 45 | 46 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/LabeledStringDocument.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | import java.io.BufferedReader; 25 | import java.io.StringReader; 26 | 27 | 28 | /** 29 | * A {@code LabeledDocument} implementation backed by a {@code String} whose 30 | * contents are used for the document text. 31 | */ 32 | public class LabeledStringDocument extends StringDocument 33 | implements LabeledDocument { 34 | 35 | /** 36 | * The label of the document 37 | */ 38 | private final String label; 39 | 40 | /** 41 | * Constructs a {@code Document} using the provided string as the document 42 | * text 43 | * 44 | * @param docText the document text 45 | */ 46 | public LabeledStringDocument(String label, String docText) { 47 | super(docText); 48 | this.label = label; 49 | } 50 | 51 | /** 52 | * {@inheritDoc} 53 | */ 54 | public String label() { 55 | return label; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/matrix/MatlabSparseFileTransformerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at 3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens, 4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved. 5 | * 6 | * This file is part of the C-Cat package and is covered under the terms and 7 | * conditions therein. 8 | * 9 | * The S-Space package is free software: you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License version 2 as published 11 | * by the Free Software Foundation and distributed hereunder to you. 12 | * 13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 18 | * RIGHTS. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with this program. If not, see . 22 | */ 23 | 24 | package edu.ucla.sspace.matrix; 25 | 26 | import edu.ucla.sspace.matrix.MatrixIO.Format; 27 | 28 | import org.junit.Test; 29 | 30 | 31 | /** 32 | * @author Keith Stevens 33 | */ 34 | public class MatlabSparseFileTransformerTest { 35 | 36 | public static final double[][] VALUES = { 37 | {1, 1, 1, 4, 5}, 38 | {5, 3, 1, 0, 0}, 39 | {0, 1, 5, 0, 2}, 40 | }; 41 | 42 | public static final Matrix MATRIX = new ArrayMatrix(VALUES); 43 | 44 | @Test public void testTransform() { 45 | FileTransformer transformer = new MatlabSparseFileTransformer(); 46 | FileTransformUtil.testTransform(MATRIX, Format.MATLAB_SPARSE, 47 | transformer); 48 | } 49 | } 50 | 51 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/matrix/SvdlibcDenseTextFileTransformerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at 3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens, 4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved. 5 | * 6 | * This file is part of the C-Cat package and is covered under the terms and 7 | * conditions therein. 8 | * 9 | * The S-Space package is free software: you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License version 2 as published 11 | * by the Free Software Foundation and distributed hereunder to you. 12 | * 13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 18 | * RIGHTS. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with this program. If not, see . 22 | */ 23 | 24 | package edu.ucla.sspace.matrix; 25 | 26 | import edu.ucla.sspace.matrix.MatrixIO.Format; 27 | 28 | import org.junit.Test; 29 | 30 | 31 | /** 32 | * @author Keith Stevens 33 | */ 34 | public class SvdlibcDenseTextFileTransformerTest { 35 | 36 | public static final double[][] VALUES = { 37 | {1, 1, 1, 4, 5}, 38 | {5, 3, 1, 0, 0}, 39 | {0, 1, 5, 0, 2}, 40 | }; 41 | 42 | public static final Matrix MATRIX = new ArrayMatrix(VALUES); 43 | 44 | @Test public void testTransform() { 45 | FileTransformer transformer = new SvdlibcDenseTextFileTransformer(); 46 | FileTransformUtil.testTransform(MATRIX, Format.SVDLIBC_DENSE_TEXT, 47 | transformer); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/WordChoiceReport.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | 25 | /** 26 | * A report of the performance of a {@link SemanticSpace} on a particular 27 | * {@link WordChoiceEvaluation} test. 28 | * 29 | * @author David Jurgens 30 | */ 31 | public interface WordChoiceReport { 32 | 33 | /** 34 | * Returns the total number of questions on the test. 35 | */ 36 | int numberOfQuestions(); 37 | 38 | /** 39 | * Returns the number of questions that were answered correctly. 40 | */ 41 | int correctAnswers(); 42 | 43 | /** 44 | * Returns the number of questions for which the {@link SemanticSpace} 45 | * could not give an answer due to missing word vectors in either the 46 | * prompt or the options. 47 | */ 48 | int unanswerableQuestions(); 49 | 50 | /** 51 | * Returns the score, ranged between 0 and 100, achieved on a particlar 52 | * evaluation. 53 | */ 54 | double score(); 55 | } 56 | 57 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/matrix/SvdlibcSparseTextFileTransformerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at 3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens, 4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved. 5 | * 6 | * This file is part of the C-Cat package and is covered under the terms and 7 | * conditions therein. 8 | * 9 | * The S-Space package is free software: you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License version 2 as published 11 | * by the Free Software Foundation and distributed hereunder to you. 12 | * 13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 18 | * RIGHTS. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with this program. If not, see . 22 | */ 23 | 24 | package edu.ucla.sspace.matrix; 25 | 26 | import edu.ucla.sspace.matrix.MatrixIO.Format; 27 | 28 | import org.junit.Test; 29 | 30 | 31 | /** 32 | * @author Keith Stevens 33 | */ 34 | public class SvdlibcSparseTextFileTransformerTest { 35 | 36 | public static final double[][] VALUES = { 37 | {1, 1, 1, 4, 5}, 38 | {5, 3, 1, 0, 0}, 39 | {0, 1, 5, 0, 2}, 40 | }; 41 | 42 | public static final Matrix MATRIX = new ArrayMatrix(VALUES); 43 | 44 | @Test public void testTransform() { 45 | FileTransformer transformer = new SvdlibcSparseTextFileTransformer(); 46 | FileTransformUtil.testTransform(MATRIX, Format.SVDLIBC_SPARSE_TEXT, 47 | transformer); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/matrix/SvdlibcSparseBinaryFileTransformerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at 3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens, 4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved. 5 | * 6 | * This file is part of the C-Cat package and is covered under the terms and 7 | * conditions therein. 8 | * 9 | * The S-Space package is free software: you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License version 2 as published 11 | * by the Free Software Foundation and distributed hereunder to you. 12 | * 13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 18 | * RIGHTS. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with this program. If not, see . 22 | */ 23 | 24 | package edu.ucla.sspace.matrix; 25 | 26 | import edu.ucla.sspace.matrix.MatrixIO.Format; 27 | 28 | import org.junit.Test; 29 | 30 | 31 | /** 32 | * @author Keith Stevens 33 | */ 34 | public class SvdlibcSparseBinaryFileTransformerTest { 35 | 36 | public static final double[][] VALUES = { 37 | {1, 1, 1, 4, 5}, 38 | {5, 3, 1, 0, 0}, 39 | {0, 1, 5, 0, 2}, 40 | }; 41 | 42 | public static final Matrix MATRIX = new ArrayMatrix(VALUES); 43 | 44 | @Test public void testTransform() { 45 | FileTransformer transformer = new SvdlibcSparseBinaryFileTransformer(); 46 | FileTransformUtil.testTransform(MATRIX, Format.SVDLIBC_SPARSE_BINARY, 47 | transformer); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/matrix/SvdlibcDenseBinaryFileTransformerTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at 3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens, 4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved. 5 | * 6 | * This file is part of the C-Cat package and is covered under the terms and 7 | * conditions therein. 8 | * 9 | * The S-Space package is free software: you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License version 2 as published 11 | * by the Free Software Foundation and distributed hereunder to you. 12 | * 13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 18 | * RIGHTS. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with this program. If not, see . 22 | */ 23 | 24 | package edu.ucla.sspace.matrix; 25 | 26 | import edu.ucla.sspace.matrix.MatrixIO.Format; 27 | 28 | import org.junit.Test; 29 | 30 | 31 | /** 32 | * @author Keith Stevens 33 | */ 34 | public class SvdlibcDenseBinaryFileTransformerTest { 35 | 36 | public static final double[][] VALUES = { 37 | {1, 1, 1, 4, 5}, 38 | {5, 3, 1, 0, 0}, 39 | {0, 1, 5, 0, 2}, 40 | }; 41 | 42 | public static final Matrix MATRIX = new ArrayMatrix(VALUES); 43 | 44 | @Test public void testTransform() { 45 | FileTransformer transformer = new SvdlibcDenseBinaryFileTransformer(); 46 | FileTransformUtil.testTransform(MATRIX, Format.SVDLIBC_DENSE_BINARY, 47 | transformer); 48 | } 49 | } 50 | 51 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/clustering/criterion/H2Function.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.clustering.criterion; 23 | 24 | 25 | /** 26 | * This {@link HybridBaseFunction} uses the {@link E1Function} and the {@link 27 | * I1Function}. 28 | * 29 | * @author Keith Stevens 30 | */ 31 | public class H2Function extends HybridBaseFunction { 32 | 33 | /** 34 | * {@inheritDoc} 35 | */ 36 | protected BaseFunction getInternalFunction() { 37 | return new I2Function(matrix, centroids, i1Costs, 38 | assignments, clusterSizes); 39 | } 40 | 41 | /** 42 | * {@inheritDoc} 43 | */ 44 | protected BaseFunction getExternalFunction() { 45 | return new E1Function(matrix, centroids, e1Costs, 46 | assignments, clusterSizes, 47 | completeCentroid, simToComplete); 48 | } 49 | 50 | /** 51 | * {@inheritDoc} 52 | */ 53 | public boolean isMaximize() { 54 | return true; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/dependency/DependencyTreeNode.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | import java.util.List; 25 | 26 | 27 | /** 28 | * The interface for a word in a dependency parse tree. 29 | */ 30 | public interface DependencyTreeNode { 31 | 32 | /** 33 | * Returns the list of neighbors to the current node. Note that this list 34 | * include both relations where the current node is the head node and 35 | * relations where the current node is the dependent. 36 | */ 37 | List neighbors(); 38 | 39 | /** 40 | * Returns the word stored in this node. 41 | */ 42 | String word(); 43 | 44 | /** 45 | * The lemmatized version of the word, if there is any. 46 | */ 47 | String lemma(); 48 | 49 | /** 50 | * Returns the part of speech tag for this node. 51 | */ 52 | String pos(); 53 | 54 | /** 55 | * Returns the index used by this {@link DependencyTreeNode} in an array. 56 | */ 57 | int index(); 58 | } 59 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/vector/MaskedDoubleVectorViewTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.vector; 23 | 24 | import org.junit.Ignore; 25 | import org.junit.Test; 26 | 27 | import static org.junit.Assert.*; 28 | 29 | import java.util.HashMap; 30 | import java.util.Map; 31 | 32 | 33 | /** 34 | * Tests for the {@link MaskedDoubleVectorView} class. 35 | */ 36 | public class MaskedDoubleVectorViewTest { 37 | 38 | @Test public void testScaledCreate() { 39 | double[] values = new double[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}; 40 | DoubleVector v = new DenseVector(values); 41 | int[] mask = new int[3]; 42 | mask[0] = 5; 43 | mask[1] = 9; 44 | mask[2] = 1; 45 | DoubleVector masked = new MaskedDoubleVectorView(v, mask); 46 | 47 | assertEquals(mask.length, masked.length()); 48 | assertEquals(values[mask[0]], masked.get(0), .00001); 49 | assertEquals(values[mask[1]], masked.get(1), .00001); 50 | assertEquals(values[mask[2]], masked.get(2), .00001); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/evaluation/WordSimilarityEvaluation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.evaluation; 23 | 24 | import java.util.Collection; 25 | 26 | import edu.ucla.sspace.common.SemanticSpace; 27 | 28 | /** 29 | * An evaluation metric that compares the human-judged similarity of word pairs 30 | * against the similarity judgements from a {@link SemanticSpace}. 31 | * 32 | * @author David Jurgens 33 | */ 34 | public interface WordSimilarityEvaluation { 35 | 36 | /** 37 | * Returns a collection of human similarity judgements for word pairs. 38 | */ 39 | Collection getPairs(); 40 | 41 | /** 42 | * Returns the numeric similarity judgement that is equivalent to two words 43 | * being completely similar (i.e. identical). 44 | */ 45 | double getMostSimilarValue(); 46 | 47 | /** 48 | * Returns the numeric similarity judgement that is equivalent to two words 49 | * being completely dissimilar (i.e. identical). 50 | */ 51 | double getLeastSimilarValue(); 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/vector/DenseVectorTests.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.vector; 23 | 24 | import org.junit.Ignore; 25 | import org.junit.Test; 26 | 27 | import static org.junit.Assert.*; 28 | 29 | import java.util.HashMap; 30 | import java.util.Map; 31 | 32 | 33 | /** 34 | * Tests for the {@link DenseVector} class. 35 | */ 36 | public class DenseVectorTests { 37 | 38 | @Test public void testMagnitude() { 39 | DenseVector v = new DenseVector(100); 40 | assertEquals(0, v.magnitude(), .0001); 41 | 42 | v.set(1, 1); 43 | assertEquals(1, v.magnitude(), .0001); 44 | 45 | v.set(1, 3); 46 | v.set(2, 4); 47 | assertEquals(5, v.magnitude(), .0001); 48 | 49 | DenseVector v2 = new DenseVector(v); 50 | assertEquals(5, v2.magnitude(), .0001); 51 | } 52 | 53 | @Test public void testArrayMagnitude() { 54 | double[] values = new double[] {0, 3, 4, 0, 0}; 55 | DenseVector v = new DenseVector(values); 56 | assertEquals(5, v.magnitude(), .0001); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/matrix/MatrixIOException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.matrix; 23 | 24 | import java.io.IOException; 25 | 26 | 27 | /** 28 | * An exception indicating that some error has occurred during reading or 29 | * writing of a matrix file. 30 | */ 31 | public class MatrixIOException extends IOException { 32 | 33 | private static final long serialVersionUID = 1L; 34 | 35 | /** 36 | * Creates a {@code MatrixIOException} with no message 37 | */ 38 | public MatrixIOException() { } 39 | 40 | /** 41 | * Creates a {@code MatrixIOException} with the provided message to report 42 | * to the user. 43 | */ 44 | public MatrixIOException(String message) { 45 | super(message); 46 | } 47 | 48 | /** 49 | * Creates a {@code MatrixIOException} with the provided message to report 50 | * to the user, listing the {@code Throwable} as the original cause of the 51 | * exception. 52 | */ 53 | public MatrixIOException(String message, Throwable cause) { 54 | super(message, cause); 55 | } 56 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/util/SynchronizedIterator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util; 23 | 24 | import java.util.Iterator; 25 | 26 | 27 | /** 28 | * An {@code Iterator} decorator that provides synchronized access to each 29 | * element. 30 | * 31 | * @author Keith Stevens 32 | */ 33 | public class SynchronizedIterator implements Iterator { 34 | 35 | /** 36 | * The iterators to use 37 | */ 38 | private final Iterator iter; 39 | 40 | /** 41 | * Constructs a {@code SynchronizedIterator} from the provided iterator. 42 | */ 43 | public SynchronizedIterator(Iterator iterator) { 44 | iter = iterator; 45 | } 46 | 47 | /** 48 | * {@inheritDoc} 49 | */ 50 | public synchronized boolean hasNext() { 51 | return iter.hasNext(); 52 | } 53 | 54 | /** 55 | * {@inheritDoc} 56 | */ 57 | public T next() { 58 | return iter.next(); 59 | } 60 | 61 | /** 62 | * {@inheritDoc} 63 | */ 64 | public void remove() { 65 | iter.remove(); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/common/Filterable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.common; 23 | 24 | import java.util.Set; 25 | 26 | 27 | /** 28 | * A marker interface that indicates that this class supports selectively 29 | * filtering which words have their semantics retained. The {@link 30 | * #setSemanticFilter(Set)} method can be used to speficy which words should 31 | * have their semantics retained. Note that the words that are filtered out 32 | * will still be used in computing the semantics of other words. This 33 | * behavior is intended for use with a large corpora where retaining the 34 | * semantics of all words in memory is infeasible.

35 | * 36 | * @see SemanticSpace 37 | */ 38 | public interface Filterable { 39 | 40 | /** 41 | * Specifies the set of words that should have their semantics retained, 42 | * where all other words do not. 43 | * 44 | * @param semanticsToRetain the set of words that should have their 45 | * semantics retained in memory 46 | */ 47 | void setSemanticFilter(Set semanticsToRetain); 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/wordsi/OccurrenceDependencyContextGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.wordsi; 23 | 24 | import edu.ucla.sspace.basis.BasisMapping; 25 | 26 | import edu.ucla.sspace.dependency.DependencyTreeNode; 27 | 28 | 29 | /** 30 | * A {@link DependencyContextGenerator} that marks each co-occurrence with 31 | * ordering information. 32 | * 33 | * @author Keith Stevens 34 | */ 35 | public class OccurrenceDependencyContextGenerator 36 | extends AbstractOccurrenceDependencyContextGenerator{ 37 | 38 | /** 39 | * Constructs a new {@link OccurrenceDependencyContextGenerator}. 40 | */ 41 | public OccurrenceDependencyContextGenerator( 42 | BasisMapping basis, 43 | int windowSize) { 44 | super(basis, windowSize); 45 | } 46 | 47 | /** 48 | * Returns a string with the node's word plus it's distance from the focus 49 | * word, with a hyphen between the two. 50 | */ 51 | protected String getFeature(DependencyTreeNode node, int index) { 52 | return node.word(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/wordsi/OrderingDependencyContextGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.wordsi; 23 | 24 | import edu.ucla.sspace.basis.BasisMapping; 25 | 26 | import edu.ucla.sspace.dependency.DependencyTreeNode; 27 | 28 | 29 | /** 30 | * A {@link DependencyContextGenerator} that marks each co-occurrence with 31 | * ordering information. 32 | * 33 | * @author Keith Stevens 34 | */ 35 | public class OrderingDependencyContextGenerator 36 | extends AbstractOccurrenceDependencyContextGenerator{ 37 | 38 | /** 39 | * Constructs a new {@link OrderingDependencyContextGenerator}. 40 | */ 41 | public OrderingDependencyContextGenerator( 42 | BasisMapping basis, 43 | int windowSize) { 44 | super(basis, windowSize); 45 | } 46 | 47 | /** 48 | * Returns a string with the node's word plus it's distance from the focus 49 | * word, with a hyphen between the two. 50 | */ 51 | protected String getFeature(DependencyTreeNode node, int index) { 52 | return node.word() + "-" + index; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/mains/TopicWordsiMain.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.mains; 23 | 24 | import edu.ucla.sspace.common.ArgOptions; 25 | import edu.ucla.sspace.common.SemanticSpaceIO.SSpaceFormat; 26 | 27 | import edu.ucla.sspace.wordsi.ContextExtractor; 28 | import edu.ucla.sspace.wordsi.TopicModelContextExtractor; 29 | 30 | import java.util.Map; 31 | 32 | 33 | /** 34 | * A main for running a wordsi model over topic signatures for documents. 35 | * 36 | * @author Keith Stevens 37 | */ 38 | public class TopicWordsiMain extends GenericWordsiMain { 39 | 40 | /** 41 | * {@inheritDoc} 42 | */ 43 | protected ContextExtractor getExtractor() { 44 | // Create the new generator. 45 | return new TopicModelContextExtractor(); 46 | } 47 | 48 | /** 49 | * {@inheritDoc} 50 | */ 51 | protected SSpaceFormat getSpaceFormat() { 52 | return SSpaceFormat.SPARSE_BINARY; 53 | } 54 | 55 | public static void main(String[] args) throws Exception { 56 | TopicWordsiMain main = new TopicWordsiMain(); 57 | main.run(args); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/dependency/FlatPathWeightTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | import java.util.List; 25 | 26 | import org.junit.Ignore; 27 | import org.junit.Test; 28 | 29 | import static org.junit.Assert.*; 30 | 31 | 32 | public class FlatPathWeightTest extends AbstractPathUtil { 33 | 34 | @Test public void testSimplePath() { 35 | String[][] pathString = {{"cat", "n", "Rel", "dog", "n"}}; 36 | DependencyPath path = makePath(pathString); 37 | DependencyPathWeight weighter = new FlatPathWeight(); 38 | assertEquals(1, weighter.scorePath(path), .000001); 39 | } 40 | 41 | @Test public void testLongPath() { 42 | String[][] pathString = {{"cat", "n", "Rel", "dog", "n"}, 43 | {"dog", "n", "noarelation", "whale", "n"}, 44 | {"whale", "n", "noarelation", "pig", "n"}}; 45 | DependencyPath path = makePath(pathString); 46 | DependencyPathWeight weighter = new FlatPathWeight(); 47 | assertEquals(1, weighter.scorePath(path), .000001); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/graph/TypedEdge.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.graph; 23 | 24 | 25 | /** 26 | * An interface for edges in multigraphs. In a multigraph, two vertices 27 | * v1 and v2 may have mulitple edges between then provided 28 | * that the for any two edges {@code !e1.equals(e2)}. 29 | * 30 | *

This interface allows for a multigraph to have mutliple types of 31 | * edges that extend from a common type. For example, a graph that represents 32 | * cities may contain edges indicating the different types of transportation 33 | * (e.g. car, train, bus) between two cities, where those types each have their 34 | * own subtypes (e.g., airline carrier, bus company, etc.) 35 | * 36 | * @see Multigraph 37 | */ 38 | public interface TypedEdge extends Edge { 39 | 40 | /** 41 | * Returns the type of information conveyed by this edge. 42 | */ 43 | T edgeType(); 44 | 45 | /** 46 | * Returns {@code true} if the other edge is considered equivalent to this 47 | * edge in a multigrpah. 48 | */ 49 | boolean equals(Object o); 50 | 51 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/wordsi/PartOfSpeechDependencyContextGenerator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.wordsi; 23 | 24 | import edu.ucla.sspace.basis.BasisMapping; 25 | 26 | import edu.ucla.sspace.dependency.DependencyTreeNode; 27 | 28 | 29 | /** 30 | * A {@link DependencyContextGenerator} that marks each co-occurrence with part 31 | * of speech information. 32 | * 33 | * @author Keith Stevens 34 | */ 35 | public class PartOfSpeechDependencyContextGenerator 36 | extends AbstractOccurrenceDependencyContextGenerator{ 37 | 38 | /** 39 | * Constructs a new {@link PartOfSpeechDependencyContextGenerator}. 40 | */ 41 | public PartOfSpeechDependencyContextGenerator( 42 | BasisMapping basis, 43 | int windowSize) { 44 | super(basis, windowSize); 45 | } 46 | 47 | /** 48 | * Returns a string with the node's word plus it's part of speech, with a 49 | * hyphen between the two. 50 | */ 51 | protected String getFeature(DependencyTreeNode node, int index) { 52 | return node.word() + "-" + node.pos(); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/util/FileResourceFinder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util; 23 | 24 | import java.io.BufferedReader; 25 | import java.io.FileReader; 26 | import java.io.IOException; 27 | 28 | 29 | /** 30 | * A {@link ResourceFinder} implementation that maps file name to {@link 31 | * java.io.File} instances. This class is the default implementation for all 32 | * systems operating with a standard JVM environment. 33 | */ 34 | public class FileResourceFinder implements ResourceFinder { 35 | 36 | public FileResourceFinder() { } 37 | 38 | /** 39 | * Finds the file with the specified name and returns a reader for that 40 | * files contents. 41 | * 42 | * @param fileName the name of a file 43 | * 44 | * @return a {@code BufferedReader} to the contents of the specified file 45 | * 46 | * @throws IOException if the resource cannot be found or if an error occurs 47 | * while opening the resource 48 | */ 49 | public BufferedReader open(String fileName) throws IOException { 50 | return new BufferedReader(new FileReader(fileName)); 51 | } 52 | 53 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/matrix/SimpleEntry.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.matrix; 23 | 24 | 25 | /** 26 | * A basic implemention of {@link MatrixEntry}. 27 | * 28 | * @see MatrixIO#getIterator(File,MatrixIO.Format) 29 | */ 30 | class SimpleEntry implements MatrixEntry, java.io.Serializable { 31 | 32 | private static final long serialVersionUID = 1L; 33 | 34 | private final int row; 35 | private final int column; 36 | private final double value; 37 | 38 | public SimpleEntry(int row, int column, double value) { 39 | this.row = row; 40 | this.column = column; 41 | this.value = value; 42 | } 43 | 44 | /** 45 | * {@inheritDoc} 46 | */ 47 | public int column() { 48 | return column; 49 | } 50 | 51 | /** 52 | * {@inheritDoc} 53 | */ 54 | public int row() { 55 | return row; 56 | } 57 | 58 | /** 59 | * {@inheritDoc} 60 | */ 61 | public double value() { 62 | return value; 63 | } 64 | 65 | public String toString() { 66 | return "(" + row + "," + column + ":" + value + ")"; 67 | } 68 | } -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/dependency/LengthPathWeightTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.dependency; 23 | 24 | import java.util.LinkedList; 25 | import java.util.List; 26 | 27 | import org.junit.Ignore; 28 | import org.junit.Test; 29 | 30 | import static org.junit.Assert.*; 31 | 32 | 33 | public class LengthPathWeightTest extends AbstractPathUtil { 34 | 35 | @Test public void testSimplePath() { 36 | String[][] pathString = {{"cat", "n", "Rel", "dog", "n"}}; 37 | DependencyPath path = makePath(pathString); 38 | DependencyPathWeight weighter = new LengthPathWeight(); 39 | assertEquals(1, weighter.scorePath(path), .000001); 40 | } 41 | 42 | @Test public void testLongPath() { 43 | String[][] pathString = {{"cat", "n", "Rel", "dog", "n"}, 44 | {"dog", "n", "noarelation", "whale", "n"}, 45 | {"whale", "n", "noarelation", "pig", "n"}}; 46 | DependencyPath path = makePath(pathString); 47 | DependencyPathWeight weighter = new LengthPathWeight(); 48 | assertEquals(1d/3, weighter.scorePath(path), .000001); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/similarity/OneSimilarity.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at 3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens, 4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved. 5 | * 6 | * This file is part of the S-Space package and is covered under the terms and 7 | * conditions therein. 8 | * 9 | * The S-Space package is free software: you can redistribute it and/or modify 10 | * it under the terms of the GNU General Public License version 2 as published 11 | * by the Free Software Foundation and distributed hereunder to you. 12 | * 13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 18 | * RIGHTS. 19 | * 20 | * You should have received a copy of the GNU General Public License 21 | * along with this program. If not, see . 22 | */ 23 | 24 | package edu.ucla.sspace.similarity; 25 | 26 | import edu.ucla.sspace.vector.DoubleVector; 27 | import edu.ucla.sspace.vector.IntegerVector; 28 | import edu.ucla.sspace.vector.Vector; 29 | import edu.ucla.sspace.vector.VectorMath; 30 | 31 | 32 | /** 33 | * Returns {@code 1}, always. 34 | * 35 | *

36 | * 37 | * This metric is symmetric. 38 | * 39 | * @author Keith Stevens 40 | */ 41 | public class OneSimilarity extends AbstractSymmetricSimilarityFunction { 42 | 43 | /** 44 | * {@inheritDoc} 45 | */ 46 | public double sim(DoubleVector v1, DoubleVector v2) { 47 | return 1; 48 | } 49 | 50 | /** 51 | * {@inheritDoc} 52 | */ 53 | public double sim(IntegerVector v1, IntegerVector v2) { 54 | return 1; 55 | } 56 | 57 | /** 58 | * {@inheritDoc} 59 | */ 60 | public double sim(Vector v1, Vector v2) { 61 | return 1; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/text/StringDocument.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2009 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.text; 23 | 24 | import java.io.BufferedReader; 25 | import java.io.StringReader; 26 | 27 | /** 28 | * A {@code Document} implementation backed by a {@code String} whose contents 29 | * are used for the document text. 30 | */ 31 | public class StringDocument implements Document { 32 | 33 | /** 34 | * The text of the document 35 | */ 36 | private final String text; 37 | 38 | /** 39 | * Constructs a {@code Document} using the provided string as the document 40 | * text 41 | * 42 | * @param docText the document text 43 | */ 44 | public StringDocument(String docText) { 45 | this.text = docText; 46 | } 47 | 48 | /** 49 | * {@inheritDoc} This method may be repeatedly called to re-read the 50 | * contents of the document. 51 | */ 52 | public BufferedReader reader() { 53 | return new BufferedReader(new StringReader(text)); 54 | } 55 | 56 | /** 57 | * Returns the entire document 58 | */ 59 | public String toString() { 60 | return text; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/test/java/edu/ucla/sspace/util/ObjectCounterTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util; 23 | 24 | import java.util.*; 25 | 26 | import org.junit.Ignore; 27 | import org.junit.Test; 28 | 29 | import static org.junit.Assert.*; 30 | 31 | 32 | /** 33 | * A collection of unit tests for {@link ObjectCounter} 34 | */ 35 | public class ObjectCounterTest { 36 | 37 | @Test public void testCount() { 38 | Counter c = new ObjectCounter(); 39 | c.count(1); 40 | assertEquals(1, c.sum()); 41 | assertEquals(1, c.items().size()); 42 | assertEquals(1, c.getCount(1)); 43 | 44 | c.count(1); 45 | assertEquals(2, c.sum()); 46 | assertEquals(1, c.items().size()); 47 | assertEquals(2, c.getCount(1)); 48 | 49 | c.count(2); 50 | assertEquals(3, c.sum()); 51 | assertEquals(2, c.items().size()); 52 | assertEquals(1, c.getCount(2)); 53 | } 54 | 55 | @Test public void testMax() { 56 | Counter c = new ObjectCounter(); 57 | c.count(5); 58 | c.count(5); 59 | c.count(3); 60 | assertEquals(5, c.max().intValue()); 61 | } 62 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/wordsi/ContextExtractor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2010 Keith Stevens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.wordsi; 23 | 24 | import java.io.BufferedReader; 25 | 26 | 27 | /** 28 | * An interface for extracting context vectors from a document and passing on 29 | * the vector to a {@link Wordsi} implementation. Implementations are 30 | * recomended to use either a {@link ContextGenerator} or a {@link BasisMapping} 31 | * that is serializable. Use of a {@link ContextGenerator} or a {@link 32 | * BasisMapping} separates the feature space from the text traveral, allowing 33 | * the feature space to be reused, even if a different text traversal method 34 | * needs to be used. 35 | * 36 | * @author Keith Stevens 37 | */ 38 | public interface ContextExtractor { 39 | 40 | /** 41 | * Processes the content of {@code document} and calls {@link 42 | * Wordsi#handleContextVector} for each context vector that can be extracted 43 | * from {@code document}. 44 | */ 45 | void processDocument(BufferedReader document, Wordsi wordsi); 46 | 47 | /** 48 | * Returns the maximum number of dimensions used to represent any given 49 | * context. 50 | */ 51 | int getVectorLength(); 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/util/primitive/IntPair.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.util.primitive; 23 | 24 | 25 | /** 26 | * A utility class for holding two {@code int}s. 27 | */ 28 | public class IntPair { 29 | 30 | /** 31 | * The first {@code int} in the pair 32 | */ 33 | public final int x; 34 | 35 | /** 36 | * The second {@code int} in the pair 37 | */ 38 | public final int y; 39 | 40 | /** 41 | * Creates a pair out of {@code x} and {@code y} 42 | */ 43 | public IntPair(int x, int y) { 44 | this.x = x; 45 | this.y = y; 46 | } 47 | 48 | /** 49 | * Returns {@code true} if {@code o} is a {@link Pair} and its {@code x} and 50 | * {@code y} elements are equal to those of this pair. Note that equality 51 | * is specific to the ordering of {@code x} and {@code y}. 52 | */ 53 | public boolean equals(Object o) { 54 | if (!(o instanceof IntPair)) 55 | return false; 56 | IntPair p = (IntPair)o; 57 | return x == p.x && y == p.y; 58 | } 59 | 60 | public int hashCode() { 61 | return x ^ y; 62 | } 63 | 64 | public String toString() { 65 | return "{" + x + ", " + y + "}"; 66 | } 67 | } -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/similarity/KendallsTau.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.similarity; 23 | 24 | import edu.ucla.sspace.common.Similarity; 25 | 26 | import edu.ucla.sspace.vector.DoubleVector; 27 | import edu.ucla.sspace.vector.IntegerVector; 28 | import edu.ucla.sspace.vector.Vector; 29 | 30 | 31 | /** 32 | * A functional class for computing Kendall's tau of the 34 | * values in the two vectors. This method uses tau-b, which is suitable for 35 | * vectors with duplicate values. 36 | * 37 | * @author David Jurgens 38 | */ 39 | public class KendallsTau extends AbstractSymmetricSimilarityFunction { 40 | 41 | /** 42 | * {@inheritDoc} 43 | */ 44 | public double sim(DoubleVector v1, DoubleVector v2) { 45 | return Similarity.kendallsTau(v1, v2); 46 | } 47 | 48 | /** 49 | * {@inheritDoc} 50 | */ 51 | public double sim(IntegerVector v1, IntegerVector v2) { 52 | return Similarity.kendallsTau(v1, v2); 53 | } 54 | 55 | /** 56 | * {@inheritDoc} 57 | */ 58 | public double sim(Vector v1, Vector v2) { 59 | return Similarity.kendallsTau(v1, v2); 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /src/main/java/edu/ucla/sspace/similarity/TanimotoCoefficient.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright 2011 David Jurgens 3 | * 4 | * This file is part of the S-Space package and is covered under the terms and 5 | * conditions therein. 6 | * 7 | * The S-Space package is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU General Public License version 2 as published 9 | * by the Free Software Foundation and distributed hereunder to you. 10 | * 11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES, 12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE 13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY 14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION 15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER 16 | * RIGHTS. 17 | * 18 | * You should have received a copy of the GNU General Public License 19 | * along with this program. If not, see . 20 | */ 21 | 22 | package edu.ucla.sspace.similarity; 23 | 24 | import edu.ucla.sspace.common.Similarity; 25 | 26 | import edu.ucla.sspace.vector.DoubleVector; 27 | import edu.ucla.sspace.vector.IntegerVector; 28 | import edu.ucla.sspace.vector.Vector; 29 | 30 | 31 | /** 32 | * Returns the Tanimoto 34 | * Coefficient between any two {@link Vector}s. 35 | * 36 | * @author David Jurgens 37 | */ 38 | public class TanimotoCoefficient extends AbstractSymmetricSimilarityFunction { 39 | 40 | /** 41 | * {@inheritDoc} 42 | */ 43 | public double sim(DoubleVector v1, DoubleVector v2) { 44 | return Similarity.tanimotoCoefficient(v1, v2); 45 | } 46 | 47 | /** 48 | * {@inheritDoc} 49 | */ 50 | public double sim(IntegerVector v1, IntegerVector v2) { 51 | return Similarity.tanimotoCoefficient(v1, v2); 52 | } 53 | 54 | /** 55 | * {@inheritDoc} 56 | */ 57 | public double sim(Vector v1, Vector v2) { 58 | return Similarity.tanimotoCoefficient(v1, v2); 59 | } 60 | } 61 | --------------------------------------------------------------------------------