├── sspace
└── test
├── src
├── main
│ └── java
│ │ ├── edu
│ │ └── ucla
│ │ │ └── sspace
│ │ │ ├── tools
│ │ │ ├── SenseEvalCleaner.java
│ │ │ ├── SemEval2010Cleaner.java
│ │ │ ├── MatrixTranspose.java
│ │ │ ├── BasisPrinter.java
│ │ │ └── SelectTopKWords.java
│ │ │ ├── evaluation
│ │ │ ├── DeeseEvaluator.java
│ │ │ ├── NormedWordPrimingTest.java
│ │ │ ├── WordAssociationTest.java
│ │ │ ├── WordChoiceEvaluation.java
│ │ │ ├── NormedWordPrimingReport.java
│ │ │ ├── NormedPrimingQuestion.java
│ │ │ ├── MultipleChoiceQuestion.java
│ │ │ ├── WordSimilarity.java
│ │ │ ├── WordAssociationReport.java
│ │ │ ├── WordPrimingReport.java
│ │ │ ├── WordSimilarityReport.java
│ │ │ ├── WordPrimingTest.java
│ │ │ ├── SimpleWordSimilarity.java
│ │ │ ├── WordChoiceReport.java
│ │ │ └── WordSimilarityEvaluation.java
│ │ │ ├── svs
│ │ │ └── RelationTuple.java
│ │ │ ├── common
│ │ │ ├── statistics
│ │ │ │ ├── SignificanceTest.java
│ │ │ │ ├── GTest.java
│ │ │ │ └── PointwiseMutualInformationTest.java
│ │ │ ├── DimensionallyInterpretableSemanticSpace.java
│ │ │ └── Filterable.java
│ │ │ ├── vector
│ │ │ ├── SparseIntegerVector.java
│ │ │ ├── SparseVector.java
│ │ │ └── SparseDoubleVector.java
│ │ │ ├── dependency
│ │ │ ├── DependencyPermutationFunction.java
│ │ │ ├── FlatPathWeight.java
│ │ │ ├── UniversalRelationAcceptor.java
│ │ │ ├── LengthPathWeight.java
│ │ │ ├── SubjObjRelationAcceptor.java
│ │ │ ├── UniversalPathAcceptor.java
│ │ │ ├── DependencyTreeTransform.java
│ │ │ ├── DependencyRelation.java
│ │ │ ├── DependencyPathAcceptor.java
│ │ │ ├── DependencyPathWeight.java
│ │ │ └── DependencyTreeNode.java
│ │ │ ├── text
│ │ │ ├── TemporalUsenetCorpusReader.java
│ │ │ ├── Stemmer.java
│ │ │ ├── Document.java
│ │ │ ├── LabeledParsedDocument.java
│ │ │ ├── TemporalDocument.java
│ │ │ ├── LabeledDocument.java
│ │ │ ├── TemporalBloglinesCorpusReader.java
│ │ │ ├── AnnotatedDocument.java
│ │ │ ├── GermanStemmer.java
│ │ │ ├── EnglishStemmer.java
│ │ │ ├── ItalianStemmer.java
│ │ │ ├── SnowballPorterStemmer.java
│ │ │ ├── LabeledStringDocument.java
│ │ │ └── StringDocument.java
│ │ │ ├── graph
│ │ │ ├── WeightedDirectedEdge.java
│ │ │ ├── WeightedTypedEdge.java
│ │ │ ├── WeightedEdge.java
│ │ │ ├── TemporalEdge.java
│ │ │ ├── DirectedEdge.java
│ │ │ ├── DirectedTypedEdge.java
│ │ │ ├── WeightedDirectedTypedEdge.java
│ │ │ ├── GraphConstructionException.java
│ │ │ └── TypedEdge.java
│ │ │ ├── clustering
│ │ │ ├── Assignment.java
│ │ │ ├── DataMatrixLinkClustering.java
│ │ │ ├── seeding
│ │ │ │ └── KMeansSeed.java
│ │ │ └── criterion
│ │ │ │ └── H2Function.java
│ │ │ ├── util
│ │ │ ├── ObjectEntry.java
│ │ │ ├── primitive
│ │ │ │ ├── IntIterator.java
│ │ │ │ └── IntPair.java
│ │ │ ├── IntegerEntry.java
│ │ │ ├── DoubleEntry.java
│ │ │ ├── BiMap.java
│ │ │ ├── Generator.java
│ │ │ ├── ReflectionUtil.java
│ │ │ ├── ResourceFinder.java
│ │ │ ├── Duple.java
│ │ │ ├── SynchronizedIterator.java
│ │ │ └── FileResourceFinder.java
│ │ │ ├── dv
│ │ │ └── DependencyPathBasisMapping.java
│ │ │ ├── matrix
│ │ │ ├── MatrixEntry.java
│ │ │ ├── MatrixIOException.java
│ │ │ └── SimpleEntry.java
│ │ │ ├── hal
│ │ │ ├── EvenWeighting.java
│ │ │ ├── LinearWeighting.java
│ │ │ ├── WeightingFunction.java
│ │ │ └── GeometricWeighting.java
│ │ │ ├── gws
│ │ │ ├── WordOrderBasisMapping.java
│ │ │ └── WordBasisMapping.java
│ │ │ ├── basis
│ │ │ └── StringBasisMapping.java
│ │ │ ├── index
│ │ │ ├── PermutationFunction.java
│ │ │ ├── DoubleVectorGenerator.java
│ │ │ └── IntegerVectorGenerator.java
│ │ │ ├── similarity
│ │ │ ├── AbstractSymmetricSimilarityFunction.java
│ │ │ ├── OneSimilarity.java
│ │ │ ├── KendallsTau.java
│ │ │ └── TanimotoCoefficient.java
│ │ │ ├── wordsi
│ │ │ ├── OccurrenceDependencyContextGenerator.java
│ │ │ ├── OrderingDependencyContextGenerator.java
│ │ │ ├── PartOfSpeechDependencyContextGenerator.java
│ │ │ └── ContextExtractor.java
│ │ │ └── mains
│ │ │ └── TopicWordsiMain.java
│ │ ├── org
│ │ └── tartarus
│ │ │ └── snowball
│ │ │ ├── SnowballStemmer.java
│ │ │ └── Among.java
│ │ └── jnt
│ │ └── FFT
│ │ ├── README
│ │ └── Test.java
└── test
│ └── java
│ └── edu
│ └── ucla
│ └── sspace
│ ├── vector
│ ├── SparseHashVectorTests.java
│ ├── VectorIOTest.java
│ ├── MaskedDoubleVectorViewTest.java
│ └── DenseVectorTests.java
│ ├── matrix
│ ├── factorization
│ │ ├── SingularValueDecompositionLibJTest.java
│ │ ├── SingularValueDecompositionLibCTest.java
│ │ ├── SingularValueDecompositionOctaveTest.java
│ │ └── SingularValueDecompositionMatlabTest.java
│ ├── MatlabSparseFileTransformerTest.java
│ ├── SvdlibcDenseTextFileTransformerTest.java
│ ├── SvdlibcSparseTextFileTransformerTest.java
│ ├── SvdlibcSparseBinaryFileTransformerTest.java
│ └── SvdlibcDenseBinaryFileTransformerTest.java
│ ├── ri
│ └── TestRandomIndexing.java
│ ├── common
│ └── DummySemanticSpace.java
│ ├── text
│ └── PorterStemmerTest.java
│ ├── dependency
│ ├── AbstractPathUtil.java
│ ├── AbstractPathTestBase.java
│ ├── FlatPathWeightTest.java
│ └── LengthPathWeightTest.java
│ └── util
│ └── ObjectCounterTest.java
├── opt
├── lib
│ └── jaws-bin-1.2.jar
└── add_non_maven_jars.sh
├── hadoop
└── pom.xml
└── README.md
/sspace/test:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/tools/SenseEvalCleaner.java:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/tools/SemEval2010Cleaner.java:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/opt/lib/jaws-bin-1.2.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fozziethebeat/S-Space/HEAD/opt/lib/jaws-bin-1.2.jar
--------------------------------------------------------------------------------
/src/main/java/org/tartarus/snowball/SnowballStemmer.java:
--------------------------------------------------------------------------------
1 |
2 | package org.tartarus.snowball;
3 | import java.lang.reflect.InvocationTargetException;
4 |
5 | public abstract class SnowballStemmer extends SnowballProgram {
6 | public abstract boolean stem();
7 | };
8 |
--------------------------------------------------------------------------------
/opt/add_non_maven_jars.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | mvn install:install-file -DgroupId=jama -DartifactId=jama \
4 | -Dversion=1.0 -Dpackaging=jar -Dfile=lib/jama.jar
5 |
6 | mvn install:install-file -DgroupId=jaws -DartifactId=jaws \
7 | -Dversion=1.2 -Dpackaging=jar -Dfile=lib/jaws-bin-1.2.jar
8 |
9 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/tools/MatrixTranspose.java:
--------------------------------------------------------------------------------
1 | package edu.ucla.sspace.tools;
2 |
3 | import edu.ucla.sspace.matrix.*;
4 |
5 | import java.io.*;
6 |
7 | /**
8 | * @author Keith Stevens
9 | */
10 | public class MatrixTranspose {
11 | public static void main(String[] args) throws Exception {
12 | Matrix m = MatrixIO.readMatrix(new File(args[0]), MatrixIO.Format.DENSE_TEXT);
13 | m = Matrices.transpose(m);
14 | File out = new File(args[1]);
15 | MatrixIO.writeMatrix(m, out, MatrixIO.Format.DENSE_TEXT);
16 | }
17 |
18 | }
19 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/tools/BasisPrinter.java:
--------------------------------------------------------------------------------
1 | package edu.ucla.sspace.tools;
2 |
3 | import edu.ucla.sspace.basis.BasisMapping;
4 | import edu.ucla.sspace.util.SerializableUtil;
5 |
6 | import java.io.File;
7 |
8 |
9 | /**
10 | * @author Keith Stevens
11 | */
12 | public class BasisPrinter {
13 | public static void main(String[] args) {
14 | BasisMapping basis =
15 | SerializableUtil.load(new File(args[0]));
16 | for (int i = 0; i < basis.numDimensions(); ++i)
17 | System.out.println(basis.getDimensionDescription(i));
18 | }
19 | }
20 |
21 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/DeeseEvaluator.java:
--------------------------------------------------------------------------------
1 | package edu.ucla.sspace.evaluation;
2 |
3 | import edu.ucla.sspace.common.SemanticSpace;
4 | import edu.ucla.sspace.common.SemanticSpaceIO;
5 |
6 |
7 | public class DeeseEvaluator {
8 | public static void main(String[] args) throws Exception {
9 | DeeseAntonymEvaluation evaluator = new DeeseAntonymEvaluation();
10 | for (String file : args) {
11 | SemanticSpace sspace = SemanticSpaceIO.load(file);
12 | WordAssociationReport report = evaluator.evaluate(sspace);
13 | System.out.printf("%s: %.3f\n", file, report.correlation());
14 | }
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/svs/RelationTuple.java:
--------------------------------------------------------------------------------
1 | package edu.ucla.sspace.svs;
2 |
3 |
4 | /**
5 | * @author Keith Stevens
6 | */
7 | public class RelationTuple {
8 | public int head;
9 | public String relation;
10 |
11 | public RelationTuple(int head, String relation) {
12 | this.head = head;
13 | this.relation = relation;
14 | }
15 |
16 | public boolean equals(Object o) {
17 | if (o == null || !(o instanceof RelationTuple))
18 | return false;
19 | RelationTuple r = (RelationTuple) o;
20 | return this.head == r.head && this.relation == r.relation;
21 | }
22 |
23 | public int hashCode() {
24 | return head ^ relation.hashCode();
25 | }
26 | }
27 |
--------------------------------------------------------------------------------
/src/main/java/jnt/FFT/README:
--------------------------------------------------------------------------------
1 | README for jnt.FFT
2 | Java Numerical Toolkit subpackage for Fast Fourier Transforms.
3 | Bruce R. Miller
4 |
5 | ***NOTE***
6 | Several routines in this subpackage were derived from
7 | Brian Gough's FFT routines in the Gnu Scientific Library (GSL).
8 | GSL is released under the Gnu General Public License
9 | (see http://www.gnu.org/copyleft/gpl.html)
10 | As such, this package must also be released under GPL.
11 |
12 | The modifications I have made to port the routines from
13 | C to Java, and the additional classes developed were
14 | developed as part of my official duties as a U.S.
15 | government employee, and are therefore not subject
16 | to copyright.
17 |
18 | Furthermore, this software is under development, and is
19 | in no way certified or guaranteed.
--------------------------------------------------------------------------------
/src/main/java/jnt/FFT/Test.java:
--------------------------------------------------------------------------------
1 | package jnt.FFT;
2 |
3 | class Test {
4 | public static void main(String[] args) {
5 | RealDoubleFFT_Radix2 ffter = new RealDoubleFFT_Radix2(32);
6 | double[] cat = { 0.1151, -0.1175, -0.0573, -0.0733, -0.0406, -0.0332, -0.3583, 0.0166, -0.1998, -0.1076, -0.0756, -0.2580, 0.0614, -0.2200, -0.0827, 0.0026, 0.0850, -0.4051, -0.0536, 0.0355, -0.0947, -0.0242, 0.0421, 0.1048, -0.1097, -0.0729, 0.0020, -0.0699, -0.1137, 0.0702, 0.1843, -0.1336};
7 | double[] are = { 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000};
8 | ffter.transform(cat, 0, 1);
9 | for (int i = 0; i < 32; i++) {
10 | System.out.println(cat[i]);
11 | }
12 | }
13 | }
14 |
15 |
16 |
--------------------------------------------------------------------------------
/hadoop/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 |
6 | sspace
7 | edu.ucla.sspace
8 | 2.0
9 | ../pom.xml
10 |
11 |
12 | sspace-hadoop
13 | Hadoop S-Space
14 | Hadoop based Semantic Space Implementations
15 |
16 |
17 |
18 |
19 | ${project.groupId}
20 | sspace
21 | 2.0
22 |
23 |
24 | org.apache.hadoop
25 | hadoop-core
26 | 1.0.1
27 |
28 |
29 |
30 |
--------------------------------------------------------------------------------
/src/main/java/org/tartarus/snowball/Among.java:
--------------------------------------------------------------------------------
1 | package org.tartarus.snowball;
2 |
3 | import java.lang.reflect.Method;
4 |
5 | public class Among {
6 | public Among (String s, int substring_i, int result,
7 | String methodname, SnowballProgram methodobject) {
8 | this.s_size = s.length();
9 | this.s = s.toCharArray();
10 | this.substring_i = substring_i;
11 | this.result = result;
12 | this.methodobject = methodobject;
13 | if (methodname.length() == 0) {
14 | this.method = null;
15 | } else {
16 | try {
17 | this.method = methodobject.getClass().
18 | getDeclaredMethod(methodname, new Class[0]);
19 | } catch (NoSuchMethodException e) {
20 | throw new RuntimeException(e);
21 | }
22 | }
23 | }
24 |
25 | public final int s_size; /* search string */
26 | public final char[] s; /* search string */
27 | public final int substring_i; /* index to longest matching substring */
28 | public final int result; /* result of the lookup */
29 | public final Method method; /* method to use if substring matches */
30 | public final SnowballProgram methodobject; /* object to invoke method on */
31 | };
32 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/common/statistics/SignificanceTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.common.statistics;
23 |
24 | public interface SignificanceTest {
25 |
26 | double score (int both, int justA, int justB, int neither);
27 |
28 | }
29 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/vector/SparseIntegerVector.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.vector;
23 |
24 |
25 | /**
26 | * An interface for sparse {@link IntegerVector} instances.
27 | */
28 | public interface SparseIntegerVector
29 | extends SparseVector, IntegerVector { }
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | The S-Space Package is a collection of algorithms for building Semantic Spaces
2 | as well as a highly-scalable library for designing new distributional semantics
3 | algorithms. Distributional algorithms process text corpora and represent the
4 | semantic for words as high dimensional feature vectors. These approaches are
5 | known by many names, such as word spaces, semantic spaces, or distributed
6 | semantics and rest upon the Distributional Hypothesis: words that appear in
7 | similar contexts have similar meanings.
8 |
9 | The research and development is being done by the Natural Language Processing
10 | group at UCLA led by David Jurgens and Keith Stevens, under the advisory of Dr.
11 | Michael Dyer.
12 |
13 | See the [Getting Started](../../wiki/GettingStarted) page for
14 | a quick introduction on how to use the S-Space package, see the [Package
15 | Overview](../../wiki/PackageLayout) for information on the
16 | code and available features, or dive right into the
17 | [Javadoc](http://fozziethebeat.github.com/S-Space/apidocs/) to see what's
18 | available now. For any questions, please contact us via our mailing lists:
19 | [S-Space-Users][1] and [S-Space-Research-Dev][2].
20 |
21 | [1]:mailto:s-space-users@googlegroups.com
22 | [2]:mailto:s-space-research-dev@googlegroups.com
23 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/dependency/DependencyPermutationFunction.java:
--------------------------------------------------------------------------------
1 | package edu.ucla.sspace.dependency;
2 |
3 | import edu.ucla.sspace.vector.Vector;
4 |
5 | import java.util.LinkedList;
6 |
7 |
8 | /**
9 | * An interface for permuting a {@link Vector} based on a dependecny path,
10 | * represented as a list of {@link DependencyRelation}s. Implemenations are
11 | * recomended to extend existing {@link
12 | * edu.ucla.sspace.index.PermutationFunction PermutationFunction}s but simply
13 | * using an existing {@link edu.ucla.sspace.index.PermutationFunction
14 | * PermutationFunction}. Implementations are also suggested to be thread-safe.
15 | *
16 | * @see edu.ucla.sspace.index.PermutationFunction
17 | *
18 | * @author Keith Stevens
19 | */
20 | public interface DependencyPermutationFunction {
21 |
22 | /**
23 | * Returns a permuted form of {code vector} based on the dependency path
24 | * provided.
25 | *
26 | * @param path A linked list of word,relation pairs that compose a
27 | * dependency path
28 | * @param vector The {@link Vector} to permute
29 | *
30 | * @return A new permuted {@link Vector} of the same type as {@code vector}
31 | * that is
32 | */
33 | T permute(T vector, DependencyPath path);
34 | }
35 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/NormedWordPrimingTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | import edu.ucla.sspace.common.SemanticSpace;
25 |
26 |
27 | /**
28 | * @author Keith Stevens
29 | */
30 | public interface NormedWordPrimingTest {
31 |
32 | public NormedWordPrimingReport evaluate(SemanticSpace sspace);
33 | }
34 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/WordAssociationTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | import edu.ucla.sspace.common.SemanticSpace;
25 |
26 |
27 | /**
28 | *
29 | *
30 | * @author David Jurgens
31 | */
32 | public interface WordAssociationTest {
33 |
34 | public WordAssociationReport evaluate(SemanticSpace sspace);
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/TemporalUsenetCorpusReader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 |
25 | /**
26 | * A subclass of {@code UsenetCorpusReader} that always includes timestamps.
27 | *
28 | * @author Keith Stevens
29 | */
30 | public class TemporalUsenetCorpusReader extends UsenetCorpusReader {
31 |
32 | public TemporalUsenetCorpusReader() {
33 | super(true);
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/Stemmer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 |
25 | /**
26 | * An interface for classes that stem tokens.
28 | */
29 | public interface Stemmer {
30 |
31 | /**
32 | * Converts the token to its root form, or if it is already in root form,
33 | * returns the token.
34 | */
35 | public String stem(String token);
36 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/Document.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 | import java.io.BufferedReader;
25 |
26 | /**
27 | * An abstraction for a document that allows document processors to access text
28 | * in a uniform manner.
29 | */
30 | public interface Document {
31 |
32 | /**
33 | * Returns the {@code BufferedReader} for this document's text
34 | */
35 | BufferedReader reader();
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/LabeledParsedDocument.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 |
25 | /**
26 | * A union interface for a document that has been (or will be) dependency parsed
27 | * to generate an accompanying parse tree of its contents and that has an
28 | * accompanying label about its source or contents.
29 | */
30 | public interface LabeledParsedDocument extends LabeledDocument, ParsedDocument {
31 |
32 | }
33 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/dependency/FlatPathWeight.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 |
25 | /**
26 | * A {@link DependencyPathWeight} that returns {@code 1} for every path.
27 | *
28 | * @author Keith Stevens
29 | */
30 | public class FlatPathWeight implements DependencyPathWeight {
31 |
32 | /**
33 | * {@inheritDoc}
34 | */
35 | public double scorePath(DependencyPath path) {
36 | return 1;
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/dependency/UniversalRelationAcceptor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 |
25 | /**
26 | * A {@link DependencyPathAcceptor} that accepts all links.
27 | *
28 | * @author Keith Stevens
29 | */
30 | public class UniversalRelationAcceptor implements DependencyRelationAcceptor {
31 |
32 | /**
33 | * {@inheritDoc}
34 | */
35 | public boolean accept(DependencyRelation relation) {
36 | return true;
37 | }
38 | }
39 |
40 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/graph/WeightedDirectedEdge.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.graph;
23 |
24 |
25 | /**
26 | * An interface for representing a weigthed edge between two vertices.
27 | */
28 | public interface WeightedDirectedEdge extends WeightedEdge, DirectedEdge {
29 |
30 | /**
31 | * Returns {@code true} if {@code o} connects the same two vertices with the
32 | * same the edge orientation regardless of edge weight.
33 | */
34 | boolean equals(Object o);
35 |
36 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/clustering/Assignment.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.clustering;
23 |
24 |
25 | /**
26 | * A cluster assignment for a data point.
27 | *
28 | * @see Clustering
29 | */
30 | public interface Assignment {
31 |
32 | /**
33 | * Returns the cluster id's that a specific data point was assigned to.
34 | */
35 | int[] assignments();
36 |
37 | /**
38 | * Returns the number of assignments given for this data point.
39 | */
40 | int length();
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/vector/SparseVector.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.vector;
23 |
24 |
25 | /**
26 | * An interface for {@code Vector} implementations whose values are sparse and
27 | * that support access to only those indices with non-zero values.
28 | *
29 | * @author Keith Stevens
30 | */
31 | public interface SparseVector extends Vector {
32 |
33 | /**
34 | * Returns all the indices whose values are non-zero
35 | */
36 | int[] getNonZeroIndices();
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/vector/SparseDoubleVector.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.vector;
23 |
24 |
25 | /**
26 | * An interface for sparse {@link DoubleVector} instances.
27 | */
28 | public interface SparseDoubleVector
29 | extends SparseVector, DoubleVector {
30 |
31 | /**
32 | * Returns a new instance of a vector with the same type. If the vector is
33 | * bounded by size, the returned instance will have the same bound.
34 | */
35 | SparseDoubleVector instanceCopy();
36 | }
37 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/TemporalDocument.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 | import edu.ucla.sspace.text.Document;
25 |
26 | import java.io.BufferedReader;
27 |
28 | /**
29 | * An abstraction for a document that allows document processors to access
30 | * time-annotated text in a uniform manner.
31 | */
32 | public interface TemporalDocument extends Document {
33 |
34 | /**
35 | * Returns the time at which this document was created.
36 | */
37 | long timeStamp();
38 |
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/graph/WeightedTypedEdge.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.graph;
23 |
24 |
25 | /**
26 | * An interface for representing a weigthed edge between two vertices in a
27 | * multigraph.
28 | *
29 | * @see Multigraph
30 | */
31 | public interface WeightedTypedEdge extends TypedEdge, WeightedEdge {
32 |
33 | /**
34 | * Returns {@code true} if {@code o} connects the same two vertices
35 | * regardless of the edge orientation, type, and weight.
36 | */
37 | boolean equals(Object o);
38 |
39 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/graph/WeightedEdge.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.graph;
23 |
24 |
25 | /**
26 | * An interface for representing a weigthed edge between two vertices.
27 | */
28 | public interface WeightedEdge extends Edge {
29 |
30 | /**
31 | * Returns {@code true} if {@code o} connects the same two vertices
32 | * regardless of the edge orientation and weight.
33 | */
34 | boolean equals(Object o);
35 |
36 | /**
37 | * Returns the weight for this edge.
38 | */
39 | double weight();
40 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/util/ObjectEntry.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util;
23 |
24 |
25 | /**
26 | * An object that represents an index that has an associated typed {@code
27 | * Object} value.
28 | *
29 | * @param the type of the object that this entry maps to
30 | */
31 | public interface ObjectEntry {
32 |
33 | /**
34 | * Returns the index position of this entry.
35 | */
36 | int index();
37 |
38 | /**
39 | * Returns the object at this entry's index.
40 | */
41 | T value();
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/LabeledDocument.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 | import java.io.BufferedReader;
25 |
26 | /**
27 | * An abstraction for a document that has an accompanying label or name.
28 | */
29 | public interface LabeledDocument extends Document {
30 |
31 | /**
32 | * Returns a label associated with this particular document. The label is
33 | * intended to provide information on the source of the document or the
34 | * contents therein.
35 | */
36 | String label();
37 |
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/TemporalBloglinesCorpusReader.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 | /**
25 | * A subclass of {@code BloglinesCorpusReader} that always includes timestamps.
26 | *
27 | * @author Keith Stevens
28 | */
29 | public class TemporalBloglinesCorpusReader extends BloglinesCorpusReader {
30 |
31 | /**
32 | * Creates a {@code BloglinesCorpusReader} that will always include
33 | * timestamps.
34 | */
35 | public TemporalBloglinesCorpusReader() {
36 | super(true);
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/util/primitive/IntIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util.primitive;
23 |
24 | import java.util.Iterator;
25 |
26 |
27 | /**
28 | * A refinement of the {@link Iterator} interface for iterating over primitive
29 | * {@code int} values.
30 | */
31 | public interface IntIterator extends Iterator {
32 |
33 | /**
34 | * Returns the next {@code int} in the sequence.
35 | *
36 | * @throws NoSuchElementException if no further {@code int} values remain
37 | */
38 | int nextInt();
39 |
40 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/dependency/LengthPathWeight.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 |
25 | /**
26 | * A {@link DependencyPathWeight} that scores paths inversely proportional to
27 | * their length. The scoring function is 1 / number of relations.
28 | *
29 | * @author Keith Stevens
30 | */
31 | public class LengthPathWeight implements DependencyPathWeight {
32 |
33 | /**
34 | * {@inheritDoc}
35 | */
36 | public double scorePath(DependencyPath path) {
37 | return 1d / (path.length());
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/graph/TemporalEdge.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.graph;
23 |
24 |
25 | /**
26 | * An interface for representing a edge in a {@link DynamicGraph}, where each
27 | * edge is associated with a specific time.
28 | */
29 | public interface TemporalEdge extends Edge {
30 |
31 | /**
32 | * Returns {@code true} if {@code o} connects the same two vertices
33 | * at the same time.
34 | */
35 | boolean equals(Object o);
36 |
37 | /**
38 | * Returns milliseconds since the epoch when this edge occurrend.
39 | */
40 | long time();
41 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/dependency/SubjObjRelationAcceptor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 |
25 | /**
26 | * A {@link DependencyPathAcceptor} that accepts links with the {@code subj} or
27 | * {@code obj} relation.
28 | *
29 | * @author Keith Stevens
30 | */
31 | public class SubjObjRelationAcceptor implements DependencyRelationAcceptor {
32 |
33 | /**
34 | * {@inheritDoc}
35 | */
36 | public boolean accept(DependencyRelation relation) {
37 | return relation.relation().equals("SBJ")
38 | || relation.relation().equals("OBJ");
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/dependency/UniversalPathAcceptor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 |
25 | /**
26 | * A {@link DependencyPathAcceptor} that accepts all links.
27 | *
28 | * @author Keith Stevens
29 | */
30 | public class UniversalPathAcceptor implements DependencyPathAcceptor {
31 |
32 | /**
33 | * {@inheritDoc}
34 | */
35 | public boolean accepts(DependencyPath relation) {
36 | return true;
37 | }
38 |
39 | /**
40 | * {@inheritDoc}
41 | */
42 | public int maxPathLength() {
43 | return Integer.MAX_VALUE;
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/WordChoiceEvaluation.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | import java.util.Collection;
25 |
26 | import edu.ucla.sspace.common.SemanticSpace;
27 |
28 | /**
29 | * A evaluation metric that uses a selection of a word from multiple choices.
30 | *
31 | * @author David Jurgens
32 | */
33 | public interface WordChoiceEvaluation {
34 |
35 | /**
36 | * Returns a collection of multiple choice questions that can be used to
37 | * evaluate a {@link SemanticSpace}.
38 | */
39 | Collection getQuestions();
40 |
41 | }
42 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/util/IntegerEntry.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util;
23 |
24 |
25 | /**
26 | * An object that represents an index that has an associated {@code int}
27 | * value. This class is intended to support other classes that provide iterator
28 | * access over their indexable values without needing to incur auto-boxing
29 | * overhead.
30 | */
31 | public interface IntegerEntry {
32 |
33 | /**
34 | * Returns the index position of this entry.
35 | */
36 | int index();
37 |
38 | /**
39 | * Returns the value at this entry's index.
40 | */
41 | int value();
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/dv/DependencyPathBasisMapping.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dv;
23 |
24 | import edu.ucla.sspace.basis.BasisMapping;
25 |
26 | import edu.ucla.sspace.dependency.DependencyPath;
27 |
28 |
29 | /**
30 | * An interface for specifying how the occurrence of a word in a specific
31 | * syntactic relationship is quantified as a dimension in the vector basis. For
32 | * example, each word may correspond to a unique dimension regardless of how it
33 | * is grammatically related.
34 | */
35 | public interface DependencyPathBasisMapping
36 | extends BasisMapping {
37 | }
38 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/util/DoubleEntry.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util;
23 |
24 |
25 | /**
26 | * An object that represents an index that has an associated {@code double}
27 | * value. This class is intended to support other classes that provide iterator
28 | * access over their indexable values without needing to incur auto-boxing
29 | * overhead.
30 | */
31 | public interface DoubleEntry {
32 |
33 | /**
34 | * Returns the index position of this entry.
35 | */
36 | int index();
37 |
38 | /**
39 | * Returns the value at this entry's index.
40 | */
41 | double value();
42 | }
43 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/matrix/MatrixEntry.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.matrix;
23 |
24 |
25 | /**
26 | * An abstraction of the entries in a matrix.
27 | *
28 | * @see MatrixIO#getMatrixFileIterator(File,MatrixIO.Format)
29 | */
30 | public interface MatrixEntry {
31 |
32 | /**
33 | * Returns the column index of this entry in the matrix
34 | */
35 | int column();
36 |
37 | /**
38 | * Returns the row index of this entry in the matrix
39 | */
40 | int row();
41 |
42 | /**
43 | * Returns the value of the matrix at this row and column
44 | */
45 | double value();
46 |
47 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/common/statistics/GTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.common.statistics;
23 |
24 | public class GTest implements SignificanceTest {
25 |
26 | /**
27 | * Returns the G-test statistic
28 | */
29 | public double score (int both, int justA, int justB, int neither) {
30 |
31 | int all = both + justA + justB + neither;
32 | double probA = (both + justA) / (double)all;
33 | double probB = (both + justB) / (double)all;
34 |
35 | double expectedBoth = (probA * probB) * all;
36 |
37 | return 2 * (both * Math.log(both / expectedBoth));
38 | }
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/graph/DirectedEdge.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.graph;
23 |
24 |
25 | /**
26 | * An interface for representing a directed edge between
28 | * two vertices. This interface can be seen as a refinement of the {@link
29 | * #equal(Object) equals} method that takes into account the orientation of the
30 | * edge.
31 | */
32 | public interface DirectedEdge extends Edge {
33 |
34 | /**
35 | * Returns {@code true} if {@code o} connects the same two vertices and have
36 | * the same edge orientation.
37 | */
38 | boolean equals(Object o);
39 |
40 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/dependency/DependencyTreeTransform.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 |
25 | /**
26 | * An interface for transforming a dependency tree represented by a series of
27 | * {@link DependencyTreeNode}s.
28 | *
29 | * @author Keith Stevens
30 | */
31 | public interface DependencyTreeTransform {
32 |
33 | /**
34 | * Transforms the {@link DependencyRelation} links within a series of {@link
35 | * DependencyTreeNode}s. Relations may be added or removed, and entire
36 | * nodes may even be removed as long as the tree remaains connected.
37 | */
38 | DependencyTreeNode[] transform(DependencyTreeNode[] tree);
39 | }
40 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/NormedWordPrimingReport.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | /**
25 | * @author Keith Stevens
26 | */
27 | public interface NormedWordPrimingReport {
28 |
29 | /**
30 | * Returns the total number of cues being reported.
31 | */
32 | int numberOfCues();
33 |
34 | /**
35 | * Returns the number of cues that could not be answered at all.
36 | */
37 | int numberOfUnanswerableCues();
38 |
39 | /**
40 | * Returns the average correlation of the normed cue to target strength and
41 | * the semantic similarity results generated by a sspace.
42 | */
43 | double averageCorrelation();
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/AnnotatedDocument.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 | import java.util.Iterator;
25 |
26 | /**
27 | * An abstraction for a document that allows document processors to access text
28 | * in a uniform manner.
29 | */
30 | public interface AnnotatedDocument extends Document {
31 |
32 | /**
33 | * Returns the timestamp when this document was created
34 | */
35 | long creationDate();
36 |
37 | /**
38 | * Returns a label associated with this particular document. The label is
39 | * intended to provide information on the source of the document or the
40 | * contents therein.
41 | */
42 | String label();
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/hal/EvenWeighting.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.hal;
23 |
24 | /**
25 | * An weighting scheme where all words have the same weight when specifying how
26 | * a {@link HyperspaceAnalogueToLanguage} instance should weigh co-occurrences
27 | * based on the word distance.
28 | */
29 | public class EvenWeighting implements WeightingFunction {
30 |
31 | /**
32 | * Returns a constant value for the weight regardless of distance
33 | *
34 | * @param positionOffset {@inheritDoc}
35 | * @param windowSize {@inheritDoc}
36 | *
37 | * @return {@inheritDoc}
38 | */
39 | public double weight(int positionOffset, int windowSize) {
40 | return 1;
41 | }
42 |
43 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/tools/SelectTopKWords.java:
--------------------------------------------------------------------------------
1 | package edu.ucla.sspace.tools;
2 |
3 | import edu.ucla.sspace.basis.BasisMapping;
4 |
5 | import edu.ucla.sspace.matrix.Matrix;
6 | import edu.ucla.sspace.matrix.MatrixIO;
7 | import edu.ucla.sspace.matrix.MatrixIO.Format;
8 |
9 | import edu.ucla.sspace.util.BoundedSortedMultiMap;
10 | import edu.ucla.sspace.util.MultiMap;
11 | import edu.ucla.sspace.util.SerializableUtil;
12 |
13 | import java.io.File;
14 |
15 | import java.util.ArrayList;
16 | import java.util.List;
17 |
18 |
19 | /**
20 | * @author Keith Stevens
21 | */
22 | public class SelectTopKWords {
23 | public static void main(String[] args) throws Exception {
24 | // Load the basis mapping.
25 | BasisMapping basis =
26 | SerializableUtil.load(new File(args[0]));
27 |
28 | // Create the top 10 lists for each topic in the word space.
29 | List> topTerms = new ArrayList>();
30 | Matrix m = MatrixIO.readMatrix(new File(args[1]), Format.DENSE_TEXT);
31 | for (int c = 0; c < m.columns(); ++c)
32 | topTerms.add(new BoundedSortedMultiMap(10));
33 |
34 | for (int r = 0; r < m.rows(); ++r) {
35 | String term = basis.getDimensionDescription(r);
36 | for (int c = 0; c < m.columns(); ++c)
37 | topTerms.get(c).put(m.get(r, c), term);
38 | }
39 |
40 | for (MultiMap topicTerms : topTerms) {
41 | for (String term : topicTerms.values())
42 | System.out.printf("%s ", term);
43 | System.out.println();
44 | }
45 | }
46 | }
47 |
48 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/common/statistics/PointwiseMutualInformationTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.common.statistics;
23 |
24 | import edu.ucla.sspace.common.Statistics;
25 |
26 | public class PointwiseMutualInformationTest implements SignificanceTest {
27 |
28 | /**
29 | * Returns the PMI score of the both A and B.
30 | */
31 | public double score (int both, int justA, int justB, int neither) {
32 | int all = both + justA + justB + neither;
33 | double probA = (both + justA) / (double)all;
34 | double probB = (both + justB) / (double)all;
35 | double probAandB = both / (double)all;
36 | return Statistics.log2(probAandB / (probA * probB));
37 | }
38 | }
39 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/graph/DirectedTypedEdge.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.graph;
23 |
24 |
25 | /**
26 | * An interface for directed edges in multigraphs. This interface refines the
27 | * {@link #equals(Object) equals} method of {@link TypedEdge} to include edge
28 | * orientation. Two {@code DirectedTypedEdge} must share the same edge
29 | * orientation to be equivalent.
30 | *
31 | * @see Multigraph
32 | */
33 | public interface DirectedTypedEdge extends DirectedEdge, TypedEdge {
34 |
35 | /**
36 | * Returns {@code true} if {@code o} connects the same vertices, has the
37 | * same edge orientation, and has edge type information that is equivalent.
38 | */
39 | boolean equals(Object o);
40 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/NormedPrimingQuestion.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | import java.util.List;
25 |
26 |
27 | /**
28 | * @author Keith Stevens
29 | */
30 | public interface NormedPrimingQuestion {
31 |
32 | /**
33 | * Returns the priming cue.
34 | */
35 | String getCue();
36 |
37 | /**
38 | * Returns the number of targets associated with the cue.
39 | */
40 | int numberOfTargets();
41 |
42 | /**
43 | * Returns the {@code i}th target assicated with the cue.
44 | */
45 | String getTarget(int i);
46 |
47 | /**
48 | * Returns the {@code i}th target strength assicated with the cue.
49 | */
50 | double getStrength(int i);
51 | }
52 |
53 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/gws/WordOrderBasisMapping.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.gws;
23 |
24 | import edu.ucla.sspace.basis.AbstractBasisMapping;
25 |
26 | import edu.ucla.sspace.util.Duple;
27 |
28 |
29 | /**
30 | * A {@link BasisMapping} implementation where each word and position
31 | * corresponds to a unique dimension.
32 | *
33 | * @author David Jurgens
34 | */
35 | public class WordOrderBasisMapping
36 | extends AbstractBasisMapping, String> {
37 |
38 | private static final long serialVersionUID = 1L;
39 |
40 | /**
41 | * {@inheritDoc}
42 | */
43 | public int getDimension(Duple key) {
44 | return getDimensionInternal(key.toString());
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/gws/WordBasisMapping.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.gws;
23 |
24 | import edu.ucla.sspace.basis.AbstractBasisMapping;
25 |
26 | import edu.ucla.sspace.util.Duple;
27 |
28 |
29 | /**
30 | * A {@link BasisMapping} implementation where each word corresponds to a unique
31 | * dimension regardless of its word position.
32 | *
33 | * @author David Jurgens
34 | */
35 | public class WordBasisMapping
36 | extends AbstractBasisMapping, String> {
37 |
38 | private static final long serialVersionUID = 1L;
39 |
40 | /**
41 | * {@inheritDoc}
42 | */
43 | public int getDimension(Duple key) {
44 | return getDimensionInternal(key.x);
45 | }
46 | }
47 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/vector/SparseHashVectorTests.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.vector;
23 |
24 | import org.junit.Ignore;
25 | import org.junit.Test;
26 |
27 | import static org.junit.Assert.*;
28 |
29 | import java.util.HashMap;
30 | import java.util.Map;
31 |
32 |
33 | /**
34 | * Tests for the {@link SparseHashVector} class.
35 | */
36 | public class SparseHashVectorTests {
37 |
38 | @Test public void testMagnitude() {
39 | SparseHashVector v = new SparseHashVector(100);
40 | assertEquals(0, v.magnitude(), .0001);
41 |
42 | v.set(1, 1);
43 | assertEquals(1, v.magnitude(), .0001);
44 |
45 | v.set(1, 3);
46 | v.set(2, 4);
47 | assertEquals(5, v.magnitude(), .0001);
48 | }
49 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/util/BiMap.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util;
23 |
24 | import java.util.Map;
25 |
26 |
27 | /**
28 | * This interface allows for a bi-directional mapping, where keys can map to
29 | * values and values can map to keys. This is expected to be used with
30 | * one-to-oen mappings.
31 | *
32 | * @author Keith Stevens
33 | */
34 | public interface BiMap extends Map {
35 |
36 | /**
37 | * Returns a reversed form of this {@link BiMap}, where values in this
38 | * {@link BiMap} will map to keys in this {@link BiMap}. Calling {@code
39 | * inverse} on the returned {@link BiMap} should return a pointer to the
40 | * original {@link BiMap}.
41 | */
42 | BiMap inverse();
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/util/Generator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util;
23 |
24 |
25 | /**
26 | * An interface for classes which will maintain and generate new instances of
27 | * classes that require several parameters that will be used consistently
28 | * several times. Implementations of this interface are also used to be used
29 | * in conjunction with a {@link GeneratorMap}, which will create new instances
30 | * for keys not currently in the map by using an instance of a {@link
31 | * Generator}.
32 | *
33 | * @see GeneratorMap
34 | *
35 | * @author Keith Stevens
36 | */
37 | public interface Generator {
38 |
39 | /**
40 | * Creates a new instance of type {@code T}.
41 | */
42 | public T generate();
43 | }
44 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/MultipleChoiceQuestion.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | import java.util.List;
25 |
26 | /**
27 | * A question that provides a prompt and one or more options to choose from as
28 | * the answer.
29 | *
30 | * @see WordChoiceEvaluation
31 | */
32 | public interface MultipleChoiceQuestion {
33 |
34 | /**
35 | * Returns the question prompt. This may be a short as a single word.
36 | */
37 | String getPrompt();
38 |
39 | /**
40 | * Returns a list of options to the prompt question.
41 | */
42 | List getOptions();
43 |
44 | /**
45 | * Returns the index of the correct answer in the list of options.
46 | */
47 | int getCorrectAnswer();
48 |
49 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/dependency/DependencyRelation.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 |
25 | /**
26 | * An interface for representing a dependency relationship between a head node
27 | * and its dependent relation.
28 | */
29 | public interface DependencyRelation {
30 |
31 | /**
32 | * Returns the dependent node that is related to the head node.
33 | */
34 | DependencyTreeNode dependentNode();
35 |
36 | /**
37 | * Returns the head node on which the second node has dependent relation.
38 | */
39 | DependencyTreeNode headNode();
40 |
41 | /**
42 | * Returns the relation the the current has with the next token in a {@link
43 | * DependencyPath}.
44 | */
45 | String relation();
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/GermanStemmer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 | import org.tartarus.snowball.ext.germanStemmer;
25 |
26 | /**
27 | * A wrapper for the german Snowball
28 | * Stemmer. Details for this specific stemmer can be found at here.
30 | *
31 | * @author Keith Stevens.
32 | */
33 | public class GermanStemmer implements Stemmer{
34 |
35 | /**
36 | * {@inheritDoc}
37 | */
38 | public String stem(String token) {
39 | germanStemmer stemmer = new germanStemmer();
40 | stemmer.setCurrent(token);
41 | stemmer.stem();
42 | return stemmer.getCurrent();
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/EnglishStemmer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 | import org.tartarus.snowball.ext.englishStemmer;
25 |
26 | /**
27 | * A wrapper for the english Snowball
28 | * Stemmer. Details for this specific stemmer can be found at here.
30 | *
31 | * @author Keith Stevens.
32 | */
33 | public class EnglishStemmer implements Stemmer{
34 |
35 | /**
36 | * {@inheritDoc}
37 | */
38 | public String stem(String token) {
39 | englishStemmer stemmer = new englishStemmer();
40 | stemmer.setCurrent(token);
41 | stemmer.stem();
42 | return stemmer.getCurrent();
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/ItalianStemmer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 | import org.tartarus.snowball.ext.italianStemmer;
25 |
26 | /**
27 | * A wrapper for the italian Snowball
28 | * Stemmer. Details for this specific stemmer can be found at here.
30 | *
31 | * @author Keith Stevens.
32 | */
33 | public class ItalianStemmer implements Stemmer{
34 |
35 | /**
36 | * {@inheritDoc}
37 | */
38 | public String stem(String token) {
39 | italianStemmer stemmer = new italianStemmer();
40 | stemmer.setCurrent(token);
41 | stemmer.stem();
42 | return stemmer.getCurrent();
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/dependency/DependencyPathAcceptor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 |
25 | /**
26 | * An interface for deciding whether a provided path is valid for further
27 | * processing.
28 | */
29 | public interface DependencyPathAcceptor {
30 |
31 | /**
32 | * Returns {@code true} if the path is valid according to this acceptor's
33 | * standards
34 | *
35 | * @param path a dependency path
36 | *
37 | * @return {@code true} if the path is valid
38 | */
39 | boolean accepts(DependencyPath path);
40 |
41 | /**
42 | * Returns the maximum path length allowed by this acceptor.
43 | *
44 | * @return the length above which no path will be accepted
45 | */
46 | int maxPathLength();
47 |
48 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/WordSimilarity.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | /**
25 | * A human-based similarity judgement for two words. Note that the ordering
26 | * from the orginal question is preserved, which allows users to consider
27 | * possible asymmetric evalutions based on word ordering.
28 | *
29 | * @author David Jurgens
30 | */
31 | public interface WordSimilarity {
32 |
33 | /**
34 | * Returns the first word in the pair.
35 | */
36 | String getFirstWord();
37 |
38 | /**
39 | * Returns the second word in the pair.
40 | */
41 | String getSecondWord();
42 |
43 | /**
44 | * Returns the human similarity judgement for the two words.
45 | */
46 | double getSimilarity();
47 |
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/graph/WeightedDirectedTypedEdge.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.graph;
23 |
24 |
25 | /**
26 | * An interface for weigthed, directed edges in multigraphs. This interface
27 | * does not refine the {@link #equals(Object) equals} method of {@link
28 | * DirectedTypedEdge}, i.e. two {@code WeightedDirectedTypedEdge} are equivalent
29 | * independent of their edge weights.
30 | *
31 | * @see Multigraph
32 | */
33 | public interface WeightedDirectedTypedEdge
34 | extends DirectedTypedEdge, WeightedDirectedEdge {
35 |
36 | /**
37 | * Returns {@code true} if {@code o} connects the same vertices, has the
38 | * same edge orientation, and has edge type information that is equivalent.
39 | */
40 | boolean equals(Object o);
41 |
42 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/SnowballPorterStemmer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 | import org.tartarus.snowball.ext.porterStemmer;
25 |
26 | /**
27 | * A wrapper for the porter Snowball
28 | * Stemmer. Details for this specific stemmer can be found at here.
30 | *
31 | * @author Keith Stevens.
32 | */
33 | public class SnowballPorterStemmer implements Stemmer{
34 |
35 | /**
36 | * {@inheritDoc}
37 | */
38 | public String stem(String token) {
39 | porterStemmer stemmer = new porterStemmer();
40 | stemmer.setCurrent(token);
41 | stemmer.stem();
42 | return stemmer.getCurrent();
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/matrix/factorization/SingularValueDecompositionLibJTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at
3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens,
4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
5 | *
6 | * This file is part of the S-Space package and is covered under the terms and
7 | * conditions therein.
8 | *
9 | * The S-Space package is free software: you can redistribute it and/or modify
10 | * it under the terms of the GNU General Public License version 2 as published
11 | * by the Free Software Foundation and distributed hereunder to you.
12 | *
13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
18 | * RIGHTS.
19 | *
20 | * You should have received a copy of the GNU General Public License
21 | * along with this program. If not, see .
22 | */
23 |
24 | package edu.ucla.sspace.matrix.factorization;
25 |
26 | import org.junit.Ignore;
27 | import org.junit.Test;
28 |
29 | import static org.junit.Assert.*;
30 |
31 |
32 | /**
33 | * @author Keith Stevens
34 | */
35 | public class SingularValueDecompositionLibJTest {
36 |
37 | @Test public void testMatrixReduction() {
38 | // This test is known to fail.
39 | /*
40 | SingularValueDecompositionTestUtil.testReductionMatrix(
41 | new SingularValueDecompositionLibJ());
42 | */
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/matrix/factorization/SingularValueDecompositionLibCTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at
3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens,
4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
5 | *
6 | * This file is part of the S-Space package and is covered under the terms and
7 | * conditions therein.
8 | *
9 | * The S-Space package is free software: you can redistribute it and/or modify
10 | * it under the terms of the GNU General Public License version 2 as published
11 | * by the Free Software Foundation and distributed hereunder to you.
12 | *
13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
18 | * RIGHTS.
19 | *
20 | * You should have received a copy of the GNU General Public License
21 | * along with this program. If not, see .
22 | */
23 |
24 | package edu.ucla.sspace.matrix.factorization;
25 |
26 | import org.junit.Ignore;
27 | import org.junit.Test;
28 |
29 | import edu.ucla.sspace.matrix.SVD;
30 |
31 | import static org.junit.Assert.*;
32 |
33 |
34 | /**
35 | * @author Keith Stevens
36 | */
37 | public class SingularValueDecompositionLibCTest {
38 |
39 | @Test public void testMatrixReduction() {
40 | if (SVD.isSVDLIBCavailable())
41 | SingularValueDecompositionTestUtil.testReductionMatrix(
42 | new SingularValueDecompositionLibC());
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/basis/StringBasisMapping.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.basis;
23 |
24 | import java.util.Set;
25 |
26 |
27 | /**
28 | * A string based {@link BasisMapping}. Keys must be strings and each dimension
29 | * is described by the associated key.
30 | *
31 | * @author Keith Stevens
32 | */
33 | public class StringBasisMapping extends AbstractBasisMapping {
34 |
35 | private static final long serialVersionUID = 1L;
36 |
37 | public StringBasisMapping() {
38 | }
39 |
40 | public StringBasisMapping(Set words) {
41 | for (String word : words)
42 | getDimension(word);
43 | }
44 |
45 | /**
46 | * {@inheritDoc}
47 | */
48 | public int getDimension(String key) {
49 | return getDimensionInternal(key);
50 | }
51 | }
52 |
53 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/matrix/factorization/SingularValueDecompositionOctaveTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at
3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens,
4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
5 | *
6 | * This file is part of the S-Space package and is covered under the terms and
7 | * conditions therein.
8 | *
9 | * The S-Space package is free software: you can redistribute it and/or modify
10 | * it under the terms of the GNU General Public License version 2 as published
11 | * by the Free Software Foundation and distributed hereunder to you.
12 | *
13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
18 | * RIGHTS.
19 | *
20 | * You should have received a copy of the GNU General Public License
21 | * along with this program. If not, see .
22 | */
23 |
24 | package edu.ucla.sspace.matrix.factorization;
25 |
26 | import edu.ucla.sspace.matrix.SVD;
27 |
28 | import org.junit.Ignore;
29 | import org.junit.Test;
30 |
31 | import static org.junit.Assert.*;
32 |
33 |
34 | /**
35 | * @author Keith Stevens
36 | */
37 | public class SingularValueDecompositionOctaveTest {
38 |
39 | @Ignore public void testMatrixReduction() {
40 | if (SVD.isOctaveAvailable())
41 | SingularValueDecompositionTestUtil.testReductionMatrix(
42 | new SingularValueDecompositionOctave());
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/matrix/factorization/SingularValueDecompositionMatlabTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at
3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens,
4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
5 | *
6 | * This file is part of the S-Space package and is covered under the terms and
7 | * conditions therein.
8 | *
9 | * The S-Space package is free software: you can redistribute it and/or modify
10 | * it under the terms of the GNU General Public License version 2 as published
11 | * by the Free Software Foundation and distributed hereunder to you.
12 | *
13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
18 | * RIGHTS.
19 | *
20 | * You should have received a copy of the GNU General Public License
21 | * along with this program. If not, see .
22 | */
23 |
24 | package edu.ucla.sspace.matrix.factorization;
25 |
26 | import edu.ucla.sspace.matrix.SVD;
27 |
28 | import org.junit.Ignore;
29 | import org.junit.Test;
30 |
31 | import static org.junit.Assert.*;
32 |
33 |
34 | /**
35 | * @author Keith Stevens
36 | */
37 | public class SingularValueDecompositionMatlabTest {
38 |
39 | @Test public void testMatrixReduction() {
40 | if (SVD.isMatlabAvailable())
41 | SingularValueDecompositionTestUtil.testReductionMatrix(
42 | new SingularValueDecompositionMatlab());
43 | }
44 | }
45 |
46 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/util/ReflectionUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util;
23 |
24 |
25 | /**
26 | * A collection of miscellaneous, but useful, functions for working with
27 | * reflection
28 | */
29 | public class ReflectionUtil {
30 |
31 | /**
32 | * Uninstantiable
33 | */
34 | private ReflectionUtil() { }
35 |
36 | /**
37 | * Returns an arbitrary object instance based on a class name.
38 | *
39 | * @param className The name of a desired class to instantiate.
40 | */
41 | @SuppressWarnings("unchecked")
42 | public static T getObjectInstance(String className) {
43 | try {
44 | Class clazz = Class.forName(className);
45 | return (T) clazz.newInstance();
46 | } catch (Exception e) {
47 | throw new Error(e);
48 | }
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/WordAssociationReport.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | /**
25 | * A report of the performance of a {@link SemanticSpace} on a particular
26 | * {@link WordAssociationTest}.
27 | *
28 | * @author David Jurgens
29 | */
30 | public interface WordAssociationReport {
31 |
32 | /**
33 | * Returns the total number of word pairs.
34 | */
35 | int numberOfWordPairs();
36 |
37 | /**
38 | * Returns the correlation between the similarity judgemnts from a {@link
39 | * SemanticSpace} similarity and the provided human similarity judgements.
40 | */
41 | double correlation();
42 |
43 | /**
44 | * Returns the number of questions for which a {@link SemanticSpace}
45 | * could not give an answer due to missing word vectors.
46 | */
47 | int unanswerableQuestions();
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/WordPrimingReport.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | /**
25 | * A report of the performance of a {@link SemanticSpace} on a particular
26 | * {@link WordPrimingTest}.
27 | *
28 | * @author Keith Stevens
29 | */
30 | public interface WordPrimingReport {
31 |
32 | /**
33 | * Returns the total number of word pairs.
34 | */
35 | int numberOfWordPairs();
36 |
37 | /**
38 | * Returns the priming score for related word pairs.
39 | */
40 | double relatedPriming();
41 |
42 | /**
43 | * Returns the priming score for unrelated word pairs.
44 | */
45 | double unrelatedPriming();
46 |
47 | /**
48 | * Returns the effect of priming, which is the difference bewtween the
49 | * priming score for related and unrelated pairs.
50 | */
51 | double effect();
52 | }
53 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/util/ResourceFinder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util;
23 |
24 | import java.io.BufferedReader;
25 | import java.io.IOException;
26 |
27 |
28 | /**
29 | * An interface for reader file-based resources regardless of the environment in
30 | * which the system is operating, e.g. a Hadoop environment.
31 | */
32 | public interface ResourceFinder {
33 |
34 | /**
35 | * Finds the file with the specified name and returns a reader for that
36 | * files contents.
37 | *
38 | * @param fileName the name of a file
39 | *
40 | * @return a {@code BufferedReader} to the contents of the specified file
41 | *
42 | * @throws IOException if the resource cannot be found or if an error occurs
43 | * while opening the resource
44 | */
45 | BufferedReader open(String fileName) throws IOException;
46 |
47 | }
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/ri/TestRandomIndexing.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.ri;
23 |
24 | import java.io.*;
25 | import java.util.*;
26 |
27 | import org.junit.Ignore;
28 | import org.junit.Test;
29 |
30 | import static org.junit.Assert.*;
31 |
32 |
33 | public class TestRandomIndexing {
34 |
35 | private static final long SEED = 42L;
36 |
37 | @Test public void test() throws IOException {
38 | RandomIndexing ri = new RandomIndexing(new Properties());
39 | ri.RANDOM.setSeed(SEED);
40 |
41 | String text = "the quick brown fox jumps over the lazy dog";
42 | ri.processDocument(new BufferedReader(new StringReader(text)));
43 |
44 | Set words = new LinkedHashSet();
45 | for (String s : text.split("\\s+"))
46 | words.add(s);
47 |
48 | assertEquals(words, ri.getWords());
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/WordSimilarityReport.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | /**
25 | * A report of the performance of a {@link SemanticSpace} on a particular
26 | * {@link WordSimilarityEvaluation} test.
27 | *
28 | * @author David Jurgens
29 | */
30 | public interface WordSimilarityReport {
31 |
32 | /**
33 | * Returns the total number of word pairs.
34 | */
35 | int numberOfWordPairs();
36 |
37 | /**
38 | * Returns the correlation between the similarity judgemnts from a {@link
39 | * SemanticSpace} similarity and the provided human similarity judgements.
40 | */
41 | double correlation();
42 |
43 | /**
44 | * Returns the number of questions for which a {@link SemanticSpace}
45 | * could not give an answer due to missing word vectors.
46 | */
47 | int unanswerableQuestions();
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/WordPrimingTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | import edu.ucla.sspace.common.SemanticSpace;
25 |
26 |
27 | /**
28 | * An interface for performing priming tests where there is no normed set of
29 | * responses to compare against. These tests simply measure the associational
30 | * strength between a prime,target pair and prime, unrelated target pairs. The
31 | * key result is the effect of related primes, where a high effect suggests that
32 | * that semantic space models the particular form of priming modeled by some
33 | * implemented test.
34 | *
35 | * @author Keith Stevens
36 | */
37 | public interface WordPrimingTest {
38 |
39 | /**
40 | * Evaluates a {@link SemanticSpace} on a particular test of word priming
41 | * pairs.
42 | */
43 | public WordPrimingReport evaluate(SemanticSpace sspace);
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/hal/LinearWeighting.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.hal;
23 |
24 | /**
25 | * A linear weighting scheme for specifying how a {@link
26 | * HyperspaceAnalogueToLanguage} instance should weigh co-occurrences based on
27 | * the word distance.
28 | */
29 | public class LinearWeighting implements WeightingFunction {
30 |
31 | /**
32 | * Returns the weighed value where the closest words receive a weight equal
33 | * to the window size and the most distance words receive a weight of {@code
34 | * 1}, using a linear decrease for in-between values.
35 | *
36 | * @param positionOffset {@inheritDoc}
37 | * @param windowSize {@inheritDoc}
38 | *
39 | * @return {@inheritDoc}
40 | */
41 | public double weight(int positionOffset, int windowSize) {
42 | return windowSize - (Math.abs(positionOffset) - 1);
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/common/DimensionallyInterpretableSemanticSpace.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.common;
23 |
24 |
25 | /**
26 | * An interface for {@link SemanticSpace} instances that are meaningfully
27 | * interpretable. In most cases, the dimensions will be understandable by human
28 | * viewers, but this interface provides support for mapping a dimension to a
29 | * generic {@code Object} for using the description in some programatic manner.
30 | */
31 | public interface DimensionallyInterpretableSemanticSpace
32 | extends SemanticSpace {
33 |
34 | /**
35 | * Returns a description of what features with which the specified dimension
36 | * corresponds.
37 | *
38 | * @param dimension a dimension number
39 | *
40 | * @return a description of the features for the dimension
41 | */
42 | T getDimensionDescription(int dimension);
43 |
44 | }
45 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/dependency/DependencyPathWeight.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 | import java.util.List;
25 |
26 |
27 | /**
28 | * An interface for weighting, or scoring, dependency paths. Implementations
29 | * are suggested to be thread-safe and stateless .
30 | *
31 | * @author Keith Stevens
32 | */
33 | public interface DependencyPathWeight {
34 |
35 | /**
36 | * Returns the score of the provided {@link DependencyPath}. The score may
37 | * be a function of the length of the path, arbitrary, e.g., 1 for all
38 | * paths, or may be a function of the relations and terms in the path.
39 | *
40 | * @param path A list of the term,relation links in the {@link
41 | * DependencyPath} being scored
42 | *
43 | * @return The score of the dependecy path
44 | */
45 | double scorePath(DependencyPath path);
46 | }
47 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/index/PermutationFunction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.index;
23 |
24 | import edu.ucla.sspace.vector.Vector;
25 |
26 |
27 | /**
28 | * An interface for functions that permute the ordering of {@code
29 | * TernaryVector}s. Implementations are expected to be thread safe when
30 | * performing permutations.
31 | */
32 | public interface PermutationFunction {
33 |
34 | /**
35 | * Permutes the provided {@code TernaryVector} the specified number of
36 | * times.
37 | *
38 | * @param v an index vector to permute
39 | * @param numPermutations the number of times the permutation function
40 | * should be applied to the provided index vector.
41 | *
42 | * @return the original index vector permuted the specified number of times
43 | */
44 | T permute(T v, int numPermutations);
45 | }
46 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/hal/WeightingFunction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.hal;
23 |
24 | /**
25 | * A function interface for specifying how a {@link
26 | * HyperspaceAnalogueToLanguage} instance should weigh co-occurrences based on
27 | * the word distance.
28 | */
29 | public interface WeightingFunction {
30 |
31 | /**
32 | * Returns the weighed value for a word at the specified offset. Negative
33 | * offsets indicate the the word appears before; positive offsets indicate
34 | * that the word appears after
35 | *
36 | * @param positionOffset the location of the word relative to the current
37 | * position
38 | * @param windowSize the maximum nubmer of words on one side that will be
39 | * considered for weighting
40 | *
41 | * @return the weight to apply
42 | */
43 | double weight(int positionOffset, int windowSize);
44 |
45 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/index/DoubleVectorGenerator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.index;
23 |
24 | import edu.ucla.sspace.util.Generator;
25 |
26 | import edu.ucla.sspace.vector.DoubleVector;
27 |
28 |
29 | /**
30 | * An interface for classes which will maintain and generate random {@code
31 | * DoubleVector}s. The main purpose of this of this class is to allow any
32 | * algorithm that makes use of some sort of random vector, such as Random
33 | * Indexing, can easily swap out the type of indexing used for experimentation
34 | * purposes.
35 | */
36 | public interface DoubleVectorGenerator
37 | extends Generator {
38 |
39 | /**
40 | * Creates an {@code VectorVector} with the provided length.
41 | *
42 | * @param length the length of the index vector
43 | *
44 | * @return an index vector
45 | */
46 | public T generate();
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/index/IntegerVectorGenerator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.index;
23 |
24 | import edu.ucla.sspace.util.Generator;
25 |
26 | import edu.ucla.sspace.vector.IntegerVector;
27 |
28 |
29 | /**
30 | * An interface for classes which will maintain and generate random {@code
31 | * IntegerVector}s. The main purpose of this of this class is to allow any
32 | * algorithm that makes use of some sort of random vector, such as Random
33 | * Indexing, can easily swap out the type of indexing used for experimentation
34 | * purposes.
35 | */
36 | public interface IntegerVectorGenerator
37 | extends Generator {
38 |
39 | /**
40 | * Creates an {@code VectorVector} with the provided length.
41 | *
42 | * @param length the length of the index vector
43 | *
44 | * @return an index vector
45 | */
46 | public T generate();
47 | }
48 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/vector/VectorIOTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.vector;
23 |
24 | import org.junit.Ignore;
25 | import org.junit.Test;
26 |
27 | import static org.junit.Assert.*;
28 |
29 |
30 | /**
31 | * @author Keith Stevens
32 | */
33 | public class VectorIOTest {
34 |
35 | @Test public void testSparseToString() {
36 | Vector vector = new CompactSparseVector(new double[]{0, 0, 0, 1});
37 | assertEquals("3,1.0", VectorIO.toString(vector));
38 | }
39 |
40 | @Test public void testSparseToString2() {
41 | Vector vector = new CompactSparseVector(new double[]{0, 1, 0, 5});
42 | assertEquals("1,1.0;3,5.0", VectorIO.toString(vector));
43 | }
44 |
45 | @Test public void testDenseToString() {
46 | Vector vector = new DenseVector(new double[]{0, 1, 0, 5});
47 | assertEquals("0.0 1.0 0.0 5.0", VectorIO.toString(vector));
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/graph/GraphConstructionException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.graph;
23 |
24 |
25 | /**
26 | * An exception for cases where a change to a graph has resulted in an invalid
27 | * construction according to its design contract. For example, this might
28 | * include adding an edge to an acyclic-by-contract graph that would cause it to
29 | * become cyclic. Callers should catch this exception if the construction of
30 | * the graph may be repealed, i.e. if the operation that caused this exception
31 | * may be rolled back and the program continue as expected.
32 | */
33 | public class GraphConstructionException extends RuntimeException {
34 |
35 | private static final long serialVersionUID = 1L;
36 |
37 | public GraphConstructionException() {
38 | super();
39 | }
40 |
41 | public GraphConstructionException(String message) {
42 | super(message);
43 | }
44 |
45 | }
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/common/DummySemanticSpace.java:
--------------------------------------------------------------------------------
1 | package edu.ucla.sspace.common;
2 |
3 | import edu.ucla.sspace.matrix.*;
4 | import edu.ucla.sspace.text.*;
5 | import edu.ucla.sspace.util.*;
6 | import edu.ucla.sspace.vector.*;
7 |
8 | import java.io.*;
9 |
10 | import java.util.HashMap;
11 | import java.util.Map;
12 | import java.util.Properties;
13 | import java.util.Set;
14 |
15 | /**
16 | * A test-only {@link SemanticSpace}, where all the semantic vectors must be
17 | * manually asssigned.
18 | */
19 | public class DummySemanticSpace implements SemanticSpace {
20 |
21 | private final Map wordToVector;
22 |
23 | private int dimensions;
24 |
25 | public DummySemanticSpace() {
26 | wordToVector = new HashMap();
27 | }
28 |
29 | /**
30 | * Does nothing
31 | */
32 | public void processDocument(BufferedReader document) throws IOException { }
33 |
34 | /**
35 | * {@inheritDoc}
36 | */
37 | public Set getWords() {
38 | return wordToVector.keySet();
39 | }
40 |
41 | /**
42 | * Returns the manually assigned vector for the word
43 | */
44 | public Vector getVector(String word) {
45 | return wordToVector.get(word);
46 | }
47 |
48 | /**
49 | * Sets the vector for the word
50 | */
51 | public Vector setVector(String word, Vector vector) {
52 | dimensions = vector.length();
53 | return wordToVector.put(word, vector);
54 | }
55 |
56 | /**
57 | * {@inheritDoc}
58 | */
59 | public int getVectorLength() {
60 | return dimensions;
61 | }
62 |
63 | /**
64 | * Does nothing
65 | */
66 | public void processSpace(Properties properties) { }
67 |
68 | /**
69 | * {@inheritDoc}
70 | */
71 | public String getSpaceName() {
72 | return "DummySemanticSpace";
73 | }
74 |
75 | }
76 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/SimpleWordSimilarity.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | /**
25 | * The default implementation of {@link WordSimilarity}
26 | */
27 | public class SimpleWordSimilarity implements WordSimilarity {
28 |
29 | private final String first;
30 |
31 | private final String second;
32 |
33 | private final double sim;
34 |
35 | public SimpleWordSimilarity(String first, String second, double sim) {
36 | this.first = first;
37 | this.second = second;
38 | this.sim = sim;
39 | }
40 |
41 | /**
42 | * {@inheritDoc}
43 | */
44 | public String getFirstWord() {
45 | return first;
46 | }
47 |
48 | /**
49 | * {@inheritDoc}
50 | */
51 | public String getSecondWord() {
52 | return second;
53 | }
54 |
55 | /**
56 | * {@inheritDoc}
57 | */
58 | public double getSimilarity() {
59 | return sim;
60 | }
61 |
62 | }
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/text/PorterStemmerTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 | import java.io.*;
25 | import java.util.*;
26 |
27 | import org.junit.Ignore;
28 | import org.junit.Test;
29 |
30 | import static org.junit.Assert.*;
31 |
32 |
33 | public class PorterStemmerTest {
34 | @Test public void testStemming() {
35 | String[][] testWords = {{"cats", "cat"},
36 | {"cat", "cat"},
37 | {"opened", "open"},
38 | {"open", "open"},
39 | {"candies", "candi"},
40 | {"candy", "candi"},
41 | {"immediately", "immedi"}};
42 | for (String[] testExpected : testWords) {
43 | String stem = new PorterStemmer().stem(testExpected[0]);
44 | assertEquals(stem, testExpected[1]);
45 | }
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/clustering/DataMatrixLinkClustering.java:
--------------------------------------------------------------------------------
1 | package edu.ucla.sspace.clustering;
2 |
3 | import edu.ucla.sspace.matrix.AffinityMatrixCreator;
4 | import edu.ucla.sspace.matrix.Matrix;
5 | import edu.ucla.sspace.matrix.MatrixFile;
6 | import edu.ucla.sspace.matrix.NearestNeighborAffinityMatrixCreator;
7 |
8 | import edu.ucla.sspace.similarity.CosineSimilarity;
9 | import edu.ucla.sspace.similarity.SimilarityFunction;
10 |
11 | import java.util.Properties;
12 |
13 |
14 | /**
15 | * @author Keith Stevens
16 | */
17 | public class DataMatrixLinkClustering implements Clustering {
18 |
19 | private final AffinityMatrixCreator creator;
20 |
21 | private final LinkClustering linkCluster;
22 |
23 | public DataMatrixLinkClustering() {
24 | this(createDefaultAffinityMatrixCreator());
25 | }
26 |
27 | public DataMatrixLinkClustering(AffinityMatrixCreator creator) {
28 | this.creator = creator;
29 | this.linkCluster = new LinkClustering();
30 | }
31 |
32 | public static AffinityMatrixCreator createDefaultAffinityMatrixCreator() {
33 | SimilarityFunction simFunc = new CosineSimilarity();
34 | AffinityMatrixCreator creator =
35 | new NearestNeighborAffinityMatrixCreator();
36 | creator.setParams(10);
37 | creator.setFunctions(simFunc, simFunc);
38 | return creator;
39 | }
40 |
41 | public Assignments cluster(Matrix matrix,
42 | int numClusters,
43 | Properties props) {
44 | MatrixFile affinityMatrix = creator.calculate(matrix);
45 | return linkCluster.cluster(affinityMatrix.load(), numClusters, props);
46 | }
47 |
48 | public Assignments cluster(Matrix matrix, Properties props) {
49 | MatrixFile affinityMatrix = creator.calculate(matrix);
50 | return linkCluster.cluster(affinityMatrix.load(), props);
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/dependency/AbstractPathUtil.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 | import java.util.LinkedList;
25 | import java.util.List;
26 |
27 | import org.junit.Ignore;
28 | import org.junit.Test;
29 |
30 | import static org.junit.Assert.*;
31 |
32 |
33 | public class AbstractPathUtil {
34 |
35 | protected DependencyPath makePath(String[][] pathString) {
36 | List path = new LinkedList();
37 | for (String[] link : pathString) {
38 | DependencyTreeNode n1 =
39 | new SimpleDependencyTreeNode(link[0], link[1], 0);
40 | String relation = link[2];
41 | DependencyTreeNode n2 =
42 | new SimpleDependencyTreeNode(link[3], link[4], 0);
43 |
44 | path.add(new SimpleDependencyRelation(n1, relation, n2));
45 | }
46 | return new SimpleDependencyPath(path);
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/similarity/AbstractSymmetricSimilarityFunction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at
3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens,
4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
5 | *
6 | * This file is part of the S-Space package and is covered under the terms and
7 | * conditions therein.
8 | *
9 | * The S-Space package is free software: you can redistribute it and/or modify
10 | * it under the terms of the GNU General Public License version 2 as published
11 | * by the Free Software Foundation and distributed hereunder to you.
12 | *
13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
18 | * RIGHTS.
19 | *
20 | * You should have received a copy of the GNU General Public License
21 | * along with this program. If not, see .
22 | */
23 |
24 | package edu.ucla.sspace.similarity;
25 |
26 |
27 | /**
28 | * A base implementation for any symmetic {@link SimilarityFunction} that
29 | * requires no parameters. Any subclass will return {@code true} for calls to
30 | * {@link #isSymmetric}.
31 | *
32 | * @author Keith Stevens
33 | */
34 | public abstract class AbstractSymmetricSimilarityFunction
35 | implements SimilarityFunction {
36 |
37 | /**
38 | * Performs a no-op and sets no parameters
39 | */
40 | public void setParams(double... arguments) {
41 | }
42 |
43 | /**
44 | * Returns {@code true}.
45 | */
46 | public boolean isSymmetric() {
47 | return true;
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/dependency/AbstractPathTestBase.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 | import java.util.LinkedList;
25 | import java.util.List;
26 |
27 | import org.junit.Ignore;
28 | import org.junit.Test;
29 |
30 | import static org.junit.Assert.*;
31 |
32 |
33 | public class AbstractPathTestBase {
34 |
35 | protected DependencyPath makePath(String[][] pathString) {
36 | List path = new LinkedList();
37 | for (String[] link : pathString) {
38 | DependencyTreeNode n1 =
39 | new SimpleDependencyTreeNode(link[0], link[1], 0);
40 | String relation = link[2];
41 | DependencyTreeNode n2 =
42 | new SimpleDependencyTreeNode(link[3], link[4], 0);
43 |
44 | path.add(new SimpleDependencyRelation(n1, relation, n2));
45 | }
46 | return new SimpleDependencyPath(path);
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/clustering/seeding/KMeansSeed.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.clustering.seeding;
23 |
24 | import edu.ucla.sspace.matrix.Matrix;
25 |
26 | import edu.ucla.sspace.vector.DoubleVector;
27 |
28 |
29 | /**
30 | * An interface for KMeans seeding algorithms. Implementations must compose
31 | * initial centroid seeds from a data set by either choosing an already existing
32 | * data point or composing a linear combination of existing data points.
33 | *
34 | *
35 | *
36 | * Implementations must be state free and threadsafe.
37 | *
38 | * @author Keith Stevens
39 | */
40 | public interface KMeansSeed {
41 |
42 | /**
43 | * Returns an array of length {@code numCentroids} that contains centroids
44 | * composed of either vectors from {@code dataPoints} or a linear combination
45 | * of vectors from {@code dataPoints}.
46 | */
47 | DoubleVector[] chooseSeeds(int numCentroids, Matrix dataPoints);
48 | }
49 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/util/Duple.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util;
23 |
24 | import java.io.Serializable;
25 |
26 |
27 | /**
28 | * A wrapper for containing two objects of different types.
29 | */
30 | public class Duple implements Serializable {
31 |
32 | private static final long serialVersionUID = 1L;
33 |
34 | public final T x;
35 |
36 | public final U y;
37 |
38 | public Duple(T x, U y) {
39 | this.x = x;
40 | this.y = y;
41 | }
42 |
43 | public boolean equals(Object o) {
44 | if (o == null || !(o instanceof Duple))
45 | return false;
46 | Duple d = (Duple)o;
47 | return (x == d.x || (x != null && x.equals(d.x))) &&
48 | (y == d.y || (y != null && y.equals(d.y)));
49 | }
50 |
51 | public int hashCode() {
52 | return ((x == null) ? 0 : x.hashCode()) ^
53 | ((y == null) ? 0 : y.hashCode());
54 | }
55 |
56 | public String toString() {
57 | return "{" + x + ", " + y + "}";
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/hal/GeometricWeighting.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.hal;
23 |
24 | /**
25 | * A geometically-decreasing weighting scheme for specifying how a {@link
26 | * HyperspaceAnalogueToLanguage} instance should weigh co-occurrences based on
27 | * the word distance.
28 | */
29 | public class GeometricWeighting implements WeightingFunction {
30 |
31 | /**
32 | * Returns the weighed value where the closest words receive a weight equal
33 | * to the window size and the most distance words receive a weight of {@code
34 | * 1}, using a geometric (1 / 2n) decrease for in-between values.
35 | *
36 | * @param positionOffset {@inheritDoc}
37 | * @param windowSize {@inheritDoc}
38 | *
39 | * @return {@inheritDoc}
40 | */
41 | public double weight(int positionOffset, int windowSize) {
42 | return ((1 << (windowSize - (Math.abs(positionOffset) - 1))) /
43 | (double)(1 << windowSize)) * windowSize;
44 | }
45 |
46 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/LabeledStringDocument.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 | import java.io.BufferedReader;
25 | import java.io.StringReader;
26 |
27 |
28 | /**
29 | * A {@code LabeledDocument} implementation backed by a {@code String} whose
30 | * contents are used for the document text.
31 | */
32 | public class LabeledStringDocument extends StringDocument
33 | implements LabeledDocument {
34 |
35 | /**
36 | * The label of the document
37 | */
38 | private final String label;
39 |
40 | /**
41 | * Constructs a {@code Document} using the provided string as the document
42 | * text
43 | *
44 | * @param docText the document text
45 | */
46 | public LabeledStringDocument(String label, String docText) {
47 | super(docText);
48 | this.label = label;
49 | }
50 |
51 | /**
52 | * {@inheritDoc}
53 | */
54 | public String label() {
55 | return label;
56 | }
57 | }
58 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/matrix/MatlabSparseFileTransformerTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at
3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens,
4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
5 | *
6 | * This file is part of the C-Cat package and is covered under the terms and
7 | * conditions therein.
8 | *
9 | * The S-Space package is free software: you can redistribute it and/or modify
10 | * it under the terms of the GNU General Public License version 2 as published
11 | * by the Free Software Foundation and distributed hereunder to you.
12 | *
13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
18 | * RIGHTS.
19 | *
20 | * You should have received a copy of the GNU General Public License
21 | * along with this program. If not, see .
22 | */
23 |
24 | package edu.ucla.sspace.matrix;
25 |
26 | import edu.ucla.sspace.matrix.MatrixIO.Format;
27 |
28 | import org.junit.Test;
29 |
30 |
31 | /**
32 | * @author Keith Stevens
33 | */
34 | public class MatlabSparseFileTransformerTest {
35 |
36 | public static final double[][] VALUES = {
37 | {1, 1, 1, 4, 5},
38 | {5, 3, 1, 0, 0},
39 | {0, 1, 5, 0, 2},
40 | };
41 |
42 | public static final Matrix MATRIX = new ArrayMatrix(VALUES);
43 |
44 | @Test public void testTransform() {
45 | FileTransformer transformer = new MatlabSparseFileTransformer();
46 | FileTransformUtil.testTransform(MATRIX, Format.MATLAB_SPARSE,
47 | transformer);
48 | }
49 | }
50 |
51 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/matrix/SvdlibcDenseTextFileTransformerTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at
3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens,
4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
5 | *
6 | * This file is part of the C-Cat package and is covered under the terms and
7 | * conditions therein.
8 | *
9 | * The S-Space package is free software: you can redistribute it and/or modify
10 | * it under the terms of the GNU General Public License version 2 as published
11 | * by the Free Software Foundation and distributed hereunder to you.
12 | *
13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
18 | * RIGHTS.
19 | *
20 | * You should have received a copy of the GNU General Public License
21 | * along with this program. If not, see .
22 | */
23 |
24 | package edu.ucla.sspace.matrix;
25 |
26 | import edu.ucla.sspace.matrix.MatrixIO.Format;
27 |
28 | import org.junit.Test;
29 |
30 |
31 | /**
32 | * @author Keith Stevens
33 | */
34 | public class SvdlibcDenseTextFileTransformerTest {
35 |
36 | public static final double[][] VALUES = {
37 | {1, 1, 1, 4, 5},
38 | {5, 3, 1, 0, 0},
39 | {0, 1, 5, 0, 2},
40 | };
41 |
42 | public static final Matrix MATRIX = new ArrayMatrix(VALUES);
43 |
44 | @Test public void testTransform() {
45 | FileTransformer transformer = new SvdlibcDenseTextFileTransformer();
46 | FileTransformUtil.testTransform(MATRIX, Format.SVDLIBC_DENSE_TEXT,
47 | transformer);
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/WordChoiceReport.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 |
25 | /**
26 | * A report of the performance of a {@link SemanticSpace} on a particular
27 | * {@link WordChoiceEvaluation} test.
28 | *
29 | * @author David Jurgens
30 | */
31 | public interface WordChoiceReport {
32 |
33 | /**
34 | * Returns the total number of questions on the test.
35 | */
36 | int numberOfQuestions();
37 |
38 | /**
39 | * Returns the number of questions that were answered correctly.
40 | */
41 | int correctAnswers();
42 |
43 | /**
44 | * Returns the number of questions for which the {@link SemanticSpace}
45 | * could not give an answer due to missing word vectors in either the
46 | * prompt or the options.
47 | */
48 | int unanswerableQuestions();
49 |
50 | /**
51 | * Returns the score, ranged between 0 and 100, achieved on a particlar
52 | * evaluation.
53 | */
54 | double score();
55 | }
56 |
57 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/matrix/SvdlibcSparseTextFileTransformerTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at
3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens,
4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
5 | *
6 | * This file is part of the C-Cat package and is covered under the terms and
7 | * conditions therein.
8 | *
9 | * The S-Space package is free software: you can redistribute it and/or modify
10 | * it under the terms of the GNU General Public License version 2 as published
11 | * by the Free Software Foundation and distributed hereunder to you.
12 | *
13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
18 | * RIGHTS.
19 | *
20 | * You should have received a copy of the GNU General Public License
21 | * along with this program. If not, see .
22 | */
23 |
24 | package edu.ucla.sspace.matrix;
25 |
26 | import edu.ucla.sspace.matrix.MatrixIO.Format;
27 |
28 | import org.junit.Test;
29 |
30 |
31 | /**
32 | * @author Keith Stevens
33 | */
34 | public class SvdlibcSparseTextFileTransformerTest {
35 |
36 | public static final double[][] VALUES = {
37 | {1, 1, 1, 4, 5},
38 | {5, 3, 1, 0, 0},
39 | {0, 1, 5, 0, 2},
40 | };
41 |
42 | public static final Matrix MATRIX = new ArrayMatrix(VALUES);
43 |
44 | @Test public void testTransform() {
45 | FileTransformer transformer = new SvdlibcSparseTextFileTransformer();
46 | FileTransformUtil.testTransform(MATRIX, Format.SVDLIBC_SPARSE_TEXT,
47 | transformer);
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/matrix/SvdlibcSparseBinaryFileTransformerTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at
3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens,
4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
5 | *
6 | * This file is part of the C-Cat package and is covered under the terms and
7 | * conditions therein.
8 | *
9 | * The S-Space package is free software: you can redistribute it and/or modify
10 | * it under the terms of the GNU General Public License version 2 as published
11 | * by the Free Software Foundation and distributed hereunder to you.
12 | *
13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
18 | * RIGHTS.
19 | *
20 | * You should have received a copy of the GNU General Public License
21 | * along with this program. If not, see .
22 | */
23 |
24 | package edu.ucla.sspace.matrix;
25 |
26 | import edu.ucla.sspace.matrix.MatrixIO.Format;
27 |
28 | import org.junit.Test;
29 |
30 |
31 | /**
32 | * @author Keith Stevens
33 | */
34 | public class SvdlibcSparseBinaryFileTransformerTest {
35 |
36 | public static final double[][] VALUES = {
37 | {1, 1, 1, 4, 5},
38 | {5, 3, 1, 0, 0},
39 | {0, 1, 5, 0, 2},
40 | };
41 |
42 | public static final Matrix MATRIX = new ArrayMatrix(VALUES);
43 |
44 | @Test public void testTransform() {
45 | FileTransformer transformer = new SvdlibcSparseBinaryFileTransformer();
46 | FileTransformUtil.testTransform(MATRIX, Format.SVDLIBC_SPARSE_BINARY,
47 | transformer);
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/matrix/SvdlibcDenseBinaryFileTransformerTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2010, Lawrence Livermore National Security, LLC. Produced at
3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens,
4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
5 | *
6 | * This file is part of the C-Cat package and is covered under the terms and
7 | * conditions therein.
8 | *
9 | * The S-Space package is free software: you can redistribute it and/or modify
10 | * it under the terms of the GNU General Public License version 2 as published
11 | * by the Free Software Foundation and distributed hereunder to you.
12 | *
13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
18 | * RIGHTS.
19 | *
20 | * You should have received a copy of the GNU General Public License
21 | * along with this program. If not, see .
22 | */
23 |
24 | package edu.ucla.sspace.matrix;
25 |
26 | import edu.ucla.sspace.matrix.MatrixIO.Format;
27 |
28 | import org.junit.Test;
29 |
30 |
31 | /**
32 | * @author Keith Stevens
33 | */
34 | public class SvdlibcDenseBinaryFileTransformerTest {
35 |
36 | public static final double[][] VALUES = {
37 | {1, 1, 1, 4, 5},
38 | {5, 3, 1, 0, 0},
39 | {0, 1, 5, 0, 2},
40 | };
41 |
42 | public static final Matrix MATRIX = new ArrayMatrix(VALUES);
43 |
44 | @Test public void testTransform() {
45 | FileTransformer transformer = new SvdlibcDenseBinaryFileTransformer();
46 | FileTransformUtil.testTransform(MATRIX, Format.SVDLIBC_DENSE_BINARY,
47 | transformer);
48 | }
49 | }
50 |
51 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/clustering/criterion/H2Function.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.clustering.criterion;
23 |
24 |
25 | /**
26 | * This {@link HybridBaseFunction} uses the {@link E1Function} and the {@link
27 | * I1Function}.
28 | *
29 | * @author Keith Stevens
30 | */
31 | public class H2Function extends HybridBaseFunction {
32 |
33 | /**
34 | * {@inheritDoc}
35 | */
36 | protected BaseFunction getInternalFunction() {
37 | return new I2Function(matrix, centroids, i1Costs,
38 | assignments, clusterSizes);
39 | }
40 |
41 | /**
42 | * {@inheritDoc}
43 | */
44 | protected BaseFunction getExternalFunction() {
45 | return new E1Function(matrix, centroids, e1Costs,
46 | assignments, clusterSizes,
47 | completeCentroid, simToComplete);
48 | }
49 |
50 | /**
51 | * {@inheritDoc}
52 | */
53 | public boolean isMaximize() {
54 | return true;
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/dependency/DependencyTreeNode.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 | import java.util.List;
25 |
26 |
27 | /**
28 | * The interface for a word in a dependency parse tree.
29 | */
30 | public interface DependencyTreeNode {
31 |
32 | /**
33 | * Returns the list of neighbors to the current node. Note that this list
34 | * include both relations where the current node is the head node and
35 | * relations where the current node is the dependent.
36 | */
37 | List neighbors();
38 |
39 | /**
40 | * Returns the word stored in this node.
41 | */
42 | String word();
43 |
44 | /**
45 | * The lemmatized version of the word, if there is any.
46 | */
47 | String lemma();
48 |
49 | /**
50 | * Returns the part of speech tag for this node.
51 | */
52 | String pos();
53 |
54 | /**
55 | * Returns the index used by this {@link DependencyTreeNode} in an array.
56 | */
57 | int index();
58 | }
59 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/vector/MaskedDoubleVectorViewTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.vector;
23 |
24 | import org.junit.Ignore;
25 | import org.junit.Test;
26 |
27 | import static org.junit.Assert.*;
28 |
29 | import java.util.HashMap;
30 | import java.util.Map;
31 |
32 |
33 | /**
34 | * Tests for the {@link MaskedDoubleVectorView} class.
35 | */
36 | public class MaskedDoubleVectorViewTest {
37 |
38 | @Test public void testScaledCreate() {
39 | double[] values = new double[] {1, 2, 3, 4, 5, 6, 7, 8, 9, 10};
40 | DoubleVector v = new DenseVector(values);
41 | int[] mask = new int[3];
42 | mask[0] = 5;
43 | mask[1] = 9;
44 | mask[2] = 1;
45 | DoubleVector masked = new MaskedDoubleVectorView(v, mask);
46 |
47 | assertEquals(mask.length, masked.length());
48 | assertEquals(values[mask[0]], masked.get(0), .00001);
49 | assertEquals(values[mask[1]], masked.get(1), .00001);
50 | assertEquals(values[mask[2]], masked.get(2), .00001);
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/evaluation/WordSimilarityEvaluation.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.evaluation;
23 |
24 | import java.util.Collection;
25 |
26 | import edu.ucla.sspace.common.SemanticSpace;
27 |
28 | /**
29 | * An evaluation metric that compares the human-judged similarity of word pairs
30 | * against the similarity judgements from a {@link SemanticSpace}.
31 | *
32 | * @author David Jurgens
33 | */
34 | public interface WordSimilarityEvaluation {
35 |
36 | /**
37 | * Returns a collection of human similarity judgements for word pairs.
38 | */
39 | Collection getPairs();
40 |
41 | /**
42 | * Returns the numeric similarity judgement that is equivalent to two words
43 | * being completely similar (i.e. identical).
44 | */
45 | double getMostSimilarValue();
46 |
47 | /**
48 | * Returns the numeric similarity judgement that is equivalent to two words
49 | * being completely dissimilar (i.e. identical).
50 | */
51 | double getLeastSimilarValue();
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/vector/DenseVectorTests.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.vector;
23 |
24 | import org.junit.Ignore;
25 | import org.junit.Test;
26 |
27 | import static org.junit.Assert.*;
28 |
29 | import java.util.HashMap;
30 | import java.util.Map;
31 |
32 |
33 | /**
34 | * Tests for the {@link DenseVector} class.
35 | */
36 | public class DenseVectorTests {
37 |
38 | @Test public void testMagnitude() {
39 | DenseVector v = new DenseVector(100);
40 | assertEquals(0, v.magnitude(), .0001);
41 |
42 | v.set(1, 1);
43 | assertEquals(1, v.magnitude(), .0001);
44 |
45 | v.set(1, 3);
46 | v.set(2, 4);
47 | assertEquals(5, v.magnitude(), .0001);
48 |
49 | DenseVector v2 = new DenseVector(v);
50 | assertEquals(5, v2.magnitude(), .0001);
51 | }
52 |
53 | @Test public void testArrayMagnitude() {
54 | double[] values = new double[] {0, 3, 4, 0, 0};
55 | DenseVector v = new DenseVector(values);
56 | assertEquals(5, v.magnitude(), .0001);
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/matrix/MatrixIOException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.matrix;
23 |
24 | import java.io.IOException;
25 |
26 |
27 | /**
28 | * An exception indicating that some error has occurred during reading or
29 | * writing of a matrix file.
30 | */
31 | public class MatrixIOException extends IOException {
32 |
33 | private static final long serialVersionUID = 1L;
34 |
35 | /**
36 | * Creates a {@code MatrixIOException} with no message
37 | */
38 | public MatrixIOException() { }
39 |
40 | /**
41 | * Creates a {@code MatrixIOException} with the provided message to report
42 | * to the user.
43 | */
44 | public MatrixIOException(String message) {
45 | super(message);
46 | }
47 |
48 | /**
49 | * Creates a {@code MatrixIOException} with the provided message to report
50 | * to the user, listing the {@code Throwable} as the original cause of the
51 | * exception.
52 | */
53 | public MatrixIOException(String message, Throwable cause) {
54 | super(message, cause);
55 | }
56 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/util/SynchronizedIterator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util;
23 |
24 | import java.util.Iterator;
25 |
26 |
27 | /**
28 | * An {@code Iterator} decorator that provides synchronized access to each
29 | * element.
30 | *
31 | * @author Keith Stevens
32 | */
33 | public class SynchronizedIterator implements Iterator {
34 |
35 | /**
36 | * The iterators to use
37 | */
38 | private final Iterator iter;
39 |
40 | /**
41 | * Constructs a {@code SynchronizedIterator} from the provided iterator.
42 | */
43 | public SynchronizedIterator(Iterator iterator) {
44 | iter = iterator;
45 | }
46 |
47 | /**
48 | * {@inheritDoc}
49 | */
50 | public synchronized boolean hasNext() {
51 | return iter.hasNext();
52 | }
53 |
54 | /**
55 | * {@inheritDoc}
56 | */
57 | public T next() {
58 | return iter.next();
59 | }
60 |
61 | /**
62 | * {@inheritDoc}
63 | */
64 | public void remove() {
65 | iter.remove();
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/common/Filterable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.common;
23 |
24 | import java.util.Set;
25 |
26 |
27 | /**
28 | * A marker interface that indicates that this class supports selectively
29 | * filtering which words have their semantics retained. The {@link
30 | * #setSemanticFilter(Set)} method can be used to speficy which words should
31 | * have their semantics retained. Note that the words that are filtered out
32 | * will still be used in computing the semantics of other words. This
33 | * behavior is intended for use with a large corpora where retaining the
34 | * semantics of all words in memory is infeasible.
35 | *
36 | * @see SemanticSpace
37 | */
38 | public interface Filterable {
39 |
40 | /**
41 | * Specifies the set of words that should have their semantics retained,
42 | * where all other words do not.
43 | *
44 | * @param semanticsToRetain the set of words that should have their
45 | * semantics retained in memory
46 | */
47 | void setSemanticFilter(Set semanticsToRetain);
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/wordsi/OccurrenceDependencyContextGenerator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.wordsi;
23 |
24 | import edu.ucla.sspace.basis.BasisMapping;
25 |
26 | import edu.ucla.sspace.dependency.DependencyTreeNode;
27 |
28 |
29 | /**
30 | * A {@link DependencyContextGenerator} that marks each co-occurrence with
31 | * ordering information.
32 | *
33 | * @author Keith Stevens
34 | */
35 | public class OccurrenceDependencyContextGenerator
36 | extends AbstractOccurrenceDependencyContextGenerator{
37 |
38 | /**
39 | * Constructs a new {@link OccurrenceDependencyContextGenerator}.
40 | */
41 | public OccurrenceDependencyContextGenerator(
42 | BasisMapping basis,
43 | int windowSize) {
44 | super(basis, windowSize);
45 | }
46 |
47 | /**
48 | * Returns a string with the node's word plus it's distance from the focus
49 | * word, with a hyphen between the two.
50 | */
51 | protected String getFeature(DependencyTreeNode node, int index) {
52 | return node.word();
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/wordsi/OrderingDependencyContextGenerator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.wordsi;
23 |
24 | import edu.ucla.sspace.basis.BasisMapping;
25 |
26 | import edu.ucla.sspace.dependency.DependencyTreeNode;
27 |
28 |
29 | /**
30 | * A {@link DependencyContextGenerator} that marks each co-occurrence with
31 | * ordering information.
32 | *
33 | * @author Keith Stevens
34 | */
35 | public class OrderingDependencyContextGenerator
36 | extends AbstractOccurrenceDependencyContextGenerator{
37 |
38 | /**
39 | * Constructs a new {@link OrderingDependencyContextGenerator}.
40 | */
41 | public OrderingDependencyContextGenerator(
42 | BasisMapping basis,
43 | int windowSize) {
44 | super(basis, windowSize);
45 | }
46 |
47 | /**
48 | * Returns a string with the node's word plus it's distance from the focus
49 | * word, with a hyphen between the two.
50 | */
51 | protected String getFeature(DependencyTreeNode node, int index) {
52 | return node.word() + "-" + index;
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/mains/TopicWordsiMain.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.mains;
23 |
24 | import edu.ucla.sspace.common.ArgOptions;
25 | import edu.ucla.sspace.common.SemanticSpaceIO.SSpaceFormat;
26 |
27 | import edu.ucla.sspace.wordsi.ContextExtractor;
28 | import edu.ucla.sspace.wordsi.TopicModelContextExtractor;
29 |
30 | import java.util.Map;
31 |
32 |
33 | /**
34 | * A main for running a wordsi model over topic signatures for documents.
35 | *
36 | * @author Keith Stevens
37 | */
38 | public class TopicWordsiMain extends GenericWordsiMain {
39 |
40 | /**
41 | * {@inheritDoc}
42 | */
43 | protected ContextExtractor getExtractor() {
44 | // Create the new generator.
45 | return new TopicModelContextExtractor();
46 | }
47 |
48 | /**
49 | * {@inheritDoc}
50 | */
51 | protected SSpaceFormat getSpaceFormat() {
52 | return SSpaceFormat.SPARSE_BINARY;
53 | }
54 |
55 | public static void main(String[] args) throws Exception {
56 | TopicWordsiMain main = new TopicWordsiMain();
57 | main.run(args);
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/dependency/FlatPathWeightTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 | import java.util.List;
25 |
26 | import org.junit.Ignore;
27 | import org.junit.Test;
28 |
29 | import static org.junit.Assert.*;
30 |
31 |
32 | public class FlatPathWeightTest extends AbstractPathUtil {
33 |
34 | @Test public void testSimplePath() {
35 | String[][] pathString = {{"cat", "n", "Rel", "dog", "n"}};
36 | DependencyPath path = makePath(pathString);
37 | DependencyPathWeight weighter = new FlatPathWeight();
38 | assertEquals(1, weighter.scorePath(path), .000001);
39 | }
40 |
41 | @Test public void testLongPath() {
42 | String[][] pathString = {{"cat", "n", "Rel", "dog", "n"},
43 | {"dog", "n", "noarelation", "whale", "n"},
44 | {"whale", "n", "noarelation", "pig", "n"}};
45 | DependencyPath path = makePath(pathString);
46 | DependencyPathWeight weighter = new FlatPathWeight();
47 | assertEquals(1, weighter.scorePath(path), .000001);
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/graph/TypedEdge.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.graph;
23 |
24 |
25 | /**
26 | * An interface for edges in multigraphs. In a multigraph, two vertices
27 | * v1 and v2 may have mulitple edges between then provided
28 | * that the for any two edges {@code !e1.equals(e2)}.
29 | *
30 | * This interface allows for a multigraph to have mutliple types of
31 | * edges that extend from a common type. For example, a graph that represents
32 | * cities may contain edges indicating the different types of transportation
33 | * (e.g. car, train, bus) between two cities, where those types each have their
34 | * own subtypes (e.g., airline carrier, bus company, etc.)
35 | *
36 | * @see Multigraph
37 | */
38 | public interface TypedEdge extends Edge {
39 |
40 | /**
41 | * Returns the type of information conveyed by this edge.
42 | */
43 | T edgeType();
44 |
45 | /**
46 | * Returns {@code true} if the other edge is considered equivalent to this
47 | * edge in a multigrpah.
48 | */
49 | boolean equals(Object o);
50 |
51 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/wordsi/PartOfSpeechDependencyContextGenerator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.wordsi;
23 |
24 | import edu.ucla.sspace.basis.BasisMapping;
25 |
26 | import edu.ucla.sspace.dependency.DependencyTreeNode;
27 |
28 |
29 | /**
30 | * A {@link DependencyContextGenerator} that marks each co-occurrence with part
31 | * of speech information.
32 | *
33 | * @author Keith Stevens
34 | */
35 | public class PartOfSpeechDependencyContextGenerator
36 | extends AbstractOccurrenceDependencyContextGenerator{
37 |
38 | /**
39 | * Constructs a new {@link PartOfSpeechDependencyContextGenerator}.
40 | */
41 | public PartOfSpeechDependencyContextGenerator(
42 | BasisMapping basis,
43 | int windowSize) {
44 | super(basis, windowSize);
45 | }
46 |
47 | /**
48 | * Returns a string with the node's word plus it's part of speech, with a
49 | * hyphen between the two.
50 | */
51 | protected String getFeature(DependencyTreeNode node, int index) {
52 | return node.word() + "-" + node.pos();
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/util/FileResourceFinder.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util;
23 |
24 | import java.io.BufferedReader;
25 | import java.io.FileReader;
26 | import java.io.IOException;
27 |
28 |
29 | /**
30 | * A {@link ResourceFinder} implementation that maps file name to {@link
31 | * java.io.File} instances. This class is the default implementation for all
32 | * systems operating with a standard JVM environment.
33 | */
34 | public class FileResourceFinder implements ResourceFinder {
35 |
36 | public FileResourceFinder() { }
37 |
38 | /**
39 | * Finds the file with the specified name and returns a reader for that
40 | * files contents.
41 | *
42 | * @param fileName the name of a file
43 | *
44 | * @return a {@code BufferedReader} to the contents of the specified file
45 | *
46 | * @throws IOException if the resource cannot be found or if an error occurs
47 | * while opening the resource
48 | */
49 | public BufferedReader open(String fileName) throws IOException {
50 | return new BufferedReader(new FileReader(fileName));
51 | }
52 |
53 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/matrix/SimpleEntry.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.matrix;
23 |
24 |
25 | /**
26 | * A basic implemention of {@link MatrixEntry}.
27 | *
28 | * @see MatrixIO#getIterator(File,MatrixIO.Format)
29 | */
30 | class SimpleEntry implements MatrixEntry, java.io.Serializable {
31 |
32 | private static final long serialVersionUID = 1L;
33 |
34 | private final int row;
35 | private final int column;
36 | private final double value;
37 |
38 | public SimpleEntry(int row, int column, double value) {
39 | this.row = row;
40 | this.column = column;
41 | this.value = value;
42 | }
43 |
44 | /**
45 | * {@inheritDoc}
46 | */
47 | public int column() {
48 | return column;
49 | }
50 |
51 | /**
52 | * {@inheritDoc}
53 | */
54 | public int row() {
55 | return row;
56 | }
57 |
58 | /**
59 | * {@inheritDoc}
60 | */
61 | public double value() {
62 | return value;
63 | }
64 |
65 | public String toString() {
66 | return "(" + row + "," + column + ":" + value + ")";
67 | }
68 | }
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/dependency/LengthPathWeightTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.dependency;
23 |
24 | import java.util.LinkedList;
25 | import java.util.List;
26 |
27 | import org.junit.Ignore;
28 | import org.junit.Test;
29 |
30 | import static org.junit.Assert.*;
31 |
32 |
33 | public class LengthPathWeightTest extends AbstractPathUtil {
34 |
35 | @Test public void testSimplePath() {
36 | String[][] pathString = {{"cat", "n", "Rel", "dog", "n"}};
37 | DependencyPath path = makePath(pathString);
38 | DependencyPathWeight weighter = new LengthPathWeight();
39 | assertEquals(1, weighter.scorePath(path), .000001);
40 | }
41 |
42 | @Test public void testLongPath() {
43 | String[][] pathString = {{"cat", "n", "Rel", "dog", "n"},
44 | {"dog", "n", "noarelation", "whale", "n"},
45 | {"whale", "n", "noarelation", "pig", "n"}};
46 | DependencyPath path = makePath(pathString);
47 | DependencyPathWeight weighter = new LengthPathWeight();
48 | assertEquals(1d/3, weighter.scorePath(path), .000001);
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/similarity/OneSimilarity.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2011, Lawrence Livermore National Security, LLC. Produced at
3 | * the Lawrence Livermore National Laboratory. Written by Keith Stevens,
4 | * kstevens@cs.ucla.edu OCEC-10-073 All rights reserved.
5 | *
6 | * This file is part of the S-Space package and is covered under the terms and
7 | * conditions therein.
8 | *
9 | * The S-Space package is free software: you can redistribute it and/or modify
10 | * it under the terms of the GNU General Public License version 2 as published
11 | * by the Free Software Foundation and distributed hereunder to you.
12 | *
13 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
14 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
15 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
16 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
17 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
18 | * RIGHTS.
19 | *
20 | * You should have received a copy of the GNU General Public License
21 | * along with this program. If not, see .
22 | */
23 |
24 | package edu.ucla.sspace.similarity;
25 |
26 | import edu.ucla.sspace.vector.DoubleVector;
27 | import edu.ucla.sspace.vector.IntegerVector;
28 | import edu.ucla.sspace.vector.Vector;
29 | import edu.ucla.sspace.vector.VectorMath;
30 |
31 |
32 | /**
33 | * Returns {@code 1}, always.
34 | *
35 | *
36 | *
37 | * This metric is symmetric.
38 | *
39 | * @author Keith Stevens
40 | */
41 | public class OneSimilarity extends AbstractSymmetricSimilarityFunction {
42 |
43 | /**
44 | * {@inheritDoc}
45 | */
46 | public double sim(DoubleVector v1, DoubleVector v2) {
47 | return 1;
48 | }
49 |
50 | /**
51 | * {@inheritDoc}
52 | */
53 | public double sim(IntegerVector v1, IntegerVector v2) {
54 | return 1;
55 | }
56 |
57 | /**
58 | * {@inheritDoc}
59 | */
60 | public double sim(Vector v1, Vector v2) {
61 | return 1;
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/text/StringDocument.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2009 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.text;
23 |
24 | import java.io.BufferedReader;
25 | import java.io.StringReader;
26 |
27 | /**
28 | * A {@code Document} implementation backed by a {@code String} whose contents
29 | * are used for the document text.
30 | */
31 | public class StringDocument implements Document {
32 |
33 | /**
34 | * The text of the document
35 | */
36 | private final String text;
37 |
38 | /**
39 | * Constructs a {@code Document} using the provided string as the document
40 | * text
41 | *
42 | * @param docText the document text
43 | */
44 | public StringDocument(String docText) {
45 | this.text = docText;
46 | }
47 |
48 | /**
49 | * {@inheritDoc} This method may be repeatedly called to re-read the
50 | * contents of the document.
51 | */
52 | public BufferedReader reader() {
53 | return new BufferedReader(new StringReader(text));
54 | }
55 |
56 | /**
57 | * Returns the entire document
58 | */
59 | public String toString() {
60 | return text;
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/src/test/java/edu/ucla/sspace/util/ObjectCounterTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util;
23 |
24 | import java.util.*;
25 |
26 | import org.junit.Ignore;
27 | import org.junit.Test;
28 |
29 | import static org.junit.Assert.*;
30 |
31 |
32 | /**
33 | * A collection of unit tests for {@link ObjectCounter}
34 | */
35 | public class ObjectCounterTest {
36 |
37 | @Test public void testCount() {
38 | Counter c = new ObjectCounter();
39 | c.count(1);
40 | assertEquals(1, c.sum());
41 | assertEquals(1, c.items().size());
42 | assertEquals(1, c.getCount(1));
43 |
44 | c.count(1);
45 | assertEquals(2, c.sum());
46 | assertEquals(1, c.items().size());
47 | assertEquals(2, c.getCount(1));
48 |
49 | c.count(2);
50 | assertEquals(3, c.sum());
51 | assertEquals(2, c.items().size());
52 | assertEquals(1, c.getCount(2));
53 | }
54 |
55 | @Test public void testMax() {
56 | Counter c = new ObjectCounter();
57 | c.count(5);
58 | c.count(5);
59 | c.count(3);
60 | assertEquals(5, c.max().intValue());
61 | }
62 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/wordsi/ContextExtractor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2010 Keith Stevens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.wordsi;
23 |
24 | import java.io.BufferedReader;
25 |
26 |
27 | /**
28 | * An interface for extracting context vectors from a document and passing on
29 | * the vector to a {@link Wordsi} implementation. Implementations are
30 | * recomended to use either a {@link ContextGenerator} or a {@link BasisMapping}
31 | * that is serializable. Use of a {@link ContextGenerator} or a {@link
32 | * BasisMapping} separates the feature space from the text traveral, allowing
33 | * the feature space to be reused, even if a different text traversal method
34 | * needs to be used.
35 | *
36 | * @author Keith Stevens
37 | */
38 | public interface ContextExtractor {
39 |
40 | /**
41 | * Processes the content of {@code document} and calls {@link
42 | * Wordsi#handleContextVector} for each context vector that can be extracted
43 | * from {@code document}.
44 | */
45 | void processDocument(BufferedReader document, Wordsi wordsi);
46 |
47 | /**
48 | * Returns the maximum number of dimensions used to represent any given
49 | * context.
50 | */
51 | int getVectorLength();
52 | }
53 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/util/primitive/IntPair.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.util.primitive;
23 |
24 |
25 | /**
26 | * A utility class for holding two {@code int}s.
27 | */
28 | public class IntPair {
29 |
30 | /**
31 | * The first {@code int} in the pair
32 | */
33 | public final int x;
34 |
35 | /**
36 | * The second {@code int} in the pair
37 | */
38 | public final int y;
39 |
40 | /**
41 | * Creates a pair out of {@code x} and {@code y}
42 | */
43 | public IntPair(int x, int y) {
44 | this.x = x;
45 | this.y = y;
46 | }
47 |
48 | /**
49 | * Returns {@code true} if {@code o} is a {@link Pair} and its {@code x} and
50 | * {@code y} elements are equal to those of this pair. Note that equality
51 | * is specific to the ordering of {@code x} and {@code y}.
52 | */
53 | public boolean equals(Object o) {
54 | if (!(o instanceof IntPair))
55 | return false;
56 | IntPair p = (IntPair)o;
57 | return x == p.x && y == p.y;
58 | }
59 |
60 | public int hashCode() {
61 | return x ^ y;
62 | }
63 |
64 | public String toString() {
65 | return "{" + x + ", " + y + "}";
66 | }
67 | }
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/similarity/KendallsTau.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.similarity;
23 |
24 | import edu.ucla.sspace.common.Similarity;
25 |
26 | import edu.ucla.sspace.vector.DoubleVector;
27 | import edu.ucla.sspace.vector.IntegerVector;
28 | import edu.ucla.sspace.vector.Vector;
29 |
30 |
31 | /**
32 | * A functional class for computing Kendall's tau of the
34 | * values in the two vectors. This method uses tau-b, which is suitable for
35 | * vectors with duplicate values.
36 | *
37 | * @author David Jurgens
38 | */
39 | public class KendallsTau extends AbstractSymmetricSimilarityFunction {
40 |
41 | /**
42 | * {@inheritDoc}
43 | */
44 | public double sim(DoubleVector v1, DoubleVector v2) {
45 | return Similarity.kendallsTau(v1, v2);
46 | }
47 |
48 | /**
49 | * {@inheritDoc}
50 | */
51 | public double sim(IntegerVector v1, IntegerVector v2) {
52 | return Similarity.kendallsTau(v1, v2);
53 | }
54 |
55 | /**
56 | * {@inheritDoc}
57 | */
58 | public double sim(Vector v1, Vector v2) {
59 | return Similarity.kendallsTau(v1, v2);
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/src/main/java/edu/ucla/sspace/similarity/TanimotoCoefficient.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright 2011 David Jurgens
3 | *
4 | * This file is part of the S-Space package and is covered under the terms and
5 | * conditions therein.
6 | *
7 | * The S-Space package is free software: you can redistribute it and/or modify
8 | * it under the terms of the GNU General Public License version 2 as published
9 | * by the Free Software Foundation and distributed hereunder to you.
10 | *
11 | * THIS SOFTWARE IS PROVIDED "AS IS" AND NO REPRESENTATIONS OR WARRANTIES,
12 | * EXPRESS OR IMPLIED ARE MADE. BY WAY OF EXAMPLE, BUT NOT LIMITATION, WE MAKE
13 | * NO REPRESENTATIONS OR WARRANTIES OF MERCHANT- ABILITY OR FITNESS FOR ANY
14 | * PARTICULAR PURPOSE OR THAT THE USE OF THE LICENSED SOFTWARE OR DOCUMENTATION
15 | * WILL NOT INFRINGE ANY THIRD PARTY PATENTS, COPYRIGHTS, TRADEMARKS OR OTHER
16 | * RIGHTS.
17 | *
18 | * You should have received a copy of the GNU General Public License
19 | * along with this program. If not, see .
20 | */
21 |
22 | package edu.ucla.sspace.similarity;
23 |
24 | import edu.ucla.sspace.common.Similarity;
25 |
26 | import edu.ucla.sspace.vector.DoubleVector;
27 | import edu.ucla.sspace.vector.IntegerVector;
28 | import edu.ucla.sspace.vector.Vector;
29 |
30 |
31 | /**
32 | * Returns the Tanimoto
34 | * Coefficient between any two {@link Vector}s.
35 | *
36 | * @author David Jurgens
37 | */
38 | public class TanimotoCoefficient extends AbstractSymmetricSimilarityFunction {
39 |
40 | /**
41 | * {@inheritDoc}
42 | */
43 | public double sim(DoubleVector v1, DoubleVector v2) {
44 | return Similarity.tanimotoCoefficient(v1, v2);
45 | }
46 |
47 | /**
48 | * {@inheritDoc}
49 | */
50 | public double sim(IntegerVector v1, IntegerVector v2) {
51 | return Similarity.tanimotoCoefficient(v1, v2);
52 | }
53 |
54 | /**
55 | * {@inheritDoc}
56 | */
57 | public double sim(Vector v1, Vector v2) {
58 | return Similarity.tanimotoCoefficient(v1, v2);
59 | }
60 | }
61 |
--------------------------------------------------------------------------------