├── .gitignore ├── README-AI2.md ├── README.md ├── USAGE-GUIDE.md ├── build.sbt ├── images ├── ikeguidepic1.png ├── ikeguidepic2.png ├── ikeguidepic3.png ├── ikeguidepic4.png ├── ikeguidepic5.png ├── ikeguidepic6.png ├── ikeguidepic7.png ├── ikeguidepic8.png └── ikeguidepic9.png ├── installing_ike_contributed_by_UTAustin.pdf ├── license.txt ├── project ├── Dependencies.scala ├── build.properties └── plugins.sbt ├── src ├── main │ ├── bin │ │ └── ike_control.sh │ ├── etc │ │ ├── default │ │ │ └── ike.template │ │ └── init.d │ │ │ └── ike │ ├── resources │ │ ├── all-corpora.conf │ │ ├── application.conf │ │ ├── logback.xml │ │ └── pgpass-ENV.sh.template │ └── scala │ │ └── org │ │ └── allenai │ │ └── ike │ │ ├── BlackLabDocument.scala │ │ ├── BlackLabResult.scala │ │ ├── BlackLabSemantics.scala │ │ ├── DataFile.scala │ │ ├── GroupedBlackLabResult.scala │ │ ├── IkeBatchSearch.scala │ │ ├── IkeKryoRegistrator.scala │ │ ├── IkeToolWebapp.scala │ │ ├── Interval.scala │ │ ├── JsonSerialization.scala │ │ ├── QExprJsonSerialization.scala │ │ ├── QueryLanguage.scala │ │ ├── SearchApp.scala │ │ ├── SearchResultGrouper.scala │ │ ├── SimilarPhrasesSearcher.scala │ │ ├── Table.scala │ │ ├── TableExpander.scala │ │ ├── WordData.scala │ │ ├── index │ │ ├── AnnotationIndexer.scala │ │ ├── CliUtils.scala │ │ ├── CreateIndex.scala │ │ ├── CreatePhraseVectors.scala │ │ ├── IdText.scala │ │ ├── IndexableText.scala │ │ ├── IndexableToken.scala │ │ ├── NlpAnnotate.scala │ │ ├── NlpAnnotatedText.scala │ │ └── XmlSerialization.scala │ │ ├── ml │ │ ├── HitAnalyzer.scala │ │ ├── QueryEvaluator.scala │ │ ├── QueryGeneralizer.scala │ │ ├── QuerySuggester.scala │ │ ├── README.md │ │ ├── TokenizedQuery.scala │ │ ├── compoundop │ │ │ ├── CompoundQueryOp.scala │ │ │ ├── EvaluatedOp.scala │ │ │ ├── NullOp.scala │ │ │ ├── OpConjunction.scala │ │ │ └── OpConjunctionOfDisjunctions.scala │ │ ├── queryop │ │ │ ├── GeneralizingOpGenerator.scala │ │ │ ├── OpGenerator.scala │ │ │ ├── QLeafGenerator.scala │ │ │ ├── QueryOp.scala │ │ │ ├── SimilarPhraseMatchTracker.scala │ │ │ └── SpecifyingOpGenerator.scala │ │ └── subsample │ │ │ ├── GeneralizedQuerySampler.scala │ │ │ ├── MatchesSampler.scala │ │ │ ├── Sampler.scala │ │ │ ├── SpanQueryFilterByCaptureGroups.scala │ │ │ ├── SpanQueryMinimumValidCaptures.scala │ │ │ ├── SpanQueryStartAt.scala │ │ │ ├── SpanQueryTrackingDisjunction.scala │ │ │ ├── SpansFilterByCaptureGroups.scala │ │ │ ├── SpansMinimumValidCaptures.scala │ │ │ ├── SpansStartAt.scala │ │ │ ├── SpansTrackingDisjunction.scala │ │ │ └── TextPatternTrackingDisjunction.scala │ │ ├── patterns │ │ ├── NamedPattern.scala │ │ └── PatternUtilities.scala │ │ └── persistence │ │ ├── IkePostgresDriver.scala │ │ └── Tablestore.scala └── test │ ├── resources │ └── testPatterns.conf │ └── scala │ └── org │ └── allenai │ └── ike │ ├── TestBlackLabSemantics.scala │ ├── TestQExprParser.scala │ ├── TestQueryLanguage.scala │ ├── index │ ├── BlackLabExample.scala │ ├── TestCreateIndex.scala │ ├── TestData.scala │ └── TestSearcher.scala │ ├── ml │ ├── SimilarPhrasesSearcherStub.scala │ ├── TestHitAnalyzer.scala │ ├── TestQueryGeneralizer.scala │ ├── TestQuerySuggester.scala │ ├── TestTokenizedQuery.scala │ ├── compoundop │ │ ├── TestCompoundQueryTokenOp.scala │ │ ├── TestOpConjunction.scala │ │ └── TestOpConjunctionOfDisjunctions.scala │ ├── queryop │ │ ├── TestGeneralizingOpGenerator.scala │ │ ├── TestSimilarPhraseTracker.scala │ │ └── TestSpecifyOpGenerator.scala │ └── subsample │ │ ├── DocFieldLengthGetterStub.scala │ │ ├── SpansStub.scala │ │ ├── TestGeneralizedQuerySampler.scala │ │ ├── TestMatchesSampler.scala │ │ ├── TestMinimumValidCaptures.scala │ │ ├── TestSpanQueryFilterByCaptureGroups.scala │ │ └── TestSpansTrackingDisjunction.scala │ └── patterns │ └── TestPatternUtilities.scala ├── version.sbt └── webapp ├── .gitignore ├── .jshintrc ├── app ├── assets │ ├── blank_user.png │ ├── glyphicons-halflings-regular.eot │ ├── glyphicons-halflings-regular.svg │ ├── glyphicons-halflings-regular.ttf │ ├── glyphicons-halflings-regular.woff │ ├── glyphicons-halflings-regular.woff2 │ └── logo.png ├── css │ ├── _app.less │ ├── _corpora.less │ ├── _header.less │ ├── _tree.less │ └── main.less ├── index.html └── js │ ├── DictApp.js │ ├── components │ ├── Header.js │ ├── config │ │ └── ConfigInterface.js │ ├── corpora │ │ ├── Corpora.js │ │ └── CorpusSelector.js │ ├── help │ │ └── HelpInterface.js │ ├── misc │ │ ├── DeleteButton.js │ │ ├── EditableList.js │ │ └── ProvenanceButton.js │ ├── pattern │ │ ├── PatternEditor.js │ │ └── PatternsInterface.js │ ├── search │ │ ├── AddResultButton.js │ │ ├── QExpr.js │ │ ├── QueryViewer.js │ │ ├── ResultContext.js │ │ ├── ResultContextSet.js │ │ ├── ResultGroup.js │ │ ├── SearchForm.js │ │ ├── SearchInterface.js │ │ ├── SearchResults.js │ │ ├── SuggestQueryButton.js │ │ ├── SuggestQueryButtonGroup.js │ │ ├── TargetSelector.js │ │ └── Tree.js │ └── table │ │ ├── DeleteTableButton.js │ │ ├── DownloadTableButton.js │ │ ├── RowAdder.js │ │ ├── SubTable.js │ │ ├── Table.js │ │ ├── TableAdder.js │ │ ├── TableButtonToolbar.js │ │ ├── TableLoader.js │ │ ├── TableRow.js │ │ └── TablesInterface.js │ ├── constants │ ├── AuthConstants.js │ └── CorporaConstants.js │ ├── dispatcher │ └── AppDispatcher.js │ ├── managers │ └── TableManager.js │ └── stores │ ├── AuthStore.js │ ├── CorporaStore.js │ └── NamedPatternsStore.js ├── gulpfile.js └── package.json /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.log 3 | *.swp 4 | *.tsv 5 | *.txt 6 | 7 | # sbt specific 8 | .cache/ 9 | .history/ 10 | .lib/ 11 | dist/* 12 | target/ 13 | lib_managed/ 14 | src_managed/ 15 | project/boot/ 16 | project/plugins/project/ 17 | 18 | # Scala-IDE specific 19 | .scala_dependencies 20 | .worksheet 21 | public/ 22 | 23 | # eclipse 24 | .cache 25 | .classpath 26 | .project 27 | .settings/ 28 | /bin/ 29 | 30 | # intellij 31 | .idea 32 | 33 | # OS X 34 | .DS_Store 35 | 36 | # deploy stuff 37 | /conf/deploy.conf 38 | /scripts/run_on_instance.sh 39 | /scripts/set_up_instance.sh 40 | /src/main/resources/prod.conf 41 | /src/main/resources/test.conf 42 | /src/main/etc/default/ike.prod 43 | /src/main/etc/default/ike.test 44 | -------------------------------------------------------------------------------- /README-AI2.md: -------------------------------------------------------------------------------- 1 | IKE AI2-internal Details 2 | ======================== 3 | 4 | [Planning Document](https://docs.google.com/a/allenai.org/document/d/1DXx43Nrk-05ynk3KQm6_S6s3bQG15lf9dBEbTcKr24Y/edit#) 5 | 6 | 7 | ## Deploy 8 | 1. Get the IKE deployment key. It's in the `ai2-secure` bucket in S3, called `OkCorpusDeploymentKey.zip`. 9 | 2. Set the `AWS_PEM_FILE` variable to point at the private key file. 10 | 3. All deploy configs and scripts are in the AI2 private `deploy` repo, under the `ike` directory. In order to use them, clone the `deploy` project locally. 11 | 4. Run the script [`https://github.com/allenai/deploy/blob/master/ike/setup.sh`] from the deploy repo, which will set up symlinks to the paths expected by the deploy script. 12 | 5. If you want to create a new machine, run the script in [`set_up_instance.sh`](https://github.com/allenai/deploy/blob/master/ike/scripts/set_up_instance.sh). It will create a new instance in EC2 and set it up for deployment. If you want to deploy to the existing machine, skip this step. 13 | 6. Run `sbt "deploy prod"` / `sbt "deploy test"`. 14 | 15 | ## Logging 16 | 17 | IKEs logs to standard out and to a rotated logs file in `/local/deploy/ike/logs`, just like all other AI2 services. In addition to that, it logs to Papertrail at https://papertrailapp.com/groups/1690753. Papertrail is configured to write archives to a bucket in S3 named `ai2-papertrail-backup`. All archives go there, not only IKE, so to get the IKE logs you have to filter them out. 18 | 19 | ### Usage logging 20 | 21 | IKE logs usage information, such as who is using the tool, how much are they using it, and which features are most popular. All that information goes into the logs together with all other logging information, but it uses the special logger named "Usage". There is a preconfigured search in Papertrail that shows this information at https://papertrailapp.com/groups/1690753/events?q=Usage%3A. 22 | 23 | The key thing about the usage logger is that the first token in the log message is always the thing being used, i.e., `groupedSearch` or `similarPhrases` or some such. The rest of the message is extra information that's specific to the thing being used. 24 | 25 | So far we have no tools to analyze this information further. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://semaphoreci.com/api/v1/allenai/ike/branches/master/shields_badge.svg)](https://semaphoreci.com/allenai/ike) 2 | IKE (Interactive Knowledge Extraction) 3 | ====================================== 4 | 5 | ## Usage Guide 6 | [First time users, please refer the IKE Getting Started Guide](USAGE-GUIDE.md) 7 | 8 | [Installation instructions (thanks to Ben Yang, Univ. Texas at Austin)](installing_ike_contributed_by_UTAustin.pdf) 9 | 10 | ## Run Locally 11 | 1. Install PostgreSQL locally and create a database for use by IKE. IKE needs this to store the tables you create. 12 | 2. Modify the `Tablestore` key value setting in the [IKE config] (https://github.com/allenai/ike/blob/master/src/main/resources/application.conf) with appropriate database JDBC URL and credentials. 13 | 3. Run `sbt`. 14 | 4. Enter the `reStart` command. 15 | 5. Open http://localhost:8080 in a browser. 16 | 17 | The webapp will download some large files from the [datastore](https://github.com/allenai/datastore) upon first request. This could take several minutes. You will see a series of messages that look like the following: 18 | 19 | ``` 20 | ike 2016-05-11 13:46:27,070 INFO org.allenai.datastore.Datastore - Downloading org.allenai.dictionary.indexes/WaterlooFilteredV2Shard4-d1.zip from the public datastore. 1.23 GB bytes read. 21 | ike 2016-05-11 13:46:28,260 INFO org.allenai.datastore.Datastore - Downloading org.allenai.dictionary.indexes/WaterlooFilteredV2Shard4-d1.zip from the public datastore. 1.23 GB bytes read. 22 | ike 2016-05-11 13:46:44,521 INFO org.allenai.datastore.Datastore - Downloading org.allenai.dictionary.indexes/WaterlooFilteredV2Shard4-d1.zip from the public datastore. 1.23 GB bytes read. 23 | ``` 24 | On subsequent runs, the service will start up quickly as the downloaded indexes are cached locally. 25 | 26 | ## Creating and using an Index 27 | To create an index, you need the source text either as a directory of text files, or as one file with one document per line. Once you have that, run this in `sbt`: 28 | ``` 29 | ike/runMain org.allenai.ike.index.CreateIndex --help 30 | ``` 31 | At the time of writing, this prints 32 | ``` 33 | Usage: CreateIndex [options] 34 | -d | --destination 35 | Directory to create the index in 36 | -b | --batchSize 37 | Batch size 38 | -t | --textSource 39 | URL of a file or directory to load the text from 40 | --help 41 | ``` 42 | The URL for the corpus can be either a file URL or a datastore URL. A datastore URL looks like this: `datastore://{public|private}//-v.` for files, and `datastore://{public|private}//-d` for directories. 43 | 44 | NOTE: The private datastore resources are for AI2 users only. 45 | 46 | You can also point to a corpus directory in your file system by using a `file://` URL, for e.g., `file://path/to/my/corpus/directory`. 47 | 48 | When you have created the index, you can use it by modifying [`application.conf`](src/main/resources/application.conf) and restarting. 49 | 50 | ### Index Size Limits 51 | 52 | A Blacklab index size will typically be 12-13x the size of the input corpus being indexed. 53 | Our testing on an Amazon [`r3.2xlarge`](https://aws.amazon.com/ec2/instance-types/) instance indicated that an index size of upto 2 GB could be processed with reasonable speed when loaded into IKE. If you have a large corpus, one way to get around the size limits is to shard the corpus and create multiple indexes, each within the size limit to load into IKE. 54 | 55 | 56 | ## Supported Platforms 57 | 58 | IKE has been built, tested and verified to work on Linux and Mac systems. However, if you are interested in developing / running on Windows, please see the instructions from [diniluca1789] (https://github.com/diniluca1789), an external IKE user who got it successfully building and running on Windows, as described in [this thread](https://github.com/allenai/ike/issues/225). 59 | 60 | 61 | ## AI2 Internal Information 62 | AI2 internal users, please go to [this link](README-AI2.md). 63 | -------------------------------------------------------------------------------- /build.sbt: -------------------------------------------------------------------------------- 1 | import Dependencies._ 2 | 3 | scalaVersion := "2.11.5" 4 | 5 | val ike = project.in(file(".")).enablePlugins(WebappPlugin) 6 | 7 | organization := "org.allenai" 8 | 9 | name := "ike" 10 | 11 | description := "buildin' them electric tables" 12 | 13 | scmInfo := Some(ScmInfo( 14 | url("https://github.com/allenai/ike"), 15 | "https://github.com/allenai/ike.git")) 16 | 17 | pomExtra := 18 | 19 | 20 | allenai-dev-role 21 | Allen Institute for Artificial Intelligence 22 | dev-role@allenai.org 23 | 24 | 25 | 26 | 27 | libraryDependencies ++= Seq( 28 | allenAiCommon, 29 | allenAiTestkit, 30 | allenAiDatastore, 31 | hadoopModule("hadoop-aws"), 32 | hadoopModule("hadoop-mapreduce"), 33 | nlpstackModule("tokenize") exclude("org.allenai", "datastore_2.11"), 34 | nlpstackModule("postag") exclude("org.allenai", "datastore_2.11"), 35 | nlpstackModule("chunk") exclude("org.allenai", "datastore_2.11"), 36 | nlpstackModule("lemmatize") exclude("org.allenai", "datastore_2.11"), 37 | nlpstackModule("segment") exclude("org.allenai", "datastore_2.11"), 38 | lucene("core"), 39 | lucene("analyzers-common"), 40 | lucene("highlighter"), 41 | lucene("queries"), 42 | lucene("queryparser"), 43 | sparkModule("core"), 44 | "com.typesafe.slick" %% "slick" % "2.1.0", 45 | "com.github.tminglei" %% "slick-pg" % "0.8.2", 46 | "com.typesafe.play" %% "play-json" % "2.3.8", 47 | "org.postgresql" % "postgresql" % "9.4-1201-jdbc41", 48 | "org.allenai.blacklab" %% "blacklab" % "1.0-ALLENAI-13", 49 | "org.allenai.word2vec" %% "word2vecjava" % "1.0.1", 50 | "com.google.guava" % "guava" % "18.0", 51 | "org.apache.thrift" % "libthrift" % "0.9.1", 52 | sprayModule("caching"), 53 | "com.papertrailapp" % "logback-syslog4j" % "1.0.0", 54 | scopt) 55 | 56 | javaOptions in Revolver.reStart += "-Xmx14G" 57 | 58 | mainClass in Revolver.reStart := Some("org.allenai.ike.IkeToolWebapp") 59 | 60 | fork in run := true 61 | 62 | scalacOptions ++= Seq("-unchecked", "-deprecation", "-feature") 63 | 64 | conflictManager := ConflictManager.default 65 | 66 | dependencyOverrides ++= Set( 67 | "org.allenai.common" %% "common-core" % "1.0.13", 68 | sprayJson, 69 | "com.fasterxml.jackson.core" % "jackson-databind" % "2.4.4", 70 | "org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.3", 71 | "org.scala-lang.modules" %% "scala-xml" % "1.0.2", 72 | "commons-codec" % "commons-codec" % "1.6", 73 | "org.apache.commons" % "commons-compress" % "1.8", 74 | "org.scala-lang" % "scala-reflect" % "2.11.5" 75 | ) 76 | 77 | deployDirs += "etc" 78 | 79 | mappings in Universal ++= 80 | (sourceDirectory.value / "main" / "etc" ** "*" pair relativeTo(sourceDirectory.value / "main")) 81 | -------------------------------------------------------------------------------- /images/ikeguidepic1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/images/ikeguidepic1.png -------------------------------------------------------------------------------- /images/ikeguidepic2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/images/ikeguidepic2.png -------------------------------------------------------------------------------- /images/ikeguidepic3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/images/ikeguidepic3.png -------------------------------------------------------------------------------- /images/ikeguidepic4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/images/ikeguidepic4.png -------------------------------------------------------------------------------- /images/ikeguidepic5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/images/ikeguidepic5.png -------------------------------------------------------------------------------- /images/ikeguidepic6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/images/ikeguidepic6.png -------------------------------------------------------------------------------- /images/ikeguidepic7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/images/ikeguidepic7.png -------------------------------------------------------------------------------- /images/ikeguidepic8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/images/ikeguidepic8.png -------------------------------------------------------------------------------- /images/ikeguidepic9.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/images/ikeguidepic9.png -------------------------------------------------------------------------------- /installing_ike_contributed_by_UTAustin.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/installing_ike_contributed_by_UTAustin.pdf -------------------------------------------------------------------------------- /project/Dependencies.scala: -------------------------------------------------------------------------------- 1 | import org.allenai.plugins.CoreDependencies 2 | 3 | import sbt._ 4 | import sbt.Keys._ 5 | 6 | object Dependencies extends CoreDependencies { 7 | val allenAiDatastore = "org.allenai.datastore" %% "datastore" % "1.0.7" excludeAll ( 8 | // This conflicts with aws-java-sdk 1.7.4 in hadoop. 9 | ExclusionRule(organization = "com.amazonaws", name = "aws-java-sdk-s3") 10 | ) 11 | 12 | def hadoopModule(id: String) = "org.apache.hadoop" % id % "2.7.2" excludeAll ( 13 | ExclusionRule(organization = "com.google.guava"), 14 | ExclusionRule(organization = "javax.servlet"), 15 | ExclusionRule(organization = "org.slf4j", name = "slf4j-log4j12") 16 | ) 17 | 18 | val luceneGroup = "org.apache.lucene" 19 | val luceneVersion = "4.2.1" 20 | def lucene(part: String) = luceneGroup % s"lucene-${part}" % luceneVersion 21 | 22 | val nlpstackVersion = "1.10" 23 | def nlpstackModule(id: String) = "org.allenai.nlpstack" %% s"nlpstack-${id}" % nlpstackVersion 24 | 25 | def sparkModule(id: String) = "org.apache.spark" %% s"spark-$id" % "1.6.1" excludeAll ( 26 | ExclusionRule(organization = "com.google.guava"), 27 | ExclusionRule(organization = "org.apache.commons"), 28 | ExclusionRule(organization = "org.codehaus.jackson"), 29 | ExclusionRule(organization = "org.slf4j", name = "slf4j-log4j12") 30 | ) 31 | } 32 | -------------------------------------------------------------------------------- /project/build.properties: -------------------------------------------------------------------------------- 1 | sbt.version=0.13.8 2 | -------------------------------------------------------------------------------- /project/plugins.sbt: -------------------------------------------------------------------------------- 1 | addSbtPlugin("org.allenai.plugins" % "allenai-sbt-plugins" % "1.3.0") 2 | -------------------------------------------------------------------------------- /src/main/bin/ike_control.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | #usage: $0 [start|stop|status] 4 | 5 | export APPROOT=$(cd $(dirname $0)/..; pwd -P) 6 | 7 | if [ -e /etc/default/ike ]; then 8 | . /etc/default/ike 9 | else 10 | echo "Missing config file: /etc/default/ike. Contact your administrator" 11 | exit 1 12 | fi 13 | 14 | if [ $UID -eq 0 ]; then 15 | echo "Do not run as root. exiting" 16 | exit 1 17 | fi 18 | 19 | # vars defined in /etc/default/ike: 20 | # START - used by rc.d to determine whether $0 should be invoked on system startup 21 | # APPROOT - fully qualified path to install location 22 | # CREDENTIALS - path to postgres pw file 23 | 24 | #user defined variables 25 | CLASS_NAME="org.allenai.ike.IkeToolWebapp" 26 | #CLASS_PATH is determined programmatically below in start() 27 | JVM_ARGS="-Xmx60G -Xms60G" 28 | prog="ike" 29 | LOGBACK_APP="$prog" 30 | LOGBACK_CONF="$APPROOT/conf/logback.xml" 31 | CONF_FILE="$APPROOT/conf/env.conf" 32 | 33 | ike_action=$1 34 | 35 | exec="/usr/bin/java" 36 | SCRIPT_DIR="$APPROOT/bin" 37 | pidfile=$APPROOT/${prog}.pid 38 | stdoutfile=$APPROOT/${prog}.out 39 | stderrfile=$APPROOT/${prog}.err 40 | 41 | LOGBACK_ARGS="-Dlogback.appname=$LOGBACK_APP -Dlogback.configurationFile=$LOGBACK_CONF" 42 | CONF_ARGS="-Dconfig.file=$CONF_FILE" 43 | 44 | start() { 45 | status > /dev/null 46 | if [ $? -eq 0 ]; then 47 | echo "$0 is already running." 48 | return 0 49 | fi 50 | 51 | if [ -e "$CREDENTIALS" ]; then 52 | source "$CREDENTIALS" 53 | else 54 | echo "Error: $CREDENTIALS not found; Should be defined in /etc/default/$prod. $0 will fail to start." 55 | fi 56 | 57 | cd $APPROOT 58 | CLASS_PATH=`find lib -name '*.jar' | tr "\\n" :` 59 | JAVA_ARGS="$JVM_ARGS -classpath $CLASS_PATH $CONF_ARGS $LOGBACK_ARGS" 60 | 61 | # --- start the program --- 62 | echo -n "Starting $prog: " 63 | nohup $exec $JAVA_ARGS $CLASS_NAME > "$stdoutfile" 2> "$stderrfile" & 64 | retval=$? 65 | echo 66 | 67 | sleep 2 68 | 69 | ps -p $! > /dev/null 70 | if [ $? -eq 0 ]; then 71 | echo $! > "$pidfile" 72 | fi 73 | 74 | return $retval 75 | } 76 | 77 | stop() { 78 | if ! [ -e $pidfile ]; then 79 | echo "pidfile: $pidfile not found. Assumption: $prog is NOT running" 80 | return 0 81 | fi 82 | kill `cat "$pidfile"` > /dev/null 83 | retval=$? 84 | rm $pidfile 85 | return $retval 86 | } 87 | 88 | # returns 0 if running; 1 or 2 if not 89 | status() { 90 | if ! [ -e $pidfile ]; then 91 | echo "pidfile: $pidfile not found. Assumption: $prog is NOT running" 92 | return 2 93 | fi 94 | ps -p `cat "$pidfile"` > /dev/null 95 | if [ $? -eq 0 ]; then 96 | echo "$prog is running" 97 | return 0 98 | else 99 | echo "$prog is NOT running" 100 | return 1 101 | fi 102 | } 103 | 104 | case "$1" in 105 | start) 106 | $1 107 | ;; 108 | stop) 109 | $1 110 | ;; 111 | status) 112 | $1 113 | ;; 114 | *) 115 | echo "Unsupported Input: $1. expected: start|stop|status" 116 | ;; 117 | esac 118 | -------------------------------------------------------------------------------- /src/main/etc/default/ike.template: -------------------------------------------------------------------------------- 1 | CREDENTIALS="/path/to/pgpass-.sh" 2 | -------------------------------------------------------------------------------- /src/main/etc/init.d/ike: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### BEGIN INIT INFO 4 | # Provides: ike 5 | # Required-Start: $remote_fs 6 | # Required-Stop: $remote_fs 7 | # Default-Start: 2 3 4 5 8 | # Default-Stop: 0 1 6 9 | # Short-Description: ike webapp 10 | # Description: ike ike ike 11 | ### END INIT INFO 12 | 13 | export APPROOT=$(cd $(dirname $(readlink -f $0))/../..; pwd) 14 | 15 | . /etc/default/ike 16 | 17 | exec="$APPROOT/bin/ike_control.sh" 18 | exec_user="ai2service" 19 | 20 | if ! [ $# -eq 1 ]; then 21 | echo "usage: $0 stop|start|status" 22 | exit 1 23 | fi 24 | 25 | sudo -u $exec_user $exec $1 26 | exit $? 27 | -------------------------------------------------------------------------------- /src/main/resources/application.conf: -------------------------------------------------------------------------------- 1 | akka.loglevel = DEBUG 2 | 3 | IkeToolWebapp = { 4 | port = 8080 5 | indices = [ 6 | { 7 | name = WebCorpus1 8 | location = datastore 9 | description = "A collection of ~1.5 million sentences from web documents skewed towards elementary science topics" 10 | item = { 11 | type = directory 12 | datastore = public 13 | group = org.allenai.dictionary.indexes 14 | name = WaterlooFiltered 15 | version = 1 16 | } 17 | }, 18 | { 19 | name = WebCorpus2 20 | location = datastore 21 | description = "A collection of ~1.1 million sentences from web documents skewed towards elementary science topics" 22 | item = { 23 | type = directory 24 | datastore = public 25 | group = org.allenai.dictionary.indexes 26 | name = WaterlooFilteredV2Shard5 27 | version = 1 28 | } 29 | } 30 | // NOTE: You can also specify a path in the file system to point to an index by using `location = file`, for e.g.: 31 | //{ 32 | // name = MyCorpus 33 | // location = file 34 | // description = "My Blacklab-indexed corpus" 35 | // path = /path/to/index-directory 36 | //} 37 | ] 38 | } 39 | 40 | Tablestore = { 41 | db = { 42 | // NOTE: Before running locally, uncomment the below lines and specify values for keys `url` to point to your PostgresSql JDBC 43 | // link, `user` and `password`. 44 | // AI2 Internal users: use the database in the `test` deploy environment and the same username. You can get the password 45 | // from this file in the ops-keystore in S3. 46 | url = "jdbc:postgresql://"${POSTGRES_DB}":5432/okcorpus" 47 | user = "okcorpus" 48 | password = ${POSTGRES_PASSWORD} 49 | } 50 | } 51 | 52 | QuerySuggester = { 53 | narrow = { 54 | prefixSize = 3 55 | suffixSize = 3 56 | minSimilarityDifference = 5 57 | suggestWord = true 58 | suggestPos = true 59 | suggestSetRepeatedOp = true 60 | } 61 | broaden = { 62 | suggestWord = true 63 | suggestPos = true 64 | minSimilarityDifference = 5 65 | wordPOSSampleSize = 200 66 | } 67 | maxUnlabelledBiasCorrection = 50 68 | pruneOperatorsIfMoreMatchesThan = 1500 69 | pruneOperatorsIfLessThan = 2 70 | minMaxOpReuse = 3 71 | maxOpReuseReturn = 4 72 | maxOpReusePercentOfBeamSize = 0.35 73 | numToSuggest = 11 74 | percentUnlabelled = 0.4 75 | timeoutInSeconds = 55 s 76 | } 77 | 78 | word2vecPhrasesSearcher = { 79 | vectors = { 80 | // word2vec embedding (300 dim) bin file 81 | type = file 82 | datastore = public 83 | group = org.allenai.okcorpus 84 | name = citeseer-ten-percent.3.mc100.bin 85 | version = 1 86 | } 87 | format = "binary" 88 | embeddingSize = 300 89 | } 90 | 91 | pmiPhrasesSearcher = { 92 | vectors = { 93 | // function PPMI SVD embedding (300 dim) text file 94 | type = file 95 | group = org.allenai.okcorpus 96 | datastore = public 97 | name = functionPPMI.300.vec.txt 98 | version = 1 99 | } 100 | format= "text" 101 | embeddingSize = 300 102 | } 103 | 104 | combinationPhraseSearcher = { 105 | // combinationStrategy can take values from [sum, average, min, max] 106 | // default is "average" 107 | combinationStrategy = "average" 108 | } 109 | 110 | spray.can.client { 111 | request-timeout = 30 s 112 | } 113 | -------------------------------------------------------------------------------- /src/main/resources/logback.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | logs/${logback.appname}.log 9 | 10 | 11 | logs/${logback.appname}.%d{yyyy-MM-dd}.log 12 | 13 | 14 | 30 15 | 16 | 17 | 18 | %-5level [%date] %logger{35}: %msg%n 19 | 20 | 21 | 22 | 23 | 24 | %date{ISO8601} [%thread] %-5level %logger{36} - %msg%n 25 | 26 | 27 | 28 | 29 | 30 | %-5level [%thread] %logger{35}: %m%n%xEx 31 | 32 | 33 | 34 | logs3.papertrailapp.com 35 | 38670 36 | OKCorpus 37 | 128000 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | -------------------------------------------------------------------------------- /src/main/resources/pgpass-ENV.sh.template: -------------------------------------------------------------------------------- 1 | export POSTGRES_PASSWORD='PASSWORD' 2 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/BlackLabDocument.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | case class BlackLabDocument(name: String, sentences: Seq[Seq[WordData]]) 4 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/BlackLabResult.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | import org.allenai.blacklab.search.{ Hit, Hits, Kwic, Span } 4 | 5 | import scala.collection.JavaConverters._ 6 | 7 | case class BlackLabResult( 8 | wordData: Seq[WordData], 9 | matchOffset: Interval, 10 | captureGroups: Map[String, Interval], 11 | corpusName: String 12 | ) { 13 | def matchData: Seq[WordData] = wordData.slice(matchOffset.start, matchOffset.end) 14 | def matchWords: Seq[String] = matchData.map(_.word) 15 | } 16 | 17 | case object BlackLabResult { 18 | def wordData(hits: Hits, kwic: Kwic): Seq[WordData] = { 19 | val attrNames = kwic.getProperties.asScala 20 | val attrValues = kwic.getTokens.asScala.grouped(attrNames.size) 21 | val attrGroups = attrValues.map(attrNames.zip(_).toMap).toSeq 22 | for { 23 | attrGroup <- attrGroups 24 | word = attrGroup.get("word") match { 25 | case Some(value) => value 26 | case _ => throw new IllegalStateException(s"kwic $kwic does not have 'word' attribute") 27 | } 28 | data = WordData(word, attrGroup - "word") 29 | } yield data 30 | } 31 | 32 | def toInterval(span: Span): Interval = Interval(span.start, span.end) 33 | 34 | def captureGroups(hits: Hits, hit: Hit, shift: Int): Map[String, Option[Interval]] = { 35 | val names = hits.getCapturedGroupNames.asScala 36 | // For some reason BlackLab will sometimes return null values here, so wrap in Options 37 | val optSpans = hits.getCapturedGroups(hit) map wrapNull 38 | val result = for { 39 | (name, optSpan) <- names.zip(optSpans) 40 | optInterval = optSpan map toInterval 41 | shifted = optInterval map (_.shift(-shift)) 42 | } yield (name, shifted) 43 | result.toMap 44 | } 45 | 46 | /** Converts a hit to a BlackLabResult. Returns None if the dreaded BlackLab NPE is returned 47 | * when computing the capture groups. 48 | */ 49 | def fromHit( 50 | hits: Hits, 51 | hit: Hit, 52 | corpusName: String, 53 | kwicSize: Int = 20 54 | ): Option[BlackLabResult] = { 55 | val kwic = hits.getKwic(hit, kwicSize) 56 | val data = wordData(hits, kwic) 57 | val offset = Interval(kwic.getHitStart, kwic.getHitEnd) 58 | // TODO: https://github.com/allenai/okcorpus/issues/30 59 | if (hits.hasCapturedGroups) { 60 | val shift = hit.start - kwic.getHitStart 61 | val optGroups = captureGroups(hits, hit, shift) 62 | // If all of the capture groups are defined, then return the result. Otherwise, the 63 | // mysterious NPE was thrown and we can't compute the result's capture groups. 64 | if (optGroups.values.forall(_.isDefined)) { 65 | val groups = optGroups.mapValues(_.get) 66 | Some(BlackLabResult(data, offset, groups, corpusName)) 67 | } else { 68 | None 69 | } 70 | } else { 71 | // No capture groups? No problem. 72 | Some(BlackLabResult(data, offset, Map.empty[String, Interval], corpusName)) 73 | } 74 | } 75 | 76 | /** Converts the hits into BlackLabResult objects. If ignoreNpe is true, then it will skip over 77 | * any hits that throw the weird NPE. If ignoreNpe is false, will throw an IllegalStateException. 78 | */ 79 | def fromHits( 80 | hits: Hits, 81 | corpusName: String, 82 | ignoreNpe: Boolean = true 83 | ): Iterator[BlackLabResult] = for { 84 | hit <- hits.iterator.asScala 85 | result <- fromHit(hits, hit, corpusName) match { 86 | case x if (x.isDefined || ignoreNpe) => x 87 | case _ => throw new IllegalStateException(s"Could not compute capture groups for $hit") 88 | } 89 | } yield result 90 | 91 | def wrapNull[A](a: A): Option[A] = if (a == null) None else Some(a) 92 | } 93 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/BlackLabSemantics.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | import org.allenai.blacklab.search.{ 4 | TextPattern, 5 | TextPatternAnd, 6 | TextPatternCaptureGroup, 7 | TextPatternOr, 8 | TextPatternProperty, 9 | TextPatternTerm 10 | } 11 | import org.allenai.blacklab.search.sequences.{ 12 | TextPatternAnyToken, 13 | TextPatternRepetition, 14 | TextPatternSequence 15 | } 16 | 17 | object BlackLabSemantics { 18 | var maxRepetition = 128 19 | def notImplemented: Exception = new UnsupportedOperationException 20 | 21 | // Prefix for auto-generated names for unnamed Capture Groups. 22 | val genericCaptureGroupNamePrefix = "Capture Group" 23 | 24 | def chunkPatternTerm(p: String) = { 25 | p match { 26 | //Covered ("NP", "VP", "PP", "ADJP" , "ADVP") 27 | case "NP" => new TextPatternOr( 28 | new TextPatternSequence( 29 | new TextPatternTerm("B-NP"), 30 | new TextPatternRepetition(new TextPatternTerm("I-NP"), 0, -1), 31 | new TextPatternTerm("E-NP") 32 | ), 33 | new TextPatternTerm("BE-NP") 34 | ) 35 | 36 | case "VP" => new TextPatternOr( 37 | new TextPatternSequence( 38 | new TextPatternTerm("B-VP"), 39 | new TextPatternRepetition(new TextPatternTerm("I-VP"), 0, -1), 40 | new TextPatternTerm("E-VP") 41 | ), 42 | new TextPatternTerm("BE-VP") 43 | ) 44 | 45 | case "PP" => new TextPatternOr( 46 | new TextPatternSequence( 47 | new TextPatternTerm("B-PP"), 48 | new TextPatternRepetition(new TextPatternTerm("I-PP"), 0, -1), 49 | new TextPatternTerm("E-PP") 50 | ), 51 | new TextPatternTerm("BE-PP") 52 | ) 53 | 54 | case "ADJP" => new TextPatternOr( 55 | new TextPatternSequence( 56 | new TextPatternTerm("B-ADJP"), 57 | new TextPatternRepetition(new TextPatternTerm("I-ADJP"), 0, -1), 58 | new TextPatternTerm("E-ADJP") 59 | ), 60 | new TextPatternTerm("BE-ADJP") 61 | ) 62 | 63 | case "ADVP" => new TextPatternOr( 64 | new TextPatternSequence( 65 | new TextPatternTerm("B-ADVP"), 66 | new TextPatternRepetition(new TextPatternTerm("I-ADVP"), 0, -1), 67 | new TextPatternTerm("E-ADVP") 68 | ), 69 | new TextPatternTerm("BE-ADVP") 70 | ) 71 | } 72 | } 73 | 74 | def blackLabQuery(qexpr: QExpr): TextPattern = { 75 | var unnamedCnt = 0 76 | def blqHelper(qexpr: QExpr): TextPattern = qexpr match { 77 | case QWord(w) => new TextPatternTerm(w) 78 | case QPos(p) => new TextPatternProperty("pos", new TextPatternTerm(p)) 79 | case QChunk(p) => new TextPatternProperty("chunk", chunkPatternTerm(p)) 80 | case QDict(_) => 81 | throw new IllegalArgumentException("Can not convert QDict to TextPattern") 82 | case QNamedPattern(_) => 83 | throw new IllegalArgumentException("Can not convert QNamedPattern to TextPattern") 84 | case QGeneralizePhrase(_, _) => 85 | throw new IllegalArgumentException("Can not convert QGeneralizePhrase to TextPattern") 86 | case QGeneralizeTable(_, _) => 87 | throw new IllegalArgumentException("Can not convert QGeneralizeTable to TextPattern") 88 | case QWildcard() => new TextPatternAnyToken(1, 1) 89 | case QNamed(e: QExpr, name: String) => new TextPatternCaptureGroup(blqHelper(e), name) 90 | case QUnnamed(e) => 91 | unnamedCnt += 1 92 | val result = blqHelper(QNamed(e, s"$genericCaptureGroupNamePrefix $unnamedCnt")) 93 | result 94 | case QNonCap(e: QExpr) => blqHelper(e) 95 | case QStar(e: QExpr) => new TextPatternRepetition(blqHelper(e), 0, maxRepetition) 96 | case QPlus(e: QExpr) => new TextPatternRepetition(blqHelper(e), 1, maxRepetition) 97 | case QRepetition(e, min, max) => new TextPatternRepetition(blqHelper(e), min, max) 98 | case QSeq(es: Seq[QExpr]) => new TextPatternSequence(es.map(blqHelper): _*) 99 | case QDisj(es: Seq[QExpr]) => new TextPatternOr(es.map(blqHelper): _*) 100 | case QAnd(expr1, expr2) => new TextPatternAnd(blqHelper(expr1), blqHelper(expr2)) 101 | case QPosFromWord(value, word, posTags) => value match { 102 | case Some(string) => blqHelper(QPos(string)) 103 | case None => blqHelper(QWord(word)) 104 | } 105 | case QSimilarPhrases(qwords, pos, phrases) => { 106 | val selected = phrases.slice(0, pos) 107 | val seqs = QSeq(qwords) +: selected.map(p => QSeq(p.qwords)) 108 | val disj = QDisj(seqs) 109 | blqHelper(disj) 110 | } 111 | } 112 | blqHelper(qexpr) 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/DataFile.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | import org.allenai.datastore.Datastore 4 | 5 | import com.typesafe.config.Config 6 | 7 | import java.io.File 8 | 9 | object DataFile { 10 | def fromConfig(config: Config): File = config.getString("location") match { 11 | case "file" => new File(config.getString("path")) 12 | case "datastore" => fromDatastore(config.getConfig("item")) 13 | } 14 | def fromDatastore(config: Config): File = { 15 | val storeName = config.getString("datastore") 16 | val group = config.getString("group") 17 | val name = config.getString("name") 18 | val version = config.getInt("version") 19 | val itemType = config.getString("type") 20 | val ds = Datastore(storeName) 21 | val path = itemType match { 22 | case "file" => ds.filePath(group, name, version) 23 | case "directory" => ds.directoryPath(group, name, version) 24 | case _ => throw new IllegalArgumentException(s"itemType must be file or directory") 25 | } 26 | path.toFile 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/GroupedBlackLabResult.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | case class KeyedBlackLabResult(keys: Seq[Interval], result: BlackLabResult) 4 | 5 | case class GroupedBlackLabResult( 6 | keys: Seq[String], 7 | size: Int, 8 | relevanceScore: Double, 9 | results: Iterable[KeyedBlackLabResult] 10 | ) 11 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/IkeKryoRegistrator.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | import org.allenai.ike.patterns.NamedPattern 4 | 5 | import com.esotericsoftware.kryo.io.{ Input, Output } 6 | import com.esotericsoftware.kryo.{ Kryo, Serializer } 7 | import org.apache.spark.serializer.KryoRegistrator 8 | 9 | /** Helper to register custom serializers for Some and None. Since Kryo is a Java library, it 10 | * doesn't provide default serializers for these classes. 11 | */ 12 | object OptionSerializers { 13 | def register(kryo: Kryo) { 14 | kryo.register(Class.forName("scala.None$"), new NoneSerializer()) 15 | kryo.register(classOf[Some[_]], new SomeSerializer(kryo)) 16 | } 17 | } 18 | 19 | class NoneSerializer extends Serializer[None.type] { 20 | override def write(kryo: Kryo, output: Output, `object`: None.type): Unit = () 21 | 22 | override def read(kryo: Kryo, input: Input, `type`: Class[None.type]): None.type = None 23 | } 24 | 25 | class SomeSerializer(kryo: Kryo) extends Serializer[Some[_]] { 26 | override def write(kryo: Kryo, output: Output, `object`: Some[_]): Unit = { 27 | kryo.writeClassAndObject(output, `object`.get) 28 | } 29 | 30 | override def read(kryo: Kryo, input: Input, `type`: Class[Some[_]]): Some[_] = 31 | Some(kryo.readClassAndObject(input)) 32 | } 33 | 34 | /** Helper class to register custom classes with Kryo. This allows Kryo to pack these objects more 35 | * efficiently with numbered indices, rather than using the fully qualified class name. 36 | */ 37 | class IkeKryoRegistrator extends KryoRegistrator { 38 | override def registerClasses(kryo: Kryo): Unit = { 39 | OptionSerializers.register(kryo) 40 | kryo.register(Class.forName("scala.collection.immutable.Nil$")) 41 | 42 | val classes: Array[Class[_]] = Array( 43 | classOf[BlackLabResult], 44 | classOf[Interval], 45 | classOf[WordData], 46 | classOf[java.time.Instant], 47 | classOf[java.time.LocalDate], 48 | classOf[java.time.Year] 49 | ) 50 | 51 | classes.foreach(kryo.register) 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/Interval.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | /** A simple interval class which supports ordering. */ 4 | case class Interval(start: Int, end: Int) extends Ordered[Interval] { 5 | def shift(by: Int): Interval = Interval(this.start + by, this.end + by) 6 | 7 | override def compare(that: Interval): Int = 8 | if (this.start > that.start) { 9 | 1 10 | } else if (this.start < that.start) { 11 | -1 12 | } else { 13 | this.length - that.length 14 | } 15 | 16 | def length: Int = end - start 17 | } 18 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/JsonSerialization.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | import org.allenai.ike.patterns.NamedPattern 4 | 5 | import spray.json.DefaultJsonProtocol._ 6 | 7 | object JsonSerialization { 8 | import org.allenai.ike.QExprJsonSerialization._ 9 | 10 | implicit val intervalFormat = jsonFormat2(Interval.apply) 11 | implicit val wordDataFormat = jsonFormat2(WordData.apply) 12 | implicit val blackLabResultFormat = jsonFormat4(BlackLabResult.apply) 13 | implicit val keyedBlackLabResultFormat = jsonFormat2(KeyedBlackLabResult.apply) 14 | implicit val groupedBlackLabResultFormat = jsonFormat4(GroupedBlackLabResult.apply) 15 | implicit val qwordFormat = jsonFormat1(QWord.apply) 16 | implicit val tableValueFormat = jsonFormat1(TableValue.apply) 17 | implicit val tableRowForamt = jsonFormat2(TableRow.apply) 18 | implicit val tableFormat = jsonFormat4(Table.apply) 19 | implicit val searchConfigFormat = jsonFormat2(SearchConfig.apply) 20 | implicit val searchRequestFormat = jsonFormat4(SearchRequest.apply) 21 | implicit val searchResponse = jsonFormat2(SearchResponse.apply) 22 | implicit val wordInfoRequest = jsonFormat2(WordInfoRequest.apply) 23 | implicit val wordInfoResponse = jsonFormat2(WordInfoResponse.apply) 24 | implicit val inferConfig = jsonFormat6(SuggestQueryConfig.apply) 25 | implicit val inferQueryRequest = jsonFormat5(SuggestQueryRequest.apply) 26 | implicit val scoredQuery = jsonFormat5(ScoredStringQuery.apply) 27 | implicit val inferQueryResponse = jsonFormat3(SuggestQueryResponse.apply) 28 | implicit val corpusDescription = jsonFormat3(CorpusDescription.apply) 29 | implicit val similarPhrasesResponse = jsonFormat1(SimilarPhrasesResponse.apply) 30 | 31 | implicit val namedPattern = jsonFormat2(NamedPattern.apply) 32 | } 33 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/QExprJsonSerialization.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | import org.allenai.common.json._ 3 | 4 | import spray.json.DefaultJsonProtocol._ 5 | import spray.json._ 6 | 7 | object QExprJsonSerialization { 8 | implicit object QExprFormat extends RootJsonFormat[QExpr] { 9 | override def write(qexpr: QExpr): JsValue = qexpr match { 10 | case q: QWord => q.toJson 11 | case q: QPos => q.toJson 12 | case q: QChunk => q.toJson 13 | case q: QDict => q.toJson 14 | case q: QNamedPattern => q.toJson 15 | case q: QWildcard => q.toJson 16 | case q: QNamed => q.toJson 17 | case q: QUnnamed => q.toJson 18 | case q: QNonCap => q.toJson 19 | case q: QStar => q.toJson 20 | case q: QPlus => q.toJson 21 | case q: QSeq => q.toJson 22 | case q: QDisj => q.toJson 23 | case q: QPosFromWord => q.toJson 24 | case q: QAnd => q.toJson 25 | case q: QSimilarPhrases => q.toJson 26 | case q: QRepetition => q.toJson 27 | case q: QGeneralizePhrase => q.toJson 28 | case q: QGeneralizeTable => q.toJson 29 | } 30 | override def read(jsValue: JsValue): QExpr = jsValue.asJsObject.unpackAs[QExpr] 31 | } 32 | 33 | implicit val qwordFormat = jsonFormat1(QWord.apply).pack("type" -> "QWord") 34 | implicit val qposFormat = jsonFormat1(QPos.apply).pack("type" -> "QPos") 35 | implicit val qchunkFormat = jsonFormat1(QChunk.apply).pack("type" -> "QChunk") 36 | implicit val qdictFormat = jsonFormat1(QDict.apply).pack("type" -> "QDict") 37 | implicit val qnamedPatternFormat = 38 | jsonFormat1(QNamedPattern.apply).pack("type" -> "QNamedPattern") 39 | implicit val qandFormat = jsonFormat2(QAnd.apply).pack("type" -> "QAnd") 40 | implicit val qwildcardFormat = new RootJsonFormat[QWildcard] { 41 | def write(wc: QWildcard): JsValue = JsObject() 42 | def read(value: JsValue): QWildcard = QWildcard() 43 | }.pack("type" -> "QWildcard") 44 | implicit val qnamedFormat = jsonFormat2(QNamed.apply).pack("type" -> "QNamed") 45 | implicit val qunnamedFormat = jsonFormat1(QUnnamed.apply).pack("type" -> "QUnnamed") 46 | implicit val qnonCapFormat = jsonFormat1(QNonCap.apply).pack("type" -> "QNonCap") 47 | implicit val qstarFormat = jsonFormat1(QStar.apply).pack("type" -> "QStar") 48 | implicit val qplusFormat = jsonFormat1(QPlus.apply).pack("type" -> "QPlus") 49 | implicit val qrepetitionFormat = jsonFormat3(QRepetition.apply).pack("type" -> "QRepetition") 50 | implicit val qseqFormat = jsonFormat1(QSeq.apply).pack("type" -> "QSeq") 51 | implicit val qdisjFormat = jsonFormat1(QDisj.apply).pack("type" -> "QDisj") 52 | implicit val qpfwFormat = jsonFormat3(QPosFromWord.apply).pack("type" -> "QPosFromWord") 53 | implicit val simPhraseFormat = jsonFormat2(SimilarPhrase.apply) 54 | implicit val qGeneralizePhrase = jsonFormat2(QGeneralizePhrase.apply). 55 | pack("type" -> "QGeneralizePhrase") 56 | implicit val qGeneralizeTable = jsonFormat2(QGeneralizeTable.apply). 57 | pack("type" -> "QGeneralizeTable") 58 | implicit val qspFormat = jsonFormat3(QSimilarPhrases.apply).pack("type" -> "QSimilarPhrases") 59 | implicit val unpackers = Seq( 60 | qwordFormat, 61 | qposFormat, 62 | qchunkFormat, 63 | qdictFormat, 64 | qnamedPatternFormat, 65 | qwildcardFormat, 66 | qnamedFormat, 67 | qunnamedFormat, 68 | qnonCapFormat, 69 | qstarFormat, 70 | qplusFormat, 71 | qseqFormat, 72 | qdisjFormat, 73 | qpfwFormat, 74 | qspFormat 75 | ) 76 | } 77 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/Table.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | import spray.json.JsValue 4 | 5 | case class Table( 6 | name: String, cols: Seq[String], positive: Seq[TableRow], negative: Seq[TableRow] 7 | ) { 8 | def getIndexOfColumn(columnName: String): Int = { 9 | val ix = cols.indexWhere(c => c.equalsIgnoreCase(columnName)) 10 | if (ix == -1) { 11 | throw new IllegalArgumentException( 12 | s"Could not find column $columnName in table $name" 13 | ) 14 | } 15 | ix 16 | } 17 | 18 | def getIndexOfColumnOption(columnName: String): Option[Int] = { 19 | val ix = cols.indexWhere(c => c.equalsIgnoreCase(columnName)) 20 | if (ix == -1) { 21 | None 22 | } else { 23 | Some(ix) 24 | } 25 | } 26 | } 27 | 28 | case class TableRow(values: Seq[TableValue], provenance: Option[JsValue] = None) 29 | 30 | case class TableValue(qwords: Seq[QWord]) 31 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/TableExpander.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | import org.allenai.common.Logging 4 | 5 | import scala.collection.GenTraversableOnce 6 | import scala.collection.immutable.Iterable 7 | 8 | /** Implement this trait for expanding (generalizing) tables with seed entries. 9 | * Various similarity measures may be used. Each can be implemented as a separate TableExpander. 10 | */ 11 | trait TableExpander { 12 | def expandTableColumn(table: Table, columnName: String): Seq[SimilarPhrase] 13 | } 14 | 15 | /** Class that generalizes a given table (column) entries using similar phrases. 16 | * The basic idea here is: 17 | * expand each seed row in the given column using SimilarPhrasesSearcher, then determine / return 18 | * the intersection set (phrases returned as matches for a "large" fraction of rows in the table-- 19 | * hard-coded to 75%. 20 | * Uses SimilarPhrasesSearcher internally to expand each table entry. 21 | * @param similarPhrasesSearcher 22 | */ 23 | class SimilarPhrasesBasedIntersectionTableExpander(similarPhrasesSearcher: SimilarPhrasesSearcher) 24 | extends Logging with TableExpander { 25 | 26 | override def expandTableColumn(table: Table, columnName: String): Seq[SimilarPhrase] = { 27 | // Get index of the required column in the table. 28 | val colIndex = table.getIndexOfColumn(columnName) 29 | 30 | // Helper Method to compute arithmetic mean similarity score. 31 | def averageSimilarity(similarityScores: Seq[Double]): Double = { 32 | val numPhrases = similarityScores.length 33 | if (numPhrases > 0) { 34 | similarityScores.sum / numPhrases 35 | } else { 36 | 0.0 37 | } 38 | } 39 | 40 | val currentTableRows = table.positive.map(_.values(colIndex)) 41 | 42 | // Construct a map of all similar phrases retrieved with corresponding scores. 43 | // Filter those that do not occur at least (75% number of rows) times -- 44 | // a good chance these were found in the similar phrase sets for 75% of the rows, 45 | // assuming word2vec doesn't return duplicates in getSimilarPhrases query. 46 | // Score is arithmetic mean of all similarity scores obtained. 47 | val phraseScoreMap = (currentTableRows.flatMap { 48 | tableEntry => 49 | similarPhrasesSearcher.getSimilarPhrases(tableEntry.qwords.map(_.value).mkString(" ")) 50 | }).groupBy(_.qwords) 51 | .filter(_._2.size >= 0.75 * currentTableRows.size) 52 | .mapValues(phrases => averageSimilarity(phrases.map(_.similarity))) 53 | 54 | // Convert the phrase score map into SimilarPhrase objects and return. 55 | phraseScoreMap.map(x => new SimilarPhrase(x._1, x._2)).toSeq 56 | } 57 | } 58 | 59 | /** Class that generalizes a given table (column) entries using SimilarPhrasesSearcher. The basic 60 | * idea here is: get the phrases similar to the set of all seed entries, by using 61 | * the logic implemented in SimilarPhrasesSearcher.getSimilarPhrases 62 | * @param similarPhrasesSearcher 63 | */ 64 | class SimilarPhrasesBasedTableExpander(similarPhrasesSearcher: SimilarPhrasesSearcher) 65 | extends Logging with TableExpander { 66 | 67 | override def expandTableColumn(table: Table, columnName: String): Seq[SimilarPhrase] = { 68 | // Get index of the required column in the table. 69 | val colIndex = table.getIndexOfColumn(columnName) 70 | // Construct set of all table rows. 71 | val currentTableEntries = new scala.collection.mutable.HashSet[Seq[QWord]]() 72 | 73 | // Retrieve the set of entries in the particular column being expanded. 74 | val columnEntries = for { 75 | row <- table.positive 76 | } yield { 77 | val tableEntry = row.values(colIndex) 78 | currentTableEntries.add(tableEntry.qwords) 79 | tableEntry.qwords.map(_.value).mkString(" ") 80 | } 81 | 82 | val expandedSet = similarPhrasesSearcher.getSimilarPhrases(columnEntries) 83 | 84 | // If the table entries are missing from the similar phrase-set, they should be added to the 85 | // set. i.e. ExpandedSet should be a superset of original set. 86 | for (entry: String <- columnEntries) { 87 | if (!expandedSet.contains(entry)) { 88 | expandedSet.+:(SimilarPhrase(entry.split("_").map(QWord), 1)) 89 | } 90 | } 91 | return expandedSet 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/WordData.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | case class WordData(word: String, attributes: Map[String, String]) 4 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/index/AnnotationIndexer.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.index 2 | 3 | import org.allenai.blacklab.index.complex.ComplexFieldProperty 4 | import org.allenai.blacklab.index.complex.ComplexFieldProperty.SensitivitySetting 5 | import org.allenai.blacklab.index.{ DocIndexerXmlHandlers, Indexer } 6 | import org.xml.sax.Attributes 7 | 8 | import java.io.Reader 9 | 10 | class AnnotationIndexer(indexer: Indexer, fileName: String, reader: Reader) 11 | extends DocIndexerXmlHandlers(indexer, fileName, reader) { 12 | val mainProp = getMainProperty 13 | val punctProp = getPropPunct 14 | val posProp = addProperty("pos", SensitivitySetting.ONLY_INSENSITIVE) 15 | val chunkProp = addProperty("chunk", SensitivitySetting.ONLY_INSENSITIVE) 16 | val lemmaProp = addProperty("lemma", SensitivitySetting.ONLY_SENSITIVE) 17 | addHandler("/document", new DocumentElementHandler()) 18 | addHandler("word", new WordHandlerBase() { 19 | def addAttribute(name: String, attrs: Attributes, prop: ComplexFieldProperty): Unit = { 20 | if (attrs.getValue(name) != null) prop.addValue(attrs.getValue(name)) 21 | } 22 | def addPos(attrs: Attributes): Unit = addAttribute("pos", attrs, posProp) 23 | def addChunk(attrs: Attributes): Unit = addAttribute("chunk", attrs, chunkProp) 24 | def addLemma(attrs: Attributes): Unit = addAttribute("lemma", attrs, lemmaProp) 25 | def addAttrs(attrs: Attributes): Unit = { 26 | addPos(attrs) 27 | addChunk(attrs) 28 | addLemma(attrs) 29 | } 30 | override def startElement(uri: String, ln: String, qName: String, attrs: Attributes): Unit = { 31 | super.startElement(uri, ln, qName, attrs) 32 | addAttrs(attrs) 33 | punctProp.addValue(consumeCharacterContent) 34 | } 35 | override def endElement(uri: String, localName: String, qName: String): Unit = { 36 | super.endElement(uri, localName, qName) 37 | mainProp.addValue(consumeCharacterContent) 38 | } 39 | }) 40 | addHandler("sentence", new InlineTagHandler) 41 | } 42 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/index/CliUtils.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.index 2 | 3 | import org.allenai.datastore.Datastore 4 | 5 | import java.net.URI 6 | import java.nio.file.Paths 7 | 8 | object CliUtils { 9 | def pathFromUri(uri: URI) = uri.getScheme match { 10 | case "file" => Paths.get(uri) 11 | case "datastore" => Datastore.locatorFromUrl(uri).path 12 | case otherAuthority => throw new RuntimeException(s"URL scheme not supported: $otherAuthority") 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/index/CreateIndex.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.index 2 | 3 | import org.allenai.datastore.Datastore 4 | import org.allenai.nlpstack.core.{ ChunkedToken, Lemmatized } 5 | 6 | import org.allenai.blacklab.index.Indexer 7 | 8 | import java.io.{ File, StringReader } 9 | import java.net.URI 10 | import java.nio.file.{ Files, Paths } 11 | 12 | object CreateIndex extends App { 13 | def addTo(indexer: Indexer)(text: IndexableText): Unit = { 14 | val xml = XmlSerialization.xml(text) 15 | val id = text.idText.id 16 | indexer.index(id, new StringReader(xml.toString())) 17 | } 18 | 19 | case class Options( 20 | destinationDir: File = null, 21 | batchSize: Int = 1000, 22 | textSource: URI = null, 23 | oneSentencePerDoc: Boolean = true 24 | ) 25 | 26 | val parser = new scopt.OptionParser[Options](this.getClass.getSimpleName.stripSuffix("$")) { 27 | opt[File]('d', "destination") required () action { (d, o) => 28 | o.copy(destinationDir = d) 29 | } text "Directory to create the index in" 30 | 31 | opt[Int]('b', "batchSize") action { (b, o) => 32 | o.copy(batchSize = b) 33 | } text "Batch size" 34 | 35 | opt[URI]('t', "textSource") required () action { (t, o) => 36 | o.copy(textSource = t) 37 | } text "URL of a file or directory to load the text from" 38 | 39 | opt[Unit]('o', "oneSentencePerDoc") action { (_, o) => 40 | o.copy(oneSentencePerDoc = true) 41 | } 42 | help("help") 43 | } 44 | 45 | def getIdTextsForTextSource(textSource: URI): Iterator[IdText] = { 46 | textSource.getScheme match { 47 | case "file" => 48 | val path = Paths.get(textSource) 49 | if (Files.isDirectory(path)) { 50 | IdText.fromDirectory(path.toFile) 51 | } else { 52 | IdText.fromFlatFile(path.toFile) 53 | } 54 | case "datastore" => 55 | val locator = Datastore.locatorFromUrl(textSource) 56 | if (locator.directory) { 57 | IdText.fromDirectory(locator.path.toFile) 58 | } else { 59 | IdText.fromFlatFile(locator.path.toFile) 60 | } 61 | case otherAuthority => 62 | throw new RuntimeException(s"URL scheme not supported: $otherAuthority") 63 | } 64 | } 65 | 66 | private def indexableToken(lemmatized: Lemmatized[ChunkedToken]): IndexableToken = { 67 | val word = lemmatized.token.string 68 | val pos = lemmatized.token.postag 69 | val lemma = lemmatized.lemma 70 | val chunk = lemmatized.token.chunk 71 | IndexableToken(word, pos, lemma, chunk) 72 | } 73 | 74 | def process(idText: IdText, oneSentencePerDoc: Boolean): Seq[IndexableText] = { 75 | if (oneSentencePerDoc) { 76 | val sents: Seq[Seq[Lemmatized[ChunkedToken]]] = NlpAnnotate.annotate(idText.text) 77 | sents.zipWithIndex.filter(_._1.nonEmpty).map { 78 | case (sent, index) => 79 | val text = idText.text.substring( 80 | sent.head.token.offset, 81 | sent.last.token.offset + sent.last.token.string.length 82 | ) 83 | val sentenceIdText = IdText(s"${idText.id}-$index", text) 84 | 85 | IndexableText(sentenceIdText, Seq(sent map indexableToken)) 86 | } 87 | } else { 88 | val text = idText.text 89 | val sents = for { 90 | sent <- NlpAnnotate.annotate(text) 91 | indexableSent = sent map indexableToken 92 | } yield indexableSent 93 | Seq(IndexableText(idText, sents)) 94 | } 95 | } 96 | 97 | private def processBatch(batch: Seq[IdText], oneSentencePerDoc: Boolean): Seq[IndexableText] = { 98 | batch.toArray.par.flatMap(idText => process(idText, oneSentencePerDoc)).seq 99 | } 100 | 101 | parser.parse(args, Options()) foreach { options => 102 | val indexDir = options.destinationDir 103 | val batchSize = options.batchSize 104 | val idTexts = getIdTextsForTextSource(options.textSource) 105 | 106 | val indexer = new Indexer(indexDir, true, classOf[AnnotationIndexer]) 107 | val indexableTexts = for { 108 | batch <- idTexts.grouped(batchSize) 109 | batchResults = processBatch(batch, options.oneSentencePerDoc) 110 | result <- batchResults 111 | } yield result 112 | 113 | indexableTexts foreach addTo(indexer) 114 | indexer.close() 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/index/IdText.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.index 2 | 3 | import org.allenai.common.ParIterator._ 4 | import org.allenai.common.{ Logging, Resource, StreamClosingIterator } 5 | import org.allenai.ike.DataFile 6 | import org.allenai.ike.index.WikipediaCorpus.DocumentIterator 7 | 8 | import com.typesafe.config.Config 9 | import org.apache.commons.io.LineIterator 10 | 11 | import java.io.{ File, FileInputStream, InputStream, InputStreamReader } 12 | import java.net.URL 13 | import java.nio.charset.MalformedInputException 14 | import java.util.zip.GZIPInputStream 15 | import scala.concurrent.ExecutionContext.Implicits.global 16 | import scala.io.Source 17 | 18 | case class IdText(id: String, text: String) 19 | 20 | case object IdText extends Logging { 21 | def fromConfig(config: Config): Iterator[IdText] = { 22 | val format = config.getString("format") 23 | val file = DataFile.fromConfig(config) 24 | format match { 25 | case "flat" => fromFlatFile(file) 26 | case "directory" => fromDirectory(file) 27 | case "wikipedia" => fromWikipedia(file) 28 | case _ => throw new IllegalArgumentException(s"format must be flat, directory, or wikipedia") 29 | } 30 | } 31 | 32 | def fromFlatFile(file: File): Iterator[IdText] = for { 33 | (line, i) <- Source.fromFile(file).getLines().zipWithIndex 34 | id = s"${file.getAbsolutePath}.$i" 35 | idText = IdText(id, line) 36 | } yield idText 37 | 38 | def fromDirectory(file: File): Iterator[IdText] = recursiveListFiles(file).parMap({ subFile => 39 | if (subFile.isDirectory) { 40 | None 41 | } else { 42 | val id = subFile.getAbsolutePath 43 | for { 44 | text <- safeLinesFromFile(subFile) 45 | idText = IdText(id, text) 46 | } yield idText 47 | } 48 | }, 32).flatten 49 | 50 | def fromWikipedia(file: File): Iterator[IdText] = 51 | StreamClosingIterator(new FileInputStream(file)) { input => 52 | val documents = new DocumentIterator(new GZIPInputStream(input)) 53 | documents.map { document => 54 | IdText(document.id.toString, document.body) 55 | } 56 | } 57 | 58 | def recursiveListFiles(f: File): Iterator[File] = { 59 | val these = f.listFiles 60 | these.iterator ++ these.iterator.filter(_.isDirectory).flatMap(recursiveListFiles) 61 | } 62 | 63 | def safeLinesFromFile(file: File): Option[String] = try { 64 | Resource.using(Source.fromFile(file)) { source => 65 | Some(source.getLines().mkString("\n")) 66 | } 67 | } catch { 68 | case _: MalformedInputException => 69 | logger.warn(s"Skipping unreadable file ${file.getName}") 70 | None 71 | } 72 | } 73 | 74 | object WikipediaCorpus { 75 | case class Document(id: Int, url: URL, title: String, body: String) 76 | 77 | class DocumentIterator(inputStream: InputStream) extends Iterator[Document] { 78 | private val lines = new LineIterator(new InputStreamReader(inputStream, "UTF-8")) 79 | 80 | private var nextDocument: Option[Document] = None 81 | private def advanceToNextDoc(): Unit = { 82 | nextDocument = None 83 | 84 | if (lines.hasNext) { 85 | val docLine = lines.next().trim 86 | 87 | val DocLinePattern = """""".r 88 | 89 | // pattern matching on Int 90 | object Int { 91 | def unapply(s: String): Option[Int] = try { 92 | Some(s.toInt) 93 | } catch { 94 | case _: java.lang.NumberFormatException => None 95 | } 96 | } 97 | 98 | // pattern matching on Url 99 | object URL { 100 | def unapply(s: String): Option[URL] = try { 101 | Some(new URL(s)) 102 | } catch { 103 | case _: java.net.MalformedURLException => None 104 | } 105 | } 106 | 107 | docLine match { 108 | case DocLinePattern(Int(id), URL(url), title) => 109 | // read in the body of the document 110 | val body = StringBuilder.newBuilder 111 | while (nextDocument.isEmpty && lines.hasNext) { 112 | val line = lines.next().trim 113 | if (line == "") { 114 | nextDocument = Some(Document(id, url, title, body.mkString.trim)) 115 | } else { 116 | body.append(line) 117 | body.append('\n') 118 | } 119 | } 120 | } 121 | } 122 | 123 | if (!lines.hasNext) lines.close() 124 | } 125 | advanceToNextDoc() 126 | 127 | override def hasNext: Boolean = nextDocument.isDefined 128 | override def next(): Document = nextDocument match { 129 | case None => throw new NoSuchElementException() 130 | case Some(doc) => advanceToNextDoc(); doc 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/index/IndexableText.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.index 2 | 3 | case class IndexableText(idText: IdText, sentences: Seq[Seq[IndexableToken]]) 4 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/index/IndexableToken.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.index 2 | 3 | case class IndexableToken(word: String, pos: String, lemma: String, chunk: String) 4 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/index/NlpAnnotate.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.index 2 | 3 | import org.allenai.nlpstack.chunk.{ defaultChunker => chunker } 4 | import org.allenai.nlpstack.core._ 5 | import org.allenai.nlpstack.lemmatize.{ MorphaStemmer => lemmatizer } 6 | import org.allenai.nlpstack.postag.{ defaultPostagger => postagger } 7 | import org.allenai.nlpstack.segment.{ defaultSegmenter => segmenter } 8 | import org.allenai.nlpstack.tokenize.{ defaultTokenizer => tokenizer } 9 | 10 | import scala.util.control.NonFatal 11 | 12 | object NlpAnnotate { 13 | def segment(text: String): Seq[Segment] = segmenter.segment(text).toSeq 14 | 15 | def tokenize(segment: Segment): Seq[Token] = tokenizer.tokenize(segment.text) 16 | 17 | def postag(tokens: Seq[Token]): Seq[PostaggedToken] = postagger.postagTokenized(tokens) 18 | 19 | def chunk(tokens: Seq[PostaggedToken]): Seq[ChunkedToken] = chunker.chunkPostagged(tokens) 20 | 21 | def addEndingMarkers(tokens: Seq[ChunkedToken]): Seq[ChunkedToken] = { 22 | if (tokens.isEmpty) { 23 | List() 24 | } else { 25 | def swI(x: String) = x.startsWith("I-") 26 | def swB(x: String) = x.startsWith("B-") 27 | 28 | (tokens.sliding(2).toList :+ Seq(tokens.last)).map { 29 | case Seq(ChunkedToken(a, b, c, d), ChunkedToken(x, _, _, _)) if swI(a) && swB(x) => 30 | ChunkedToken("E-" + a.substring(2), b, c, d) 31 | case Seq(ChunkedToken(a, b, c, d), ChunkedToken(x, _, _, _)) if swB(a) && swB(x) => 32 | ChunkedToken("BE-" + a.substring(2), b, c, d) 33 | case Seq(ChunkedToken(a, b, c, d), ChunkedToken(x, _, _, _)) => 34 | ChunkedToken(a, b, c, d) 35 | case Seq(ChunkedToken(a, b, c, d)) if swB(a) => 36 | ChunkedToken("BE-" + a.substring(2), b, c, d) 37 | case Seq(ChunkedToken(a, b, c, d)) => 38 | ChunkedToken(a, b, c, d) 39 | } 40 | } 41 | } 42 | 43 | def lemmatize(chunked: Seq[ChunkedToken]): Seq[Lemmatized[ChunkedToken]] = 44 | chunked.map(lemmatizer.lemmatizePostaggedToken) 45 | 46 | def annotate(text: String): Seq[Seq[Lemmatized[ChunkedToken]]] = segment(text).flatMap { 47 | segment => 48 | val tokens = tokenize(segment) 49 | val tagged = postag(tokens) 50 | try { 51 | val chunked = chunk(tagged) 52 | val chunkedWithEndingMarkers = addEndingMarkers(chunked) 53 | Some(lemmatize(chunkedWithEndingMarkers)) 54 | } catch { 55 | case NonFatal(e) => None 56 | } 57 | } 58 | } 59 | 60 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/index/NlpAnnotatedText.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.index 2 | 3 | import org.allenai.nlpstack.core.{ Lemmatized, PostaggedToken } 4 | 5 | case class NlpAnnotatedText(idText: IdText, sentences: Seq[Seq[Lemmatized[PostaggedToken]]]) 6 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/index/XmlSerialization.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.index 2 | 3 | import scala.xml.{ Elem, Node, Text } 4 | 5 | object XmlSerialization { 6 | def xml(text: IndexableText): Elem = { 7 | val children = addSpaces(text.sentences map xml) 8 | { children } 9 | } 10 | def xml(tokens: Seq[IndexableToken]): Elem = { 11 | val children = addSpaces(tokens map xml) 12 | { children } 13 | } 14 | def xml(token: IndexableToken): Elem = 15 | { token.word } 16 | def addSpaces(elems: Seq[Elem]): Seq[Node] = { 17 | val n = elems.size 18 | val spaces = List.fill(n)(Text(" ")) 19 | for { 20 | (elem, space) <- elems.zip(spaces) 21 | node <- List(elem, space) 22 | } yield node 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/compoundop/EvaluatedOp.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.compoundop 2 | 3 | import org.allenai.ike.ml.queryop.QueryOp 4 | 5 | import scala.collection.immutable.IntMap 6 | 7 | object EvaluatedOp { 8 | def fromList(op: QueryOp, matches: Seq[Int]): EvaluatedOp = { 9 | EvaluatedOp(op, IntMap(matches.map((_, 0)): _*)) 10 | } 11 | 12 | def fromPairs(op: QueryOp, matches: Seq[(Int, Int)]): EvaluatedOp = { 13 | EvaluatedOp(op, IntMap(matches: _*)) 14 | } 15 | } 16 | 17 | /** QueryOp that is paired with a cache of what sentences inside the Hits object the op was 18 | * created from this operator matches 19 | * @param op operator 20 | * @param matches Map of (sentence index) -> 1, if this operator fills a requirement for the 21 | * sentence and -> 0 if the associated query this operator was built from would match the sentence 22 | * once this operator is applied 23 | */ 24 | case class EvaluatedOp(op: QueryOp, matches: IntMap[Int]) 25 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/compoundop/NullOp.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.compoundop 2 | 3 | import org.allenai.ike.ml.queryop.{ QueryOp, TokenQueryOp } 4 | 5 | import scala.collection.immutable.IntMap 6 | 7 | /** Special op with fixed edits counts that makes no changes to the query */ 8 | case class NullOp(numEdits: IntMap[Int]) extends CompoundQueryOp() { 9 | override def ops: Set[TokenQueryOp] = Set() 10 | override def canAdd(op: QueryOp): Boolean = false 11 | override def add(op: QueryOp, matches: IntMap[Int]): CompoundQueryOp = 12 | throw new RuntimeException() 13 | } 14 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/compoundop/OpConjunction.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.compoundop 2 | 3 | import org.allenai.ike.ml.queryop.TokenCombination._ 4 | import org.allenai.ike.ml.queryop._ 5 | 6 | import scala.collection.immutable.IntMap 7 | 8 | object OpConjunction { 9 | def apply(op: EvaluatedOp): Option[OpConjunction] = op.op match { 10 | case tq: TokenQueryOp => Some(new OpConjunction(Set(tq), op.matches)) 11 | case _ => None 12 | } 13 | } 14 | 15 | /** Class that combines operations that can be combined by ANDing them together 16 | */ 17 | // Note that this OpConjunction is currently not used in favour of OpConjunctionOfDisjunction 18 | // with carefully restricted Disjunction slots. 19 | case class OpConjunction private ( 20 | ops: Set[TokenQueryOp], 21 | numEdits: IntMap[Int] 22 | ) extends CompoundQueryOp() { 23 | 24 | override def canAdd(op: QueryOp): Boolean = op match { 25 | case rt: RemoveToken => !ops.exists(x => x.slot == rt.slot) 26 | case tq: TokenQueryOp => !ops.exists(x => x.slot == tq.slot && x.combinable(tq) != AND) 27 | } 28 | 29 | override def add(op: QueryOp, matches: IntMap[Int]): OpConjunction = { 30 | require(canAdd(op)) 31 | val addEdits = op match { 32 | case tq: TokenQueryOp => !ops.exists(_.slot == tq.slot) 33 | case _ => true 34 | } 35 | val newNumEdits = 36 | if (addEdits) { 37 | numEdits.intersectionWith(matches, (_, v1: Int, v2: Int) => v1 + v2) 38 | } else { 39 | numEdits.intersectionWith(matches, (_, v1: Int, v2: Int) => math.min(1, v1 + v2)) 40 | } 41 | val newOp = op match { 42 | case tq: TokenQueryOp => tq 43 | } 44 | new OpConjunction(ops + newOp, newNumEdits) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/compoundop/OpConjunctionOfDisjunctions.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.compoundop 2 | 3 | import org.allenai.ike.ml.Slot 4 | import org.allenai.ike.ml.queryop.TokenCombination._ 5 | import org.allenai.ike.ml.queryop._ 6 | 7 | import scala.collection.immutable.IntMap 8 | 9 | object OpConjunctionOfDisjunctions { 10 | def apply( 11 | op: EvaluatedOp, 12 | restrictDisjunctionsTo: Option[Set[Slot]] = None 13 | ): Option[OpConjunctionOfDisjunctions] = op.op match { 14 | case tq: TokenQueryOp => Some(new OpConjunctionOfDisjunctions(Set(tq), op.matches, 15 | Map(tq.slot -> op.matches), restrictDisjunctionsTo)) 16 | case _ => None // Cannot initialize with a non-TokenQueryOp 17 | } 18 | } 19 | 20 | /** CompoundOp that combines ops that are compatible by either AND or OR operations. This comes 21 | * at the price of some additional computation expense relative to OpConjunction 22 | * 23 | * @param ops TokeQueryOps that this contains 24 | * @param numEdits maps sentence indices the number of required edits this has made to that 25 | * sentence 26 | * @param perSlotEdits Map slots -> maps of sentences indices number of edits made to that sentence 27 | * by operations that were applied to that slot. 28 | * @param restrictDisjunctionsTo Optionally limits the slots we can use disjunctions for 29 | */ 30 | case class OpConjunctionOfDisjunctions private ( 31 | ops: Set[TokenQueryOp], 32 | numEdits: IntMap[Int], 33 | perSlotEdits: Map[Slot, IntMap[Int]], 34 | restrictDisjunctionsTo: Option[Set[Slot]] 35 | ) extends CompoundQueryOp() { 36 | 37 | override def canAdd(op: QueryOp): Boolean = op match { 38 | case rt: RemoveToken => !(perSlotEdits contains rt.slot) 39 | case tq: TokenQueryOp => 40 | if (perSlotEdits contains tq.slot) { 41 | val otherOps = ops.filter(_.slot == tq.slot) 42 | val combinations = otherOps.map(x => x.combinable(tq)) 43 | combinations.forall(_ == AND) || 44 | ((restrictDisjunctionsTo.isEmpty || restrictDisjunctionsTo.get.contains(tq.slot)) 45 | && combinations.forall(_ == OR)) 46 | } else { 47 | true 48 | } 49 | } 50 | 51 | override def add(op: QueryOp, matches: IntMap[Int]): OpConjunctionOfDisjunctions = { 52 | require(canAdd(op)) 53 | val newOp = op match { 54 | case tq: TokenQueryOp => tq 55 | } 56 | 57 | val (newPerSlot, recalculate) = 58 | if (perSlotEdits contains newOp.slot) { 59 | val otherOps = ops.filter(_.slot == newOp.slot) 60 | if (newOp.combinable(otherOps.head) == OR) { 61 | val newSlot = perSlotEdits(newOp.slot).unionWith( 62 | matches, (_, v1: Int, v2: Int) => math.min(1, v1 + v2) 63 | ) 64 | (perSlotEdits + (newOp.slot -> newSlot), true) 65 | } else { 66 | val newSlot = perSlotEdits(newOp.slot).intersectionWith( 67 | matches, (_, v1: Int, v2: Int) => math.min(1, v1 + v2) 68 | ) 69 | (perSlotEdits + (newOp.slot -> newSlot), false) 70 | } 71 | } else { 72 | (perSlotEdits + (newOp.slot -> matches), false) 73 | } 74 | 75 | val newMatches = if (recalculate) { 76 | // A previously existing slot's IntMap was changed, so we can't just AND numEdits 77 | // and Matches, we have to to recompute it from each per-slot IntMap 78 | newPerSlot.values.reduce((a, b) => 79 | a.intersectionWith(b, (_, v1: Int, v2: Int) => v1 + v2)) 80 | } else { 81 | numEdits.intersectionWith(matches, (_, v1: Int, v2: Int) => v1 + v2) 82 | } 83 | new OpConjunctionOfDisjunctions(ops + newOp, newMatches, newPerSlot, restrictDisjunctionsTo) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/queryop/OpGenerator.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.queryop 2 | 3 | import org.allenai.ike._ 4 | import org.allenai.ike.ml._ 5 | 6 | import scala.collection.immutable.IntMap 7 | 8 | object OpGenerator { 9 | 10 | /** Calculates which QLeaf could be used to match with QueryMatches. Note 11 | * 1: QueryMatches with not token are never matched to a QLeaf 12 | * 2: QueryMatches with multiple tokens will be matched to a QLeaf, if the QLeaf could 13 | * match the token if it was repeated a sufficient number of times 14 | * 15 | * @param leafGenerator QLeafGenerator to determines what leaves to build the map for 16 | * @param matches Sequence of QueryMatches to build the match for 17 | * @return 18 | */ 19 | def buildLeafMap( 20 | leafGenerator: QLeafGenerator, 21 | matches: Seq[QueryMatch] 22 | ): Map[QLeaf, IntMap[Int]] = { 23 | // Mutable for update speed since this is performance-relevant code 24 | val operatorMap = scala.collection.mutable.Map[QLeaf, List[(Int, Int)]]() 25 | matches.view.zipWithIndex.foreach { 26 | case (queryMatch, index) => 27 | val tokens = queryMatch.tokens 28 | val leaves = leafGenerator.generateLeaves(tokens) 29 | leaves.foreach { qLeaf => 30 | val currentList = operatorMap.getOrElse(qLeaf, List[(Int, Int)]()) 31 | operatorMap.put(qLeaf, (index, if (queryMatch.didMatch) 0 else 1) :: currentList) 32 | } 33 | } 34 | operatorMap.map { case (k, v) => k -> IntMap(v: _*) }.toMap 35 | } 36 | } 37 | 38 | /** Abstract class for classes that 'generate' possibles operation that could be applied to 39 | * to a query and calculates what sentences that operation would a starting query to match 40 | */ 41 | abstract class OpGenerator { 42 | def generate( 43 | matches: QueryMatches, 44 | examples: IndexedSeq[WeightedExample] 45 | ): Map[QueryOp, IntMap[Int]] 46 | } 47 | 48 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/queryop/QLeafGenerator.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.queryop 2 | 3 | import org.allenai.ike._ 4 | import org.allenai.ike.ml.Token 5 | 6 | object QLeafGenerator { 7 | 8 | /** @return True if word is a word that can be used in a string QExpr */ 9 | def validWord(word: String): Boolean = { 10 | QueryLanguage.parser.wordRegex.pattern.matcher(word).matches() 11 | } 12 | 13 | /** @return True if pos is a POS tag that can be used in a string QExpr */ 14 | def validPos(pos: String): Boolean = QExprParser.posTagSet contains pos 15 | 16 | } 17 | 18 | /** Given Tokens, builds QLeafs that would match that token 19 | * 20 | * @param pos whether to generate QPos 21 | * @param word whether to generate QWord 22 | * @param avoidSuggesting a specific QLeaf this should never suggest 23 | */ 24 | case class QLeafGenerator(pos: Boolean, word: Boolean, 25 | avoidSuggesting: Set[QLeaf] = Set()) { 26 | 27 | def generateLeaves(tokens: Seq[Token]): Iterable[QLeaf] = { 28 | if (tokens.isEmpty) { 29 | Seq() 30 | } else { 31 | val posOp = if (pos) { 32 | val head = tokens.head.pos 33 | if (tokens.tail.forall(_.pos == head) && QLeafGenerator.validPos(head)) { 34 | Some(QPos(head)) 35 | } else { 36 | None 37 | } 38 | } else { 39 | None 40 | } 41 | val wordOp = if (word) { 42 | val head = tokens.head.word 43 | if (tokens.tail.forall(_.word == word) && QLeafGenerator.validWord(head)) { 44 | Some(QWord(head)) 45 | } else { 46 | None 47 | } 48 | } else { 49 | None 50 | } 51 | (posOp ++ wordOp).filterNot(avoidSuggesting.contains(_)) 52 | } 53 | } 54 | 55 | def generateLeaves(token: Token): Iterable[QLeaf] = { 56 | val posOp = if (pos && QLeafGenerator.validPos(token.pos)) { 57 | Some(QPos(token.pos)) 58 | } else { 59 | None 60 | } 61 | 62 | val wordOp = if (word && QLeafGenerator.validWord(token.word)) { 63 | Some(QWord(token.word)) 64 | } else { 65 | None 66 | } 67 | (posOp ++ wordOp).filterNot(avoidSuggesting.contains(_)) 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/subsample/MatchesSampler.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.ike._ 4 | import org.allenai.ike.ml._ 5 | import org.allenai.ike.patterns.NamedPattern 6 | 7 | import org.allenai.blacklab.search.{ Hits, Searcher } 8 | 9 | object MatchesSampler { 10 | 11 | /** Returns a query that matches the same hits as `tokenSequence` but where any query-token that 12 | * could match a variable number of tokens is wrapped in a capture group drawn from the 13 | * corresponding entry in `name` 14 | */ 15 | def captureQueryTokens(queryTokenSequence: QueryTokenSequence, names: Seq[String]): Seq[QExpr] = { 16 | queryTokenSequence.queryTokens.zip(names).map { 17 | case (qexpr, name) => 18 | val (min, max) = QueryLanguage.getQueryLength(qexpr) 19 | if (min == max) { 20 | qexpr 21 | } else { 22 | QNamed(qexpr, name) 23 | } 24 | } 25 | } 26 | 27 | /** Returns a QExpr that matches tokenizedQuery and where any individual query-tokens 28 | * that are of variable length are wrapped in capture groups 29 | */ 30 | def getNamedQuery(tokenizedQuery: TokenizedQuery): QExpr = { 31 | QSeq(tokenizedQuery.getSequencesWithNames.flatMap { 32 | case (tseq, names) => 33 | val tokensWithName = captureQueryTokens(tseq, names) 34 | tseq match { 35 | case CapturedTokenSequence(_, name, _) => 36 | Seq(QNamed(TokenizedQuery.qexprFromSequence(tokensWithName), name)) 37 | case TokenSequence(_) => tokensWithName 38 | } 39 | }) 40 | } 41 | 42 | /** For a one column table, returns a Query similar to `getNamedQuery`, 43 | * but already limited to the rows in single column table `table` 44 | */ 45 | def getNamedColumnMatchingQuery(tokenizedQuery: TokenizedQuery, table: Table): QExpr = { 46 | // We can optimize this case by ANDing the capture group with a query matching the table rows 47 | // instead of using SpanQueryFilterByCaptureGroups 48 | require(table.cols.size == 1) 49 | val filteredRows = Sampler.getFilteredRows(tokenizedQuery, table) 50 | val captureQuery = QDisj(filteredRows.map(x => QSeq(x.head))) 51 | QSeq(tokenizedQuery.getSequencesWithNames.flatMap { 52 | case (tseq, names) => 53 | val tokensWithName = captureQueryTokens(tseq, names) 54 | tseq match { 55 | case CapturedTokenSequence(_, name, _) => 56 | val captureAndQuery = 57 | QAnd(TokenizedQuery.qexprFromSequence(tokensWithName), captureQuery) 58 | Seq(QNamed(captureAndQuery, name)) 59 | case TokenSequence(_) => tokensWithName 60 | } 61 | }) 62 | } 63 | } 64 | 65 | /** Samples hits that the given query already matches 66 | */ 67 | case class MatchesSampler() extends Sampler() { 68 | override def getSample( 69 | qexpr: TokenizedQuery, 70 | searcher: Searcher, 71 | targetTable: Table, 72 | tables: Map[String, Table], 73 | patterns: Map[String, NamedPattern] 74 | ): Hits = { 75 | val query = QueryLanguage.interpolateTables( 76 | MatchesSampler.getNamedQuery(qexpr), 77 | tables, 78 | patterns, 79 | None 80 | ).get 81 | searcher.find(BlackLabSemantics.blackLabQuery(query)) 82 | } 83 | 84 | override def getLabelledSample( 85 | qexpr: TokenizedQuery, 86 | searcher: Searcher, 87 | targetTable: Table, 88 | tables: Map[String, Table], 89 | patterns: Map[String, NamedPattern], 90 | startFromDoc: Int, 91 | startFromToken: Int 92 | ): Hits = { 93 | val spanQuery = if (targetTable.cols.size == 1) { 94 | val oneColQexpr = MatchesSampler.getNamedColumnMatchingQuery(qexpr, targetTable) 95 | val interQuery = QueryLanguage.interpolateTables(oneColQexpr, tables, patterns, None).get 96 | val spanQuery = searcher.createSpanQuery(BlackLabSemantics.blackLabQuery(interQuery)) 97 | new SpanQueryStartAt(spanQuery, startFromDoc, startFromToken) 98 | } else { 99 | val query = QueryLanguage.interpolateTables( 100 | MatchesSampler.getNamedQuery(qexpr), tables, patterns, None 101 | ).get 102 | val spanQuery = searcher.createSpanQuery(BlackLabSemantics.blackLabQuery(query)) 103 | val tableQuery = Sampler.buildLabelledQuery(qexpr, targetTable) 104 | val tableSpanQuery = searcher.createSpanQuery(BlackLabSemantics.blackLabQuery(tableQuery)) 105 | new SpanQueryFilterByCaptureGroups(spanQuery, tableSpanQuery, 106 | targetTable.cols, startFromDoc, startFromToken) 107 | } 108 | searcher.find(spanQuery) 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/subsample/SpanQueryFilterByCaptureGroups.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.blacklab.search.lucene.{ BLSpansWrapper, SpanQueryBase } 4 | import org.apache.lucene.index.{ AtomicReaderContext, Term, TermContext } 5 | import org.apache.lucene.search.spans.{ SpanQuery, Spans } 6 | import org.apache.lucene.util.Bits 7 | 8 | import java.util 9 | 10 | /** SpanQuery the filters hits from a query that do not capture and the same spans as another 11 | * query. In other words a capture group level AND between two queries. 12 | * 13 | * @param _query The query to filter 14 | * @param _filter the filter to AND the query against 15 | * @param captureGroups Capture groups to filter the query by, both the filter and the query should 16 | * contain capture groups with the names in this list 17 | * @param startFromDoc document to start from, returns hits have doc >= startFromDoc 18 | * @param startFromToken token to start from, returned hits have doc > startFromDoc or 19 | * start >= startFromToken 20 | */ 21 | class SpanQueryFilterByCaptureGroups( 22 | _query: SpanQuery, 23 | _filter: SpanQuery, 24 | captureGroups: Seq[String], 25 | startFromDoc: Int = 0, 26 | startFromToken: Int = 0 27 | ) extends SpanQueryBase(_query, _filter) { 28 | 29 | def query: SpanQuery = clauses(0) 30 | def filter: SpanQuery = clauses(1) 31 | 32 | override def getSpans(context: AtomicReaderContext, acceptDocs: Bits, 33 | termContexts: util.Map[Term, TermContext]): Spans = { 34 | val leftSpans = query.getSpans(context, acceptDocs, termContexts) 35 | val filterSpans = filter.getSpans(context, acceptDocs, termContexts) 36 | new SpansFilterByCaptureGroups( 37 | BLSpansWrapper.optWrap(leftSpans), 38 | BLSpansWrapper.optWrap(filterSpans), 39 | captureGroups, 40 | startFromDoc, 41 | startFromToken 42 | ) 43 | } 44 | 45 | override def toString(field: String): String = { 46 | query.toString(field) + s" FILTERED BY <${filter.toString(field)}>" 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/subsample/SpanQueryMinimumValidCaptures.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.blacklab.search.lucene.{ BLSpansWrapper, SpanQueryBase } 4 | import org.apache.lucene.index.{ AtomicReaderContext, Term, TermContext } 5 | import org.apache.lucene.search.spans.{ SpanQuery, Spans } 6 | import org.apache.lucene.util.Bits 7 | 8 | import java.util 9 | 10 | /** SpanQuery that filters another query of hits that return too few valid captures, where a valid 11 | * capture is a capture that is non-negative and non-null. See SpansMinimumValidCaptures. 12 | */ 13 | class SpanQueryMinimumValidCaptures( 14 | spans: SpanQuery, 15 | requiredMatches: Int, 16 | groupsToCheck: Seq[String] 17 | ) extends SpanQueryBase(spans) { 18 | 19 | override def getSpans(atomicReaderContext: AtomicReaderContext, bits: Bits, 20 | map: util.Map[Term, TermContext]): Spans = { 21 | val spans = BLSpansWrapper.optWrap(clauses.head.getSpans(atomicReaderContext, bits, map)) 22 | new SpansMinimumValidCaptures(spans, requiredMatches, groupsToCheck) 23 | } 24 | 25 | override def toString(s: String): String = { 26 | s"AtLeast($requiredMatches) Captures From ${spans.toString(s)}" 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/subsample/SpanQueryStartAt.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.blacklab.search.lucene.{ BLSpansWrapper, SpanQueryBase } 4 | import org.apache.lucene.index.{ AtomicReaderContext, Term, TermContext } 5 | import org.apache.lucene.search.spans.{ SpanQuery, Spans } 6 | import org.apache.lucene.util.Bits 7 | 8 | import java.util 9 | 10 | /** Modifies a SpanQuery so that the returned Spans only occur after the given document and token 11 | * 12 | * @param query SpanQuery to modify 13 | * @param startDoc document to start from, returned hits have doc >= startFromDoc 14 | * @param startToken token to start from, returned hits have doc > startFromDoc or 15 | * start >= startFromToken 16 | */ 17 | class SpanQueryStartAt(query: SpanQuery, startDoc: Int, startToken: Int) 18 | extends SpanQueryBase(query) { 19 | override def getSpans( 20 | atomicReaderContext: AtomicReaderContext, 21 | bits: Bits, 22 | map: util.Map[Term, TermContext] 23 | ): Spans = { 24 | val clauseSpans = BLSpansWrapper.optWrap(clauses(0).getSpans(atomicReaderContext, bits, map)) 25 | new SpansStartAt(clauseSpans, startDoc, startToken) 26 | } 27 | 28 | override def toString(s: String): String = s"${query.toString(s)}" 30 | } 31 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/subsample/SpanQueryTrackingDisjunction.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.blacklab.search.lucene.{ BLSpansWrapper, SpanQueryBase } 4 | import org.apache.lucene.index.{ AtomicReaderContext, Term, TermContext } 5 | import org.apache.lucene.search.spans.{ SpanQuery, Spans } 6 | import org.apache.lucene.util.Bits 7 | 8 | import java.util 9 | import scala.collection.JavaConverters._ 10 | 11 | /** Disjunction of SpanQueries that uses a capture group to mark whether 'firstSpan' created each 12 | * returned Span or if one of the 'alternatives' Spans did. See `SpansTrackingDisjunction`. 13 | */ 14 | class SpanQueryTrackingDisjunction( 15 | firstSpan: SpanQuery, 16 | alternatives: Seq[SpanQuery], 17 | captureName: String 18 | ) extends SpanQueryBase((firstSpan +: alternatives).asJava) { 19 | 20 | override def getSpans( 21 | atomicReaderContext: AtomicReaderContext, 22 | bits: Bits, map: util.Map[Term, TermContext] 23 | ): Spans = { 24 | val spans = clauses.map(spanQuery => 25 | BLSpansWrapper.optWrap(spanQuery.getSpans(atomicReaderContext, bits, map))) 26 | new SpansTrackingDisjunction(spans.head, spans.drop(1), captureName) 27 | } 28 | 29 | override def toString(s: String): String = { 30 | s"${firstSpan.toString(s)} EXTENDED BY<$captureName ${alternatives.map(_.toString(s))})" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/subsample/SpansMinimumValidCaptures.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.blacklab.search.Span 4 | import org.allenai.blacklab.search.lucene.{ BLSpans, HitQueryContext } 5 | 6 | import java.util 7 | 8 | /** Returns spans that have a minimum number of valid capture groups, where a valid capture 9 | * group is one that is non-null and whose end is non-negative 10 | * 11 | * @param clause Spans to filter 12 | * @param requiredMatches Number of required matches 13 | * @param capturesToCheck Names of the capture groups to check, should be registered by clause 14 | */ 15 | class SpansMinimumValidCaptures( 16 | clause: BLSpans, 17 | requiredMatches: Int, 18 | capturesToCheck: Seq[String] 19 | ) extends BLSpans { 20 | 21 | var more = true 22 | var captureGroupHolder: Array[Span] = Array() 23 | var captureIndicesToCheck: Seq[Int] = Seq() 24 | var clauseNumCaptureGroups = -1 25 | var clauseFirstCaptureGroupIndex = -1 26 | 27 | override def passHitQueryContextToClauses(context: HitQueryContext): Unit = { 28 | clauseFirstCaptureGroupIndex = context.getCaptureRegisterNumber 29 | clause.setHitQueryContext(context) 30 | captureGroupHolder = Array.fill[Span](context.numberOfCapturedGroups())(null) 31 | clauseNumCaptureGroups = context.getCaptureRegisterNumber - clauseFirstCaptureGroupIndex 32 | val captureGroupsNames = context.getCapturedGroupNames 33 | require(capturesToCheck.forall(captureGroupsNames.contains)) 34 | captureIndicesToCheck = capturesToCheck.map(captureGroupsNames.indexOf) 35 | } 36 | 37 | override def getCapturedGroups(capturedGroups: Array[Span]): Unit = { 38 | // Any valid hit will have already filled captureGroupHolder 39 | System.arraycopy(captureGroupHolder, clauseFirstCaptureGroupIndex, 40 | capturedGroups, clauseFirstCaptureGroupIndex, clauseNumCaptureGroups) 41 | } 42 | 43 | def onValidMatch: Boolean = { 44 | captureGroupHolder.indices.foreach(captureGroupHolder.update(_, null)) 45 | clause.getCapturedGroups(captureGroupHolder) 46 | val numValidCaptures = captureIndicesToCheck.count(i => { 47 | val span = captureGroupHolder(i) 48 | span != null && span.end > 0 49 | }) 50 | numValidCaptures >= requiredMatches 51 | } 52 | 53 | override def skipTo(target: Int): Boolean = { 54 | if (more) { 55 | more = clause.skipTo(target) 56 | while (more && !onValidMatch) { 57 | more = clause.next() 58 | } 59 | } 60 | more 61 | } 62 | 63 | override def next(): Boolean = { 64 | if (more) { 65 | more = clause.next() 66 | while (more && !onValidMatch) { 67 | more = clause.next() 68 | } 69 | } 70 | more 71 | } 72 | 73 | override def doc(): Int = clause.doc 74 | 75 | override def start(): Int = clause.start 76 | 77 | override def end(): Int = clause.end 78 | 79 | override def hitsHaveUniqueStart(): Boolean = clause.hitsHaveUniqueStart() 80 | 81 | override def hitsHaveUniqueEnd(): Boolean = clause.hitsHaveUniqueEnd() 82 | 83 | override def hitsAllSameLength(): Boolean = clause.hitsAllSameLength() 84 | 85 | override def hitsLength(): Int = clause.hitsLength() 86 | 87 | override def hitsAreUnique(): Boolean = clause.hitsAreUnique() 88 | 89 | override def hitsEndPointSorted(): Boolean = clause.hitsEndPointSorted() 90 | 91 | override def getPayload: util.Collection[Array[Byte]] = clause.getPayload 92 | 93 | override def isPayloadAvailable: Boolean = clause.isPayloadAvailable 94 | } 95 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/subsample/SpansStartAt.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.blacklab.search.Span 4 | import org.allenai.blacklab.search.lucene.{ BLSpans, HitQueryContext } 5 | 6 | import java.util 7 | 8 | /** Modifies `clause` so that it only matches Spans after the given document and token 9 | * 10 | * @param clause Spans to modify 11 | * @param startDoc document to start from, returns hits have doc >= startFromDoc 12 | * @param startToken token to start from, returned hits have doc > startFromDoc or 13 | * start >= startFromToken 14 | */ 15 | class SpansStartAt(clause: BLSpans, startDoc: Int, startToken: Int) extends BLSpans { 16 | 17 | var initialized = false 18 | 19 | def initialize(): Boolean = { 20 | if (!clause.skipTo(startDoc)) { 21 | false 22 | } else { 23 | while (clause.start() < startToken && clause.doc == startDoc) { 24 | if (!clause.next()) return false 25 | } 26 | true 27 | } 28 | } 29 | 30 | override def next(): Boolean = { 31 | if (!initialized) { 32 | initialized = true 33 | initialize() 34 | } else { 35 | clause.next() 36 | } 37 | } 38 | 39 | override def skipTo(target: Int): Boolean = { 40 | if (!initialized) { 41 | initialized = true 42 | if (target > doc) { 43 | clause.skipTo(target) 44 | } else { 45 | initialize() 46 | } 47 | } else { 48 | clause.skipTo(target) 49 | } 50 | } 51 | 52 | override def passHitQueryContextToClauses(hitQueryContext: HitQueryContext) = { 53 | clause.setHitQueryContext(hitQueryContext) 54 | } 55 | 56 | override def getCapturedGroups(spans: Array[Span]): Unit = clause.getCapturedGroups(spans) 57 | 58 | override def doc(): Int = clause.doc 59 | 60 | override def start(): Int = clause.start 61 | 62 | override def end(): Int = clause.end 63 | 64 | override def hitsHaveUniqueStart(): Boolean = clause.hitsHaveUniqueStart 65 | 66 | override def hitsHaveUniqueEnd(): Boolean = clause.hitsHaveUniqueEnd 67 | 68 | override def hitsAllSameLength(): Boolean = clause.hitsAllSameLength 69 | 70 | override def hitsLength(): Int = clause.hitsLength 71 | 72 | override def hitsAreUnique(): Boolean = clause.hitsAreUnique 73 | 74 | override def hitsEndPointSorted(): Boolean = clause.hitsEndPointSorted 75 | 76 | override def getPayload: util.Collection[Array[Byte]] = clause.getPayload 77 | 78 | override def isPayloadAvailable: Boolean = clause.isPayloadAvailable 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/ml/subsample/TextPatternTrackingDisjunction.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.blacklab.search.{ QueryExecutionContext, TextPattern, TextPatternTranslator } 4 | import org.apache.lucene.search.spans.SpanQuery 5 | 6 | /** TextPattern for SpanQueryTrackingDisjunction. Note this only works when being translated into 7 | * SpanQueries, making it work in general would require an API change to TextPatternTranslator 8 | */ 9 | class TextPatternTrackingDisjunction( 10 | firstSpan: TextPattern, 11 | alternatives: Seq[TextPattern], 12 | captureName: String 13 | ) extends TextPattern { 14 | 15 | override def translate[T]( 16 | translator: TextPatternTranslator[T], 17 | context: QueryExecutionContext 18 | ): T = { 19 | val translatedFirstSpan = firstSpan.translate(translator).asInstanceOf[SpanQuery] 20 | val translatedAlternatives = alternatives.map(_.translate(translator).asInstanceOf[SpanQuery]) 21 | new SpanQueryTrackingDisjunction( 22 | translatedFirstSpan, translatedAlternatives, captureName 23 | ).asInstanceOf[T] 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/patterns/NamedPattern.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.patterns 2 | 3 | case class NamedPattern(name: String, pattern: String) 4 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/patterns/PatternUtilities.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.patterns 2 | 3 | import com.typesafe.config.ConfigFactory 4 | import org.allenai.ike.{SearchConfig, SearchRequest} 5 | import scala.collection.JavaConverters._ 6 | 7 | object PatternUtilities { 8 | /** Given a Seq of NamedPattern, create a Map where the key is the pattern's name, and the 9 | * value is a SearchRequest. 10 | * 11 | * In this manner you can access a SearchRequest by its name 12 | * 13 | * @param namedPatterns A Seq[NamedPattern] 14 | * @return a Map[String, SearchRequest] where the key is the name from a NamedPattern 15 | */ 16 | def createSearchers(namedPatterns: Seq[NamedPattern]): Map[String, SearchRequest] = { 17 | val hugeLimit = Int.MaxValue 18 | namedPatterns.map{ case namedPattern => 19 | (namedPattern.name, SearchRequest(Left(namedPattern.pattern), None, None, SearchConfig(hugeLimit))) 20 | }.toMap 21 | } 22 | 23 | /** Takes a config file and creates the NamedPattern extractions patterns 24 | * 25 | * @note See the configuration in test/resources for an example of format 26 | * 27 | * @param configFile The config file that holds your patterns. 28 | * @return a Seq[NamedPattern] 29 | */ 30 | def loadNamedPatterns(configFile: String): Seq[NamedPattern] = { 31 | val patternConfig = ConfigFactory.load(configFile) 32 | patternConfig.getConfigList("ExtractionPatterns.patterns").asScala.map { config => 33 | NamedPattern(config.getString("name"), config.getString("pattern")) 34 | } 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/scala/org/allenai/ike/persistence/IkePostgresDriver.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.persistence 2 | 3 | import com.github.tminglei.slickpg._ 4 | 5 | import scala.slick.driver.PostgresDriver 6 | 7 | trait IkePostgresDriver extends PostgresDriver with PgArraySupport with PgPlayJsonSupport { 8 | override val pgjson = "jsonb" //to keep back compatibility, pgjson's value was "json" by default 9 | 10 | trait ImplicitsPlus extends Implicits with ArrayImplicits with JsonImplicits 11 | trait SimpleQLPlus extends SimpleQL with ImplicitsPlus 12 | 13 | override lazy val Implicit = new ImplicitsPlus {} 14 | override val simple = new SimpleQLPlus {} 15 | } 16 | 17 | object IkePostgresDriver extends IkePostgresDriver 18 | -------------------------------------------------------------------------------- /src/test/resources/testPatterns.conf: -------------------------------------------------------------------------------- 1 | ExtractionPatterns = { 2 | patterns = [ 3 | { 4 | name = "result-percent", 5 | pattern = "CD {%|percent|per cent|pct}" 6 | }, 7 | { 8 | name = "treatments", 9 | pattern = "{were given|treated with|received|receiving} {CD|NN|JJ|IN}+" 10 | } 11 | ] 12 | } 13 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/TestBlackLabSemantics.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | import org.allenai.common.testkit.{ ScratchDirectory, UnitSpec } 4 | import org.allenai.ike.index.TestData 5 | 6 | class TestBlackLabSemantics extends UnitSpec with ScratchDirectory { 7 | TestData.createTestIndex(scratchDir) 8 | val searcher = TestData.testSearcher(scratchDir) 9 | def results(s: String): Iterator[BlackLabResult] = { 10 | val e = QExprParser.parse(s).get 11 | val q = BlackLabSemantics.blackLabQuery(e) 12 | val hits = searcher.find(q) 13 | BlackLabResult.fromHits(hits, "testCorpus") 14 | } 15 | def search(s: String): Set[String] = { 16 | for { 17 | result <- results(s) 18 | words = result.matchWords mkString (" ") 19 | } yield words 20 | }.toSet 21 | def searchGroups(s: String): Set[String] = { 22 | for { 23 | result <- results(s) 24 | (groupName, offsets) <- result.captureGroups 25 | data = result.wordData.slice(offsets.start, offsets.end) 26 | words = data map (_.word) mkString " " 27 | named = s"$groupName $words" 28 | } yield named 29 | }.toSet 30 | 31 | "BlackLabSemantics" should "handle single word queries" in { 32 | assert(search("like") == Set("like")) 33 | assert(search("garbage") == Set.empty) 34 | } 35 | 36 | it should "handle pos tags" in { 37 | assert(search("NNS") == Set("bananas")) 38 | assert(search("PRP") == Set("I", "It", "They")) 39 | } 40 | 41 | it should "handle multi-term queries" in { 42 | assert(search("I VBP") == Set("I like", "I hate")) 43 | assert(search("PRP VBP") == Set("I like", "I hate", "It tastes", "They taste")) 44 | } 45 | 46 | it should "handle wildcard queries" in { 47 | assert(search("I .") == Set("I like", "I hate")) 48 | } 49 | 50 | it should "handle disjunctive queries" in { 51 | assert(search("like|hate") == Set("like", "hate")) 52 | } 53 | 54 | it should "handle repetition queries" in { 55 | assert(search("RB* JJ") == Set("great", "not great")) 56 | assert(search("RB+ JJ") == Set("not great")) 57 | } 58 | 59 | it should "handle groups" in { 60 | assert(searchGroups("I (?VBP) DT* (?NN|NNS)") == 61 | Set("x like", "y mango", "x hate", "y bananas")) 62 | assert(searchGroups("I (?.*) (?NN|NNS)") == 63 | Set("x like", "y mango", "x hate those", "y bananas")) 64 | } 65 | 66 | it should "handle multiple grouped wildcards" in { 67 | val q = "(?{I, They, It} VBP) ." 68 | val expected = Set("x I like", "x I hate", "x They taste", "x It tastes") 69 | assert(searchGroups(q) == expected) 70 | } 71 | 72 | it should "handle all-wildcard queries" in { 73 | val q = "(?.) . . ." 74 | val expected = Set("x I", "x hate", "x They", "x taste", "x It") 75 | assert(searchGroups(q) == expected) 76 | } 77 | 78 | it should "handle disjunctions at the beginning" in { 79 | val q = "{I, They} (?VBP)" 80 | val expected = Set("x like", "x hate", "x taste") 81 | assert(searchGroups(q) == expected) 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/TestQExprParser.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | import org.allenai.common.testkit.{ ScratchDirectory, UnitSpec } 4 | 5 | class TestQExprParser extends UnitSpec with ScratchDirectory { 6 | 7 | val wc = QWildcard() 8 | // A bunch of QExpr shorthand functions 9 | // scalastyle:off 10 | def w(s: String) = QWord(s) 11 | def p(s: String) = QPos(s) 12 | def qs(exprs: QExpr*) = QSeq(exprs) 13 | def cap(expr: QExpr) = QUnnamed(expr) 14 | def cap(name: String, expr: QExpr) = QNamed(expr, name) 15 | def nocap(expr: QExpr) = QNonCap(expr) 16 | def or(exprs: QExpr*) = QDisj(exprs) 17 | def star(expr: QExpr) = QStar(expr) 18 | def rep(expr: QExpr, min: Int, max: Int) = QRepetition(expr, min, max) 19 | def g(words: Seq[String], pos: Int) = QGeneralizePhrase(words.map(QWord), pos) 20 | // scalastyle:on 21 | 22 | def parse(s: String): QExpr = QExprParser.parse(s).get 23 | 24 | "QExprParser" should "parse correctly" in { 25 | 26 | val q1 = "this is a test" 27 | val e1 = qs(w("this"), w("is"), w("a"), w("test")) 28 | assert(parse(q1) == e1) 29 | 30 | val q2 = "this is DT NN" 31 | val e2 = qs(w("this"), w("is"), p("DT"), p("NN")) 32 | assert(parse(q2) == e2) 33 | 34 | val q4 = "this is (a test)" 35 | val e4 = qs(w("this"), w("is"), cap(qs(w("a"), w("test")))) 36 | assert(parse(q4) == e4) 37 | 38 | val q5 = "this is (? a test)" 39 | val e5 = qs(w("this"), w("is"), cap("foo", qs(w("a"), w("test")))) 40 | assert(parse(q5) == e5) 41 | 42 | val q6 = "this is (?:a test)" 43 | val e6 = qs(w("this"), w("is"), nocap(qs(w("a"), w("test")))) 44 | assert(parse(q6) == e6) 45 | 46 | val q7 = "(?:this|that) is a test" 47 | val e7 = qs(nocap(or(w("this"), w("that"))), w("is"), w("a"), w("test")) 48 | assert(parse(q7) == e7) 49 | 50 | val q8 = ". is a test" 51 | val e8 = qs(wc, w("is"), w("a"), w("test")) 52 | assert(parse(q8) == e8) 53 | 54 | val q9 = ".* is a test" 55 | val e9 = qs(star(wc), w("is"), w("a"), w("test")) 56 | assert(parse(q9) == e9) 57 | 58 | val q10 = "{this, that} is a test" 59 | val e10 = qs(or(w("this"), w("that")), w("is"), w("a"), w("test")) 60 | assert(parse(q10) == e10) 61 | 62 | val q11 = "the thing[1,5] ran" 63 | val e11 = qs(w("the"), rep(w("thing"), 1, 5), w("ran")) 64 | assert(parse(q11) == e11) 65 | 66 | val q12 = "the {thing [1,-1], ran}" 67 | val e12 = qs(w("the"), or(rep(w("thing"), 1, -1), w("ran"))) 68 | assert(parse(q12) == e12) 69 | 70 | val q13 = "the \"fat cat\"~10 \"ran\"~1 {fast,quick~34}" 71 | val e13 = qs(w("the"), g(Seq("fat", "cat"), 10), g(Seq("ran"), 1), 72 | or(w("fast"), g(Seq("quick"), 34))) 73 | assert(parse(q13) == e13) 74 | 75 | val q14 = "the \"fat cat\"[1,2]" 76 | val e14 = qs(w("the"), rep(g(Seq("fat", "cat"), 0), 1, 2)) 77 | assert(parse(q14) == e14) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/TestQueryLanguage.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | import org.allenai.common.testkit.UnitSpec 4 | 5 | class TestQueryLanguage extends UnitSpec { 6 | 7 | "getQueryLength" should "get correct length" in { 8 | assertResult((2, 2)) { 9 | val query = QSeq(Seq(QWord(""), QNamed(QPos(""), ""))) 10 | QueryLanguage.getQueryLength(query) 11 | } 12 | 13 | assertResult((1, -1)) { 14 | val query = QSeq(Seq(QStar(QWord("")), QNamed(QPos(""), ""))) 15 | QueryLanguage.getQueryLength(query) 16 | } 17 | 18 | val disjLength2 = QDisj(Seq(QSeq(Seq(QWord(""), QPos(""))), QSeq(Seq(QWord(""), QPos(""))))) 19 | val seqLength2 = QSeq(disjLength2.qexprs) 20 | 21 | assertResult((4, 4)) { 22 | val query = QSeq(Seq(disjLength2, disjLength2)) 23 | QueryLanguage.getQueryLength(query) 24 | } 25 | 26 | assertResult((12, 12)) { 27 | val q1 = QSeq(Seq(seqLength2, seqLength2, QWord(""), disjLength2, QWildcard())) 28 | QueryLanguage.getQueryLength(q1) 29 | } 30 | 31 | assertResult((4, -1)) { 32 | val q1 = QUnnamed(QSeq(Seq(disjLength2, QSeq(Seq(QPos(""), QPlus(QWord(""))))))) 33 | QueryLanguage.getQueryLength(q1) 34 | } 35 | 36 | assertResult((4, 4)) { 37 | val q1 = QueryLanguage.parse("(a b)[2,2]").get 38 | QueryLanguage.getQueryLength(q1) 39 | } 40 | 41 | assertResult((2, -1)) { 42 | val q1 = QueryLanguage.parse("(a (b c d)*)[2,2]").get 43 | QueryLanguage.getQueryLength(q1) 44 | } 45 | 46 | assertResult((2, 4)) { 47 | val q1 = QueryLanguage.parse("(a b)[1,2]").get 48 | QueryLanguage.getQueryLength(q1) 49 | } 50 | 51 | assertResult((1, 3)) { 52 | def w(): QWord = QWord("") 53 | val q1 = QSimilarPhrases(Seq(w(), w()), 2, Seq( 54 | SimilarPhrase(Seq(w(), w(), w()), 1), 55 | SimilarPhrase(Seq(w()), 1), 56 | SimilarPhrase(Seq(w(), w(), w(), w()), 1) 57 | )) 58 | QueryLanguage.getQueryLength(q1) 59 | } 60 | } 61 | 62 | "getQueryString" should "get correct string" in { 63 | def check(string: String) = { 64 | val qexpr = QueryLanguage.parse(string).get 65 | assert(QueryLanguage.getQueryString(qexpr) == string) 66 | } 67 | check("a b[1,2] (c d)* e") 68 | check("a {d,e}* e") 69 | check("((?:NP PP)*)") 70 | assert(QueryLanguage.getQueryString(QStar(QSeq(Seq(QWord("a"), QWord("b"))))) == "(?:a b)*") 71 | 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/index/BlackLabExample.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.index 2 | 3 | import org.allenai.common.testkit.{ ScratchDirectory, UnitSpec } 4 | import org.allenai.ike.BlackLabResult 5 | 6 | import org.allenai.blacklab.index.Indexer 7 | import org.allenai.blacklab.queryParser.corpusql.CorpusQueryLanguageParser 8 | import org.allenai.blacklab.search.sequences.{ TextPatternRepetition, TextPatternSequence } 9 | import org.allenai.blacklab.search.{ Searcher, TextPatternCaptureGroup, TextPatternOr, TextPatternPrefix, TextPatternProperty, TextPatternTerm } 10 | 11 | class BlackLabExample extends UnitSpec with ScratchDirectory { 12 | 13 | val text = "A teacher is a person who teaches students ." 14 | val annotated = NlpAnnotate.annotate(text) 15 | val tokenSentences = for { 16 | sentence <- annotated 17 | indexableTokens = sentence.map { t => 18 | IndexableToken(t.token.string, t.token.postag, t.lemma, "") 19 | } 20 | } yield indexableTokens 21 | val doc = IndexableText(IdText("doc1", text), tokenSentences) 22 | 23 | println("Here is the document:") 24 | println(doc.idText.id) 25 | println(doc.idText.text) 26 | doc.sentences foreach println 27 | println 28 | 29 | val indexLocation = scratchDir 30 | val indexer = new Indexer(indexLocation, true, classOf[AnnotationIndexer]) 31 | val addToMyIndex = CreateIndex.addTo(indexer) _ 32 | addToMyIndex(doc) 33 | indexer.close 34 | 35 | val searcher = Searcher.open(indexLocation) 36 | 37 | val singularNoun = new TextPatternProperty("pos", new TextPatternTerm("NN")) 38 | val pluralNoun = new TextPatternProperty("pos", new TextPatternTerm("NNS")) 39 | val noun = new TextPatternOr(singularNoun, pluralNoun) 40 | val determiner = new TextPatternProperty("pos", new TextPatternTerm("DT")) 41 | val adjective = new TextPatternProperty("pos", new TextPatternTerm("JJ")) 42 | val who = new TextPatternTerm("who") 43 | val that = new TextPatternTerm("that") 44 | val which = new TextPatternTerm("which") 45 | val whWord = new TextPatternOr(who, that, which) 46 | val beWord = new TextPatternProperty("lemma", new TextPatternTerm("be")) 47 | val verb = new TextPatternProperty("pos", new TextPatternPrefix("V")) 48 | val optionalDeterminer = new TextPatternRepetition(determiner, 0, 1) 49 | val someAdjectives = new TextPatternRepetition(adjective, 0, -1) 50 | val atLeastOneNoun = new TextPatternRepetition(noun, 1, -1) 51 | val nounPhrase = new TextPatternSequence(optionalDeterminer, someAdjectives, atLeastOneNoun) 52 | 53 | val isaSeq = new TextPatternSequence(nounPhrase, beWord, nounPhrase) 54 | val defnSeq = new TextPatternSequence(verb, nounPhrase) 55 | 56 | val textPattern = new TextPatternSequence( 57 | new TextPatternCaptureGroup(isaSeq, "isa-part"), 58 | whWord, 59 | new TextPatternCaptureGroup(defnSeq, "defn-part") 60 | ) 61 | 62 | val limit = 1000 63 | val hits = searcher.find(textPattern).window(0, limit) 64 | val transformedHits = BlackLabResult.fromHits(hits, "testCorpus").toSeq 65 | 66 | for (hit <- transformedHits) { 67 | println("Here is the word data:") 68 | hit.wordData foreach println 69 | println 70 | println("It matched this subset of the word data:") 71 | hit.wordData.slice(hit.matchOffset.start, hit.matchOffset.end) foreach println 72 | println 73 | println("Here are the matching named capture groups:") 74 | for ((groupName, groupOffset) <- hit.captureGroups) { 75 | println(s"Inside capture group '${groupName}'") 76 | hit.wordData.slice(groupOffset.start, groupOffset.end) foreach println 77 | println 78 | } 79 | } 80 | 81 | val parsed = CorpusQueryLanguageParser.parse(""" [pos="NN"] """) 82 | 83 | } 84 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/index/TestCreateIndex.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.index 2 | 3 | import org.allenai.common.testkit.{ ScratchDirectory, UnitSpec } 4 | 5 | import org.allenai.blacklab.queryParser.corpusql.CorpusQueryLanguageParser 6 | 7 | import scala.collection.JavaConverters._ 8 | 9 | class TestCreateIndex extends UnitSpec with ScratchDirectory { 10 | TestData.createTestIndex(scratchDir) 11 | val searcher = TestData.testSearcher(scratchDir) 12 | "createTestIndex" should "create the index" in { 13 | val reader = searcher.getIndexReader 14 | assert(reader.numDocs == TestData.indexableTexts.size) 15 | } 16 | it should "add the doc content" in { 17 | val i = CorpusQueryLanguageParser.parse(""" "I" [pos="VBP"] """) 18 | val hits = searcher.find(i) 19 | assert(hits.numberOfDocs == 2) 20 | assert(hits.iterator.asScala.toList.size == 2) 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/index/TestData.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.index 2 | 3 | import org.allenai.blacklab.index.Indexer 4 | import org.allenai.blacklab.search.Searcher 5 | 6 | import java.io.File 7 | 8 | object TestData { 9 | 10 | val lemmas = Map( 11 | "I" -> "i", 12 | "like" -> "like", 13 | "mango" -> "mango", 14 | "." -> ".", 15 | "It" -> "it", 16 | "tastes" -> "taste", 17 | "great" -> "great", 18 | "hate" -> "hate", 19 | "those" -> "this", 20 | "bananas" -> "banana", 21 | "They" -> "they", 22 | "taste" -> "taste", 23 | "not" -> "not" 24 | ) 25 | 26 | val posTags = Map( 27 | "I" -> "PRP", 28 | "like" -> "VBP", 29 | "mango" -> "NN", 30 | "." -> ".", 31 | "It" -> "PRP", 32 | "tastes" -> "VBP", 33 | "great" -> "JJ", 34 | "hate" -> "VBP", 35 | "those" -> "DT", 36 | "bananas" -> "NNS", 37 | "They" -> "PRP", 38 | "taste" -> "VBP", 39 | "not" -> "RB" 40 | ) 41 | 42 | val chunks = Map( 43 | "doc1" -> "BE-NP BE-VP BE-ADJP O".split(' '), 44 | "doc2" -> "BE-NP BE-VP BE-ADJP O".split(' '), 45 | "doc3" -> "BE-NP NE-VP B-NP E-NP O".split(' '), 46 | "doc4" -> "BE-NP BE-VP O BE-ADJP O".split(' ') 47 | ) 48 | 49 | val idTexts = Seq( 50 | IdText("doc1", "I like mango ."), 51 | IdText("doc2", "It tastes great ."), 52 | IdText("doc3", "I hate those bananas ."), 53 | IdText("doc4", "They taste not great .") 54 | ) 55 | 56 | val indexableTexts = idTexts map { idText => 57 | val words = idText.text.split(" ") 58 | val ps = words.map(posTags.getOrElse(_, "")) 59 | val ls = words.map(lemmas.getOrElse(_, "")) 60 | val ch = chunks(idText.id) 61 | val tokens = List(words, ps, ls, ch).transpose map { 62 | case List(w, p, l, c) => IndexableToken(w, p, l, c) 63 | } 64 | IndexableText(idText, List(tokens)) 65 | } 66 | 67 | def createTestIndex(path: File): Unit = { 68 | val indexer = new Indexer(path, true, classOf[AnnotationIndexer]) 69 | indexableTexts foreach CreateIndex.addTo(indexer) 70 | indexer.close 71 | } 72 | 73 | def testSearcher(path: File): Searcher = Searcher.open(path) 74 | 75 | } 76 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/index/TestSearcher.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike 2 | 3 | import org.allenai.common.testkit.{ ScratchDirectory, UnitSpec } 4 | import org.allenai.ike.index.TestData 5 | 6 | import org.allenai.blacklab.queryParser.corpusql.CorpusQueryLanguageParser 7 | 8 | class TestSearcher extends UnitSpec with ScratchDirectory { 9 | TestData.createTestIndex(scratchDir) 10 | val searcher = TestData.testSearcher(scratchDir) 11 | def search(s: String): Iterator[BlackLabResult] = { 12 | val query = CorpusQueryLanguageParser.parse(s) 13 | val hits = searcher.find(query) 14 | BlackLabResult.fromHits(hits, "testCorpus") 15 | } 16 | def groups(s: String): Iterator[String] = for { 17 | r <- search(s) 18 | (name, offset) <- r.captureGroups 19 | data = r.wordData.slice(offset.start, offset.end) 20 | words = data.map(_.word).mkString(" ") 21 | result = s"$name $words" 22 | } yield result 23 | "searcher" should "return the expected search results" in { 24 | val results = search(""" "I" [pos="VBP"] """) 25 | val resultStrings = results.map(r => r.matchWords.mkString(" ")).toSet 26 | assert(resultStrings == Set("I like", "I hate")) 27 | } 28 | it should "handle capture groups" in { 29 | val results = groups(""" "I" myGroup:[pos="VBP"] """) 30 | assert(results.toSet == Set("myGroup like", "myGroup hate")) 31 | } 32 | it should "handle multi-word capture groups" in { 33 | val results = groups(""" "I" myGroup:([pos="VBP"] [])""") 34 | assert(results.toSet == Set("myGroup like mango", "myGroup hate those")) 35 | } 36 | it should "handle multiple capture groups" in { 37 | val results = groups(""" "I" myGroup1:[pos="VBP"] myGroup2:[]""") 38 | val expected = Set("myGroup1 like", "myGroup1 hate", "myGroup2 mango", "myGroup2 those") 39 | assert(results.toSet == expected) 40 | } 41 | it should "handle repetition" in { 42 | val results = search(""" [pos="RB"]{0,1} [pos="JJ"] """) 43 | val resultStrings = results.map(r => r.matchWords.mkString(" ")).toSet 44 | assert(resultStrings == Set("great", "not great")) 45 | } 46 | it should "handle repitition and groupign" in { 47 | val results = groups(""" group1:([pos="RB"]{0,1} [pos="JJ"]) """) 48 | val expected = Set("group1 not great", "group1 great") 49 | assert(results.toSet == expected) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/SimilarPhrasesSearcherStub.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml 2 | 3 | import org.allenai.ike.{ SimilarPhrase, SimilarPhrasesSearcher } 4 | 5 | class SimilarPhrasesSearcherStub(phrases: Map[String, Seq[SimilarPhrase]] = Map()) 6 | extends SimilarPhrasesSearcher { 7 | override def getSimilarPhrases(phrase: String): Seq[SimilarPhrase] = { 8 | phrases.getOrElse(phrase, Seq()) 9 | } 10 | override def getSimilarPhrases(phraseSeq: Seq[String]): Seq[SimilarPhrase] = { 11 | Seq.empty 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/TestQueryGeneralizer.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml 2 | 3 | import org.allenai.common.testkit.{ ScratchDirectory, UnitSpec } 4 | import org.allenai.ike._ 5 | import org.allenai.ike.index.TestData 6 | 7 | class TestQueryGeneralizer extends UnitSpec with ScratchDirectory { 8 | 9 | TestData.createTestIndex(scratchDir) 10 | val searcher = TestData.testSearcher(scratchDir) 11 | val searchers = Seq(searcher) 12 | val ss = new SimilarPhrasesSearcherStub(Map( 13 | "i" -> Seq( 14 | SimilarPhrase(Seq(QWord("It")), 0.8), 15 | SimilarPhrase(Seq(QWord("like")), 0.4) 16 | ) 17 | )) 18 | val qsimForI = QSimilarPhrases(Seq(QWord("i")), 2, ss.getSimilarPhrases("i")) 19 | 20 | it should "cover all PosTags" in { 21 | // This test will raise an error if there are 22 | val tagSet = QueryLanguage.parser.posTagSet.toSet - "FW" 23 | val generalizingTagset = QueryGeneralizer.posSets.reduce(_ ++ _) 24 | assert(tagSet == generalizingTagset) 25 | } 26 | 27 | it should "suggest correct generalizations" in { 28 | { 29 | val gens = QueryGeneralizer.queryGeneralizations(QPos("NN"), searchers, ss, 10) 30 | val qexprs = gens.asInstanceOf[GeneralizeToDisj].pos 31 | assert(qexprs.contains(QPos("NNS"))) 32 | assert(!qexprs.contains(QPos("VBG"))) 33 | assert(!qexprs.contains(QPos("NN"))) 34 | } 35 | { 36 | val gens = QueryGeneralizer.queryGeneralizations( 37 | QDisj(Seq(QPos("NN"), QPos("VBG"))), searchers, ss, 10 38 | ) 39 | val qexprs = gens.asInstanceOf[GeneralizeToDisj].pos 40 | assert(qexprs.contains(QPos("NNS"))) 41 | assert(qexprs.contains(QPos("VB"))) 42 | assert(!qexprs.contains(QPos("JJ"))) 43 | assert(!qexprs.contains(QPos("JJS"))) 44 | assert(!qexprs.contains(QPos("VBG"))) 45 | } 46 | { 47 | val gens = QueryGeneralizer.queryGeneralizations(QWord("i"), searchers, ss, 10) 48 | assertResult(gens)(GeneralizeToDisj(Seq(QPos("PRP")), Seq(qsimForI), true)) 49 | } 50 | { 51 | val testQuery = QDisj(Seq( 52 | QPos("NN"), QSimilarPhrases(Seq(QWord("i")), 1, ss.getSimilarPhrases("i")) 53 | )) 54 | val gens = QueryGeneralizer.queryGeneralizations(testQuery, searchers, ss, 10). 55 | asInstanceOf[GeneralizeToDisj] 56 | assert(gens.pos.map(_.value).toSet == (QueryGeneralizer.posSets(1) - "NN" + "PRP")) 57 | assert(gens.phrase == Seq(qsimForI)) 58 | assert(!gens.fullyGeneralizes) 59 | } 60 | 61 | assertResult(GeneralizeToNone())(QueryGeneralizer.queryGeneralizations( 62 | QRepetition(QWildcard(), 1, 4), searchers, ss, 10 63 | )) 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/TestTokenizedQuery.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml 2 | 3 | import org.allenai.common.testkit.{ ScratchDirectory, UnitSpec } 4 | import org.allenai.ike._ 5 | 6 | class TestTokenizedQuery extends UnitSpec with ScratchDirectory { 7 | 8 | // Shorthands 9 | def qs(seq: QExpr*): QueryTokenSequence = TokenSequence(seq) 10 | def qsc(name: String, seq: Seq[QExpr], explicity: Boolean = true): QueryTokenSequence = 11 | CapturedTokenSequence(seq, name, explicity) 12 | 13 | "convertQuery" should "correctly tokenize" in { 14 | { 15 | val captureSeq = Seq(QWord(""), QDisj(Seq(QWord(""), QPos("")))) 16 | val query = QSeq(Seq(QWord("1"), QWord("2"), QNamed(QSeq(captureSeq), "col1"))) 17 | val tokenized = TokenizedQuery.buildFromQuery(query, Seq("col1")) 18 | 19 | assertResult(qs(QWord("1"), QWord("2")))(tokenized.tokenSequences(0)) 20 | assertResult(qsc("col1", captureSeq))(tokenized.tokenSequences(1)) 21 | assertResult(query)(tokenized.getOriginalQuery) 22 | } 23 | { 24 | val query = QueryLanguage.parse("a NN+ (? c) d*").get 25 | assertResult(query)(TokenizedQuery.buildFromQuery(query, Seq("x")).getOriginalQuery) 26 | } 27 | { 28 | val query = QueryLanguage.parse("a (? {b, c} d) e f (g) (? h)").get 29 | val tokenized = TokenizedQuery.buildFromQuery(query, Seq("y", "x", "z")) 30 | 31 | assertResult(qs(QWord("a")))(tokenized.tokenSequences(0)) 32 | assertResult(qsc( 33 | "y", 34 | Seq( 35 | QDisj(List(QWord("b"), QWord("c"))), 36 | QWord("d") 37 | ) 38 | ))(tokenized.tokenSequences(1)) 39 | assertResult(qs(QWord("e"), QWord("f")))(tokenized.tokenSequences(2)) 40 | assertResult(qsc("x", Seq(QWord("g")), false))(tokenized.tokenSequences(3)) 41 | assertResult(qsc("z", Seq(QWord("h"))))(tokenized.tokenSequences(4)) 42 | 43 | val seq = tokenized.getSeq 44 | val expectedSeq = Seq( 45 | QWord("a"), 46 | QDisj(List(QWord("b"), QWord("c"))), 47 | QWord("d"), 48 | QWord("e"), 49 | QWord("f"), 50 | QWord("g"), 51 | QWord("h") 52 | ) 53 | assertResult(expectedSeq.size)(seq.size) 54 | seq.zip(expectedSeq).foreach { 55 | case (expected, actual) => assertResult(expected)(actual) 56 | } 57 | } 58 | { 59 | // Make sure if the user writes a overly-complex sequential query we can recover the original 60 | val query = QueryLanguage.parse("{a b} (?: {{c d}} {e}) (? {f g})").get 61 | val tokenized = TokenizedQuery.buildFromQuery(query, Seq("z")) 62 | 63 | val seq = tokenized.getSeq 64 | val expectedSeq = Seq( 65 | QWord("a"), 66 | QWord("b"), 67 | QWord("c"), 68 | QWord("d"), 69 | QWord("e"), 70 | QWord("f"), 71 | QWord("g") 72 | ) 73 | assertResult(expectedSeq.size)(seq.size) 74 | seq.zip(expectedSeq).foreach { 75 | case (expected, actual) => assertResult(expected)(actual) 76 | } 77 | } 78 | } 79 | 80 | it should "get data correctly" in { 81 | val query = QueryLanguage.parse("a (? b c) d e (? f)").get 82 | val tokenized = TokenizedQuery.buildFromQuery(query, Seq("c1", "c2")) 83 | val expectedResults = Seq( 84 | QuerySlotData(Some(QWord("a")), QueryToken(1), false), 85 | QuerySlotData(Some(QWord("b")), QueryToken(2), true), 86 | QuerySlotData(Some(QWord("c")), QueryToken(3), true), 87 | QuerySlotData(Some(QWord("d")), QueryToken(4), false), 88 | QuerySlotData(Some(QWord("e")), QueryToken(5), false), 89 | QuerySlotData(Some(QWord("f")), QueryToken(6), true) 90 | ) 91 | assertResult(expectedResults)(tokenized.getAnnotatedSeq) 92 | } 93 | 94 | } 95 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/compoundop/TestOpConjunction.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.compoundop 2 | 3 | import org.allenai.common.testkit.UnitSpec 4 | import org.allenai.ike.ml.queryop._ 5 | import org.allenai.ike.ml.{ Prefix, QueryToken } 6 | import org.allenai.ike.{ QPos, QWord } 7 | 8 | class TestOpConjunction extends UnitSpec { 9 | 10 | val prefix2 = EvaluatedOp.fromPairs( 11 | SetToken(Prefix(2), QWord("p2")), 12 | List((1, 1), (3, 0), (4, 1), (9, 1)) 13 | ) 14 | val replace3 = EvaluatedOp.fromList( 15 | SetToken(QueryToken(3), QPos("r3")), 16 | List(1, 2, 3, 4, 5) 17 | ) 18 | val setMin3 = EvaluatedOp.fromPairs( 19 | SetMin(3, 1), List((1, 1), (3, 0)) 20 | ) 21 | val setMax3 = EvaluatedOp.fromPairs( 22 | SetMax(3, 1), List((1, 1), (3, 0)) 23 | ) 24 | val setMinLarge3 = EvaluatedOp.fromPairs( 25 | SetMin(3, 2), List((1, 1), (3, 0)) 26 | ) 27 | 28 | val removeToken = EvaluatedOp.fromPairs(RemoveToken(1), List((1, 1), (5, 0))) 29 | 30 | "OpConjunction" should "calculate numEdits correctly" in { 31 | var op = OpConjunction(replace3).get.add(prefix2) 32 | assertResult(List((1, 1), (3, 0), (4, 1)))(op.numEdits.toSeq.sorted) 33 | 34 | assert(!op.canAdd(prefix2.op)) 35 | 36 | op = op.add(setMin3).add(setMax3) 37 | assertResult(List((1, 1), (3, 0)))(op.numEdits.toSeq.sorted) 38 | 39 | assert(!op.canAdd(setMax3.op)) 40 | assert(!op.canAdd(setMin3.op)) 41 | assert(!op.canAdd(setMinLarge3.op)) 42 | 43 | op = op.add(removeToken) 44 | assertResult(List((1, 2)))(op.numEdits.toSeq.sorted) 45 | 46 | assertResult(5)(op.size) 47 | assertResult(Set(prefix2.op, setMin3.op, setMax3.op, replace3.op, RemoveToken(1)))(op.ops) 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/compoundop/TestOpConjunctionOfDisjunctions.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.compoundop 2 | 3 | import org.allenai.common.testkit.UnitSpec 4 | import org.allenai.ike._ 5 | import org.allenai.ike.ml._ 6 | import org.allenai.ike.ml.queryop._ 7 | 8 | class TestOpConjunctionOfDisjunctions extends UnitSpec { 9 | 10 | val suffix1 = EvaluatedOp.fromPairs( 11 | SetToken(Suffix(1), QWord("s1")), 12 | List((1, 1), (2, 0), (3, 0), (4, 1), (5, 1), (6, 1)) 13 | ) 14 | val prefix21 = EvaluatedOp.fromPairs( 15 | SetToken(Prefix(2), QWord("p21")), 16 | List((1, 1), (2, 1), (8, 0)) 17 | ) 18 | val prefix22 = EvaluatedOp.fromPairs( 19 | SetToken(Prefix(2), QWord("p22")), 20 | List((1, 1), (3, 0), (4, 1), (9, 1)) 21 | ) 22 | val replace3 = EvaluatedOp.fromList( 23 | SetToken(QueryToken(3), QPos("r3")), 24 | List(1, 2, 3, 4, 5, 6, 7) 25 | ) 26 | val add3 = EvaluatedOp.fromPairs( 27 | AddToken(3, QWord("a3")), 28 | List((3, 1), (4, 0), (5, 0)) 29 | ) 30 | 31 | val add2 = EvaluatedOp.fromPairs( 32 | AddToken(2, QWord("a3")), 33 | List((3, 1), (4, 0), (5, 0)) 34 | ) 35 | 36 | "OpConjunctionOfDisjunctions" should "calculate matches correctly" in { 37 | var op = OpConjunctionOfDisjunctions(suffix1).get.add(replace3) 38 | // Suffix1 AND replace3 39 | assertResult(List((1, 1), (2, 0), (3, 0), (4, 1), (5, 1), (6, 1)))(op.numEdits.toSeq.sorted) 40 | op = op.add(prefix21) 41 | // Suffix1 AND replace3 AND prefix21 42 | assertResult(List((1, 2), (2, 1)))(op.numEdits.toSeq.sorted) 43 | op = op.add(prefix22) 44 | // Suffix1 AND replace3 AND (prefix21 OR prefix22) 45 | assertResult(List((1, 2), (2, 1), (3, 0), (4, 2)))(op.numEdits.toSeq.sorted) 46 | op = op.add(add3) 47 | // Suffix1 AND (replace3 OR add3) AND (prefix21 OR prefix22) 48 | assertResult(List((1, 2), (2, 1), (3, 1), (4, 2)))(op.numEdits.toSeq.sorted) 49 | 50 | op = op.add(add2) 51 | // Suffix1 AND (replace3 OR add3) AND (prefix21 OR prefix22) AND (add2) 52 | assertResult(List((3, 2), (4, 2)))(op.numEdits.toSeq.sorted) 53 | 54 | assertResult(Set(suffix1.op, replace3.op, prefix21.op, prefix22.op, add3.op, add2.op))(op.ops) 55 | } 56 | 57 | val replace11 = EvaluatedOp.fromPairs( 58 | SetToken(QueryToken(1), QWord("r1")), 59 | List((1, 1), (2, 0), (4, 1), (5, 1), (6, 1)) 60 | ) 61 | val replace12 = EvaluatedOp.fromPairs( 62 | SetToken(QueryToken(1), QWord("r2")), 63 | List((3, 0)) 64 | ) 65 | val setMax1 = EvaluatedOp.fromPairs( 66 | SetMax(1, 1), List((3, 1)) 67 | ) 68 | 69 | val remove3 = EvaluatedOp.fromPairs( 70 | RemoveToken(3), List((1, 1), (2, 0), (3, 0), (4, 0), (5, 0), (6, 0), (7, 0)) 71 | ) 72 | val setMin2 = EvaluatedOp.fromPairs( 73 | SetMin(2, 1), List((1, 1), (2, 0), (3, 1)) 74 | ) 75 | val replace2 = EvaluatedOp.fromPairs( 76 | SetToken(QueryToken(2), QWord("r2")), List((2, 1), (3, 0)) 77 | ) 78 | 79 | "OpConjunctionOfDisjunctions" should "work with RemoveToken and ModifyParent token" in { 80 | var op = OpConjunctionOfDisjunctions(replace11).get.add(remove3) 81 | 82 | assertResult(List((1, 2), (2, 0), (4, 1), (5, 1), (6, 1)))(op.numEdits.toSeq.sorted) 83 | assert(!op.canAdd(replace3.op)) // We remove this slot, so we should not be able to add to it 84 | assert(!op.canAdd(add3.op)) 85 | assert(!op.canAdd(RemoveToken(1))) // This slot is set, we should not be able to remove it 86 | 87 | op = op.add(setMin2) 88 | assertResult(List((1, 3), (2, 0)))(op.numEdits.toSeq.sorted) 89 | assert(!op.canAdd(SetMin(2, 2))) 90 | assert(!op.canAdd(SetMax(2, 0))) 91 | 92 | op = op.add(replace2) 93 | assertResult(List((2, 1)))(op.numEdits.toSeq.sorted) 94 | 95 | // We can't add this guy due to the ordering 96 | assert(!op.canAdd(AddToken(QueryToken(2), QWord("")))) 97 | 98 | // (replace12 OR replace11) AND (removeStar2 AND replace2) AND remove3 99 | op = op.add(replace12) 100 | assertResult(List((2, 1), (3, 1)))(op.numEdits.toSeq.sorted) 101 | 102 | // (removePlus1 AND (replace12 OR replace11)) AND (removeStar2 AND replace2) AND remove3 103 | op = op.add(setMax1) 104 | assertResult(List((3, 2)))(op.numEdits.toSeq.sorted) 105 | 106 | assertResult(Set(replace11.op, replace12.op, remove3.op, replace2.op, 107 | setMin2.op, setMax1.op))(op.ops) 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/queryop/TestGeneralizingOpGenerator.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.queryop 2 | 3 | import org.allenai.common.testkit.{ ScratchDirectory, UnitSpec } 4 | import org.allenai.ike._ 5 | import org.allenai.ike.ml._ 6 | 7 | import scala.collection.immutable.IntMap 8 | 9 | class TestGeneralizingOpGenerator extends UnitSpec with ScratchDirectory { 10 | 11 | it should "Suggest correct POS operators" in { 12 | val query = QueryLanguage.parse("a (?b c) d").get 13 | val tokenized = TokenizedQuery.buildFromQuery(query, Seq("x")) 14 | var generator = GeneralizingOpGenerator(true, true) 15 | val slot = QueryToken(4) 16 | val setPosNN = SetToken(slot, QPos("NN")) 17 | val setPosCC = SetToken(slot, QPos("CC")) 18 | val setPosVB = SetToken(slot, QPos("VB")) 19 | 20 | val labels = IndexedSeq(Label.Negative, Label.Positive, Label.Negative, Label.Positive, 21 | Label.Positive, Label.Positive, Label.Positive, Label.Positive).zipWithIndex. 22 | map(x => WeightedExample(x._1, x._2, 0, 0, 1)) 23 | 24 | def getWithGeneralization(gen: Generalization): Map[QueryOp, IntMap[Int]] = { 25 | val matches = QueryMatches(QuerySlotData( 26 | Some(QWord("d")), slot, true, Some(gen) 27 | ), Seq( 28 | QueryMatch(Seq(Token("a1", "CC")), false), 29 | QueryMatch(Seq(Token("d", "NN")), true), 30 | QueryMatch(Seq(Token("a4", "VB")), false), 31 | QueryMatch(Seq(Token("a1", "CC")), true), 32 | QueryMatch(Seq(Token("d", "MD")), true), 33 | QueryMatch(Seq(Token("a2", "MD")), false), 34 | QueryMatch(Seq(Token("c3", "MD")), false), 35 | QueryMatch(Seq(Token("a3", "MD")), false) 36 | )) 37 | generator.generate(matches, labels) 38 | } 39 | 40 | val m1 = getWithGeneralization(GeneralizeToDisj(Seq("CC", "NN", "VB").map(QPos), Seq(), true)) 41 | assertResult(IntMap(1 -> 0))(m1(setPosNN)) 42 | assertResult(IntMap(0 -> 1, 3 -> 0))(m1(setPosCC)) 43 | assertResult(IntMap(2 -> 1))(m1(setPosVB)) 44 | assertResult(3)(m1.size) 45 | 46 | val m2 = getWithGeneralization(GeneralizeToDisj(Seq(QPos("NN")), Seq(), true)) 47 | assertResult(IntMap(1 -> 0))(m2(setPosNN)) 48 | assertResult(1)(m2.size) 49 | 50 | val simPhrases = QSimilarPhrases(Seq(QWord("d")), 4, 51 | Seq(("a1", 0.9), ("a2", 0.8), ("a3", 0.7), ("a4", 0.6)). 52 | map(x => SimilarPhrase(Seq(QWord(x._1)), x._2))) 53 | val m3 = getWithGeneralization(GeneralizeToDisj(Seq(), Seq(simPhrases), true)) 54 | 55 | // Match a1 56 | assertResult(IntMap(0 -> 1, 1 -> 0, 3 -> 0, 4 -> 0))( 57 | m3(SetToken(slot, simPhrases.copy(pos = 1))) 58 | ) 59 | // match a1,a2,a3 60 | assertResult(IntMap(0 -> 1, 1 -> 0, 3 -> 0, 4 -> 0, 5 -> 1, 7 -> 1))( 61 | m3(SetToken(slot, simPhrases.copy(pos = 3))) 62 | ) 63 | // match a1,a2,a3,a4 64 | assertResult(IntMap(0 -> 1, 1 -> 0, 3 -> 0, 4 -> 0, 5 -> 1, 7 -> 1, 2 -> 1))( 65 | m3(SetToken(slot, simPhrases.copy(pos = 4))) 66 | ) 67 | 68 | // Note there will not be an op that matches a1,a2 because a2 and a3 add elements to the 69 | // possible hits that have the same label so they should be merged together 70 | assertResult(3)(m3.size) 71 | 72 | generator = GeneralizingOpGenerator(true, true, 4) 73 | val m4 = getWithGeneralization(GeneralizeToDisj(Seq(), Seq(simPhrases), true)) 74 | assertResult(IntMap(0 -> 1, 1 -> 0, 3 -> 0, 4 -> 0, 5 -> 1, 7 -> 1))( 75 | m4(SetToken(slot, simPhrases.copy(pos = 3))) 76 | ) 77 | assertResult(1)(m4.size) 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/queryop/TestSimilarPhraseTracker.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.queryop 2 | 3 | import org.allenai.common.testkit.UnitSpec 4 | import org.allenai.ike.{ QSimilarPhrases, QWord, SimilarPhrase } 5 | 6 | class TestSimilarPhraseTracker extends UnitSpec { 7 | 8 | def tos(str: String): IndexedSeq[String] = { 9 | str.split(" ").toIndexedSeq 10 | } 11 | "minSimForPhrases" should "test correctly" in { 12 | val simStrs = List( 13 | ("c d a", 0.97), 14 | ("b", 0.93), 15 | ("a b", 0.9), 16 | ("c", 0.85), 17 | ("c d", 0.8), 18 | ("a c", 0.7), 19 | ("d", 0.6), 20 | ("b c", 0.25), 21 | ("a e", 0.2), 22 | ("a", 0.1) 23 | ) 24 | val strRanks = ("e" :: simStrs.map(_._1)).zipWithIndex.toMap 25 | val simPhrases = simStrs.map { 26 | case (str, sim) => 27 | SimilarPhrase(str.split(" ").map(QWord), sim) 28 | }.toSeq 29 | val qSimilarPhrases = QSimilarPhrases(Seq(QWord("e")), simPhrases.size, simPhrases) 30 | val tracker = new SimilarPhraseMatchTracker(qSimilarPhrases) 31 | 32 | // Sanity check 33 | assertResult(strRanks("a"))(tracker.minSimForPhrases(tos("a"), 0, 5)) 34 | assertResult(0)(tracker.minSimForPhrases(tos("e"), 0, 5)) 35 | assertResult(0)(tracker.minSimForPhrases(IndexedSeq(), 0, 5)) 36 | 37 | // Find that is better than 38 | assertResult(strRanks("c"))(tracker.minSimForPhrases(tos("a b c"), 0, 5)) 39 | 40 | // Find is best 41 | assertResult(strRanks("c d a"))(tracker.minSimForPhrases(tos("c d a e"), 0, -1)) 42 | 43 | // Min should stop us using 44 | assertResult(strRanks("a e"))(tracker.minSimForPhrases(tos("c d a e"), 3, -1)) 45 | 46 | // Find is best 47 | assertResult(strRanks("c"))(tracker.minSimForPhrases(tos("b c c"), 0, 3)) 48 | 49 | // Max should force us to use 50 | assertResult(strRanks("b c"))(tracker.minSimForPhrases(tos("b c c"), 0, 2)) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/queryop/TestSpecifyOpGenerator.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.queryop 2 | 3 | import org.allenai.common.testkit.UnitSpec 4 | import org.allenai.ike.ml._ 5 | import org.allenai.ike.{ QPos, QWord } 6 | 7 | import scala.collection.immutable.IntMap 8 | 9 | class TestSpecifyOpGenerator extends UnitSpec { 10 | 11 | "getRepeatedOpMatch" should "Create correct repeated ops" in { 12 | val matches = QueryMatches(QuerySlotData( 13 | Some(QWord("d")), QueryToken(1), true 14 | ), Seq( 15 | QueryMatch(Seq(Token("b", "NN"), Token("b", "NN")), true), 16 | QueryMatch(Seq(Token("a", "NN")), true) 17 | )) 18 | 19 | val leafGen = QLeafGenerator(true, true) 20 | 21 | val rOps = SpecifyingOpGenerator.getRepeatedOpMatch(matches, leafGen) 22 | assertResult(IntMap(0 -> 0))(rOps(SetRepeatedToken(1, 1, QWord("b")))) 23 | assertResult(IntMap(1 -> 0))(rOps(SetRepeatedToken(1, 1, QWord("a")))) 24 | assertResult(IntMap(0 -> 0))(rOps(SetRepeatedToken(1, 2, QWord("b")))) 25 | assertResult(IntMap(0 -> 0, 1 -> 0))(rOps(SetRepeatedToken(1, 1, QPos("NN")))) 26 | assertResult(IntMap(0 -> 0))(rOps(SetRepeatedToken(1, 2, QPos("NN")))) 27 | assertResult(rOps.size)(5) 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/subsample/DocFieldLengthGetterStub.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.blacklab.search.lucene.DocFieldLengthGetter 4 | 5 | /** Stub for DocFieldLengthGetter for testing purposes 6 | */ 7 | class DocFieldLengthGetterStub(docLength: IndexedSeq[Int]) 8 | extends DocFieldLengthGetter(null, "test") { 9 | 10 | def this(docLength: Seq[Int]) = { 11 | this(docLength.toIndexedSeq) 12 | } 13 | 14 | override def getFieldLength(doc: Int): Int = { 15 | docLength(doc) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/subsample/SpansStub.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.blacklab.search.Span 4 | import org.allenai.blacklab.search.lucene.{ BLSpans, HitQueryContext } 5 | 6 | import java.util 7 | 8 | object SpansStub { 9 | def apply(docs: Seq[Int], s: Seq[Int], e: Seq[Int]): SpansStub = 10 | new SpansStub(docs.toIndexedSeq, s.toIndexedSeq, e.toIndexedSeq, Seq(), IndexedSeq()) 11 | 12 | def apply(data: (Seq[Int], Seq[Int], Seq[Int])): SpansStub = apply(data._1, data._2, data._3) 13 | 14 | def apply(data: Seq[(Int, Int, Int)]): SpansStub = apply(data.unzip3) 15 | 16 | def apply(data: Seq[(Int, Int)], length: Int): SpansStub = 17 | apply(data.map((x => (x._1, x._2, x._2 + length)))) 18 | 19 | def withCaptures( 20 | data: Seq[(Int, Int, Int)], captures: Seq[Seq[Span]], names: Seq[String] 21 | ): SpansStub = { 22 | val (d, s, e) = data.unzip3 23 | new SpansStub(d.toIndexedSeq, s.toIndexedSeq, e.toIndexedSeq, names, captures.toIndexedSeq) 24 | } 25 | } 26 | /** Stub Spans class for testing 27 | */ 28 | class SpansStub( 29 | val docs: IndexedSeq[Int], 30 | val starts: IndexedSeq[Int], 31 | val ends: IndexedSeq[Int], 32 | val captureNames: Seq[String], 33 | val captures: IndexedSeq[Seq[Span]] 34 | ) extends BLSpans { 35 | 36 | private var current = -1 37 | private var captureNumbers = Seq[Int]() 38 | 39 | def expected(index: Int): (Int, Int, Int) = { 40 | (docs(index), starts(index), ends(index)) 41 | } 42 | 43 | override def hitsLength(): Int = { 44 | if (docs.size > 0) { 45 | val differences = starts.zip(ends).map { case (start, end) => end - start } 46 | if (differences.forall(_ == differences.head)) differences.head else -1 47 | } else { 48 | // Force all length zero spans to be of length one for the moment 49 | 1 50 | } 51 | } 52 | 53 | override def setHitQueryContext(context: HitQueryContext): Unit = { 54 | captureNumbers = captureNames.map(context.registerCapturedGroup(_)) 55 | } 56 | 57 | override def passHitQueryContextToClauses(context: HitQueryContext): Unit = {} 58 | 59 | override def getCapturedGroups(capturedGroups: Array[Span]): Unit = { 60 | val onCaptures = captures(current) 61 | captureNumbers.zip(onCaptures).foreach { 62 | case (i, span) => capturedGroups.update(i, span) 63 | } 64 | } 65 | 66 | override def doc(): Int = docs(current) 67 | 68 | override def end(): Int = ends(current) 69 | 70 | override def start(): Int = starts(current) 71 | 72 | override def next(): Boolean = { 73 | // Deliberately allow this to get in a bad state if next() is called when the previous 74 | // call to next() was false since (to my knowledge) this matches Spans API 75 | current += 1 76 | current < docs.length 77 | } 78 | 79 | override def skipTo(target: Int): Boolean = { 80 | var more = true 81 | while (more && (current < 0 || doc() < target)) 82 | more = next() 83 | more 84 | } 85 | 86 | override def getPayload: util.Collection[Array[Byte]] = null 87 | 88 | override def isPayloadAvailable: Boolean = false 89 | } 90 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/subsample/TestMatchesSampler.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.common.testkit.{ ScratchDirectory, UnitSpec } 4 | import org.allenai.ike._ 5 | import org.allenai.ike.index.TestData 6 | import org.allenai.ike.ml.TokenizedQuery 7 | 8 | import org.allenai.blacklab.search.Hits 9 | 10 | import scala.collection.JavaConverters._ 11 | 12 | class TestMatchesSampler extends UnitSpec with ScratchDirectory { 13 | 14 | TestData.createTestIndex(scratchDir) 15 | val searcher = TestData.testSearcher(scratchDir) 16 | 17 | def hitToAllCaptures(hits: Hits, groups: Seq[String]): Seq[Seq[String]] = { 18 | hits.asScala.map(hit => { 19 | val kwic = hits.getKwic(hit) 20 | val captures = groups.map(hits.getCapturedGroupMap(hit).get(_)) 21 | captures.map(span => { 22 | if (span == null) { 23 | null 24 | } else { 25 | val captureKwic = span.start - hit.start 26 | kwic.getMatch("word").subList( 27 | captureKwic, 28 | captureKwic + span.end - span.start 29 | ).asScala.mkString(" ") 30 | } 31 | }).toSeq 32 | }).toSeq 33 | } 34 | 35 | def buildTable(positive: Seq[String], negative: Seq[String]): Table = { 36 | Table("testTable", Seq("testCol"), 37 | positive.map(x => TableRow(Seq(TableValue(x.split(" ").map(QWord.apply))))), 38 | negative.map(x => TableRow(Seq(TableValue(x.split(" ").map(QWord.apply)))))) 39 | } 40 | 41 | "Matches Sampler" should "get named query correctly" in { 42 | val query = QueryLanguage.parse("(? a+) b (? c d) e*").get 43 | val tokenized = TokenizedQuery.buildFromQuery(query, Seq("c1", "c2")) 44 | val names = tokenized.getNames 45 | val expectedNamedQuery = QSeq(Seq( 46 | QNamed(QNamed(QPlus(QWord("a")), names(0)), "c1"), 47 | QWord("b"), 48 | QNamed(QSeq(Seq( 49 | QWord("c"), 50 | QWord("d") 51 | )), "c2"), 52 | QNamed(QStar(QWord("e")), names(4)) 53 | )) 54 | assertResult(expectedNamedQuery)(MatchesSampler.getNamedQuery(tokenized)) 55 | } 56 | 57 | it should "test correctly" in { 58 | val startingQuery = QueryLanguage.parse("(? {I, hate, it}) . " + 59 | "(? {great, mango, bananas}) .").get 60 | val table = Table( 61 | "test", 62 | Seq("col1", "col2"), 63 | Seq( 64 | TableRow(Seq(TableValue(Seq(QWord("I"))), TableValue(Seq(QWord("mango"))))), 65 | TableRow(Seq(TableValue(Seq(QWord("hate"))), TableValue(Seq(QWord("those"))))) 66 | ), 67 | Seq( 68 | TableRow(Seq(TableValue(Seq(QWord("it"))), TableValue(Seq(QWord("great"))))), 69 | TableRow(Seq(TableValue(Seq(QWord("I"))), TableValue(Seq(QWord("bananas"))))) 70 | ) 71 | ) 72 | val tokenized = TokenizedQuery.buildFromQuery(startingQuery, Seq("col1", "col2")) 73 | 74 | val expectedResults = Seq( 75 | Seq("I", "mango"), 76 | Seq("It", "great") 77 | ) 78 | assertResult(expectedResults)(hitToAllCaptures(MatchesSampler().getLabelledSample( 79 | tokenized, searcher, table, Map(), Map(), 0, 0 80 | ), table.cols)) 81 | assertResult(expectedResults.drop(1))(hitToAllCaptures(MatchesSampler().getLabelledSample( 82 | tokenized, searcher, table, Map(), Map(), 1, 0 83 | ), table.cols)) 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/subsample/TestMinimumValidCaptures.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.common.testkit.UnitSpec 4 | 5 | import org.allenai.blacklab.search.Span 6 | import org.allenai.blacklab.search.lucene.HitQueryContext 7 | 8 | class TestMinimumValidCaptures extends UnitSpec { 9 | def s(spans: Int*) = spans.map { x => new Span(x, x + x / Math.abs(x)) } 10 | 11 | it should "test correctly" in { 12 | val captureNames = Seq("c1", "c2", "c3", "c4") 13 | val stub = SpansStub.withCaptures(Seq( 14 | (0, 1, 3), 15 | (0, 2, 6), 16 | (0, 2, 7), 17 | (2, 2, 5), 18 | (10, 6, 10) 19 | ), Seq( 20 | s(1, 1, -1, 1), 21 | s(1, 1, 1, 1), 22 | s(-1, -1, -1, 1), 23 | s(-1, 1, -1, 1), 24 | s(1, 1, 1, 1) 25 | ), captureNames) 26 | val validated = new SpansMinimumValidCaptures( 27 | stub, 2, Seq("c1", "c2", "c3") 28 | ) 29 | val context = new HitQueryContext(validated) 30 | validated.setHitQueryContext(context) 31 | 32 | def testAtHit(at: Int): Unit = { 33 | assert(validated.next()) 34 | val expected = (stub.docs(at), stub.starts(at), stub.ends(at)) 35 | val actual = (validated.doc, validated.start, validated.end) 36 | assertResult(expected)(actual) 37 | val expectedC = captureNames.map { name => 38 | stub.captures(at)(context.getCapturedGroupNames.indexOf(name)) 39 | } 40 | 41 | val actualC = Array.fill[Span](context.numberOfCapturedGroups)(null) 42 | stub.getCapturedGroups(actualC) 43 | assertResult(expectedC)(actualC) 44 | } 45 | 46 | testAtHit(0) 47 | testAtHit(1) 48 | testAtHit(4) 49 | assert(!validated.next()) 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/subsample/TestSpanQueryFilterByCaptureGroups.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.common.testkit.{ ScratchDirectory, UnitSpec } 4 | import org.allenai.ike.index.TestData 5 | import org.allenai.ike.{ BlackLabSemantics, QueryLanguage } 6 | 7 | import org.apache.lucene.search.spans.SpanQuery 8 | 9 | import scala.collection.JavaConverters._ 10 | 11 | class TestSpanQueryFilterByCaptureGroups extends UnitSpec with ScratchDirectory { 12 | TestData.createTestIndex(scratchDir) 13 | val searcher = TestData.testSearcher(scratchDir) 14 | 15 | "SpanQueryFilterByCaptureGroups" should "filter correctly" in { 16 | val startingQuery = QueryLanguage.parse("(? {like, mango, taste, I}) . " + 17 | "(? {mango, great})").get 18 | val startingSpanQuery = searcher.createSpanQuery(BlackLabSemantics.blackLabQuery(startingQuery)) 19 | val andWith = QueryLanguage.parse("(? {I, taste}) . (? {mango, great})").get 20 | val andWithSpanQuery = searcher.createSpanQuery(BlackLabSemantics.blackLabQuery(andWith)) 21 | 22 | def testQuery(query: SpanQuery, results: Seq[String]) = { 23 | val hits = searcher.find(query) 24 | assertResult(results)( 25 | hits.asScala.map(hit => hits.getKwic(hit).getMatch("word").asScala.mkString(" ")) 26 | ) 27 | } 28 | 29 | testQuery(new SpanQueryFilterByCaptureGroups(startingSpanQuery, andWithSpanQuery, 30 | Seq("c1", "c2")), Seq("I like mango", "taste not great")) 31 | 32 | testQuery(new SpanQueryFilterByCaptureGroups(startingSpanQuery, andWithSpanQuery, 33 | Seq("c1", "c2"), 1), Seq("taste not great")) 34 | 35 | testQuery(new SpanQueryFilterByCaptureGroups(startingSpanQuery, andWithSpanQuery, 36 | Seq("c1", "c2"), 0, 3), Seq("taste not great")) 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/ml/subsample/TestSpansTrackingDisjunction.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.ml.subsample 2 | 3 | import org.allenai.common.testkit.UnitSpec 4 | 5 | import org.allenai.blacklab.search.Span 6 | import org.allenai.blacklab.search.lucene.{ BLSpans, HitQueryContext } 7 | 8 | class TestSpansTrackingDisjunction extends UnitSpec { 9 | 10 | def assertHit(spans: BLSpans, doc: Int, start: Int, end: Int, didMatch: Boolean) = { 11 | assert(spans.next()) 12 | assert(spans.doc == doc) 13 | assert(spans.start == start) 14 | assert(spans.end == end) 15 | 16 | val captures = Array[Span](null) 17 | spans.getCapturedGroups(captures) 18 | if (didMatch) { 19 | assert(captures.head.start == start) 20 | assert(captures.head.end == end) 21 | } else { 22 | assert(captures.head.start == -start) 23 | assert(captures.head.end == -end) 24 | } 25 | } 26 | 27 | it should "test correctly" in { 28 | val first = SpansStub(Seq((0, 0, 1), (0, 5, 6), (1, 3, 5))) 29 | val t1 = SpansStub(Seq((0, 0, 1), (0, 0, 2), (1, 1, 3), (2, 3, 6))) 30 | val t2 = SpansStub(Seq((0, 5, 6), (2, 3, 5))) 31 | val spans = new SpansTrackingDisjunction(first, Seq(t1, t2), "c") 32 | spans.setHitQueryContext(new HitQueryContext(spans)) 33 | 34 | assertHit(spans, 0, 0, 1, true) 35 | assertHit(spans, 0, 0, 2, false) 36 | assertHit(spans, 0, 5, 6, true) 37 | assertHit(spans, 1, 1, 3, false) 38 | assertHit(spans, 1, 3, 5, true) 39 | assertHit(spans, 2, 3, 5, false) 40 | assertHit(spans, 2, 3, 6, false) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/test/scala/org/allenai/ike/patterns/TestPatternUtilities.scala: -------------------------------------------------------------------------------- 1 | package org.allenai.ike.patterns 2 | 3 | import org.allenai.common.testkit.{ScratchDirectory, UnitSpec} 4 | 5 | class TestPatternUtilities extends UnitSpec with ScratchDirectory { 6 | val namedPatterns = PatternUtilities.loadNamedPatterns("testPatterns.conf") 7 | 8 | "TestPatternUtilties" should "load up the correct test patterns" in { 9 | // sort to guarantee order on checks 10 | val namedPatternsSorted = namedPatterns.sortBy(_.name) 11 | assert(namedPatternsSorted.length == 2) 12 | assert(namedPatternsSorted.head.name == "result-percent") 13 | assert(namedPatternsSorted.head.pattern == "CD {%|percent|per cent|pct}") 14 | assert(namedPatternsSorted.last.name == "treatments") 15 | assert(namedPatternsSorted.last.pattern == "{were given|treated with|received|receiving} {CD|NN|JJ|IN}+") 16 | } 17 | 18 | it should "then create the correct searchRequest objects per name" in { 19 | val searcherMap = PatternUtilities.createSearchers(namedPatterns) 20 | 21 | assert(searcherMap.size == 2) 22 | 23 | assert(searcherMap.contains("result-percent")) 24 | val searcherRes = searcherMap("result-percent") 25 | assert(searcherRes.query.isLeft) 26 | assert(searcherRes.query.left.get == "CD {%|percent|per cent|pct}") 27 | 28 | assert(searcherMap.contains("treatments")) 29 | val searcherRes2 = searcherMap("treatments") 30 | assert(searcherRes2.query.isLeft) 31 | assert(searcherRes2.query.left.get == "{were given|treated with|received|receiving} {CD|NN|JJ|IN}+") 32 | 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /version.sbt: -------------------------------------------------------------------------------- 1 | version in ThisBuild := "0.5-SNAPSHOT" -------------------------------------------------------------------------------- /webapp/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | -------------------------------------------------------------------------------- /webapp/.jshintrc: -------------------------------------------------------------------------------- 1 | 2 | { 3 | "node": true, 4 | "browser": true, 5 | "esnext": true, 6 | "bitwise": true, 7 | "camelcase": true, 8 | "curly": true, 9 | "eqeqeq": true, 10 | "immed": true, 11 | "indent": 2, 12 | "latedef": false, 13 | "laxbreak": true, 14 | "newcap": true, 15 | "noarg": true, 16 | "quotmark": "single", 17 | "regexp": true, 18 | "undef": true, 19 | "unused": true, 20 | "strict": true, 21 | "trailing": true, 22 | "smarttabs": true, 23 | "expr": true, 24 | "node": true, 25 | "globalstrict": true, 26 | "esnext": true 27 | } -------------------------------------------------------------------------------- /webapp/app/assets/blank_user.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/webapp/app/assets/blank_user.png -------------------------------------------------------------------------------- /webapp/app/assets/glyphicons-halflings-regular.eot: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/webapp/app/assets/glyphicons-halflings-regular.eot -------------------------------------------------------------------------------- /webapp/app/assets/glyphicons-halflings-regular.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/webapp/app/assets/glyphicons-halflings-regular.ttf -------------------------------------------------------------------------------- /webapp/app/assets/glyphicons-halflings-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/webapp/app/assets/glyphicons-halflings-regular.woff -------------------------------------------------------------------------------- /webapp/app/assets/glyphicons-halflings-regular.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/webapp/app/assets/glyphicons-halflings-regular.woff2 -------------------------------------------------------------------------------- /webapp/app/assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/allenai/ike/f3a7f85a50831b56cef04e599f70f75de6c655a3/webapp/app/assets/logo.png -------------------------------------------------------------------------------- /webapp/app/css/_app.less: -------------------------------------------------------------------------------- 1 | .vslider { 2 | writing-mode: bt-lr; /* IE */ 3 | -webkit-appearance: slider-vertical; /* WebKit */ 4 | width: 8px; 5 | height: 75px; 6 | padding: 0 5px; 7 | } 8 | .panel-collapse.in { 9 | height: auto !important; 10 | } 11 | .rowKey { 12 | white-space: nowrap 13 | } 14 | .mainContent { 15 | padding: 20px; 16 | } 17 | .searchForm { 18 | margin-top: 10px; 19 | } 20 | .wordData { 21 | margin-right: 5px; 22 | } 23 | 24 | .dataTableContainer { 25 | background-color:white; 26 | border: 1px solid lightgray; 27 | width: 100%; 28 | border-bottom-left-radius: 4px; 29 | border-bottom-right-radius: 4px; 30 | border-top: 0px; 31 | padding: 15px; 32 | } 33 | 34 | .dataTable { 35 | border: 0px; 36 | width: 100%; 37 | } 38 | 39 | .dataTable td { 40 | padding: 5px; 41 | margin: 5px; 42 | text-align: center; 43 | } 44 | 45 | .dataTable th { 46 | padding: 5px; 47 | margin: 5px; 48 | text-align: center; 49 | } 50 | 51 | .dataTable input { 52 | text-align: center; 53 | } 54 | 55 | .dataTable tr td:first-child { 56 | width: 106px; 57 | white-space: nowrap; 58 | border-top: 0px; 59 | border-right: 0px; 60 | text-align: right; 61 | } 62 | 63 | .dictList { 64 | max-height: 300px; 65 | overflow-y: scroll; 66 | } 67 | 68 | .keyedBlackLabResults { 69 | max-height: 100px; 70 | overflow-y: scroll; 71 | } 72 | 73 | .leftContext { 74 | text-align: right; 75 | } 76 | .rightContext { 77 | text-align: left; 78 | } 79 | .hit { 80 | text-align: center; 81 | } 82 | .highlighted { 83 | color: blue; 84 | font-weight: bold; 85 | } 86 | 87 | .nav-tabs { 88 | padding: 0 20px; 89 | } 90 | 91 | .tab-pane > * { 92 | padding: 20px 0; 93 | } 94 | 95 | .modal-backdrop { 96 | height: 100%; 97 | width: 100%; 98 | } 99 | 100 | #suggestion-table { 101 | font-size: small; 102 | border-collapse: collapse; 103 | } 104 | 105 | .queryCell { 106 | white-space: nowrap; 107 | -webkit-touch-callout: none; 108 | -webkit-user-select: none; 109 | -khtml-user-select: none; 110 | -moz-user-select: none; 111 | -ms-user-select: none; 112 | user-select: none; 113 | cursor: pointer; 114 | } 115 | 116 | .queryHeader { 117 | white-space: nowrap; 118 | } 119 | 120 | .queryStat { 121 | text-align: center; 122 | } 123 | 124 | .order-by-dropdown { 125 | float: right; 126 | } 127 | 128 | .order-by-dropdown-entry { 129 | padding: 0; 130 | font-size: 10pt; 131 | } 132 | 133 | .nav-pills>li.active>a { 134 | &:hover,&:active,&:focus,& { 135 | button { 136 | color: #ffffff; 137 | } 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /webapp/app/css/_corpora.less: -------------------------------------------------------------------------------- 1 | 2 | .corpora-modal-trigger { 3 | cursor: pointer; 4 | float: right; 5 | font-size: 13px; 6 | } 7 | 8 | .corpora { 9 | margin-bottom: 15px; 10 | padding-left: 10px; 11 | 12 | .form-group, 13 | .checkbox { 14 | margin-bottom: 2px; 15 | 16 | label { 17 | color: #337ab7; 18 | font-size: 16px; 19 | } 20 | } 21 | 22 | p { 23 | margin-left: 20px; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /webapp/app/css/_header.less: -------------------------------------------------------------------------------- 1 | 2 | header { 3 | padding: 0 0 16px 0; 4 | 5 | .btn-group { 6 | margin-top: 12px; 7 | } 8 | 9 | .auth { 10 | padding: 15px 0 0; 11 | 12 | a.btn { 13 | font-size: .9em; 14 | } 15 | 16 | img { 17 | border-radius: 50%; 18 | height: 24px; 19 | margin-right: 10px; 20 | width: 24px; 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /webapp/app/css/_tree.less: -------------------------------------------------------------------------------- 1 | /* from http://stackoverflow.com/questions/14954789/complex-heirarchical-tree-layout */ 2 | /*Now the CSS*/ 3 | * {margin: 0; padding: 0;} 4 | 5 | .queryViewer * { 6 | font-size: 8pt !important; 7 | } 8 | 9 | ul.tree { 10 | padding-top: 20px; position: relative; 11 | 12 | transition: all 0.5s; 13 | -webkit-transition: all 0.5s; 14 | -moz-transition: all 0.5s; 15 | } 16 | 17 | li.tree { 18 | float: left; text-align: center; 19 | list-style-type: none; 20 | position: relative; 21 | padding: 20px 5px 0 5px; 22 | 23 | transition: all 0.5s; 24 | -webkit-transition: all 0.5s; 25 | -moz-transition: all 0.5s; 26 | } 27 | 28 | /*We will use ::before and ::after to draw the connectors*/ 29 | 30 | li.tree::before, li.tree::after{ 31 | content: ''; 32 | position: absolute; top: 0; right: 50%; 33 | border-top: 1px solid #ccc; 34 | width: 50%; height: 20px; 35 | } 36 | li.tree::after{ 37 | right: auto; left: 50%; 38 | border-left: 1px solid #ccc; 39 | } 40 | 41 | /*We need to remove left-right connectors from elements without 42 | any siblings*/ 43 | li.tree:only-child::after, li.tree:only-child::before{ 44 | display: none; 45 | } 46 | 47 | /*Remove space from the top of single children*/ 48 | li.tree:only-child{ padding-top: 0;} 49 | 50 | /*Remove left connector from first child and 51 | right connector from last child*/ 52 | li.tree:first-child::before, li.tree:last-child::after{ 53 | border: 0 none; 54 | } 55 | /*Adding back the vertical connector to the last nodes*/ 56 | li.tree:last-child::before{ 57 | border-right: 1px solid #ccc; 58 | border-radius: 0 5px 0 0; 59 | -webkit-border-radius: 0 5px 0 0; 60 | -moz-border-radius: 0 5px 0 0; 61 | } 62 | li.tree:first-child::after{ 63 | border-radius: 5px 0 0 0; 64 | -webkit-border-radius: 5px 0 0 0; 65 | -moz-border-radius: 5px 0 0 0; 66 | } 67 | 68 | /*Time to add downward connectors from parents*/ 69 | ul.tree ul.tree::before{ 70 | content: ''; 71 | position: absolute; top: 0; left: 50%; 72 | border-left: 1px solid #ccc; 73 | width: 0; height: 20px; 74 | } 75 | /*Thats all. I hope you enjoyed it. 76 | Thanks :)*/ 77 | -------------------------------------------------------------------------------- /webapp/app/css/main.less: -------------------------------------------------------------------------------- 1 | @import '../../node_modules/bootstrap/less/alerts.less'; 2 | @import '../../node_modules/bootstrap/less/badges.less'; 3 | @import '../../node_modules/bootstrap/less/bootstrap.less'; 4 | @import '../../node_modules/bootstrap/less/breadcrumbs.less'; 5 | @import '../../node_modules/bootstrap/less/button-groups.less'; 6 | @import '../../node_modules/bootstrap/less/buttons.less'; 7 | @import '../../node_modules/bootstrap/less/carousel.less'; 8 | @import '../../node_modules/bootstrap/less/close.less'; 9 | @import '../../node_modules/bootstrap/less/code.less'; 10 | @import '../../node_modules/bootstrap/less/component-animations.less'; 11 | @import '../../node_modules/bootstrap/less/dropdowns.less'; 12 | @import '../../node_modules/bootstrap/less/forms.less'; 13 | @import '../../node_modules/bootstrap/less/glyphicons.less'; 14 | @import '../../node_modules/bootstrap/less/grid.less'; 15 | @import '../../node_modules/bootstrap/less/input-groups.less'; 16 | @import '../../node_modules/bootstrap/less/jumbotron.less'; 17 | @import '../../node_modules/bootstrap/less/labels.less'; 18 | @import '../../node_modules/bootstrap/less/list-group.less'; 19 | @import '../../node_modules/bootstrap/less/media.less'; 20 | @import '../../node_modules/bootstrap/less/mixins.less'; 21 | @import '../../node_modules/bootstrap/less/modals.less'; 22 | @import '../../node_modules/bootstrap/less/navbar.less'; 23 | @import '../../node_modules/bootstrap/less/navs.less'; 24 | @import '../../node_modules/bootstrap/less/normalize.less'; 25 | @import '../../node_modules/bootstrap/less/pager.less'; 26 | @import '../../node_modules/bootstrap/less/pagination.less'; 27 | @import '../../node_modules/bootstrap/less/panels.less'; 28 | @import '../../node_modules/bootstrap/less/popovers.less'; 29 | @import '../../node_modules/bootstrap/less/print.less'; 30 | @import '../../node_modules/bootstrap/less/progress-bars.less'; 31 | @import '../../node_modules/bootstrap/less/responsive-embed.less'; 32 | @import '../../node_modules/bootstrap/less/responsive-utilities.less'; 33 | @import '../../node_modules/bootstrap/less/scaffolding.less'; 34 | @import '../../node_modules/bootstrap/less/tables.less'; 35 | @import '../../node_modules/bootstrap/less/theme.less'; 36 | @import '../../node_modules/bootstrap/less/thumbnails.less'; 37 | @import '../../node_modules/bootstrap/less/tooltip.less'; 38 | @import '../../node_modules/bootstrap/less/type.less'; 39 | @import '../../node_modules/bootstrap/less/utilities.less'; 40 | @import '../../node_modules/bootstrap/less/variables.less'; 41 | @import '../../node_modules/bootstrap/less/wells.less'; 42 | @import '../../node_modules/bootstrap/less/mixins/alerts.less'; 43 | @import '../../node_modules/bootstrap/less/mixins/background-variant.less'; 44 | @import '../../node_modules/bootstrap/less/mixins/border-radius.less'; 45 | @import '../../node_modules/bootstrap/less/mixins/buttons.less'; 46 | @import '../../node_modules/bootstrap/less/mixins/center-block.less'; 47 | @import '../../node_modules/bootstrap/less/mixins/clearfix.less'; 48 | @import '../../node_modules/bootstrap/less/mixins/forms.less'; 49 | @import '../../node_modules/bootstrap/less/mixins/gradients.less'; 50 | @import '../../node_modules/bootstrap/less/mixins/grid-framework.less'; 51 | @import '../../node_modules/bootstrap/less/mixins/grid.less'; 52 | @import '../../node_modules/bootstrap/less/mixins/hide-text.less'; 53 | @import '../../node_modules/bootstrap/less/mixins/image.less'; 54 | @import '../../node_modules/bootstrap/less/mixins/labels.less'; 55 | @import '../../node_modules/bootstrap/less/mixins/list-group.less'; 56 | @import '../../node_modules/bootstrap/less/mixins/nav-divider.less'; 57 | @import '../../node_modules/bootstrap/less/mixins/nav-vertical-align.less'; 58 | @import '../../node_modules/bootstrap/less/mixins/opacity.less'; 59 | @import '../../node_modules/bootstrap/less/mixins/pagination.less'; 60 | @import '../../node_modules/bootstrap/less/mixins/panels.less'; 61 | @import '../../node_modules/bootstrap/less/mixins/progress-bar.less'; 62 | @import '../../node_modules/bootstrap/less/mixins/reset-filter.less'; 63 | @import '../../node_modules/bootstrap/less/mixins/resize.less'; 64 | @import '../../node_modules/bootstrap/less/mixins/responsive-visibility.less'; 65 | @import '../../node_modules/bootstrap/less/mixins/size.less'; 66 | @import '../../node_modules/bootstrap/less/mixins/tab-focus.less'; 67 | @import '../../node_modules/bootstrap/less/mixins/table-row.less'; 68 | @import '../../node_modules/bootstrap/less/mixins/text-emphasis.less'; 69 | @import '../../node_modules/bootstrap/less/mixins/text-overflow.less'; 70 | @import '../../node_modules/bootstrap/less/mixins/vendor-prefixes.less'; 71 | @import '_app.less'; 72 | @import '_corpora.less'; 73 | @import '_header.less'; 74 | @import '_tree.less'; 75 | @icon-font-path: "/assets/"; 76 | -------------------------------------------------------------------------------- /webapp/app/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | IKE 9 | 10 | 11 | 12 | 13 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /webapp/app/js/DictApp.js: -------------------------------------------------------------------------------- 1 | const React = require('react/addons'); 2 | const bs = require('react-bootstrap'); 3 | const SearchInterface = require('./components/search/SearchInterface.js'); 4 | const TablesInterface = require('./components/table/TablesInterface.js'); 5 | const TableManager = require('./managers/TableManager.js'); 6 | const PatternsInterface = require('./components/pattern/PatternsInterface.js'); 7 | const ConfigInterface = require('./components/config/ConfigInterface.js'); 8 | const HelpInterface = require('./components/help/HelpInterface.js'); 9 | const xhr = require('xhr'); 10 | const Header = require('./components/Header.js'); 11 | const AuthStore = require('./stores/AuthStore.js'); 12 | const CorporaStore = require('./stores/CorporaStore.js'); 13 | const assign = require('object-assign'); 14 | const TabbedArea = bs.TabbedArea; 15 | const TabPane = bs.TabPane; 16 | 17 | var DictApp = React.createClass({ 18 | mixins: [React.addons.LinkedStateMixin], 19 | 20 | componentWillUnmount() { 21 | AuthStore.removeChangeListener(this.onAuthChange); 22 | }, 23 | 24 | componentDidMount() { 25 | AuthStore.addChangeListener(this.onAuthChange); 26 | 27 | TableManager.addChangeListener(function(tables) { 28 | var target = this.linkState('target'); 29 | this.setState({tables: tables}); 30 | if(target.value == null && tables) { 31 | for(var tableName in tables) { 32 | if(tables.hasOwnProperty(tableName)) { 33 | target.requestChange(tableName); 34 | break; 35 | } 36 | } 37 | } 38 | }.bind(this)); 39 | TableManager.setUserEmail(AuthStore.getUserEmail()); 40 | }, 41 | 42 | getInitialState() { 43 | return { 44 | authenticated: AuthStore.authenticated(), 45 | config: { 46 | limit: 1000, 47 | evidenceLimit: 10, 48 | hideAdded: false, 49 | groupsPerPage: 25, 50 | ml: { 51 | disable: false, 52 | depth: 3, 53 | beamSize: 25, 54 | maxSampleSize: 8000, 55 | pWeight: 2.0, 56 | nWeight: -1.0, 57 | uWeight: -0.05, 58 | pWeightNarrow: 2.0, 59 | nWeightNarrow: -1.0, 60 | uWeightNarrow: -0.05 61 | } 62 | }, 63 | tables: [], 64 | target: null 65 | }; 66 | }, 67 | 68 | onAuthChange() { 69 | let newState = { authenticated: AuthStore.authenticated() }; 70 | if (!AuthStore.authenticated()) { 71 | assign(newState, { target: null }); 72 | } 73 | this.setState(newState); 74 | }, 75 | 76 | renderContent() { 77 | var target = this.linkState('target'); 78 | var patterns = this.linkState('patterns'); 79 | var config = this.linkState('config'); 80 | return 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | ; 97 | }, 98 | 99 | render() { 100 | var content = this.renderContent(); 101 | return
{content}
; 102 | } 103 | }); 104 | 105 | React.render(, document.body); 106 | -------------------------------------------------------------------------------- /webapp/app/js/components/Header.js: -------------------------------------------------------------------------------- 1 | "use strict"; 2 | 3 | const React = require('react/addons'); 4 | const bs = require('react-bootstrap'); 5 | const AppDispatcher = require('../dispatcher/AppDispatcher'); 6 | const AuthStore = require('../stores/AuthStore.js'); 7 | const AuthConstants = require('../constants/AuthConstants'); 8 | 9 | const Header = React.createClass({ 10 | 11 | propTypes: { 12 | authenticated: React.PropTypes.bool.isRequired 13 | }, 14 | 15 | signIn(e) { 16 | e.preventDefault(); 17 | AppDispatcher.dispatch({ 18 | actionType: AuthConstants.SIGN_IN 19 | }); 20 | }, 21 | 22 | signOut(e) { 23 | e.preventDefault(); 24 | AppDispatcher.dispatch({ 25 | actionType: AuthConstants.SIGN_OUT 26 | }); 27 | }, 28 | 29 | renderAuth() { 30 | if (this.props.authenticated) { 31 | var userEmail = AuthStore.getUserEmail(); 32 | var userImageUrl = AuthStore.getUserImageUrl(); 33 | return ( 34 |
39 | ) 40 | } else { 41 | return ( 42 |
43 | Sign In 44 |
45 | ) 46 | } 47 | }, 48 | 49 | render() { 50 | return (
51 | 52 | {this.renderAuth()} 53 | 54 | 55 |
); 56 | } 57 | }); 58 | 59 | module.exports = Header; 60 | -------------------------------------------------------------------------------- /webapp/app/js/components/corpora/Corpora.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var Input = bs.Input; 4 | 5 | var Corpora = React.createClass({ 6 | propTypes: { 7 | corpora: React.PropTypes.array.isRequired, 8 | selectedCorpusNames: React.PropTypes.array.isRequired, 9 | toggleCorpora: React.PropTypes.func.isRequired 10 | }, 11 | render: function() { 12 | var self = this; 13 | return ( 14 |
15 | {this.props.corpora.map(function(corpus, i) { 16 | return ( 17 |
18 | = 0} 22 | onChange={self.props.toggleCorpora.bind(undefined, i)}> 23 | 24 |

{corpus.description}

25 |
); 26 | })} 27 |
28 | ); 29 | } 30 | }); 31 | 32 | module.exports = Corpora; 33 | -------------------------------------------------------------------------------- /webapp/app/js/components/corpora/CorpusSelector.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var Modal = bs.Modal; 4 | var ModalTrigger = bs.ModalTrigger; 5 | var Glyphicon = bs.Glyphicon; 6 | var Corpora = require('../corpora/Corpora.js'); 7 | 8 | var CorpusSelector = React.createClass({ 9 | propTypes: { 10 | corpora: React.PropTypes.array.isRequired, 11 | selectedCorpusNames: React.PropTypes.array.isRequired, 12 | toggleCorpora: React.PropTypes.func.isRequired 13 | }, 14 | 15 | renderCorporaLabel: function() { 16 | // Get the number of selected corpora 17 | var corpora = this.props.corpora; 18 | var selectedCorpusNames = this.props.selectedCorpusNames; 19 | var corporaLabel = 'Searching '; 20 | if (selectedCorpusNames.length === corpora.length) { 21 | corporaLabel += ' All '; 22 | } 23 | corporaLabel += (selectedCorpusNames.length === 1) 24 | ? selectedCorpusNames + ' Corpus' 25 | : selectedCorpusNames.length + ' Corpora'; 26 | 27 | return {corporaLabel}; 28 | }, 29 | 30 | render: function() { 31 | // Bootstrap's Modal requires a function, so we give it one. 32 | var onRequestHide = function() {}; 33 | 34 | var overlay = 35 | 39 |
40 | 44 |
45 |
; 46 | return 47 | {this.renderCorporaLabel()} 48 | ; 49 | } 50 | }); 51 | 52 | module.exports = CorpusSelector; 53 | -------------------------------------------------------------------------------- /webapp/app/js/components/misc/DeleteButton.js: -------------------------------------------------------------------------------- 1 | var React = require('react/addons'); 2 | var bs = require('react-bootstrap'); 3 | var Button = bs.Button; 4 | var Glyphicon = bs.Glyphicon; 5 | 6 | var DeleteButton = React.createClass({ 7 | propTypes: { 8 | callback: React.PropTypes.func.isRequired, 9 | bsStyle: React.PropTypes.string 10 | }, 11 | 12 | render: function() { 13 | var callback = this.props.callback; 14 | 15 | var bsStyle = this.props.bsStyle; 16 | if(!bsStyle) 17 | bsStyle = "danger"; 18 | 19 | return ( 20 | 27 | ); 28 | } 29 | }); 30 | 31 | module.exports = DeleteButton; 32 | -------------------------------------------------------------------------------- /webapp/app/js/components/misc/EditableList.js: -------------------------------------------------------------------------------- 1 | const React = require('react/addons'); 2 | const bs = require('react-bootstrap'); 3 | const DeleteButton = require('./DeleteButton.js'); 4 | const Input = bs.Input; 5 | const ListGroup = bs.ListGroup; 6 | const ListGroupItem = bs.ListGroupItem; 7 | const Glyphicon = bs.Glyphicon; 8 | const Button = bs.Button; 9 | const Well = bs.Well; 10 | 11 | var EditableList = React.createClass({ 12 | getInitialState: function() { 13 | return {input: ""}; 14 | }, 15 | 16 | pressedEnterKey: function(e) { 17 | return e.which == 13; 18 | }, 19 | 20 | handleChange: function(e) { 21 | if (this.pressedEnterKey(e)) { 22 | this.add(); 23 | } else { 24 | this.setState({input: e.target.value}); 25 | } 26 | }, 27 | 28 | add: function() { 29 | var input = this.state.input; 30 | this.props.onAdd(input); 31 | this.setState({input: ''}); 32 | }, 33 | 34 | remove: function(i) { 35 | return function () { this.props.onRemove(i); }.bind(this); 36 | }, 37 | 38 | focus: function() { 39 | this.refs.inputBox.getDOMNode().childNodes[0].focus(); 40 | }, 41 | 42 | makeRow: function(value, i) { 43 | var button = ; 44 | var key = value + '.' + i; 45 | return {value} {button}; 46 | }, 47 | 48 | render: function() { 49 | var items = this.props.value; 50 | var input = this.state.input; 51 | const plusIcon = ; 54 | var inputBox = ; 63 | var groupItems = items.map(this.makeRow); 64 | var listGroup = groupItems.length > 0 ? 65 | {groupItems} : 66 | No columns defined yet.; 67 | 68 | return ( 69 |
70 | {listGroup} 71 | {inputBox} 72 |
73 | ); 74 | } 75 | 76 | }); 77 | module.exports = EditableList; 78 | -------------------------------------------------------------------------------- /webapp/app/js/components/misc/ProvenanceButton.js: -------------------------------------------------------------------------------- 1 | var React = require('react/addons'); 2 | var bs = require('react-bootstrap'); 3 | var Button = bs.Button; 4 | var Glyphicon = bs.Glyphicon; 5 | var Modal = bs.Modal; 6 | var ModalTrigger = bs.ModalTrigger; 7 | var TableManager = require('../../managers/TableManager.js'); 8 | 9 | var ProvenanceButton = React.createClass({ 10 | onRequestHide: function() {}, 11 | render: function() { 12 | var rowvalues = this.props.rowvalues.map(TableManager.valueString) 13 | 14 | var title; 15 | if (rowvalues.length == 1) 16 | title = "Provenance for " + rowvalues[0] 17 | else 18 | title = "Provenance for (" + rowvalues.join(", ") + ")" 19 | 20 | var provenance = this.props.provenance; 21 | if(provenance) { 22 | var query = query = provenance.query; 23 | 24 | var examples = []; 25 | if (provenance.context) { 26 | examples = provenance.context.map(function(c, i) { 27 | var matchOffset = [0, 0] 28 | if(c.matchOffset) 29 | matchOffset = c.matchOffset 30 | 31 | var tags = c.words.map(function(word, j) { 32 | if(j >= matchOffset[0] && j < matchOffset[1]) { 33 | return {word.word} 34 | } else { 35 | return {word.word} 36 | } 37 | }); 38 | if(c.corpus) 39 | tags.push(({c.corpus})); 40 | return

{tags}

; 41 | }); 42 | } 43 | 44 | var cellStyle = { "padding": "5px", "verticalAlign": "top" }; 45 | var overlay = 46 |
47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 |
Query:{query}
Examples:{examples}
55 |
; 56 | 57 | return 58 | 61 | ; 62 | } else { 63 | return 66 | } 67 | } 68 | }); 69 | module.exports = ProvenanceButton; 70 | -------------------------------------------------------------------------------- /webapp/app/js/components/pattern/PatternEditor.js: -------------------------------------------------------------------------------- 1 | var React = require('react/addons'); 2 | var bs = require('react-bootstrap'); 3 | var xhr = require('xhr'); 4 | var Button = bs.Button; 5 | var Row = bs.Row; 6 | var Col = bs.Col; 7 | var SearchInterface = require("../search/SearchInterface"); 8 | var PatternStore = require("../../stores/NamedPatternsStore"); 9 | 10 | var PatternEditor = React.createClass({ 11 | mixins: [React.addons.LinkedStateMixin], 12 | 13 | propTypes: { 14 | patternName: React.PropTypes.string.isRequired, 15 | config: React.PropTypes.object.isRequired, 16 | initialQuery: React.PropTypes.string 17 | }, 18 | 19 | getInitialState: function() { 20 | var query = this.props.initialQuery; 21 | if(!query) 22 | query = ""; 23 | return { 24 | query: query 25 | }; 26 | }, 27 | 28 | componentWillReceiveProps: function(newProps) { 29 | var query = newProps.initialQuery; 30 | if(!query) 31 | query = ""; 32 | this.setState({ 33 | query: query 34 | }); 35 | }, 36 | 37 | selectedCorpora: function() { 38 | return this.props.corpora.value.filter(function(corpus) { 39 | return corpus.selected; 40 | }); 41 | }, 42 | 43 | saveButtonClicked: function() { 44 | PatternStore.savePattern(this.props.patternName, this.state.query.trim()); 45 | }, 46 | 47 | render: function() { 48 | var saveAllowed = 49 | (!this.props.initialQuery || this.state.query.trim() !== this.props.initialQuery.trim()) && 50 | (this.state.query.trim() !== ""); 51 | var saveText = "Save as " + this.props.patternName; 52 | var saveButton = 53 | ; 54 | 55 | return ; 63 | } 64 | }); 65 | 66 | module.exports = PatternEditor; 67 | -------------------------------------------------------------------------------- /webapp/app/js/components/search/AddResultButton.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var ButtonToolbar = bs.ButtonToolbar; 4 | var ButtonGroup = bs.ButtonGroup; 5 | var Button = bs.Button; 6 | var OverlayTrigger = bs.OverlayTrigger; 7 | var Tooltip = bs.Tooltip; 8 | var TableManager = require('../../managers/TableManager.js'); 9 | var Glyphicon = bs.Glyphicon; 10 | 11 | var AddResultButton = React.createClass({ 12 | getInitialState: function() { 13 | return { 14 | isPos: this.isPos(), 15 | isNeg: this.isNeg() 16 | }; 17 | }, 18 | 19 | tableDidUpdate: function() { 20 | this.setState(this.getInitialState()); 21 | }, 22 | 23 | componentWillMount: function() { 24 | TableManager.addChangeListener(this.tableDidUpdate); 25 | }, 26 | 27 | componentWillUnmount: function() { 28 | TableManager.removeChangeListener(this.tableDidUpdate); 29 | }, 30 | 31 | componentDidUpdate: function(prevProps, prevState) { 32 | if(prevProps.target.value !== this.props.target.value) 33 | this.setState(this.getInitialState()); 34 | }, 35 | 36 | row: function() { 37 | var group = this.props.group; 38 | var values = group.keys; 39 | 40 | var provenance = { 41 | "query": this.props.query, 42 | "context": group.results.map(function(resultObject) { 43 | var words = resultObject.result.wordData; 44 | var fragment = words.map(function(word) { return word.word; }).join(" "); 45 | var matchOffset = resultObject.result.matchOffset; 46 | var corpus = resultObject.result.corpusName; 47 | return { 48 | "fragment": fragment, 49 | "words": words, 50 | "matchOffset": matchOffset, 51 | "corpus": corpus 52 | }; 53 | }) 54 | }; 55 | 56 | var row = TableManager.stringsRow(values); 57 | row.provenance = provenance; 58 | return row; 59 | }, 60 | 61 | isType: function(type) { 62 | var target = this.props.target.value; 63 | return TableManager.hasRow(target, type, this.row()); 64 | }, 65 | 66 | isPos: function() { 67 | return this.isType('positive'); 68 | }, 69 | 70 | isNeg: function() { 71 | return this.isType('negative'); 72 | }, 73 | 74 | toggleType: function(type) { 75 | var target = this.props.target.value; 76 | TableManager.toggleRow(target, type, this.row()); 77 | }, 78 | 79 | togglePos: function() { 80 | this.toggleType('positive'); 81 | }, 82 | 83 | toggleNeg: function() { 84 | this.toggleType('negative'); 85 | }, 86 | 87 | posStyle: function() { 88 | return this.state.isPos ? 'primary' : 'default'; 89 | }, 90 | 91 | negStyle: function() { 92 | return this.state.isNeg ? 'warning' : 'default'; 93 | }, 94 | 95 | render: function() { 96 | var target = this.props.target.value; 97 | return ( 98 | 99 | 100 | Click to add this extraction to the target table as a positive example}> 102 | 105 | 106 | Click to add this extraction to the target table as a negative example}> 108 | 111 | 112 | 113 | 114 | ); 115 | } 116 | }); 117 | 118 | module.exports = AddResultButton; 119 | -------------------------------------------------------------------------------- /webapp/app/js/components/search/QueryViewer.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var xhr = require('xhr'); 4 | var tree = require('./Tree.js'); 5 | var Tree = tree.Tree; 6 | var Node = tree.Node; 7 | var QExpr = require('./QExpr.js'); 8 | var Well = bs.Well; 9 | var Panel = bs.Panel; 10 | var Accordion = bs.Accordion; 11 | var Button = bs.Button; 12 | var QueryViewer = React.createClass({ 13 | render: function() { 14 | var rootState = this.props.rootState; 15 | var handleChange = this.props.handleChange; 16 | var config = this.props.config; 17 | var makeUri = this.props.makeUri; 18 | if (rootState == null || rootState.value == null) { 19 | return
; 20 | } else { 21 | return ( 22 | 23 |
24 | 25 | 26 | 32 | 33 | 34 |
35 |
36 | ); 37 | } 38 | } 39 | }); 40 | module.exports = QueryViewer; 41 | -------------------------------------------------------------------------------- /webapp/app/js/components/search/ResultContext.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var ResultContext = React.createClass({ 4 | render: function() { 5 | var context = this.props.context; 6 | var words = context.result.wordData.map(function(w) { return w.word }); 7 | var spans = context.keys; 8 | var highlightedIndex = function(i) { 9 | return spans.some(function(span) { 10 | return span[0] <= i && i < span[1]; 11 | }); 12 | }; 13 | var highlighted = words.map(function(word, i) { 14 | if (highlightedIndex(i)) { 15 | return "" + word + "" 16 | } else { 17 | return word; 18 | } 19 | }); 20 | var innerHtml = highlighted.join(' '); 21 | 22 | return
; 23 | } 24 | }); 25 | module.exports = ResultContext; 26 | -------------------------------------------------------------------------------- /webapp/app/js/components/search/ResultContextSet.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var ResultContext = require('./ResultContext.js'); 4 | var ResultContextSet = React.createClass({ 5 | render: function() { 6 | var group = this.props.group; 7 | var results = group.results; 8 | var contexts = results.map(function(context, i) { 9 | return
; 10 | }); 11 | return
{contexts}
; 12 | } 13 | }); 14 | module.exports = ResultContextSet; 15 | -------------------------------------------------------------------------------- /webapp/app/js/components/search/ResultGroup.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var AddResultButton = require('./AddResultButton.js'); 4 | var ResultContextSet = require('./ResultContextSet.js'); 5 | var ResultGroup = React.createClass({ 6 | addCol: function() { 7 | var target = this.props.target; 8 | var group = this.props.group; 9 | if (target && target.value) { 10 | return ; 11 | } else { 12 | return null; 13 | } 14 | }, 15 | keyCols: function() { 16 | var cols = this.props.cols; 17 | if (cols == null || cols.length == 0) { 18 | return null; 19 | } else { 20 | var values = this.props.group.keys; 21 | var makeCol = function(value, i) { 22 | return {value}; 23 | }; 24 | return values.map(makeCol); 25 | } 26 | }, 27 | render: function() { 28 | var group = this.props.group; 29 | var context = ; 30 | return ( 31 | 32 | {this.addCol()} 33 | {this.keyCols()} 34 | {group.size} 35 | {context} 36 | 37 | ); 38 | } 39 | }); 40 | module.exports = ResultGroup; 41 | -------------------------------------------------------------------------------- /webapp/app/js/components/search/SearchForm.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var Row = bs.Row; 4 | var Col = bs.Col; 5 | var Input = bs.Input; 6 | var CorpusSelector = require('../corpora/CorpusSelector.js'); 7 | var TargetSelector = require('./TargetSelector.js'); 8 | var SuggestQueryButtonGroup = require('./SuggestQueryButtonGroup.js'); 9 | const AuthStore = require('../../stores/AuthStore.js'); 10 | 11 | var SearchForm = React.createClass({ 12 | propTypes: { 13 | config: React.PropTypes.object.isRequired, 14 | corpora: React.PropTypes.array.isRequired, 15 | selectedCorpusNames: React.PropTypes.object.isRequired, // This is a linkState. 16 | handleSubmit: React.PropTypes.func.isRequired, 17 | makeUri: React.PropTypes.func.isRequired, 18 | query: React.PropTypes.object.isRequired, 19 | target: React.PropTypes.object, 20 | buttonAfterQuery: React.PropTypes.element, 21 | showQuerySuggestions: React.PropTypes.bool 22 | }, 23 | 24 | showQuerySuggestions: function() { 25 | if(this.props.showQuerySuggestions === undefined) 26 | return !this.props.config.value.ml.disable; 27 | else 28 | return this.props.showQuerySuggestions; 29 | }, 30 | 31 | render: function() { 32 | var self = this; 33 | var config = this.props.config; 34 | var queryWidth = (this.showQuerySuggestions()) ? 8 : 10; 35 | queryWidth = (this.props.target) ? queryWidth : queryWidth + 2; 36 | var querySuggestions = this.showQuerySuggestions() ? 37 | 38 | 45 | : 46 | null; 47 | 48 | var toggleCorpora = function(corpusIndex) { 49 | var toggledCorpusName = self.props.corpora[corpusIndex].name; 50 | var selectedCorpusNames = self.props.selectedCorpusNames.value; 51 | var remove = selectedCorpusNames.indexOf(toggledCorpusName) >= 0; 52 | var newSelectedCorpusNames = []; 53 | selectedCorpusNames.forEach(function(corpusName) { 54 | if(!(remove && corpusName == toggledCorpusName)) 55 | newSelectedCorpusNames.push(corpusName); 56 | }); 57 | if(!remove) 58 | newSelectedCorpusNames.push(toggledCorpusName); 59 | self.props.selectedCorpusNames.requestChange(newSelectedCorpusNames); 60 | }; 61 | 62 | return ( 63 |
64 |
65 | 66 | {(this.props.target) ? : null} 67 | 68 | 72 | 79 | 80 | {querySuggestions} 81 | 82 |
83 |
84 | ); 85 | } 86 | }); 87 | module.exports = SearchForm; 88 | -------------------------------------------------------------------------------- /webapp/app/js/components/search/SuggestQueryButtonGroup.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var SuggestQueryButton = require('./SuggestQueryButton.js'); 4 | var Label = bs.Label 5 | var Row = bs.Row; 6 | var Col = bs.Col; 7 | var ButtonToolbar = bs.ButtonToolbar; 8 | 9 | var SuggestQueryButtonGroup = React.createClass({ 10 | 11 | render: function() { 12 | var props = this.props; 13 | var config = props.config; 14 | var target = props.target; 15 | var query = props.query; 16 | var makeUri = props.makeUri; 17 | var submitQuery = props.submitQuery; 18 | var disabled = props.disabled; 19 | return ( 20 |
21 | 22 | 23 | 32 | 41 | 42 |
) 43 | } 44 | }); 45 | module.exports = SuggestQueryButtonGroup; -------------------------------------------------------------------------------- /webapp/app/js/components/search/TargetSelector.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var TableManager = require('../../managers/TableManager.js'); 4 | var Input = bs.Input; 5 | 6 | var TargetSelector = React.createClass({ 7 | propTypes: { 8 | target: React.PropTypes.object.isRequired 9 | }, 10 | 11 | makeOption: function(name) { 12 | return ; 13 | }, 14 | 15 | render: function() { 16 | var target = this.props.target; 17 | var names = Object.keys(TableManager.getTables()); 18 | var label = "Target Table"; 19 | if (names.length > 0) { 20 | return ( 21 | 22 | {names.map(this.makeOption)} 23 | 24 | ); 25 | } else { 26 | return ( 27 | 28 | 29 | 30 | ); 31 | } 32 | } 33 | }); 34 | module.exports = TargetSelector; 35 | -------------------------------------------------------------------------------- /webapp/app/js/components/search/Tree.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var Tree = React.createClass({ 3 | render: function() { return
    {this.props.children}
; } 4 | }); 5 | var Node = React.createClass({ 6 | render: function() { return
  • {this.props.children}
  • ; } 7 | }); 8 | module.exports = { 9 | Tree: Tree, 10 | Node: Node 11 | }; 12 | -------------------------------------------------------------------------------- /webapp/app/js/components/table/DeleteTableButton.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var TableManager = require('../../managers/TableManager.js'); 4 | var DeleteButton = require('../misc/DeleteButton.js'); 5 | var Glyphicon = bs.Glyphicon; 6 | var DeleteTableButton = React.createClass({ 7 | render: function() { 8 | var table = this.props.table; 9 | var deleteTable = function(e) { 10 | TableManager.deleteTable(table.name); 11 | }; 12 | return ; 13 | } 14 | }); 15 | module.exports = DeleteTableButton; 16 | -------------------------------------------------------------------------------- /webapp/app/js/components/table/DownloadTableButton.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var TableManager = require('../../managers/TableManager.js'); 4 | var ButtonGroup = bs.ButtonGroup; 5 | var Button = bs.Button; 6 | var Glyphicon = bs.Glyphicon; 7 | var DownloadTableButton = React.createClass({ 8 | render: function() { 9 | var table = this.props.table; 10 | var downloadDict = function(e) { 11 | e.stopPropagation(); 12 | var tsv = TableManager.table2csv(table); 13 | var blob = new Blob([tsv], {type: 'text/tsv'}); 14 | var a = document.createElement('a'); 15 | a.href = URL.createObjectURL(blob); 16 | a.download = table.name + ".dict.tsv"; 17 | document.body.appendChild(a); 18 | setTimeout(function() { 19 | a.click(); 20 | document.body.removeChild(a); 21 | }, 50); 22 | }; 23 | var icon = ; 24 | return ; 25 | } 26 | }); 27 | module.exports = DownloadTableButton; 28 | -------------------------------------------------------------------------------- /webapp/app/js/components/table/RowAdder.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var Input = bs.Input; 4 | var RowAdder = React.createClass({ 5 | emptyValues: function() { 6 | return this.props.cols.map(function(col, i) { return ''; }); 7 | }, 8 | getInitialState: function() { 9 | return {values: this.emptyValues()}; 10 | }, 11 | handleSubmit: function(e) { 12 | e.preventDefault(); 13 | this.props.onSubmit(this.state.values); 14 | var firstField = this.refs.col0.getDOMNode(); 15 | this.setState({values: this.emptyValues()}, function() { 16 | firstField.focus(); 17 | }); 18 | }, 19 | handleChange: function(i) { 20 | return function(e) { 21 | if (e.which == 13) { 22 | this.handleSubmit(e); 23 | } else { 24 | var value = e.target.value; 25 | this.state.values[i] = value; 26 | this.setState({values: this.state.values}); 27 | } 28 | }.bind(this); 29 | }, 30 | columnInput: function(col, i) { 31 | var key = col + '.' + i; 32 | var value = this.state.values[i]; 33 | var onChange = this.handleChange(i); 34 | var ph = "Add " + col; 35 | var ref = "col" + i; 36 | var input = ; 41 | return {input}; 42 | }, 43 | render: function() { 44 | var cols = this.props.cols; 45 | var inputs = cols.map(this.columnInput); 46 | var row = Add New Row:{inputs}; 47 | return row; 48 | } 49 | }); 50 | module.exports = RowAdder; 51 | -------------------------------------------------------------------------------- /webapp/app/js/components/table/SubTable.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var BsTable = bs.Table; 4 | var TableRow = require('./TableRow.js'); 5 | var RowAdder = require('./RowAdder.js'); 6 | var TableManager = require('../../managers/TableManager.js'); 7 | var SubTable = React.createClass({ 8 | headerCell: function(col, i) { 9 | return {col}; 10 | }, 11 | thead: function() { 12 | var cols = this.props.table.cols; 13 | var cells = cols.map(this.headerCell); 14 | return {cells}; 15 | }, 16 | row: function(row, i) { 17 | var rowType = this.props.rowType; 18 | var table = this.props.table; 19 | var key = "row" + i; 20 | return ; 21 | }, 22 | tbody: function() { 23 | var rowType = this.props.rowType; 24 | var rows = this.props.table[rowType]; 25 | var rowComponents = rows.map(this.row); 26 | var rowAdder = this.rowAdder(); 27 | return ( 28 | 29 | {rowAdder} 30 | {rowComponents} 31 | 32 | ); 33 | }, 34 | invalidValue: function(valueString) { 35 | return !valueString || (valueString.trim() == ''); 36 | }, 37 | invalidRow: function(valueStrings) { 38 | return valueStrings.some(this.invalidValue); 39 | }, 40 | rowAdder: function() { 41 | var cols = this.props.table.cols; 42 | var tableName = this.props.table.name; 43 | var rowType = this.props.rowType; 44 | var add = function(valueStrings) { 45 | valueStrings = valueStrings.map(function(string) { 46 | return string.toLowerCase(); 47 | }); 48 | if (this.invalidRow(valueStrings)) { return; } 49 | var row = TableManager.stringsRow(valueStrings); 50 | TableManager.addRow(tableName, rowType, row); 51 | }.bind(this); 52 | return ; 53 | }, 54 | render: function() { 55 | var style = {borderTop: 0}; 56 | return ( 57 |
    58 | 59 | {this.thead()} 60 | {this.tbody()} 61 |
    62 |
    63 | ); 64 | } 65 | }); 66 | module.exports = SubTable; 67 | -------------------------------------------------------------------------------- /webapp/app/js/components/table/Table.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var SubTable = require('./SubTable.js'); 4 | var TabbedArea = bs.TabbedArea; 5 | var TabPane = bs.TabPane; 6 | var Table = React.createClass({ 7 | getInitialState: function() { 8 | return {tableTag: this.props.table.tag}; 9 | }, 10 | 11 | shouldComponentUpdate: function(nextProps, nextState) { 12 | return nextProps.table.tag !== this.state.tableTag; 13 | }, 14 | 15 | componentWillRecieveProps: function(nextProps) { 16 | this.setState({tableTag: nextProps.table.tag}); 17 | }, 18 | 19 | pane: function(rowType) { 20 | var table = this.props.table; 21 | var rows = table[rowType]; 22 | var cap = rowType.charAt(0).toUpperCase() + rowType.slice(1); 23 | var title = cap + ' (' + rows.length + ')'; 24 | return ( 25 | 26 | 27 | 28 | ); 29 | }, 30 | 31 | render: function() { 32 | var posPane = this.pane('positive'); 33 | var negPane = this.pane('negative'); 34 | return ( 35 | 36 | {posPane} 37 | {negPane} 38 | 39 | ); 40 | } 41 | }); 42 | module.exports = Table; 43 | -------------------------------------------------------------------------------- /webapp/app/js/components/table/TableAdder.js: -------------------------------------------------------------------------------- 1 | const React = require('react/addons'); 2 | const bs = require('react-bootstrap'); 3 | const Input = bs.Input; 4 | const Panel = bs.Panel; 5 | const Modal = bs.Modal; 6 | const Button = bs.Button; 7 | const Alert = bs.Alert; 8 | const EditableList = require('../misc/EditableList.js'); 9 | const TableManager = require('../../managers/TableManager.js'); 10 | 11 | const TableAdder = React.createClass({ 12 | componentDidMount: function() { 13 | var callback = function() { 14 | // Since this is a callback, the component could have been unmounted in the meantime. 15 | if(this.isMounted()) { 16 | if (TableManager.userEmail()) { 17 | this.setState({error: null}); 18 | } else { 19 | this.setState({error: "You must be logged in to create tables."}); 20 | } 21 | } 22 | }.bind(this); 23 | 24 | TableManager.addChangeListener(callback); 25 | callback(); 26 | }, 27 | 28 | getInitialState: function() { 29 | return { 30 | name: '', 31 | cols: [], 32 | error: null 33 | }; 34 | }, 35 | 36 | validCol: function(col) { 37 | return col && col.trim() && this.state.cols.indexOf(col) < 0; 38 | }, 39 | 40 | addCol: function(value) { 41 | if (this.validCol(value)) { 42 | this.state.cols.push(value); 43 | this.setState({cols: this.state.cols}); 44 | } 45 | }, 46 | 47 | removeCol: function(i) { 48 | this.state.cols.splice(i, 1); 49 | this.setState({cols: this.state.cols}); 50 | }, 51 | 52 | handleNameChange: function(e) { 53 | this.setState({name: e.target.value}); 54 | }, 55 | 56 | handleSubmit: function(e) { 57 | e.preventDefault(); 58 | var table = { 59 | name: this.state.name, 60 | cols: this.state.cols, 61 | positive: [], 62 | negative: [] 63 | }; 64 | try { 65 | this.props.onSubmit(table); 66 | } catch(err) { 67 | alert(err); // TODO: replace with something pretty 68 | } 69 | this.setState({name: '', cols: []}); 70 | }, 71 | 72 | submitDisabled: function() { 73 | var name = this.state.name; 74 | var cols = this.state.cols; 75 | return name.trim() == '' || cols.length == 0 || this.state.error; 76 | }, 77 | 78 | nameInput: function() { 79 | var label = "Table Name"; 80 | var placeholder = "Enter Table Name"; 81 | var onChange = this.handleNameChange; 82 | var value = this.state.name; 83 | return ; 91 | }, 92 | 93 | columnInput: function() { 94 | var name = "Table Columns"; 95 | var label = ; 96 | var list = ; 102 | return
    {label} {list}
    ; 103 | }, 104 | 105 | submitButton: function() { 106 | return ; 112 | }, 113 | 114 | render: function() { 115 | var nameInput = this.nameInput(); 116 | var columnInput = this.columnInput(); 117 | var submitButton = this.submitButton(); 118 | 119 | var content = [nameInput, columnInput]; 120 | if(this.state.error) 121 | content.push({this.state.error}); 122 | content.push(submitButton); 123 | 124 | return
    {content}
    ; 125 | } 126 | }); 127 | 128 | module.exports = TableAdder; 129 | -------------------------------------------------------------------------------- /webapp/app/js/components/table/TableButtonToolbar.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var ButtonToolbar = bs.ButtonToolbar; 4 | var ButtonGroup = bs.ButtonGroup; 5 | var DeleteTableButton = require('./DeleteTableButton.js'); 6 | var DownloadTableButton = require('./DownloadTableButton.js'); 7 | var TableButtonToolbar = React.createClass({ 8 | render: function() { 9 | var table = this.props.table; 10 | var deleteButton = ; 11 | var downloadButton = ; 12 | return ( 13 | 14 | 15 | {downloadButton} 16 | {deleteButton} 17 | 18 | 19 | ); 20 | } 21 | }); 22 | module.exports = TableButtonToolbar; 23 | -------------------------------------------------------------------------------- /webapp/app/js/components/table/TableRow.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var TableManager = require('../../managers/TableManager.js'); 4 | var DeleteButton = require('../misc/DeleteButton.js'); 5 | var ProvenanceButton = require('../misc/ProvenanceButton.js'); 6 | var TableRow = React.createClass({ 7 | valueCell: function(value, i) { 8 | var valueString = TableManager.valueString(value); 9 | return {valueString}; 10 | }, 11 | deleteButton: function() { 12 | var table = this.props.table; 13 | var rowType = this.props.rowType; 14 | var row = this.props.row; 15 | var callback = function() { 16 | TableManager.deleteRow(table.name, rowType, row); 17 | }; 18 | return ; 19 | }, 20 | render: function() { 21 | var rowData = this.props.row; 22 | var values = rowData.values; 23 | var cells = values.map(this.valueCell); 24 | var provenance = ; 25 | return {this.deleteButton()}{cells}{provenance}; 26 | } 27 | }); 28 | module.exports = TableRow; 29 | -------------------------------------------------------------------------------- /webapp/app/js/components/table/TablesInterface.js: -------------------------------------------------------------------------------- 1 | var React = require('react'); 2 | var bs = require('react-bootstrap'); 3 | var Row = bs.Row; 4 | var Col = bs.Col; 5 | var Accordion = bs.Accordion; 6 | var Panel = bs.Panel; 7 | var Badge = bs.Badge; 8 | var TableAdder = require('./TableAdder.js'); 9 | var TableLoader = require('./TableLoader.js'); 10 | var Table = require('./Table.js'); 11 | var TableManager = require('../../managers/TableManager.js'); 12 | var TableButtonToolbar = require('./TableButtonToolbar.js'); 13 | const AuthStore = require('../../stores/AuthStore.js'); 14 | 15 | var TablesInterface = React.createClass({ 16 | 17 | // This mixin makes this interface require authentication 18 | mixins: [ AuthStore.Mixin ], 19 | 20 | tables: function() { 21 | var tables = TableManager.getTables(); 22 | var components = Object.keys(tables).map(function(name, i) { 23 | var table = tables[name]; 24 | var badge = {table.positive.length + " / " + table.negative.length}; 25 | var buttons = ; 26 | var header = {name}{badge}   {buttons}; 27 | return ( 28 | 29 | 30 | 31 | ); 32 | }.bind(this)); 33 | return {components}; 34 | }, 35 | 36 | addTable: function(table) { 37 | TableManager.createTable(table); 38 | this.props.target.requestChange(table.name); 39 | }, 40 | 41 | adder: function() { 42 | return ( 43 |
    44 | 45 | 46 | 47 | 48 | 49 | 50 |
    51 | ); 52 | }, 53 | 54 | render: function() { 55 | return ( 56 | 57 | {this.adder()} 58 | {this.tables()} 59 | 60 | ); 61 | } 62 | }); 63 | module.exports = TablesInterface; 64 | -------------------------------------------------------------------------------- /webapp/app/js/constants/AuthConstants.js: -------------------------------------------------------------------------------- 1 | 2 | module.exports = { 3 | SIGN_IN: 'SIGN_IN', 4 | SIGN_OUT: 'SIGN_OUT' 5 | }; 6 | -------------------------------------------------------------------------------- /webapp/app/js/constants/CorporaConstants.js: -------------------------------------------------------------------------------- 1 | 2 | module.exports = { 3 | REFRESH: 'REFRESH' 4 | }; 5 | -------------------------------------------------------------------------------- /webapp/app/js/dispatcher/AppDispatcher.js: -------------------------------------------------------------------------------- 1 | 2 | var Dispatcher = require('flux').Dispatcher; 3 | 4 | module.exports = new Dispatcher(); -------------------------------------------------------------------------------- /webapp/app/js/stores/AuthStore.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const React = require('react'); 4 | const AppDispatcher = require('../dispatcher/AppDispatcher'); 5 | const EventEmitter = require('events').EventEmitter; 6 | const AuthConstants = require('../constants/AuthConstants'); 7 | const assign = require('object-assign'); 8 | const CHANGE_EVENT = 'change'; 9 | 10 | const AuthStore = assign({}, EventEmitter.prototype, { 11 | 12 | /** 13 | * Save (or delete) the user's info via localStorage 14 | */ 15 | setAuthState(userEmail, userImageUrl) { 16 | if (userEmail) { 17 | localStorage.userEmail = userEmail; 18 | localStorage.userImageUrl = userImageUrl; 19 | } else { 20 | delete localStorage.userEmail; 21 | delete localStorage.userImageUrl; 22 | } 23 | this.emitChange(); 24 | }, 25 | 26 | /** 27 | * Sign in callback function for google auth 28 | */ 29 | onSignIn(authResult) { 30 | if (authResult['status']['signed_in']) { 31 | gapi.client.load('plus','v1', function() { 32 | var request = gapi.client.plus.people.get({ userId: 'me' }); 33 | request.execute(function(resp) { 34 | var userEmail = resp.emails[0].value; 35 | AuthStore.setAuthState(userEmail, resp.image.url); 36 | }); 37 | }); 38 | } else { 39 | AuthStore.setAuthState(null); 40 | } 41 | }, 42 | 43 | /** 44 | * Sign in function for google auth 45 | */ 46 | signIn() { 47 | var additionalParams = { 48 | scope: 'email', 49 | callback: this.onSignIn, 50 | cookiepolicy: 'single_host_origin', 51 | clientid: '793503486502-8q1pf7shj3jq7ak2q8ib1ca5hlufdfv7.apps.googleusercontent.com' 52 | }; 53 | gapi.auth.signIn(additionalParams); 54 | }, 55 | 56 | /** 57 | * Sign out 58 | */ 59 | signOut() { 60 | gapi.auth.signOut(); 61 | this.setAuthState(null); 62 | }, 63 | 64 | /** 65 | * Return a user's email address 66 | */ 67 | getUserEmail() { 68 | return localStorage['userEmail']; 69 | }, 70 | 71 | /** 72 | * Return a user's iamge url 73 | */ 74 | getUserImageUrl() { 75 | return localStorage['userImageUrl']; 76 | }, 77 | 78 | /** 79 | * Return boolean if they are signed in 80 | */ 81 | authenticated() { 82 | return !!localStorage['userEmail']; 83 | }, 84 | 85 | emitChange() { 86 | this.emit(CHANGE_EVENT); 87 | }, 88 | 89 | addChangeListener: function(callback) { 90 | this.on(CHANGE_EVENT, callback); 91 | }, 92 | 93 | removeChangeListener: function(callback) { 94 | this.removeListener(CHANGE_EVENT, callback); 95 | } 96 | }); 97 | 98 | // Register callback to handle all updates 99 | AppDispatcher.register(function(action) { 100 | if (action.actionType === AuthConstants.SIGN_IN) { 101 | AuthStore.signIn(); 102 | } else if (action.actionType === AuthConstants.SIGN_OUT) { 103 | AuthStore.signOut(); 104 | } 105 | }); 106 | 107 | module.exports = AuthStore; 108 | -------------------------------------------------------------------------------- /webapp/app/js/stores/CorporaStore.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const React = require('react'); 4 | const AppDispatcher = require('../dispatcher/AppDispatcher'); 5 | const EventEmitter = require('events').EventEmitter; 6 | const CorporaConstants = require('../constants/CorporaConstants'); 7 | const xhr = require('xhr'); 8 | const assign = require('object-assign'); 9 | const CHANGE_EVENT = 'change'; 10 | 11 | const CorporaStore = assign({}, EventEmitter.prototype, { 12 | getCorpora() { 13 | if(localStorage.corpora) 14 | return JSON.parse(localStorage.corpora); 15 | else 16 | return []; 17 | }, 18 | 19 | getCorpusNames() { 20 | function isSelected(corpus) { 21 | return corpus.defaultSelected 22 | } 23 | return this.getCorpora().filter(isSelected).map(function(corpus) { 24 | return corpus.name; 25 | }); 26 | }, 27 | 28 | refresh() { 29 | var self = this; 30 | 31 | xhr({ 32 | uri: '/api/corpora', 33 | method: 'GET' 34 | }, function(err, response, body) { 35 | if(response.statusCode === 200) { 36 | var newCorpora = JSON.parse(body).map(function (corpus, i) { 37 | return { 38 | name: corpus.name, 39 | description: corpus.description, 40 | defaultSelected: corpus.defaultSelected 41 | } 42 | }); 43 | 44 | if(JSON.stringify(newCorpora) !== JSON.stringify(self.getCorpora())) { 45 | localStorage.corpora = JSON.stringify(newCorpora); 46 | self.emit(CHANGE_EVENT); 47 | } 48 | } else { 49 | console.warn("Updating corpora failed: " + body); 50 | } 51 | }); 52 | }, 53 | 54 | addChangeListener(callback) { 55 | this.on(CHANGE_EVENT, callback); 56 | }, 57 | 58 | removeChangeListener(callback) { 59 | this.removeListener(CHANGE_EVENT, callback); 60 | } 61 | }); 62 | 63 | AppDispatcher.register(function(action) { 64 | if (action.actionType === CorporaConstants.REFRESH) { 65 | CorporaStore.refresh(); 66 | } 67 | }); 68 | 69 | CorporaStore.refresh(); 70 | 71 | module.exports = CorporaStore; 72 | -------------------------------------------------------------------------------- /webapp/app/js/stores/NamedPatternsStore.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | const React = require('react'); 4 | const AppDispatcher = require('../dispatcher/AppDispatcher'); 5 | const EventEmitter = require('events').EventEmitter; 6 | const assign = require('object-assign'); 7 | const AuthStore = require('./AuthStore'); 8 | var xhr = require('xhr'); 9 | 10 | const CHANGE_EVENT = 'change'; 11 | 12 | var userEmail = null; 13 | var patterns = []; 14 | var error = null; 15 | 16 | const NamedPatternsStore = assign({}, EventEmitter.prototype, { 17 | setUserEmail(newUserEmail) { 18 | if(newUserEmail !== userEmail) { 19 | userEmail = newUserEmail; 20 | this.refreshPatternsFromServer(); 21 | } 22 | }, 23 | 24 | refreshPatternsFromServer() { 25 | patterns = []; 26 | error = null; 27 | 28 | var self = this; 29 | 30 | // Get the patterns from the server 31 | if(userEmail) { 32 | xhr({ 33 | uri: '/api/patterns/' + encodeURIComponent(userEmail), 34 | method: 'GET' 35 | }, function (err, resp, body) { 36 | if (resp.statusCode == 200) { 37 | // set patterns 38 | var patternsObjects = JSON.parse(body); 39 | var newPatterns = {}; 40 | patternsObjects.forEach(function (patternObject) { 41 | newPatterns[patternObject.name] = patternObject.pattern; 42 | }); 43 | 44 | patterns = newPatterns; 45 | error = null; 46 | } else { 47 | error = resp.body + " (" + resp.statusCode + ")"; 48 | } 49 | self.emitChange(); 50 | }); 51 | } 52 | 53 | this.emitChange(); 54 | }, 55 | 56 | savePattern(name, pattern) { 57 | if(userEmail) { 58 | patterns[name] = pattern; 59 | xhr({ 60 | uri: '/api/patterns/' + encodeURIComponent(userEmail) + '/' + name, 61 | method: 'PUT', 62 | body: pattern 63 | }, function(err, response, body) { 64 | if(response.statusCode !== 200) { 65 | console.log("Unexpected response saving a pattern: " + JSON.stringify(response)); 66 | } 67 | }); 68 | this.emitChange(); 69 | } 70 | }, 71 | 72 | deletePattern(name) { 73 | if(userEmail) { 74 | delete patterns[name]; 75 | xhr({ 76 | uri: '/api/patterns/' + encodeURIComponent(userEmail) + '/' + name, 77 | method: 'DELETE' 78 | }, function(err, response, body) { 79 | if(response.statusCode !== 200) { 80 | console.log("Unexpected response deleting a pattern: " + JSON.stringify(response)); 81 | } 82 | }); 83 | this.emitChange(); 84 | } 85 | }, 86 | 87 | getPatterns() { 88 | return patterns; 89 | }, 90 | 91 | getError() { 92 | return error; 93 | }, 94 | 95 | emitChange() { 96 | this.emit(CHANGE_EVENT); 97 | }, 98 | 99 | addChangeListener: function(callback) { 100 | this.on(CHANGE_EVENT, callback); 101 | }, 102 | 103 | removeChangeListener: function(callback) { 104 | this.removeListener(CHANGE_EVENT, callback); 105 | } 106 | }); 107 | 108 | // initialize NamedPatternStore 109 | NamedPatternsStore.setUserEmail(AuthStore.getUserEmail()); 110 | AuthStore.addChangeListener(function() { 111 | NamedPatternsStore.setUserEmail(AuthStore.getUserEmail()); 112 | }); 113 | 114 | module.exports = NamedPatternsStore; 115 | -------------------------------------------------------------------------------- /webapp/gulpfile.js: -------------------------------------------------------------------------------- 1 | 'use strict'; 2 | 3 | var syrup = require('syrup'); 4 | var gulp = require('gulp'); 5 | 6 | syrup.gulp.init(gulp, { compressJs: false, disableJsHint: true, detectGlobals: true }, undefined, 7 | { 8 | js: 'app/js/DictApp.js', 9 | less: 'app/css/*.less', 10 | allLess: 'app/css/**/*.less', 11 | build: '../public' 12 | } 13 | ); -------------------------------------------------------------------------------- /webapp/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "ike", 3 | "version": "1.0.0", 4 | "description": "build dicts", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 0", 8 | "build": "gulp build", 9 | "watch": "gulp watch --color" 10 | }, 11 | "author": "tonyf", 12 | "license": "ISC", 13 | "devDependencies": { 14 | "babel": "4.3.0", 15 | "babel-core": "4.7.16", 16 | "babelify": "5.0.4", 17 | "bootstrap": "3.3.2", 18 | "flux": "2.0.1", 19 | "gulp": "3.9.1", 20 | "object-assign": "1.0.0", 21 | "react": "0.13.2", 22 | "react-bootstrap": "0.13.3", 23 | "react-router": "0.13.3", 24 | "react-loader": "1.4.0", 25 | "syrup": "0.1.4", 26 | "xhr": "2.0.1" 27 | } 28 | } 29 | --------------------------------------------------------------------------------