├── .gitignore
├── LICENSE
├── README.md
├── assembly.sbt
├── build.sbt
├── data
    ├── dqa
    │   └── sample
    │   │   ├── diagram_features_synthetic.json
    │   │   ├── diagrams.json
    │   │   └── questions.json
    └── geoquery
    │   ├── all_folds.ccg
    │   ├── fold0.ccg
    │   ├── fold1.ccg
    │   ├── fold2.ccg
    │   ├── fold3.ccg
    │   ├── fold4.ccg
    │   ├── fold5.ccg
    │   ├── fold6.ccg
    │   ├── fold7.ccg
    │   ├── fold8.ccg
    │   ├── fold9.ccg
    │   ├── np_list.ccg
    │   └── test.ccg
├── docs
    └── Probabilistic Neural Programs.ipynb
├── experiments
    ├── dipart
    │   ├── README.md
    │   └── scripts
    │   │   ├── config.sh
    │   │   ├── preprocess
    │   │       ├── extract_tqa_diagrams.sh
    │   │       ├── generate_diagram_feats.py
    │   │       ├── ngrams.py
    │   │       ├── ngrams_to_features.py
    │   │       ├── preprocess_diagram_annotations.py
    │   │       ├── preprocess_diagram_annotations.sh
    │   │       ├── preprocess_ngrams.py
    │   │       ├── sample_pairs.py
    │   │       ├── tqa_diagrams_to_features.py
    │   │       └── tqa_to_features.py
    │   │   ├── run.sh
    │   │   ├── train_affine.sh
    │   │   ├── train_mn_lstm.sh
    │   │   ├── train_nearest_neighbor.sh
    │   │   ├── train_pointer_net.sh
    │   │   ├── train_ssmn.sh
    │   │   ├── train_ssmn_loglikelihood.sh
    │   │   ├── train_ssmn_unary.sh
    │   │   ├── train_structural_consistency.sh
    │   │   └── visualize
    │   │       ├── generate_heatmap.py
    │   │       ├── heatmap_data.py
    │   │       ├── visualize_global.sh
    │   │       └── visualize_loss.py
    ├── dqa
    │   └── scripts
    │   │   └── train.sh
    ├── geoquery
    │   └── scripts
    │   │   ├── example.sh
    │   │   ├── run_experiment.sh
    │   │   └── train_docker.sh
    └── pascal_parts
    │   └── scripts
    │       ├── config.sh
    │       ├── preprocess
    │           ├── extract_tqa_diagrams.sh
    │           ├── generate_diagram_feats.py
    │           ├── ngrams.py
    │           ├── ngrams_to_features.py
    │           ├── preprocess_diagram_annotations.py
    │           ├── preprocess_diagram_annotations.sh
    │           ├── preprocess_ngrams.py
    │           ├── preprocess_pascal.sh
    │           ├── sample_pairs.py
    │           ├── tqa_diagrams_to_features.py
    │           └── tqa_to_features.py
    │       ├── run.sh
    │       ├── train_affine.sh
    │       ├── train_mn.sh
    │       ├── train_mn_lstm.sh
    │       ├── train_mn_lstm_bso.sh
    │       ├── train_mn_lstm_dropout.sh
    │       ├── train_nearest_neighbor.sh
    │       ├── train_pointer_net.sh
    │       ├── train_ssmn.sh
    │       ├── train_ssmn_ablated.sh
    │       ├── train_ssmn_ablated_1iter.sh
    │       ├── train_ssmn_ablated_dropout.sh
    │       ├── train_ssmn_loglikelihood.sh
    │       ├── train_ssmn_lstmonly.sh
    │       ├── train_ssmn_pretrain.sh
    │       ├── train_ssmn_unary.sh
    │       ├── train_structural_consistency.sh
    │       └── visualize
    │           └── visualize_loss.py
├── lib
    └── jklol.jar
├── project
    ├── assembly.sbt
    └── plugins.sbt
└── src
    ├── main
        ├── docker
        │   └── Dockerfile
        └── scala
        │   └── org
        │       └── allenai
        │           ├── dqa
        │               ├── labeling
        │               │   ├── AnswerSelector.scala
        │               │   ├── Diagram.scala
        │               │   ├── DiagramFeatures.scala
        │               │   ├── LabelingDqaCli.scala
        │               │   ├── LabelingExample.scala
        │               │   ├── LabelingExecutor.scala
        │               │   ├── LabelingP3Model.scala
        │               │   └── LabelingUtil.scala
        │               └── matching
        │               │   ├── MatchingExample.scala
        │               │   ├── MatchingModel.scala
        │               │   ├── TestMatchingCli.scala
        │               │   ├── TrainMatchingCli.scala
        │               │   └── VisualizeMatchingCli.scala
        │           └── pnp
        │               ├── BsoTrainer.scala
        │               ├── CompGraph.scala
        │               ├── Env.scala
        │               ├── ExecutionScore.scala
        │               ├── GlobalLoglikelihoodTrainer.scala
        │               ├── LoglikelihoodTrainer.scala
        │               ├── Pnp.scala
        │               ├── PnpContinuation.scala
        │               ├── PnpExample.scala
        │               ├── PnpInferenceContext.scala
        │               ├── PnpModel.scala
        │               ├── PnpSearchQueue.scala
        │               ├── PnpUtil.scala
        │               ├── examples
        │                   ├── MultilayerPerceptron.scala
        │                   └── Seq2Seq.scala
        │               ├── semparse
        │                   ├── ActionSpace.scala
        │                   ├── EntityLinking.scala
        │                   ├── Scope.scala
        │                   ├── SemanticParser.scala
        │                   ├── SemanticParserState.scala
        │                   ├── SemanticParserUtils.scala
        │                   ├── Template.scala
        │                   ├── TestSemanticParserCli.scala
        │                   └── TrainSemanticParserCli.scala
        │               └── util
        │                   └── Trie.scala
    └── test
        └── scala
            └── org
                └── allenai
                    └── pnp
                        ├── BsoTrainerSpec.scala
                        ├── GlobalLoglikelihoodTrainerSpec.scala
                        ├── LoglikelihoodTrainerSpec.scala
                        ├── PnpSpec.scala
                        ├── PnpUtilSpec.scala
                        ├── SampleSpec.scala
                        └── semparse
                            └── SemanticParserSpec.scala


/.gitignore:
--------------------------------------------------------------------------------
 1 | *.class
 2 | *.log
 3 | 
 4 | # sbt specific
 5 | .cache
 6 | .history
 7 | .lib/
 8 | dist/*
 9 | target/
10 | lib_managed/
11 | src_managed/
12 | project/boot/
13 | project/plugins/project/
14 | 
15 | # Scala-IDE specific
16 | .scala_dependencies
17 | .worksheet
18 | .idea
19 | 
20 | # Emacs temp files
21 | *~
22 | 
23 | # Notebooks
24 | docs/.ipynb_checkpoints
25 | /bin/
26 | 


--------------------------------------------------------------------------------
/assembly.sbt:
--------------------------------------------------------------------------------
1 | import AssemblyKeys._ // put this at the top of the file
2 | 
3 | assemblySettings
4 | 
5 | test in assembly := {}
6 | 


--------------------------------------------------------------------------------
/build.sbt:
--------------------------------------------------------------------------------
 1 | import org.allenai.plugins.DockerBuildPlugin
 2 | 
 3 | organization := "org.allenai"
 4 | 
 5 | name := "pnp"
 6 | 
 7 | description := "Library for probabilistic neural programming"
 8 | 
 9 | version := "0.1.2"
10 | 
11 | scalaVersion := "2.11.8"
12 | 
13 | libraryDependencies ++= Seq(
14 |   "com.google.guava" % "guava" % "17.0",
15 |   "com.fasterxml.jackson.core" % "jackson-databind" % "2.2.3",
16 |   "com.fasterxml.jackson.core" % "jackson-core" % "2.2.3",
17 |   "com.fasterxml.jackson.core" % "jackson-annotations" % "2.2.3",
18 |   "net.sf.jopt-simple" % "jopt-simple" % "4.9",
19 |   "org.scalatest" %% "scalatest" % "3.0.0" % "test",
20 |   "io.spray" %%  "spray-json" % "1.3.3"
21 | )
22 | 
23 | licenses += ("Apache-2.0", url("https://www.apache.org/licenses/LICENSE-2.0"))
24 | 
25 | bintrayOrganization := Some("allenai")
26 | 
27 | bintrayRepository := "private"
28 | 
29 | fork := true
30 | 
31 | // Docker configuration
32 | enablePlugins(DockerBuildPlugin)
33 | dockerImageBase := "allenai-docker-private-docker.bintray.io/java-dynet"
34 | dockerCopyMappings += ((file("lib"), "lib"))
35 | dockerCopyMappings += ((file("data"), "data"))
36 | dockerCopyMappings += ((file("experiments"), "experiments"))
37 | // mainClass := Some("org.allenai.pnp.semparse.SemanticParserCli")
38 | 


--------------------------------------------------------------------------------
/data/dqa/sample/diagram_features_synthetic.json:
--------------------------------------------------------------------------------
1 | {"points":[{"xy":[335,300],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[1290,232],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[325,330],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[90,65],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[790,163],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[1238,337],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[927,133],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[1307,38],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[450,362],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[425,375],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[1230,385],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[312,300],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[82,63],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[87,65],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]},{"xy":[438,362],"vec":[0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]}],"imageId":"087-airplane_2_line_art.png"}
2 | 


--------------------------------------------------------------------------------
/data/dqa/sample/diagrams.json:
--------------------------------------------------------------------------------
1 | {"imageId":"087-airplane_2_line_art.png","height":425,"width":1500,"points":[{"textId":"C","xy":[87,65],"label":"cockpit"},{"textId":"B","xy":[312,300],"label":"engine"},{"textId":"E","xy":[438,362],"label":"flap"},{"textId":"D","xy":[1238,337],"label":"tail"},{"textId":"A","xy":[927,133],"label":"wing"}],"label":"airplane","id":"087-airplane_2_line_art.png_0"}
2 | 


--------------------------------------------------------------------------------
/data/dqa/sample/questions.json:
--------------------------------------------------------------------------------
1 | {"question" : "what part of this object generates lift ?", "answerOptions" : ["A", "B", "C", "D", "E"], "correctAnswer" : 0, "diagramId" : "087-airplane_2_line_art.png_0"}
2 | {"question" : "what does part C do ?", "answerOptions" : ["contain the pilot", "generate lift", "generate thrust", "control aviation"], "correctAnswer" : 0, "diagramId" : "087-airplane_2_line_art.png_0"}
3 | 


--------------------------------------------------------------------------------
/experiments/dipart/README.md:
--------------------------------------------------------------------------------
 1 | # Structured Set Matching Networks for One-Shot Part Labeling
 2 | 
 3 | This directory contains scripts for running the experiments from
 4 | "Structured Set Matching Networks for One-Shot Part Labeling." (TODO:
 5 | arXiv link) The corresponding Scala code is located in
 6 | `org.allenai.dqa.matching` package.
 7 | 
 8 | ## Data Set
 9 | 
10 | TODO
11 | 
12 | ## Running Experiments
13 | 
14 | Once the data is downloaded and preprocessed, you can train and
15 | evaluate the SSMN model by running the following from the root `pnp`
16 | directory:
17 | 
18 | ```
19 | sbt assembly
20 | ./experiments/dipart/scripts/train_ssmn.sh
21 | ```
22 | 
23 | This script sends its output to the
24 | `experiments/dipart/output/.../ssmn` directory. After the script
25 | completes, this directory will contain several files:
26 | 
27 | * `log.txt` shows the progress of model training and corresponding statistics.
28 | * `model.ser` is the serialized trained model.
29 | * `validation_error_log.txt` is the validation error results. The end
30 |   of this file contains the accuracy numbers reported in the paper.
31 | * `validation_error.json` is a JSON representation of the model's validation set predictions.
32 | 
33 | It will also automatically create an HTML visualization of the model's
34 | predictions in the `validation_error` subdirectory. To view it, simply
35 | open `index.html` in a web browser. The visualization includes
36 | per-category error rates, confusion matrices, and the predicted part
37 | labeling for each example.
38 | 
39 | The `./experiments/dipart/scripts/` directory contains several other
40 | scripts for training the baselines from the paper. These scripts
41 | similarly send their output to `experiments/dipart/output/`. In some
42 | cases the files may be slightly different than those above. For
43 | example, the matching network (`train_mn_lstm.sh`) has two validation
44 | error logs, one that enforces the matching constraint at test time and
45 | one that doesn't.
46 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/config.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | SCRIPT_DIR="experiments/dipart/scripts/"
 4 | DATA_DIR="data/dqa_parts_v1"
 5 | DIAGRAMS="$DATA_DIR/diagrams.json"
 6 | DIAGRAM_FEATURES="$DATA_DIR/diagram_features_xy.json"
 7 | DATA_SPLIT="unseen_category"
 8 | TRAIN_BEAM="5"
 9 | TEST_BEAM="20"
10 | EPOCHS="1"
11 | TRAIN_OPTS=""
12 | TRAIN="$DATA_DIR/data_splits/$DATA_SPLIT/train.json"
13 | TEST="$DATA_DIR/data_splits/$DATA_SPLIT/validation.json"
14 | 
15 | OUT_DIR="experiments/dipart/output/"
16 | EXPERIMENT_NAME="$DATA_SPLIT/dqa_310/pnp_update/"
17 | EXPERIMENT_DIR="$OUT_DIR/$EXPERIMENT_NAME/"
18 | 
19 | # MATCHING_MODEL_DIR="$EXPERIMENT_DIR/matching_model.ser"
20 | # INDEPENDENT_MODEL="$EXPERIMENT_DIR/independent_model.ser"
21 | # BINARY_MATCHING_MODEL="$EXPERIMENT_DIR/binary_matching_model.ser"
22 | 
23 | mkdir -p $EXPERIMENT_DIR
24 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/preprocess/extract_tqa_diagrams.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | TQA_DIR=data/tqa/
 4 | TQA=$TQA_DIR/tqa_dataset_beta_v8.json
 5 | IMAGE_DIR=$TQA_DIR/
 6 | TQA_DIAGRAMS=$TQA_DIR/tqa_diagrams.json
 7 | 
 8 | cat $TQA | jq -c '.[] | .diagramAnnotations | to_entries | .[]' > $TQA_DIAGRAMS
 9 | 
10 | sips -g pixelHeight -g pixelWidth $IMAGE_DIR/**/*.png > $DIAGRAM_SIZE_OUTPUT
11 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/preprocess/generate_diagram_feats.py:
--------------------------------------------------------------------------------
  1 | #!/usr/local/bin/python3
  2 | # Generate feature vectors for each diagram part
  3 | 
  4 | import sys
  5 | import json
  6 | import random
  7 | import pickle
  8 | import gzip
  9 | import numpy as np
 10 | import re
 11 | 
 12 | diagram_label_file = sys.argv[1]
 13 | vgg_dir = sys.argv[2]
 14 | matching_dir = sys.argv[3]
 15 | out_file = sys.argv[4]
 16 | 
 17 | def label_to_matching_vector(diagram_json, label):
 18 |     matching_vec = []
 19 |     # img_id = re.sub("-([^0-9])", "_\g<1>", j["imageId"])
 20 |     img_id = j["imageId"]
 21 |     matching_file = matching_dir + "/" + j["label"] + "/" + img_id + "_" + label + ".pklz"
 22 |     # print(matching_file)
 23 |     with open(matching_file, 'rb') as g:
 24 |         matching = pickle.loads(gzip.decompress(g.read()))
 25 | 
 26 |         if len(matching) == 1:
 27 |             # Choi's format
 28 |             matching_vec = matching[0]
 29 |         else:
 30 |             # Ani's format
 31 |             matching_vec = matching        
 32 | 
 33 |         # print(matching_vec)
 34 | 
 35 |     return matching_vec
 36 | 
 37 |     '''
 38 |     # One-hot at a label-specific index.
 39 |     DIMS = 32
 40 |     vec = [0.0] * DIMS
 41 |     h = label.__hash__() % DIMS
 42 |     vec[h] = 1.0
 43 |     return np.array(vec)
 44 |     '''
 45 | 
 46 | def label_to_vgg_vector(diagram_json, label, scale):
 47 |     vgg_vec = []
 48 |     vgg_file = vgg_dir + "/" + j["label"] + "/" + j["imageId"] + "_" + label + "_" + str(scale) + ".png.pkl"
 49 |     with open(vgg_file, 'rb') as g:
 50 |         vgg = pickle.loads(gzip.decompress(g.read()))
 51 |         vgg_vec = vgg[0]
 52 | 
 53 |     return vgg_vec
 54 | 
 55 | def label_to_feature_vector(label, xy, width, height):
 56 |     DIMS = 2
 57 |     vec = [0.0] * DIMS
 58 | 
 59 |     # X/Y coordinates normalized by image size
 60 |     vec[0] = float(xy[0]) / width
 61 |     vec[1] = float(xy[1]) / height
 62 |     return np.array(vec)
 63 | 
 64 |     # Random with a high-scoring element in a label-specific index.
 65 |     '''
 66 |     h = label.__hash__() % (DIMS / 2)
 67 |     vec[h] = 3.0
 68 |     for i in xrange(len(vec)):
 69 |         vec[i] += random.gauss(0.0, 1.0)
 70 |     return vec
 71 |     '''
 72 | 
 73 |     # One-hot at a label-specific index.
 74 |     '''
 75 |     h = label.__hash__() % DIMS
 76 |     vec[h] = 1.0
 77 |     return vec
 78 |     '''
 79 | 
 80 |     # Random around a mean per label    
 81 |     '''
 82 |     for i in xrange(len(vec)):
 83 |         mean_random = random.Random()
 84 |         mean_random.seed(label.__hash__() * i)
 85 |         mean = mean_random.uniform(-1, 1)
 86 |         
 87 |         vec[i] = random.gauss(mean, 1.0)
 88 |     return vec
 89 |     '''
 90 | 
 91 |     # Completely random
 92 |     '''
 93 |     for i in xrange(len(vec)):
 94 |         vec[i] = random.gauss(0.0, 1.0)
 95 |     return vec
 96 |     '''
 97 | 
 98 | image_points = {}
 99 | with open(diagram_label_file, 'r') as f:
100 |     for line in f:
101 |         j = json.loads(line)
102 | 
103 |         image_id = j["imageId"]
104 |         width = j["width"]
105 |         height = j["height"]
106 | 
107 |         if not image_id in image_points:
108 |             image_points[image_id] = {}
109 | 
110 |         # print image_id
111 |         for p in j["points"]:
112 |             xy = tuple(p["xy"])
113 |             label = p["label"]
114 |             xy_vec = label_to_feature_vector(label, xy, width, height)
115 |             matching_vec = label_to_matching_vector(j, label)
116 | 
117 |             # Zeroed out to keep file size down.
118 |             # vgg_vec_0 = label_to_vgg_vector(j, label, 0)
119 |             # vgg_vec_1 = label_to_vgg_vector(j, label, 1)
120 |             # vgg_vec_2 = label_to_vgg_vector(j, label, 2)
121 |             vgg_vec_0 = np.array([0])
122 |             vgg_vec_1 = np.array([0])
123 |             vgg_vec_2 = np.array([0])
124 | 
125 |             # print "  ", xy, label
126 |             # print "  ", vec
127 |             
128 |             image_points[image_id][xy] = {"xy_vec" : xy_vec, "matching_vec" : matching_vec, "vgg_0_vec" : vgg_vec_0,
129 |                                           "vgg_1_vec" : vgg_vec_1, "vgg_2_vec" : vgg_vec_2}
130 | 
131 | # Convert dict format to something jsonable
132 | with open(out_file, 'w') as f:
133 |     for image_id in image_points.keys():
134 |         point_vectors = []
135 |         for point in image_points[image_id]:
136 |             point_dict = {}
137 |             point_dict["xy"] = list(point)
138 | 
139 |             feature_names = image_points[image_id][point]
140 |             for k in feature_names.keys():
141 |                 point_dict[k] = feature_names[k].tolist()
142 | 
143 |             point_vectors.append(point_dict)
144 | 
145 |         image_json = {"imageId" : image_id, "points" : point_vectors}
146 |         print(json.dumps(image_json), file=f)
147 | 
148 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/preprocess/ngrams.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import gzip
 3 | import sys
 4 | import re
 5 | from math import sqrt,log
 6 | 
 7 | class GoogleNgrams:
 8 | 
 9 |     '''
10 |     gzip_dir is the name of a directory containing the
11 |     gzipped parts of the ngrams data.
12 |     '''
13 |     def __init__(self, gzip_dir):
14 |         self.gzip_dir = gzip_dir
15 | 
16 |         # Index the start word of each file
17 |         self.files = glob.glob(self.gzip_dir + '/*.gz')
18 |         self.words = []
19 |         for f in self.files:
20 |             start_word = self.get_start_word(f)
21 |             self.words.append(start_word)
22 | 
23 |     def get_start_word(self, filename):
24 |         with gzip.open(filename, 'r') as f:
25 |             return f.readline().split('\t')[0]
26 | 
27 |     def find_files_with_word(self, query_word):
28 |         files_to_search = []
29 |         for i in xrange(len(self.words)):
30 |             if query_word >= self.words[i] and (i + 1 == len(self.words) or query_word <= self.words[i + 1]):
31 |                 files_to_search.append(self.files[i])
32 | 
33 |         return files_to_search
34 | 
35 |     def run_query(self, query_word):
36 |         filenames = self.find_files_with_word(query_word)
37 |         results = {}
38 |         for filename in filenames:
39 |             q = query_word + '\t'
40 |             with gzip.open(filename, 'r') as f:
41 |                 for line in f:
42 |                     if line.startswith(q):
43 |                         parts = line.split('\t')
44 |                         results[parts[1]] = int(parts[2])
45 | 
46 |         return results
47 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/preprocess/ngrams_to_features.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Generate features for each part based on its name.
 3 | 
 4 | import sys
 5 | import ujson as json
 6 | import re
 7 | 
 8 | ngrams_file = sys.argv[1]
 9 | # out_file = sys.argv[2]
10 | 
11 | def counts_to_features(counts):
12 |     prep_pattern = re.compile("([^ ]*)/[^ ]*/prep/.*")
13 |     prep_counts = {}
14 |     for (k, v) in counts.iteritems():
15 |         m = prep_pattern.search(k)
16 |         if m is not None:
17 |             prep = m.group(1)
18 |             if prep not in prep_counts:
19 |                 prep_counts[prep] = 0
20 |             prep_counts[prep] += v
21 | 
22 |     return prep_counts
23 | 
24 | 
25 | ngram_features = {}
26 | with open(ngrams_file, 'r') as f:
27 |     for line in f:
28 |         j = json.loads(line)
29 |         features = counts_to_features(j["counts"])
30 | 
31 |         if j["part1"] != j["part2"]:
32 |             print j["part1"], j["part2"]
33 | 
34 |             for (k, v) in features.iteritems():
35 |                 if "/CC/" in k:
36 |                     continue
37 |                 
38 |                 print "  ", k, v
39 | 
40 |         ngram_features[(j["part1"], j["part2"])] = features
41 | 
42 | all_features = set([])
43 | for (k, counts) in ngram_features.iteritems():
44 |     all_features.update(counts.keys())
45 | 
46 | feature_indexes = dict([(f, i) for (i, f) in enumerate(all_features)])
47 | 
48 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/preprocess/preprocess_diagram_annotations.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import ujson as json
 5 | import random
 6 | import re
 7 | 
 8 | diagram_file = sys.argv[1]
 9 | diagram_size_file = sys.argv[2]
10 | out_file = sys.argv[3]
11 | text_labels = ["A", "B", "C", "D", "E", "F"]
12 | 
13 | diagram_sizes = {}
14 | with open(diagram_size_file, 'r') as f:
15 |     lines = f.readlines()
16 | 
17 |     for ind in xrange(len(lines) / 3):
18 |         i = ind * 3
19 |         idline = lines[i]
20 |         id = idline[(idline.rfind('/') + 1):].strip()
21 |         height = re.search("[0-9]+", lines[i + 1].strip()).group(0)
22 |         width = re.search("[0-9]+", lines[i + 2].strip()).group(0)
23 | 
24 |         # print id, width, height
25 |         diagram_sizes[id] = (int(width), int(height))
26 |     
27 | 
28 | output = []
29 | with open(diagram_file, 'r') as f:
30 |     j = json.load(f)
31 | 
32 |     for t in j.iterkeys():
33 |         diagrams = j[t]
34 |         for diagram_id in diagrams.iterkeys():
35 |             if not diagram_id in diagram_sizes:
36 |                 print "WARNING: could not find size for", diagram_id, "type:", t
37 |                 continue
38 |             
39 |             part_labels = diagrams[diagram_id]
40 |             label_point_map = {}
41 | 
42 |             for label in part_labels:
43 |                 label_point_map[label] = part_labels[label]
44 | 
45 |             point_annotated_id = t + "/" + diagram_id
46 |                 
47 |             labels = sorted(label_point_map.keys())
48 | 
49 |             # shuffle the text labels for each index
50 |             random.seed(t.__hash__())
51 |             shuffled_text_labels = [x for x in text_labels[:len(labels)]]
52 |             random.shuffle(shuffled_text_labels)
53 | 
54 |             points = [{"label": k, "xy" : label_point_map[k], "textId" : shuffled_text_labels[i]} for (i,k) in enumerate(labels)]
55 |                 
56 |             (width, height) = diagram_sizes[diagram_id]
57 |             output.append( {"id" : point_annotated_id, "imageId" : diagram_id, "label" : t, "points" : points, "width" : width, "height" : height} )
58 | 
59 | with open(out_file, 'w') as f:
60 |     for d in output:
61 |         print >> f, json.dumps(d)
62 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/preprocess/preprocess_diagram_annotations.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | DATA_DIR=data/dqa_parts_v1/
 4 | SCRIPT_DIR=experiments/dipart/scripts/preprocess/
 5 | RAW_ANNOTATIONS=$DATA_DIR/annotation.json
 6 | IMAGE_DIR=$DATA_DIR/
 7 | # SYNTACTIC_NGRAMS=~/Desktop/syntactic_ngrams/
 8 | 
 9 | DIAGRAM_SIZE_OUTPUT=$DATA_DIR/diagram_sizes.txt
10 | OUTPUT=$DATA_DIR/diagrams.json
11 | # NGRAM_OUTPUT=$DATA_DIR/syntactic_ngrams.json
12 | VGG_DIR=$DATA_DIR/vgg_features/dqa_matching_final_complete_working_crop_feat_fc2/
13 | MATCHING_DIR=$DATA_DIR/matchingnet_features/dqa_310/
14 | FEATURE_OUTPUT=$DATA_DIR/diagram_features_xy.json
15 | 
16 | UNSEEN_SAMPLE=$DATA_DIR/unseen_sample_trvats.json
17 | UNSEEN_S_DIR=$DATA_DIR/data_splits/unseen_sample
18 | UNSEEN_SAMPLE_TRAIN=$UNSEEN_S_DIR/train.json
19 | UNSEEN_SAMPLE_VAL=$UNSEEN_S_DIR/validation.json
20 | UNSEEN_SAMPLE_TEST=$UNSEEN_S_DIR/test.json
21 | 
22 | UNSEEN_CATEGORY=$DATA_DIR/unseen_category_trvats.json
23 | UNSEEN_C_DIR=$DATA_DIR/data_splits/unseen_category
24 | UNSEEN_CATEGORY_TRAIN=$UNSEEN_C_DIR/train.json
25 | UNSEEN_CATEGORY_VAL=$UNSEEN_C_DIR/validation.json
26 | UNSEEN_CATEGORY_TEST=$UNSEEN_C_DIR/test.json
27 | 
28 | sips -g pixelHeight -g pixelWidth $IMAGE_DIR/**/*.png > $DIAGRAM_SIZE_OUTPUT
29 | ./$SCRIPT_DIR/preprocess_diagram_annotations.py $RAW_ANNOTATIONS $DIAGRAM_SIZE_OUTPUT $OUTPUT
30 | ./$SCRIPT_DIR/generate_diagram_feats.py $OUTPUT $VGG_DIR $MATCHING_DIR $FEATURE_OUTPUT
31 | 
32 | # Generate data splits. Note that the sampling is seeded so as to be repeatable
33 | # (as long as the number of samples doesn't change.)
34 | ./$SCRIPT_DIR/sample_pairs.py $UNSEEN_SAMPLE $UNSEEN_SAMPLE_TRAIN train -1 -1
35 | ./$SCRIPT_DIR/sample_pairs.py $UNSEEN_SAMPLE $UNSEEN_SAMPLE_VAL val -1 -1
36 | ./$SCRIPT_DIR/sample_pairs.py $UNSEEN_SAMPLE $UNSEEN_SAMPLE_TEST test -1 -1
37 | ./$SCRIPT_DIR/sample_pairs.py $UNSEEN_CATEGORY $UNSEEN_CATEGORY_TRAIN train -1 -1
38 | ./$SCRIPT_DIR/sample_pairs.py $UNSEEN_CATEGORY $UNSEEN_CATEGORY_VAL val -1 -1
39 | ./$SCRIPT_DIR/sample_pairs.py $UNSEEN_CATEGORY $UNSEEN_CATEGORY_TEST test -1 -1
40 | 
41 | # Unseen sample splits for different numbers of training diagrams
42 | SPLITS=( 2 5 10 20 )
43 | for i in ${SPLITS[@]}; do
44 |     DIR=$DATA_DIR/data_splits/unseen_sample_$i
45 |     mkdir -p $DIR
46 |     TRAIN=$DATA_DIR/data_splits/unseen_sample_$i/train.json
47 |     VAL=$DATA_DIR/data_splits/unseen_sample_$i/validation.json
48 |     TEST=$DATA_DIR/data_splits/unseen_sample_$i/test.json
49 | 
50 |     python $SCRIPT_DIR/sample_pairs.py $UNSEEN_SAMPLE $TRAIN train 1 $i
51 |     python $SCRIPT_DIR/sample_pairs.py $UNSEEN_SAMPLE $VAL val -1 -1
52 |     python $SCRIPT_DIR/sample_pairs.py $UNSEEN_SAMPLE $TEST test -1 -1
53 | done
54 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/preprocess/preprocess_ngrams.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Generate features for each part based on its name.
 3 | 
 4 | import sys
 5 | import ujson as json
 6 | import re
 7 | from ngrams import GoogleNgrams
 8 | 
 9 | diagram_file = sys.argv[1]
10 | ngrams_dir = sys.argv[2]
11 | out_file = sys.argv[3]
12 | 
13 | ngrams = GoogleNgrams(ngrams_dir)
14 | 
15 | def filter_dependencies(raw_counts, pattern):
16 |     filtered_counts = {}
17 |     p = re.compile(pattern)
18 |     for (k, v) in raw_counts.iteritems():
19 |         parts = k.split()
20 |         for part in parts:
21 |             result = p.match(part)
22 |             if result is not None:
23 |                 filtered_counts[k] = v
24 |                 break
25 |         
26 |     return filtered_counts
27 | 
28 | type_parts = {}
29 | with open(diagram_file, 'r') as f:
30 |     for line in f:
31 |         j = json.loads(line)
32 |         diagram_label = j["label"]
33 | 
34 |         if diagram_label not in type_parts:
35 |             type_parts[diagram_label] = set([])
36 | 
37 |         part_labels = [point["label"] for point in j["points"]]
38 |         type_parts[diagram_label].update(part_labels)
39 | 
40 | '''
41 | for diagram_label in type_parts.iterkeys():
42 |     print diagram_label
43 |     for part_label in type_parts[diagram_label]:
44 |         print "  ", part_label
45 | '''
46 | 
47 | # type_parts = {'tractor' : type_parts['tractor']}
48 | 
49 | all_parts = set([])
50 | for diagram_label in type_parts.iterkeys():
51 |     all_parts.update(type_parts[diagram_label])
52 | 
53 | print len(all_parts), "unique parts"
54 |     
55 | part_vectors = {}
56 | for part in all_parts:
57 |     query = part.split("_")[-1].strip().encode('ascii')
58 |     print part, "->", query
59 |     vector = ngrams.run_query(query)
60 |     part_vectors[part] = vector
61 | 
62 | with open(out_file, 'w') as f:
63 |     for diagram_label in type_parts.iterkeys():
64 |         parts = type_parts[diagram_label]
65 |         for p1 in parts:
66 |             p1_vec = part_vectors[p1]
67 |             for p2 in parts:
68 |                 p1_p2_counts = filter_dependencies(p1_vec, p2 + "/")
69 |                 print >> f, json.dumps( {"part1" : p1, "part2" : p2, "counts" : p1_p2_counts} )
70 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/preprocess/sample_pairs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import json
 5 | import random
 6 | import re
 7 | 
 8 | split_file = sys.argv[1]
 9 | out_file = sys.argv[2]
10 | key = sys.argv[3]
11 | samples = int(sys.argv[4])
12 | diagram_samples = int(sys.argv[5])
13 | 
14 | def sample_pairs(diagram_list, num_per_target, num_diagrams_per_type):
15 |     typed_diagrams = [(d, d.split('/')[0]) for d in diagram_list]
16 |     
17 |     diagrams_by_type = {}
18 |     for (d, t) in typed_diagrams:
19 |         if not t in diagrams_by_type:
20 |             diagrams_by_type[t] = set([])
21 |         diagrams_by_type[t].add(d)
22 | 
23 |     if num_diagrams_per_type >= 0:
24 |         num_types_below_threshold = 0
25 |         for t in diagrams_by_type.iterkeys():
26 |             ds = sorted(list(diagrams_by_type[t]))
27 |             random.seed(t.__hash__())
28 |             random.shuffle(ds)
29 | 
30 |             if len(ds) < num_diagrams_per_type:
31 |                 num_types_below_threshold += 1
32 | 
33 |             diagrams_by_type[t] = set(ds[:num_diagrams_per_type])
34 | 
35 |         print num_types_below_threshold, "/", len(diagrams_by_type), "types below threshold of", num_diagrams_per_type
36 | 
37 |         typed_diagrams = []
38 |         for t in diagrams_by_type.iterkeys():
39 |             for d in diagrams_by_type[t]:
40 |                 typed_diagrams.append((d, t))
41 | 
42 |     pairs = []
43 |     for (d, t) in typed_diagrams:
44 |         other_diagrams = list(diagrams_by_type[t] - set([d]))
45 |         other_diagrams.sort()
46 | 
47 |         if num_per_target >= 0:
48 |             random.seed(d.__hash__())
49 |             random.shuffle(other_diagrams)
50 | 
51 |             num = min(len(other_diagrams), num_per_target)
52 |             for i in xrange(num):
53 |                 pairs.append({'src' : other_diagrams[i], 'target' : d})
54 |         else:
55 |             for other_diagram in other_diagrams:
56 |                 pairs.append({'src' : other_diagram, 'target' : d})
57 | 
58 |     return pairs
59 | 
60 | j = None
61 | with open(split_file, 'r') as f:
62 |     j = json.load(f)
63 | 
64 | pairs = sample_pairs(j[key], samples, diagram_samples)
65 | 
66 | with open(out_file, 'wb') as f:
67 |     for pair in pairs:
68 |         print >> f, json.dumps(pair)
69 | 
70 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/preprocess/tqa_diagrams_to_features.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Generate features for each part based on its name
 3 | # using positional information from the TQA dataset.
 4 | 
 5 | import sys
 6 | import ujson as json
 7 | import re
 8 | from collections import defaultdict
 9 | 
10 | tqa_diagrams_file = sys.argv[1]
11 | diagrams_file = sys.argv[2]
12 | 
13 | # Get the parts of each kind of diagram.
14 | type_parts = defaultdict(set)
15 | with open(diagrams_file, 'r') as f:
16 |     for line in f:
17 |         j = json.loads(line)
18 |         diagram_label = j["label"]
19 |         part_labels = [point["label"] for point in j["points"]]
20 |         type_parts[diagram_label].update(part_labels)
21 | 
22 | all_parts = set([])
23 | part_part_map = defaultdict(set)
24 | part_counts = defaultdict(lambda: 0)
25 | for diagram_label in type_parts.iterkeys():
26 |     all_parts.update(type_parts[diagram_label])
27 | 
28 |     for part in type_parts[diagram_label]:
29 |         part_part_map[part].update(type_parts[diagram_label])
30 |         part_counts[part] += 1
31 | 
32 | sorted_counts = sorted(part_counts.items(), key=lambda x: x[1], reverse=True)
33 | 
34 | for (k,v) in sorted_counts:
35 |     print k, v
36 | 
37 | # Read token positions from TQA
38 | token_x = defaultdict(lambda: 0)
39 | token_y = defaultdict(lambda: 0)
40 | token_count = defaultdict(lambda: 0)
41 | with open(tqa_diagrams_file, 'r') as f:
42 |     for line in f:
43 |         j = json.loads(line)
44 | 
45 |         for ocr in j["value"]:
46 |             rect = ocr["rectangle"]
47 |             text = ocr["text"]
48 | 
49 |             x = None
50 |             y = None
51 |             if not isinstance(rect[0], list):
52 |                 x = rect[0]
53 |                 y = rect[1]
54 |             else:
55 |                 x = (rect[0][0] + rect[1][0]) / 2
56 |                 y = (rect[0][1] + rect[1][1]) / 2
57 |             
58 |             tokens = text.split()
59 |             for token in tokens:
60 |                 # print x, y, token
61 | 
62 |                 token_x[token] += x
63 |                 token_y[token] += y
64 |                 token_count[token] += 1
65 | 
66 | num_not_found = 0
67 | for part in all_parts:
68 |     tokens = part.split("_")
69 |     c = 0
70 |     x = 0
71 |     y = 0
72 | 
73 |     for token in tokens:
74 |         c += token_count[token]
75 |         x += token_x[token]
76 |         y += token_y[token]
77 | 
78 |     if c == 0:
79 |         print part, "n/a"
80 |         num_not_found += 1
81 |     else:
82 |         nx = float(x) / c
83 |         ny = float(y) / c
84 |         print part, nx, ny, c
85 | 
86 | 
87 | print "not found: ", num_not_found, " / ", len(all_parts)
88 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/preprocess/tqa_to_features.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Generate features for each part based on its name
 3 | # using positional information from the TQA dataset.
 4 | 
 5 | import sys
 6 | import ujson as json
 7 | import re
 8 | from collections import defaultdict
 9 | 
10 | diagrams_file = sys.argv[1]
11 | sample_file = sys.argv[2]
12 | 
13 | sample_json = None
14 | with open(sample_file, 'r') as f:
15 |     sample_json = json.load(f)
16 | 
17 | diagram_to_fold = {}
18 | for (fold, diagrams) in sample_json.iteritems():
19 |     for d in diagrams:
20 |         diagram_to_fold[d] = fold
21 | 
22 | # Get the parts of each kind of diagram.
23 | fold_parts = defaultdict(list)
24 | with open(diagrams_file, 'r') as f:
25 |     for line in f:
26 |         j = json.loads(line)
27 |         fold = diagram_to_fold[j["id"]]
28 |         part_labels = [point["label"] for point in j["points"]]
29 |         fold_parts[fold].extend(part_labels)
30 | 
31 | for (fold1, parts1) in fold_parts.iteritems():
32 |     p1s = set(parts1)    
33 |     for (fold2, parts2) in fold_parts.iteritems():
34 |         p2s = set(parts2)
35 | 
36 |         inter = p1s & p2s
37 |         fold1pct = float(len(inter)) / len(p1s)
38 |         
39 |         print fold1, "/", fold2, fold1pct
40 |         for part in inter:
41 |             print "  ", part
42 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | 
3 | CLASSPATH="target/scala-2.11/pnp-assembly-0.1.2.jar"
4 | echo $CLASSPATH 
5 | java -Djava.library.path=lib -classpath $CLASSPATH $@
6 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/train_affine.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/dipart/scripts/config.sh"
 4 | 
 5 | MY_NAME=affine_transform
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--affineTransform"
 9 | 
10 | mkdir -p $MY_DIR
11 | 
12 | echo "Training $MY_NAME model..."
13 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
14 | 
15 | echo "Testing $MY_NAME model..."
16 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
17 | 
18 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/test_error.json  > $MY_DIR/test_error_log.txt
19 | 
20 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/test_error.json  > $MY_DIR/test_error_log.txt
21 | 
22 | # mkdir -p $MY_DIR/test_error/
23 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/test_error.json $MY_DIR/test_error/
24 | # tar cf $MY_DIR/test_error.tar $MY_DIR/test_error/
25 | # gzip -f $MY_DIR/test_error.tar
26 | 
27 | # /$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
28 | 
29 | echo "Finished training $MY_NAME"
30 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/train_mn_lstm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/dipart/scripts/config.sh"
 4 | 
 5 | MY_NAME=matching_lstm2
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--lstmEncode --matchIndependent --loglikelihood"
 9 | MY_EPOCHS=5
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | echo "Training $MY_NAME model..."
14 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | echo "Testing $MY_NAME model..."
17 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_independent.json  > $MY_DIR/validation_error_independent_log.txt
18 | 
19 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --enforceMatching --globalNormalize --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_matching.json  > $MY_DIR/validation_error_matching_log.txt
20 | 
21 | mkdir -p $MY_DIR/validation_error_matching/
22 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error_matching.json $MY_DIR/validation_error_matching/
23 | tar cf $MY_DIR/validation_error_matching.tar $MY_DIR/validation_error_matching/
24 | gzip -f $MY_DIR/validation_error_matching.tar
25 | 
26 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error_independent.json  > $MY_DIR/train_error_independent_log.txt
27 | 
28 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --enforceMatching --globalNormalize --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error_matching.json  > $MY_DIR/train_error_matching_log.txt
29 | 
30 | mkdir -p $MY_DIR/train_error_matching/
31 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error_matching.json $MY_DIR/train_error_matching/
32 | tar cf $MY_DIR/train_error_matching.tar $MY_DIR/train_error_matching/
33 | gzip -f $MY_DIR/train_error_matching.tar
34 | 
35 | echo "Finished training $MY_NAME"
36 | 
37 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/train_nearest_neighbor.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/dipart/scripts/config.sh"
 4 | 
 5 | MY_NAME=nearest_neighbor
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--nearestNeighbor --matchIndependent"
 9 | 
10 | mkdir -p $MY_DIR
11 | 
12 | echo "Training $MY_NAME model..."
13 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize 1 --epochs 0 --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $MY_FLAGS > $MY_DIR/log.txt
14 | 
15 | echo "Testing $MY_NAME model..."
16 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_independent.json  > $MY_DIR/validation_error_independent_log.txt
17 | 
18 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --enforceMatching --globalNormalize --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_matching.json  > $MY_DIR/validation_error_matching_log.txt
19 | 
20 | # mkdir -p $MY_DIR/validation_error/
21 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
22 | # tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
23 | # gzip -f $MY_DIR/validation_error.tar
24 | 
25 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
26 | 
27 | # mkdir -p $MY_DIR/train_error/
28 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
29 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
30 | # gzip -f $MY_DIR/train_error.tar
31 | 
32 | echo "Finished training $MY_NAME"
33 | 
34 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/train_pointer_net.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/dqa/scripts/config.sh"
 4 | 
 5 | MY_NAME=pointer_net
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--pointerNet --loglikelihood"
 9 | 
10 | mkdir -p $MY_DIR
11 | 
12 | echo "Training $MY_NAME model..."
13 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
14 | 
15 | echo "Testing $MY_NAME model..."
16 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
17 | 
18 | # mkdir -p $MY_DIR/validation_error/
19 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
20 | # tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
21 | # gzip -f $MY_DIR/validation_error.tar
22 | 
23 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
24 | 
25 | # mkdir -p $MY_DIR/train_error/
26 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
27 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
28 | # gzip -f $MY_DIR/train_error.tar
29 | 
30 | echo "Finished training $MY_NAME"
31 | 
32 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/train_ssmn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/dipart/scripts/config.sh"
 4 | 
 5 | MY_NAME=ssmn
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--structuralFactor --partClassifier --relativeAppearance --lstmEncode"
 9 | 
10 | mkdir -p $MY_DIR
11 | 
12 | echo "Training $MY_NAME model..."
13 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
14 | 
15 | echo "Testing $MY_NAME model..."
16 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
17 | 
18 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/test_error.json  > $MY_DIR/test_error_log.txt
19 | 
20 | mkdir -p $MY_DIR/validation_error/
21 | python $SCRIPT_DIR/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
22 | tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
23 | gzip -f $MY_DIR/validation_error.tar
24 | 
25 | # /$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
26 | 
27 | echo "Finished training $MY_NAME"
28 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/train_ssmn_loglikelihood.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/dipart/scripts/config.sh"
 4 | 
 5 | MY_NAME=ssmn_lstm_loglikelihood
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--structuralFactor --partClassifier --relativeAppearance --lstmEncode --loglikelihood"
 9 | 
10 | mkdir -p $MY_DIR
11 | 
12 | echo "Training $MY_NAME model..."
13 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
14 | 
15 | echo "Testing $MY_NAME model..."
16 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
17 | 
18 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/test_error.json  > $MY_DIR/test_error_log.txt
19 | 
20 | # mkdir -p $MY_DIR/validation_error/
21 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
22 | # tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
23 | # gzip -f $MY_DIR/validation_error.tar
24 | 
25 | # /$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
26 | 
27 | echo "Finished training $MY_NAME"
28 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/train_ssmn_unary.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/dipart/scripts/config.sh"
 4 | 
 5 | MY_NAME=ssmn_unary
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--matchingNetwork --partClassifier"
 9 | 
10 | mkdir -p $MY_DIR
11 | 
12 | echo "Training $MY_NAME model..."
13 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
14 | 
15 | echo "Testing $MY_NAME model..."
16 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
17 | 
18 | mkdir -p $MY_DIR/validation_error/
19 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
20 | tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
21 | gzip -f $MY_DIR/validation_error.tar
22 | 
23 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
24 | 
25 | # mkdir -p $MY_DIR/train_error/
26 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
27 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
28 | # gzip -f $MY_DIR/train_error.tar
29 | 
30 | echo "Finished training $MY_NAME"
31 | 
32 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/train_structural_consistency.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/dipart/scripts/config.sh"
 4 | 
 5 | MY_NAME=structural_consistency
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--structuralFactor"
 9 | MY_EPOCHS=1
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | echo "Training $MY_NAME model..."
14 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | echo "Testing $MY_NAME model..."
17 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
18 | 
19 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/test_error.json  > $MY_DIR/test_error_log.txt
20 | 
21 | mkdir -p $MY_DIR/validation_error/
22 | python $SCRIPT_DIR/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
23 | tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
24 | gzip -f $MY_DIR/validation_error.tar
25 | 
26 | # /$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
27 | 
28 | echo "Finished training $MY_NAME"
29 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/visualize/generate_heatmap.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Generate heatmap of points
 3 | 
 4 | import numpy as np
 5 | import seaborn as sns
 6 | sns.set()
 7 | import matplotlib.pyplot as plt
 8 | 
 9 | from heatmap_data import *
10 | 
11 | # image_name=
12 | # im = plt.imread(image_name);
13 | # implot = plt.imshow(im);
14 | 
15 | # Load the example flights dataset and conver to long-form
16 | # flights_long = sns.load_dataset("flights")
17 | # flights = flights_long.pivot("month", "year", "passengers")
18 | 
19 | 
20 | def sample_kde_data(data):
21 |     u = np.exp(data)
22 |     z = np.sum(u)
23 |     p = (u / z) * 1000
24 | 
25 |     xs = []
26 |     ys = []
27 |     for yind in xrange(len(p)):
28 |         for xind in xrange(len(p[yind])):
29 |             c = int(p[yind][xind])
30 |             xs += [xind] * c
31 |             ys += [NUM_POINTS - yind] * c
32 | 
33 |     return (np.array(xs), np.array(ys))
34 | 
35 | 
36 | NUM_POINTS=25
37 | def plot_kde(data, cmap):
38 |     (xs, ys) = sample_kde_data(data)
39 |     print len(xs)
40 |     sns.kdeplot(xs, ys, cmap=cmap, shade=True, shade_lowest=False, clip=[[0,NUM_POINTS], [0, NUM_POINTS]], alpha=0.5)
41 | 
42 | 
43 | # img = plt.imread("data/dqa_parts_v1/fighter-jet/fighter-jet_0000.png")
44 | img = plt.imread("data/dqa_parts_v1/antelope/antelope_0000.png")
45 | 
46 | fig, ax = plt.subplots()
47 | ax.imshow(img, extent=[0, NUM_POINTS, 0, NUM_POINTS])
48 | 
49 | plot_kde(neck_data3, "Blues")
50 | # plot_kde(leg_data2, "Reds")
51 | # plot_kde(tail_data2, "Greens")
52 | 
53 | plt.axis('off')
54 | plt.show()
55 | 
56 | # Draw a heatmap with the numeric values in each cell
57 | # sns.heatmap(data, cbar=False, cmap="coolwarm")
58 | # plt.show()
59 | 
60 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/visualize/visualize_global.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/dipart/scripts/config.sh"
 4 | 
 5 | EXPERIMENT_NAME="$DATA_SPLIT/dqa_310/final1_laso/"
 6 | EXPERIMENT_DIR="$OUT_DIR/$EXPERIMENT_NAME/"
 7 | BINARY_MATCHING_MODEL="$EXPERIMENT_DIR/binary_matching_model.ser"
 8 | 
 9 | SOURCE="antelope/antelope_0003.png"
10 | TARGET="antelope/antelope_0000.png"
11 | LABELS_TO_MATCH="horn,belly,tail,leg"
12 | SOURCE_QUERY="neck"
13 | 
14 | ./experiments/dqa/scripts/run.sh org.allenai.dqa.matching.VisualizeMatchingCli --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $BINARY_MATCHING_MODEL --source $SOURCE --target $TARGET --labelsToMatch $LABELS_TO_MATCH --sourcePart $SOURCE_QUERY --numGrid 25
15 | 
16 | 


--------------------------------------------------------------------------------
/experiments/dipart/scripts/visualize/visualize_loss.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python
  2 | # Generate an HTML file for visualizing the predictions of
  3 | # a matching model.
  4 | 
  5 | import sys
  6 | import ujson as json
  7 | import random
  8 | 
  9 | loss_json_file = sys.argv[1]
 10 | output_dir = sys.argv[2]
 11 | 
 12 | image_dir ="file:///Users/jayantk/github/pnp/data/dqa_parts_v1/"
 13 | 
 14 | html_header = '''
 15 | <html>
 16 | <body>
 17 | <style>
 18 | img {
 19 |   border: 1px solid black
 20 | }
 21 | .partid {
 22 |   font-size: 20;
 23 |   position: absolute;
 24 |   -webkit-transform: translateX(-50%) translateY(-50%);
 25 |   transform: translateX(-50%) translateY(-50%);
 26 |   border: solid 1px black;
 27 |   padding: 0px 2px; 
 28 | }
 29 | </style>
 30 | '''
 31 | html_footer = '''
 32 | </body></html>
 33 | '''
 34 | 
 35 | NUM_LABELS = 5
 36 | IMG_WIDTH=448
 37 | 
 38 | def image_id_to_path(imgid):
 39 |     t = imgid.split("_")[0]
 40 |     return image_dir + "/" + t + "/" + imgid
 41 | 
 42 | def print_loss_html(j, outfile):
 43 |     html_format = '''
 44 | <img width="%(w)s" height="%(w)s" src="%(srcpath)s"></img> <img width="%(w)s" height="%(w)s" src="%(targetpath)s"></img>
 45 | '''
 46 |     args = {'srcpath' : image_id_to_path(j["sourceImgId"]),
 47 |             'targetpath' : image_id_to_path(j["targetImgId"]),
 48 |             'w' : IMG_WIDTH}
 49 | 
 50 |     target_to_source = {}
 51 |     for arr in j["matching"]:
 52 |         target_to_source[arr[0]] = arr[1]
 53 | 
 54 |         
 55 |     source_labels = j["sourceLabel"]["partLabels"]
 56 |     print >> outfile, '<div style="position: relative">'
 57 |     for part in j["sourceParts"]:
 58 |         label = source_labels[part["ind"]]
 59 |         x = IMG_WIDTH * part["coords"]["x"] / j["sourceDims"]["x"]
 60 |         y = IMG_WIDTH * part["coords"]["y"] / j["sourceDims"]["y"]
 61 |         print >> outfile, '<p class="partid" style="background-color: lightgray; left: %s; top: %s">%s</p>' % (x, y, label)
 62 |         # Print source labels on target image
 63 |         # print >> outfile, '<p class="partid" style="background-color: lightgreen; left: %s; top: %s">%s</p>' % (x + IMG_WIDTH, y, label)
 64 | 
 65 |     target_labels = j["sourceLabel"]["partLabels"]
 66 |     for part in j["targetParts"]:
 67 |         x = IMG_WIDTH * part["coords"]["x"] / j["targetDims"]["x"]
 68 |         y = IMG_WIDTH * part["coords"]["y"] / j["targetDims"]["y"]
 69 |         target_ind = part["ind"]
 70 |         target_label = target_labels[target_ind]
 71 |         source_ind = target_to_source[target_ind]
 72 |         source_label = source_labels[source_ind]
 73 | 
 74 |         color = None
 75 |         text = None
 76 |         if source_label == target_label:
 77 |             color = "lightgreen"
 78 |             text = source_label
 79 |         else: 
 80 |             color = "red"
 81 |             text = source_label
 82 | 
 83 |         print >> outfile, '<p class="partid" style="background-color: %s; left: %s; top: %s">%s</p>' % (color, x + IMG_WIDTH, y, text)
 84 | 
 85 |     print >> outfile, html_format % args
 86 | 
 87 |     print >> outfile, "</div>"
 88 | 
 89 | def compute_confusion_matrix(jsons):
 90 |     label_list = jsons[0]["sourceLabel"]["partLabels"]
 91 |     label_inds = dict([(y, x) for (x, y) in enumerate(jsons[0]["sourceLabel"]["partLabels"])])
 92 |     num_labels = len(label_inds)
 93 |     mat = [[0 for i in xrange(num_labels)] for i in xrange(num_labels)]
 94 |     
 95 |     for j in jsons:
 96 |         source_labels = jsons[0]["sourceLabel"]["partLabels"]
 97 |         target_labels = jsons[0]["targetLabel"]["partLabels"]
 98 |         for arr in j["matching"]:
 99 |             target_ind = label_inds[target_labels[arr[0]]]
100 |             source_ind = label_inds[source_labels[arr[1]]]
101 |             
102 |             mat[target_ind][source_ind] += 1
103 | 
104 |     return (mat, label_list)
105 | 
106 | def generate_confusion_matrix_html(confusion_matrix, label_list, f):
107 |     print >> f, "<table>"
108 |     print >> f, "<tr><td></td>"
109 |     for j in xrange(len(confusion_matrix)):
110 |         print >> f, "<td>", label_list[j], "</td>"
111 |     print >> f, "<td>Accuracy:</td></tr>"
112 |             
113 |     for i in xrange(len(confusion_matrix)):
114 |         print >> f, "<tr>"
115 |         print >> f, "<td>", label_list[i], "</td>"
116 | 
117 |         for j in xrange(len(confusion_matrix[i])):
118 |             print >> f, "<td>", confusion_matrix[i][j], "</td>"
119 | 
120 |         accuracy = 100 * float(confusion_matrix[i][i]) / sum(confusion_matrix[i])
121 |         print >> f, "<td>%.1f%%</td>" % accuracy
122 |         print >> f, "</tr>"
123 |     print >> f, "</table>"
124 | 
125 | 
126 | def generate_label_html(label, losses, html_output_file):
127 |     with open(html_output_file, 'w') as f:
128 |         print >> f, html_header
129 |         print >> f, "<h1>" + label + "</h1>"
130 | 
131 |         print >> f, "<h2>Confusion Matrix</h2>"
132 |         print >> f, "<p>(Rows are the true target label, columns are the predicted labels. Accuracy is % of points with the row's target label that are predicted correctly.)</p>"
133 |         (confusion_matrix, label_list) = compute_confusion_matrix(losses)
134 |         generate_confusion_matrix_html(confusion_matrix, label_list, f)
135 | 
136 |         for j in losses:
137 |             print_loss_html(j, f)
138 |         print >> f, html_footer  
139 | 
140 | 
141 | losses_by_label = {}
142 | with open(loss_json_file, 'r') as g:
143 |     for line in g:
144 |         j = json.loads(line)
145 |         label_type = j["sourceImgId"].split("_")[0]
146 |         if label_type not in losses_by_label:
147 |             losses_by_label[label_type] = []
148 |         losses_by_label[label_type].append(j)
149 | 
150 | 
151 | for label in losses_by_label.iterkeys():
152 |     html_output_file = output_dir + "/" + label + ".html"
153 |     generate_label_html(label, losses_by_label[label], html_output_file)
154 | 
155 | label_accuracies = []
156 | for label in losses_by_label.iterkeys():
157 |     losses = losses_by_label[label]
158 |     (confusion_matrix, label_list) = compute_confusion_matrix(losses)
159 |     num_correct = 0
160 |     num_total = 0
161 |     for i in xrange(len(confusion_matrix)):
162 |         num_correct += confusion_matrix[i][i]
163 |         num_total += sum(confusion_matrix[i])
164 | 
165 |     accuracy = float(num_correct) / num_total
166 |     label_accuracies.append((label, accuracy))
167 | 
168 |     label_accuracies.sort(key=lambda x: x[1])
169 |     
170 |     index_file = output_dir + "/index.html"
171 |     with open(index_file, 'w') as f:
172 |         print >> f, html_header
173 |         for (label, acc) in label_accuracies:
174 |             a = acc * 100
175 |             num_examples = len(losses_by_label[label])
176 |             print >> f, '<h3><a href="%s.html">%s</a> (%.1f%%) (%d examples)</h3>' % (label, label, acc * 100, num_examples)
177 | 
178 |             (confusion_matrix, label_list) = compute_confusion_matrix(losses_by_label[label])
179 |             generate_confusion_matrix_html(confusion_matrix, label_list, f)
180 | 
181 |             
182 |         print >> f, html_footer
183 | 


--------------------------------------------------------------------------------
/experiments/dqa/scripts/train.sh:
--------------------------------------------------------------------------------
1 | 
2 | TRAIN="data/dqa/sample/questions.json"
3 | DIAGRAMS="data/dqa/sample/diagrams.json"
4 | DIAGRAM_FEATURES="data/dqa/sample/diagram_features_synthetic.json"
5 | 
6 | sbt "run-main org.allenai.dqa.labeling.LabelingDqaCli --trainingData $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES" 
7 | 


--------------------------------------------------------------------------------
/experiments/geoquery/scripts/example.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Run this script from the root directory to train and evaluate
 4 | # a semantic parser on the GeoQuery data set:
 5 | # ./experiments/geoquery/scripts/example.sh
 6 | 
 7 | TRAIN="data/geoquery/all_folds.ccg"
 8 | NP_LIST="data/geoquery/np_list.ccg"
 9 | TEST="data/geoquery/test.ccg"
10 | 
11 | MODEL_OUT="parser.ser"
12 | 
13 | sbt "run-main org.allenai.pnp.semparse.TrainSemanticParserCli --trainingData $TRAIN --entityData $NP_LIST --modelOut $MODEL_OUT" 
14 | sbt "run-main org.allenai.pnp.semparse.TestSemanticParserCli --testData $TEST --entityData $NP_LIST --model $MODEL_OUT" 
15 | 
16 | 


--------------------------------------------------------------------------------
/experiments/geoquery/scripts/run_experiment.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | TRAIN="data/geoquery/all_folds.ccg"
 4 | NP_LIST="data/geoquery/np_list.ccg"
 5 | TEST="data/geoquery/test.ccg"
 6 | 
 7 | EXPERIMENT_NAME="pnp_update"
 8 | OUT_DIR="experiments/geoquery/output/$EXPERIMENT_NAME/"
 9 | MODEL_OUT="experiments/geoquery/output/$EXPERIMENT_NAME/parser.ser"
10 | TRAIN_LOG=$OUT_DIR/train_log.txt
11 | TEST_LOG=$OUT_DIR/test_log.txt
12 | 
13 | mkdir -p $OUT_DIR
14 | 
15 | sbt "run-main org.allenai.pnp.semparse.TrainSemanticParserCli --trainingData $TRAIN --entityData $NP_LIST --modelOut $MODEL_OUT" > $TRAIN_LOG
16 | sbt "run-main org.allenai.pnp.semparse.TestSemanticParserCli --testData $TEST --entityData $NP_LIST --model $MODEL_OUT" > $TEST_LOG
17 | 
18 | 


--------------------------------------------------------------------------------
/experiments/geoquery/scripts/train_docker.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | # Train a single semantic parsing model on a docker host.
 4 | # Run this command with many different configurations to
 5 | # sweep parameters, etc.
 6 | 
 7 | TRAIN="data/geoquery/all_folds.ccg"
 8 | NP_LIST="data/geoquery/np_list.ccg"
 9 | TEST="data/geoquery/test.ccg"
10 | 
11 | EXPERIMENT_NAME="70"
12 | OUT_DIR="experiments/geoquery/output/$EXPERIMENT_NAME/"
13 | LOG=$OUT_DIR/train_log.txt
14 | 
15 | mkdir -p $OUT_DIR
16 | 
17 | CLASSPATH=`find lib -name '*.jar' | tr "\\n" :`
18 | java -Djava.library.path=lib -classpath $CLASSPATH org.allenai.pnp.semparse.SemanticParserCli --trainingData $TRAIN --entityData $NP_LIST --testData $TEST > $LOG
19 | 
20 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/config.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | SCRIPT_DIR="experiments/pascal_parts/scripts/"
 4 | DATA_DIR="data/pascal_parts/"
 5 | DIAGRAMS="$DATA_DIR/diagrams.json"
 6 | DIAGRAM_FEATURES="$DATA_DIR/diagram_features_xy.json"
 7 | DATA_SPLIT="unseen_category"
 8 | TRAIN_BEAM="5"
 9 | TEST_BEAM="20"
10 | EPOCHS="1"
11 | TRAIN_OPTS=""
12 | TRAIN="$DATA_DIR/data_splits_for_ssmn/train.json"
13 | TEST="$DATA_DIR/data_splits_for_ssmn/validation.json"
14 | 
15 | OUT_DIR="experiments/pascal_parts/output/"
16 | EXPERIMENT_NAME="v1_normalized"
17 | EXPERIMENT_DIR="$OUT_DIR/$EXPERIMENT_NAME/"
18 | 
19 | mkdir -p $EXPERIMENT_DIR
20 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/preprocess/extract_tqa_diagrams.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | TQA_DIR=data/tqa/
 4 | TQA=$TQA_DIR/tqa_dataset_beta_v8.json
 5 | IMAGE_DIR=$TQA_DIR/
 6 | TQA_DIAGRAMS=$TQA_DIR/tqa_diagrams.json
 7 | 
 8 | cat $TQA | jq -c '.[] | .diagramAnnotations | to_entries | .[]' > $TQA_DIAGRAMS
 9 | 
10 | sips -g pixelHeight -g pixelWidth $IMAGE_DIR/**/*.png > $DIAGRAM_SIZE_OUTPUT
11 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/preprocess/generate_diagram_feats.py:
--------------------------------------------------------------------------------
  1 | #!/usr/local/bin/python3
  2 | # Generate feature vectors for each diagram part
  3 | 
  4 | import sys
  5 | import json
  6 | import random
  7 | import pickle
  8 | import gzip
  9 | import numpy as np
 10 | import re
 11 | 
 12 | diagram_label_file = sys.argv[1]
 13 | matching_dir = sys.argv[2]
 14 | out_file = sys.argv[3]
 15 | 
 16 | def label_to_matching_vector(diagram_json, label):
 17 |     matching_vec = []
 18 |     # img_id = re.sub("-([^0-9])", "_\g<1>", j["imageId"])
 19 |     img_id = j["imageId"]
 20 |     matching_file = matching_dir + "/" + j["label"] + "/" + img_id + "_" + label + ".pklz"
 21 |     # print(matching_file)
 22 |     with open(matching_file, 'rb') as g:
 23 |         matching = pickle.loads(gzip.decompress(g.read()))
 24 | 
 25 |         if len(matching) == 1:
 26 |             # Choi's format
 27 |             matching_vec = matching[0]
 28 |         else:
 29 |             # Ani's format
 30 |             matching_vec = matching        
 31 | 
 32 |         # print(matching_vec)
 33 | 
 34 |     return matching_vec
 35 | 
 36 |     '''
 37 |     # One-hot at a label-specific index.
 38 |     DIMS = 32
 39 |     vec = [0.0] * DIMS
 40 |     h = label.__hash__() % DIMS
 41 |     vec[h] = 1.0
 42 |     return np.array(vec)
 43 |     '''
 44 | 
 45 | def label_to_vgg_vector(diagram_json, label, scale):
 46 |     vgg_vec = []
 47 |     vgg_file = vgg_dir + "/" + j["label"] + "/" + j["imageId"] + "_" + label + "_" + str(scale) + ".png.pkl"
 48 |     with open(vgg_file, 'rb') as g:
 49 |         vgg = pickle.loads(gzip.decompress(g.read()))
 50 |         vgg_vec = vgg[0]
 51 | 
 52 |     return vgg_vec
 53 | 
 54 | def label_to_feature_vector(label, xy, width, height):
 55 |     DIMS = 2
 56 |     vec = [0.0] * DIMS
 57 | 
 58 |     # X/Y coordinates normalized by image size
 59 |     vec[0] = float(xy[0]) / width
 60 |     vec[1] = float(xy[1]) / height
 61 |     return np.array(vec)
 62 | 
 63 |     # Random with a high-scoring element in a label-specific index.
 64 |     '''
 65 |     h = label.__hash__() % (DIMS / 2)
 66 |     vec[h] = 3.0
 67 |     for i in xrange(len(vec)):
 68 |         vec[i] += random.gauss(0.0, 1.0)
 69 |     return vec
 70 |     '''
 71 | 
 72 |     # One-hot at a label-specific index.
 73 |     '''
 74 |     h = label.__hash__() % DIMS
 75 |     vec[h] = 1.0
 76 |     return vec
 77 |     '''
 78 | 
 79 |     # Random around a mean per label    
 80 |     '''
 81 |     for i in xrange(len(vec)):
 82 |         mean_random = random.Random()
 83 |         mean_random.seed(label.__hash__() * i)
 84 |         mean = mean_random.uniform(-1, 1)
 85 |         
 86 |         vec[i] = random.gauss(mean, 1.0)
 87 |     return vec
 88 |     '''
 89 | 
 90 |     # Completely random
 91 |     '''
 92 |     for i in xrange(len(vec)):
 93 |         vec[i] = random.gauss(0.0, 1.0)
 94 |     return vec
 95 |     '''
 96 | 
 97 | image_points = {}
 98 | with open(diagram_label_file, 'r') as f:
 99 |     for line in f:
100 |         j = json.loads(line)
101 | 
102 |         image_id = j["imageId"]
103 |         width = j["width"]
104 |         height = j["height"]
105 | 
106 |         if not image_id in image_points:
107 |             image_points[image_id] = {}
108 | 
109 |         # print image_id
110 |         for p in j["points"]:
111 |             xy = tuple(p["xy"])
112 |             label = p["label"]
113 |             xy_vec = label_to_feature_vector(label, xy, width, height)
114 |             matching_vec = label_to_matching_vector(j, label)
115 | 
116 |             # Zeroed out to keep file size down.
117 |             # vgg_vec_0 = label_to_vgg_vector(j, label, 0)
118 |             # vgg_vec_1 = label_to_vgg_vector(j, label, 1)
119 |             # vgg_vec_2 = label_to_vgg_vector(j, label, 2)
120 |             vgg_vec_0 = np.array([0])
121 |             vgg_vec_1 = np.array([0])
122 |             vgg_vec_2 = np.array([0])
123 | 
124 |             # print "  ", xy, label
125 |             # print "  ", vec
126 |             
127 |             image_points[image_id][xy] = {"xy_vec" : xy_vec, "matching_vec" : matching_vec, "vgg_0_vec" : vgg_vec_0,
128 |                                           "vgg_1_vec" : vgg_vec_1, "vgg_2_vec" : vgg_vec_2}
129 | 
130 | # Convert dict format to something jsonable
131 | with open(out_file, 'w') as f:
132 |     for image_id in image_points.keys():
133 |         point_vectors = []
134 |         for point in image_points[image_id]:
135 |             point_dict = {}
136 |             point_dict["xy"] = list(point)
137 | 
138 |             feature_names = image_points[image_id][point]
139 |             for k in feature_names.keys():
140 |                 point_dict[k] = feature_names[k].tolist()
141 | 
142 |             point_vectors.append(point_dict)
143 | 
144 |         image_json = {"imageId" : image_id, "points" : point_vectors}
145 |         print(json.dumps(image_json), file=f)
146 | 
147 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/preprocess/ngrams.py:
--------------------------------------------------------------------------------
 1 | import glob
 2 | import gzip
 3 | import sys
 4 | import re
 5 | from math import sqrt,log
 6 | 
 7 | class GoogleNgrams:
 8 | 
 9 |     '''
10 |     gzip_dir is the name of a directory containing the
11 |     gzipped parts of the ngrams data.
12 |     '''
13 |     def __init__(self, gzip_dir):
14 |         self.gzip_dir = gzip_dir
15 | 
16 |         # Index the start word of each file
17 |         self.files = glob.glob(self.gzip_dir + '/*.gz')
18 |         self.words = []
19 |         for f in self.files:
20 |             start_word = self.get_start_word(f)
21 |             self.words.append(start_word)
22 | 
23 |     def get_start_word(self, filename):
24 |         with gzip.open(filename, 'r') as f:
25 |             return f.readline().split('\t')[0]
26 | 
27 |     def find_files_with_word(self, query_word):
28 |         files_to_search = []
29 |         for i in xrange(len(self.words)):
30 |             if query_word >= self.words[i] and (i + 1 == len(self.words) or query_word <= self.words[i + 1]):
31 |                 files_to_search.append(self.files[i])
32 | 
33 |         return files_to_search
34 | 
35 |     def run_query(self, query_word):
36 |         filenames = self.find_files_with_word(query_word)
37 |         results = {}
38 |         for filename in filenames:
39 |             q = query_word + '\t'
40 |             with gzip.open(filename, 'r') as f:
41 |                 for line in f:
42 |                     if line.startswith(q):
43 |                         parts = line.split('\t')
44 |                         results[parts[1]] = int(parts[2])
45 | 
46 |         return results
47 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/preprocess/ngrams_to_features.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Generate features for each part based on its name.
 3 | 
 4 | import sys
 5 | import ujson as json
 6 | import re
 7 | 
 8 | ngrams_file = sys.argv[1]
 9 | # out_file = sys.argv[2]
10 | 
11 | def counts_to_features(counts):
12 |     prep_pattern = re.compile("([^ ]*)/[^ ]*/prep/.*")
13 |     prep_counts = {}
14 |     for (k, v) in counts.iteritems():
15 |         m = prep_pattern.search(k)
16 |         if m is not None:
17 |             prep = m.group(1)
18 |             if prep not in prep_counts:
19 |                 prep_counts[prep] = 0
20 |             prep_counts[prep] += v
21 | 
22 |     return prep_counts
23 | 
24 | 
25 | ngram_features = {}
26 | with open(ngrams_file, 'r') as f:
27 |     for line in f:
28 |         j = json.loads(line)
29 |         features = counts_to_features(j["counts"])
30 | 
31 |         if j["part1"] != j["part2"]:
32 |             print j["part1"], j["part2"]
33 | 
34 |             for (k, v) in features.iteritems():
35 |                 if "/CC/" in k:
36 |                     continue
37 |                 
38 |                 print "  ", k, v
39 | 
40 |         ngram_features[(j["part1"], j["part2"])] = features
41 | 
42 | all_features = set([])
43 | for (k, counts) in ngram_features.iteritems():
44 |     all_features.update(counts.keys())
45 | 
46 | feature_indexes = dict([(f, i) for (i, f) in enumerate(all_features)])
47 | 
48 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/preprocess/preprocess_diagram_annotations.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import ujson as json
 5 | import random
 6 | import re
 7 | 
 8 | diagram_file = sys.argv[1]
 9 | out_file = sys.argv[2]
10 | text_labels = ["A", "B", "C", "D", "E", "F"]    
11 | 
12 | output = []
13 | with open(diagram_file, 'r') as f:
14 |     j = json.load(f)
15 | 
16 |     for t in j.iterkeys():
17 |         diagrams = j[t]
18 |         for diagram_id in diagrams.iterkeys():
19 |             part_labels = diagrams[diagram_id]
20 |             label_point_map = {}
21 | 
22 |             for label in part_labels:
23 |                 label_point_map[label] = part_labels[label]
24 | 
25 |             point_annotated_id = t + "/" + diagram_id
26 |                 
27 |             labels = sorted(label_point_map.keys())
28 | 
29 |             # shuffle the text labels for each index
30 |             random.seed(t.__hash__())
31 |             shuffled_text_labels = [x for x in text_labels[:len(labels)]]
32 |             random.shuffle(shuffled_text_labels)
33 | 
34 |             points = [{"label": k, "xy" : label_point_map[k], "textId" : shuffled_text_labels[i]} for (i,k) in enumerate(labels)]
35 |                 
36 |             width = 800
37 |             height = 800
38 |             output.append( {"id" : point_annotated_id, "imageId" : diagram_id, "label" : t, "points" : points, "width" : width, "height" : height} )
39 | 
40 | with open(out_file, 'w') as f:
41 |     for d in output:
42 |         print >> f, json.dumps(d)
43 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/preprocess/preprocess_diagram_annotations.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | DATA_DIR=data/pascal_parts/
 4 | SCRIPT_DIR=experiments/pascal_parts/scripts/preprocess/
 5 | RAW_ANNOTATIONS=$DATA_DIR/pascal_parts_for_matching/images/annotation_normalized.json
 6 | # IMAGE_DIR=$DATA_DIR/pascal_parts_for_matching/images/
 7 | 
 8 | # DIAGRAM_SIZE_OUTPUT=$DATA_DIR/diagram_sizes.txt
 9 | OUTPUT=$DATA_DIR/diagrams.json
10 | MATCHING_DIR=$DATA_DIR/pascal_parts_22/
11 | FEATURE_OUTPUT=$DATA_DIR/diagram_features_xy.json
12 | 
13 | # This command seems to work from the command line but not in the script ??
14 | # echo $IMAGE_DIR/**/*.png | xargs sips -g pixelHeight -g pixelWidth > $DIAGRAM_SIZE_OUTPUT
15 | ./$SCRIPT_DIR/preprocess_diagram_annotations.py $RAW_ANNOTATIONS $OUTPUT
16 | ./$SCRIPT_DIR/generate_diagram_feats.py $OUTPUT $MATCHING_DIR $FEATURE_OUTPUT
17 | 
18 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/preprocess/preprocess_ngrams.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Generate features for each part based on its name.
 3 | 
 4 | import sys
 5 | import ujson as json
 6 | import re
 7 | from ngrams import GoogleNgrams
 8 | 
 9 | diagram_file = sys.argv[1]
10 | ngrams_dir = sys.argv[2]
11 | out_file = sys.argv[3]
12 | 
13 | ngrams = GoogleNgrams(ngrams_dir)
14 | 
15 | def filter_dependencies(raw_counts, pattern):
16 |     filtered_counts = {}
17 |     p = re.compile(pattern)
18 |     for (k, v) in raw_counts.iteritems():
19 |         parts = k.split()
20 |         for part in parts:
21 |             result = p.match(part)
22 |             if result is not None:
23 |                 filtered_counts[k] = v
24 |                 break
25 |         
26 |     return filtered_counts
27 | 
28 | type_parts = {}
29 | with open(diagram_file, 'r') as f:
30 |     for line in f:
31 |         j = json.loads(line)
32 |         diagram_label = j["label"]
33 | 
34 |         if diagram_label not in type_parts:
35 |             type_parts[diagram_label] = set([])
36 | 
37 |         part_labels = [point["label"] for point in j["points"]]
38 |         type_parts[diagram_label].update(part_labels)
39 | 
40 | '''
41 | for diagram_label in type_parts.iterkeys():
42 |     print diagram_label
43 |     for part_label in type_parts[diagram_label]:
44 |         print "  ", part_label
45 | '''
46 | 
47 | # type_parts = {'tractor' : type_parts['tractor']}
48 | 
49 | all_parts = set([])
50 | for diagram_label in type_parts.iterkeys():
51 |     all_parts.update(type_parts[diagram_label])
52 | 
53 | print len(all_parts), "unique parts"
54 |     
55 | part_vectors = {}
56 | for part in all_parts:
57 |     query = part.split("_")[-1].strip().encode('ascii')
58 |     print part, "->", query
59 |     vector = ngrams.run_query(query)
60 |     part_vectors[part] = vector
61 | 
62 | with open(out_file, 'w') as f:
63 |     for diagram_label in type_parts.iterkeys():
64 |         parts = type_parts[diagram_label]
65 |         for p1 in parts:
66 |             p1_vec = part_vectors[p1]
67 |             for p2 in parts:
68 |                 p1_p2_counts = filter_dependencies(p1_vec, p2 + "/")
69 |                 print >> f, json.dumps( {"part1" : p1, "part2" : p2, "counts" : p1_p2_counts} )
70 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/preprocess/preprocess_pascal.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | DATA_DIR=data/pascal_parts_matching/pascal_parts_for_matching/images_resize_crop/
 4 | SCRIPT_DIR=experiments/dipart/scripts/preprocess/
 5 | RAW_ANNOTATIONS=$DATA_DIR/annotation.json
 6 | IMAGE_DIR=$DATA_DIR/
 7 | # SYNTACTIC_NGRAMS=~/Desktop/syntactic_ngrams/
 8 | 
 9 | DIAGRAM_SIZE_OUTPUT=$DATA_DIR/diagram_sizes.txt
10 | OUTPUT=$DATA_DIR/diagrams.json
11 | # NGRAM_OUTPUT=$DATA_DIR/syntactic_ngrams.json
12 | VGG_DIR=data/pascal_parts_matching/images_resize_crop_feat_fc2/
13 | MATCHING_DIR=$DATA_DIR/matchingnet_features/dqa_310/
14 | FEATURE_OUTPUT=$DATA_DIR/diagram_features_xy.json
15 | 
16 | sips -g pixelHeight -g pixelWidth $IMAGE_DIR/**/*.png > $DIAGRAM_SIZE_OUTPUT
17 | ./$SCRIPT_DIR/preprocess_diagram_annotations.py $RAW_ANNOTATIONS $DIAGRAM_SIZE_OUTPUT $OUTPUT
18 | # ./$SCRIPT_DIR/generate_diagram_feats.py $OUTPUT $VGG_DIR $MATCHING_DIR $FEATURE_OUTPUT
19 | 
20 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/preprocess/sample_pairs.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | 
 3 | import sys
 4 | import json
 5 | import random
 6 | import re
 7 | 
 8 | split_file = sys.argv[1]
 9 | out_file = sys.argv[2]
10 | key = sys.argv[3]
11 | samples = int(sys.argv[4])
12 | diagram_samples = int(sys.argv[5])
13 | 
14 | def sample_pairs(diagram_list, num_per_target, num_diagrams_per_type):
15 |     typed_diagrams = [(d, d.split('/')[0]) for d in diagram_list]
16 |     
17 |     diagrams_by_type = {}
18 |     for (d, t) in typed_diagrams:
19 |         if not t in diagrams_by_type:
20 |             diagrams_by_type[t] = set([])
21 |         diagrams_by_type[t].add(d)
22 | 
23 |     if num_diagrams_per_type >= 0:
24 |         num_types_below_threshold = 0
25 |         for t in diagrams_by_type.iterkeys():
26 |             ds = sorted(list(diagrams_by_type[t]))
27 |             random.seed(t.__hash__())
28 |             random.shuffle(ds)
29 | 
30 |             if len(ds) < num_diagrams_per_type:
31 |                 num_types_below_threshold += 1
32 | 
33 |             diagrams_by_type[t] = set(ds[:num_diagrams_per_type])
34 | 
35 |         print num_types_below_threshold, "/", len(diagrams_by_type), "types below threshold of", num_diagrams_per_type
36 | 
37 |         typed_diagrams = []
38 |         for t in diagrams_by_type.iterkeys():
39 |             for d in diagrams_by_type[t]:
40 |                 typed_diagrams.append((d, t))
41 | 
42 |     pairs = []
43 |     for (d, t) in typed_diagrams:
44 |         other_diagrams = list(diagrams_by_type[t] - set([d]))
45 |         other_diagrams.sort()
46 | 
47 |         if num_per_target >= 0:
48 |             random.seed(d.__hash__())
49 |             random.shuffle(other_diagrams)
50 | 
51 |             num = min(len(other_diagrams), num_per_target)
52 |             for i in xrange(num):
53 |                 pairs.append({'src' : other_diagrams[i], 'target' : d})
54 |         else:
55 |             for other_diagram in other_diagrams:
56 |                 pairs.append({'src' : other_diagram, 'target' : d})
57 | 
58 |     return pairs
59 | 
60 | j = None
61 | with open(split_file, 'r') as f:
62 |     j = json.load(f)
63 | 
64 | pairs = sample_pairs(j[key], samples, diagram_samples)
65 | 
66 | with open(out_file, 'wb') as f:
67 |     for pair in pairs:
68 |         print >> f, json.dumps(pair)
69 | 
70 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/preprocess/tqa_diagrams_to_features.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Generate features for each part based on its name
 3 | # using positional information from the TQA dataset.
 4 | 
 5 | import sys
 6 | import ujson as json
 7 | import re
 8 | from collections import defaultdict
 9 | 
10 | tqa_diagrams_file = sys.argv[1]
11 | diagrams_file = sys.argv[2]
12 | 
13 | # Get the parts of each kind of diagram.
14 | type_parts = defaultdict(set)
15 | with open(diagrams_file, 'r') as f:
16 |     for line in f:
17 |         j = json.loads(line)
18 |         diagram_label = j["label"]
19 |         part_labels = [point["label"] for point in j["points"]]
20 |         type_parts[diagram_label].update(part_labels)
21 | 
22 | all_parts = set([])
23 | part_part_map = defaultdict(set)
24 | part_counts = defaultdict(lambda: 0)
25 | for diagram_label in type_parts.iterkeys():
26 |     all_parts.update(type_parts[diagram_label])
27 | 
28 |     for part in type_parts[diagram_label]:
29 |         part_part_map[part].update(type_parts[diagram_label])
30 |         part_counts[part] += 1
31 | 
32 | sorted_counts = sorted(part_counts.items(), key=lambda x: x[1], reverse=True)
33 | 
34 | for (k,v) in sorted_counts:
35 |     print k, v
36 | 
37 | # Read token positions from TQA
38 | token_x = defaultdict(lambda: 0)
39 | token_y = defaultdict(lambda: 0)
40 | token_count = defaultdict(lambda: 0)
41 | with open(tqa_diagrams_file, 'r') as f:
42 |     for line in f:
43 |         j = json.loads(line)
44 | 
45 |         for ocr in j["value"]:
46 |             rect = ocr["rectangle"]
47 |             text = ocr["text"]
48 | 
49 |             x = None
50 |             y = None
51 |             if not isinstance(rect[0], list):
52 |                 x = rect[0]
53 |                 y = rect[1]
54 |             else:
55 |                 x = (rect[0][0] + rect[1][0]) / 2
56 |                 y = (rect[0][1] + rect[1][1]) / 2
57 |             
58 |             tokens = text.split()
59 |             for token in tokens:
60 |                 # print x, y, token
61 | 
62 |                 token_x[token] += x
63 |                 token_y[token] += y
64 |                 token_count[token] += 1
65 | 
66 | num_not_found = 0
67 | for part in all_parts:
68 |     tokens = part.split("_")
69 |     c = 0
70 |     x = 0
71 |     y = 0
72 | 
73 |     for token in tokens:
74 |         c += token_count[token]
75 |         x += token_x[token]
76 |         y += token_y[token]
77 | 
78 |     if c == 0:
79 |         print part, "n/a"
80 |         num_not_found += 1
81 |     else:
82 |         nx = float(x) / c
83 |         ny = float(y) / c
84 |         print part, nx, ny, c
85 | 
86 | 
87 | print "not found: ", num_not_found, " / ", len(all_parts)
88 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/preprocess/tqa_to_features.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | # Generate features for each part based on its name
 3 | # using positional information from the TQA dataset.
 4 | 
 5 | import sys
 6 | import ujson as json
 7 | import re
 8 | from collections import defaultdict
 9 | 
10 | diagrams_file = sys.argv[1]
11 | sample_file = sys.argv[2]
12 | 
13 | sample_json = None
14 | with open(sample_file, 'r') as f:
15 |     sample_json = json.load(f)
16 | 
17 | diagram_to_fold = {}
18 | for (fold, diagrams) in sample_json.iteritems():
19 |     for d in diagrams:
20 |         diagram_to_fold[d] = fold
21 | 
22 | # Get the parts of each kind of diagram.
23 | fold_parts = defaultdict(list)
24 | with open(diagrams_file, 'r') as f:
25 |     for line in f:
26 |         j = json.loads(line)
27 |         fold = diagram_to_fold[j["id"]]
28 |         part_labels = [point["label"] for point in j["points"]]
29 |         fold_parts[fold].extend(part_labels)
30 | 
31 | for (fold1, parts1) in fold_parts.iteritems():
32 |     p1s = set(parts1)    
33 |     for (fold2, parts2) in fold_parts.iteritems():
34 |         p2s = set(parts2)
35 | 
36 |         inter = p1s & p2s
37 |         fold1pct = float(len(inter)) / len(p1s)
38 |         
39 |         print fold1, "/", fold2, fold1pct
40 |         for part in inter:
41 |             print "  ", part
42 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash -e
2 | 
3 | CLASSPATH="target/scala-2.11/pnp-assembly-0.1.2.jar"
4 | echo $CLASSPATH 
5 | java -Djava.library.path=lib -classpath $CLASSPATH $@
6 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_affine.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=affine_transform
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--affineTransform"
 9 | 
10 | mkdir -p $MY_DIR
11 | 
12 | echo "Training $MY_NAME model..."
13 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
14 | 
15 | echo "Testing $MY_NAME model..."
16 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
17 | 
18 | mkdir -p $MY_DIR/validation_error/
19 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
20 | tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
21 | gzip -f $MY_DIR/validation_error.tar
22 | 
23 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
24 | 
25 | # mkdir -p $MY_DIR/train_error/
26 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
27 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
28 | # gzip -f $MY_DIR/train_error.tar
29 | 
30 | echo "Finished training $MY_NAME"
31 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_mn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=matching
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--matchingNetwork --matchIndependent --loglikelihood"
 9 | MY_EPOCHS=5
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | echo "Training $MY_NAME model..."
14 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | echo "Testing $MY_NAME model..."
17 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_independent.json  > $MY_DIR/validation_error_independent_log.txt
18 | 
19 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --enforceMatching --globalNormalize --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_matching.json  > $MY_DIR/validation_error_matching_log.txt
20 | 
21 | mkdir -p $MY_DIR/validation_error_matching/
22 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error_matching.json $MY_DIR/validation_error_matching/
23 | tar cf $MY_DIR/validation_error_matching.tar $MY_DIR/validation_error_matching/
24 | gzip -f $MY_DIR/validation_error_matching.tar
25 | 
26 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
27 | 
28 | # mkdir -p $MY_DIR/train_error/
29 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
30 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
31 | # gzip -f $MY_DIR/train_error.tar
32 | 
33 | echo "Finished training $MY_NAME"
34 | 
35 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_mn_lstm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=matching_lstm
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--lstmEncode --matchIndependent --loglikelihood"
 9 | MY_EPOCHS=5
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | echo "Training $MY_NAME model..."
14 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | echo "Testing $MY_NAME model..."
17 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_independent.json  > $MY_DIR/validation_error_independent_log.txt
18 | 
19 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --enforceMatching --globalNormalize --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_matching.json  > $MY_DIR/validation_error_matching_log.txt
20 | 
21 | mkdir -p $MY_DIR/validation_error_matching/
22 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error_matching.json $MY_DIR/validation_error_matching/
23 | tar cf $MY_DIR/validation_error_matching.tar $MY_DIR/validation_error_matching/
24 | gzip -f $MY_DIR/validation_error_matching.tar
25 | 
26 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --enforceMatching --globalNormalize --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error_matching.json  > $MY_DIR/train_error_matching_log.txt
27 | 
28 | mkdir -p $MY_DIR/train_error_matching/
29 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error_matching.json $MY_DIR/train_error_matching/
30 | tar cf $MY_DIR/train_error_matching.tar $MY_DIR/train_error_matching/
31 | gzip -f $MY_DIR/train_error_matching.tar
32 | 
33 | echo "Finished training $MY_NAME"
34 | 
35 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_mn_lstm_bso.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=matching_lstm_bso
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--lstmEncode --matchIndependent"
 9 | MY_EPOCHS=5
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | echo "Training $MY_NAME model..."
14 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | echo "Testing $MY_NAME model..."
17 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_independent.json  > $MY_DIR/validation_error_independent_log.txt
18 | 
19 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --enforceMatching --globalNormalize --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_matching.json  > $MY_DIR/validation_error_matching_log.txt
20 | 
21 | mkdir -p $MY_DIR/validation_error_matching/
22 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error_matching.json $MY_DIR/validation_error_matching/
23 | tar cf $MY_DIR/validation_error_matching.tar $MY_DIR/validation_error_matching/
24 | gzip -f $MY_DIR/validation_error_matching.tar
25 | 
26 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
27 | 
28 | # mkdir -p $MY_DIR/train_error/
29 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
30 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
31 | # gzip -f $MY_DIR/train_error.tar
32 | 
33 | echo "Finished training $MY_NAME"
34 | 
35 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_mn_lstm_dropout.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=matching_lstm_dropout_10epoch
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--lstmEncode --matchIndependent --loglikelihood --dropout 0.5"
 9 | MY_EPOCHS=10
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | echo "Training $MY_NAME model..."
14 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | echo "Testing $MY_NAME model..."
17 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_independent.json  > $MY_DIR/validation_error_independent_log.txt
18 | 
19 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --enforceMatching --globalNormalize --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_matching.json  > $MY_DIR/validation_error_matching_log.txt
20 | 
21 | mkdir -p $MY_DIR/validation_error_matching/
22 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error_matching.json $MY_DIR/validation_error_matching/
23 | tar cf $MY_DIR/validation_error_matching.tar $MY_DIR/validation_error_matching/
24 | gzip -f $MY_DIR/validation_error_matching.tar
25 | 
26 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --enforceMatching --globalNormalize --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error_matching.json  > $MY_DIR/train_error_matching_log.txt
27 | 
28 | mkdir -p $MY_DIR/train_error_matching/
29 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error_matching.json $MY_DIR/train_error_matching/
30 | tar cf $MY_DIR/train_error_matching.tar $MY_DIR/train_error_matching/
31 | gzip -f $MY_DIR/train_error_matching.tar
32 | 
33 | 
34 | # mkdir -p $MY_DIR/train_error/
35 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
36 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
37 | # gzip -f $MY_DIR/train_error.tar
38 | 
39 | echo "Finished training $MY_NAME"
40 | 
41 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_nearest_neighbor.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=nearest_neighbor
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--nearestNeighbor --matchIndependent"
 9 | 
10 | mkdir -p $MY_DIR
11 | 
12 | echo "Training $MY_NAME model..."
13 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize 1 --epochs 0 --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $MY_FLAGS > $MY_DIR/log.txt
14 | 
15 | echo "Testing $MY_NAME model..."
16 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_independent.json  > $MY_DIR/validation_error_independent_log.txt
17 | 
18 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --enforceMatching --globalNormalize --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_matching.json  > $MY_DIR/validation_error_matching_log.txt
19 | 
20 | # mkdir -p $MY_DIR/validation_error/
21 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
22 | # tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
23 | # gzip -f $MY_DIR/validation_error.tar
24 | 
25 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
26 | 
27 | # mkdir -p $MY_DIR/train_error/
28 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
29 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
30 | # gzip -f $MY_DIR/train_error.tar
31 | 
32 | echo "Finished training $MY_NAME"
33 | 
34 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_pointer_net.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/dqa/scripts/config.sh"
 4 | 
 5 | MY_NAME=pointer_net
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--pointerNet --loglikelihood"
 9 | 
10 | mkdir -p $MY_DIR
11 | 
12 | echo "Training $MY_NAME model..."
13 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
14 | 
15 | echo "Testing $MY_NAME model..."
16 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
17 | 
18 | # mkdir -p $MY_DIR/validation_error/
19 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
20 | # tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
21 | # gzip -f $MY_DIR/validation_error.tar
22 | 
23 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
24 | 
25 | # mkdir -p $MY_DIR/train_error/
26 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
27 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
28 | # gzip -f $MY_DIR/train_error.tar
29 | 
30 | echo "Finished training $MY_NAME"
31 | 
32 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_ssmn.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=ssmn_1iter_nopart
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--structuralFactor --relativeAppearance --lstmEncode"
 9 | MY_EPOCHS=1
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | echo "Training $MY_NAME model..."
14 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | echo "Testing $MY_NAME model..."
17 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
18 | 
19 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/test_error.json  > $MY_DIR/test_error_log.txt
20 | 
21 | mkdir -p $MY_DIR/validation_error/
22 | python $SCRIPT_DIR/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
23 | tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
24 | gzip -f $MY_DIR/validation_error.tar
25 | 
26 | # /$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
27 | 
28 | echo "Finished training $MY_NAME"
29 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_ssmn_ablated.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=ssmn_ablated_5epochs
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--lstmEncode --structuralFactor"
 9 | MY_EPOCHS=5
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | echo "Training $MY_NAME model..."
14 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | echo "Testing $MY_NAME model..."
17 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
18 | 
19 | mkdir -p $MY_DIR/validation_error/
20 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
21 | tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
22 | gzip -f $MY_DIR/validation_error.tar
23 | 
24 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
25 | 
26 | # mkdir -p $MY_DIR/train_error/
27 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
28 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
29 | # gzip -f $MY_DIR/train_error.tar
30 | 
31 | echo "Finished training $MY_NAME"
32 | 
33 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_ssmn_ablated_1iter.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=ssmn_ablated_1iter
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--lstmEncode --structuralFactor"
 9 | MY_EPOCHS=1
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | echo "Training $MY_NAME model..."
14 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | echo "Testing $MY_NAME model..."
17 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_independent.json  > $MY_DIR/validation_error_independent_log.txt
18 | 
19 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --enforceMatching --globalNormalize --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_matching.json  > $MY_DIR/validation_error_matching_log.txt
20 | 
21 | mkdir -p $MY_DIR/validation_error_matching/
22 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error_matching.json $MY_DIR/validation_error_matching/
23 | tar cf $MY_DIR/validation_error_matching.tar $MY_DIR/validation_error_matching/
24 | gzip -f $MY_DIR/validation_error_matching.tar
25 | 
26 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
27 | 
28 | # mkdir -p $MY_DIR/train_error/
29 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
30 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
31 | # gzip -f $MY_DIR/train_error.tar
32 | 
33 | echo "Finished training $MY_NAME"
34 | 
35 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_ssmn_ablated_dropout.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=ssmn_ablated_1iter_dropout
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--lstmEncode --structuralFactor --dropout 0.5"
 9 | MY_EPOCHS=1
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | echo "Training $MY_NAME model..."
14 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | echo "Testing $MY_NAME model..."
17 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_independent.json  > $MY_DIR/validation_error_independent_log.txt
18 | 
19 | mkdir -p $MY_DIR/validation_error_independent/
20 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error_independent.json $MY_DIR/validation_error_independent/
21 | tar cf $MY_DIR/validation_error_independent.tar $MY_DIR/validation_error_independent/
22 | gzip -f $MY_DIR/validation_error_independent.tar
23 | 
24 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
25 | 
26 | mkdir -p $MY_DIR/train_error/
27 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
28 | tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
29 | gzip -f $MY_DIR/train_error.tar
30 | 
31 | echo "Finished training $MY_NAME"
32 | 
33 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_ssmn_loglikelihood.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/dipart/scripts/config.sh"
 4 | 
 5 | MY_NAME=ssmn_lstm_loglikelihood
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--structuralFactor --partClassifier --relativeAppearance --lstmEncode --loglikelihood"
 9 | 
10 | mkdir -p $MY_DIR
11 | 
12 | echo "Training $MY_NAME model..."
13 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
14 | 
15 | echo "Testing $MY_NAME model..."
16 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
17 | 
18 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/test_error.json  > $MY_DIR/test_error_log.txt
19 | 
20 | # mkdir -p $MY_DIR/validation_error/
21 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
22 | # tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
23 | # gzip -f $MY_DIR/validation_error.tar
24 | 
25 | # /$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
26 | 
27 | echo "Finished training $MY_NAME"
28 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_ssmn_lstmonly.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=ssmn_lstmonly
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--lstmEncode"
 9 | MY_EPOCHS=5
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | echo "Training $MY_NAME model..."
14 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | echo "Testing $MY_NAME model..."
17 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_independent.json  > $MY_DIR/validation_error_independent_log.txt
18 | 
19 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize 120 --enforceMatching --globalNormalize --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error_matching.json  > $MY_DIR/validation_error_matching_log.txt
20 | 
21 | mkdir -p $MY_DIR/validation_error_matching/
22 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error_matching.json $MY_DIR/validation_error_matching/
23 | tar cf $MY_DIR/validation_error_matching.tar $MY_DIR/validation_error_matching/
24 | gzip -f $MY_DIR/validation_error_matching.tar
25 | 
26 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
27 | 
28 | # mkdir -p $MY_DIR/train_error/
29 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
30 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
31 | # gzip -f $MY_DIR/train_error.tar
32 | 
33 | echo "Finished training $MY_NAME"
34 | 
35 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_ssmn_pretrain.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=ssmn_pretrain
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--structuralFactor --partClassifier --relativeAppearance --lstmEncode --pretrain"
 9 | MY_EPOCHS=2
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | echo "Training $MY_NAME model..."
14 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | echo "Testing $MY_NAME model..."
17 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
18 | 
19 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/test_error.json  > $MY_DIR/test_error_log.txt
20 | 
21 | mkdir -p $MY_DIR/validation_error/
22 | python $SCRIPT_DIR/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
23 | tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
24 | gzip -f $MY_DIR/validation_error.tar
25 | 
26 | # /$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
27 | 
28 | echo "Finished training $MY_NAME"
29 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_ssmn_unary.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/dipart/scripts/config.sh"
 4 | 
 5 | MY_NAME=ssmn_unary
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--matchingNetwork --partClassifier"
 9 | 
10 | mkdir -p $MY_DIR
11 | 
12 | echo "Training $MY_NAME model..."
13 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
14 | 
15 | echo "Testing $MY_NAME model..."
16 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
17 | 
18 | mkdir -p $MY_DIR/validation_error/
19 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
20 | tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
21 | gzip -f $MY_DIR/validation_error.tar
22 | 
23 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
24 | 
25 | # mkdir -p $MY_DIR/train_error/
26 | # python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
27 | # tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
28 | # gzip -f $MY_DIR/train_error.tar
29 | 
30 | echo "Finished training $MY_NAME"
31 | 
32 | 


--------------------------------------------------------------------------------
/experiments/pascal_parts/scripts/train_structural_consistency.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -e
 2 | 
 3 | source "experiments/pascal_parts/scripts/config.sh"
 4 | 
 5 | MY_NAME=structural_consistency
 6 | MY_DIR=$EXPERIMENT_DIR/$MY_NAME/
 7 | MY_MODEL=$MY_DIR/model.ser
 8 | MY_FLAGS="--structuralFactor"
 9 | MY_EPOCHS=1
10 | 
11 | mkdir -p $MY_DIR
12 | 
13 | # echo "Training $MY_NAME model..."
14 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TrainMatchingCli --beamSize $TRAIN_BEAM --epochs $MY_EPOCHS --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --modelOut $MY_MODEL $TRAIN_OPTS $MY_FLAGS > $MY_DIR/log.txt
15 | 
16 | # echo "Testing $MY_NAME model..."
17 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/validation_error.json  > $MY_DIR/validation_error_log.txt
18 | 
19 | # ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TEST --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/test_error.json  > $MY_DIR/test_error_log.txt
20 | 
21 | mkdir -p $MY_DIR/validation_error/
22 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/validation_error.json $MY_DIR/validation_error/
23 | tar cf $MY_DIR/validation_error.tar $MY_DIR/validation_error/
24 | gzip -f $MY_DIR/validation_error.tar
25 | 
26 | ./$SCRIPT_DIR/run.sh org.allenai.dqa.matching.TestMatchingCli --beamSize $TEST_BEAM --examples $TRAIN --diagrams $DIAGRAMS --diagramFeatures $DIAGRAM_FEATURES --model $MY_MODEL --lossJson $MY_DIR/train_error.json  > $MY_DIR/train_error_log.txt
27 | 
28 | mkdir -p $MY_DIR/train_error/
29 | python $SCRIPT_DIR/visualize/visualize_loss.py $MY_DIR/train_error.json $MY_DIR/train_error/
30 | tar cf $MY_DIR/train_error.tar $MY_DIR/train_error/
31 | gzip -f $MY_DIR/train_error.tar
32 | 
33 | echo "Finished training $MY_NAME"
34 | 


--------------------------------------------------------------------------------
/lib/jklol.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/allenai/pnp/d67bce256309855bdb5547d779c995e93bf70db5/lib/jklol.jar


--------------------------------------------------------------------------------
/project/assembly.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.11.2")
2 | 


--------------------------------------------------------------------------------
/project/plugins.sbt:
--------------------------------------------------------------------------------
1 | addSbtPlugin("me.lessis" % "bintray-sbt" % "0.3.0")
2 | addSbtPlugin("org.allenai.plugins" % "allenai-sbt-plugins" % "1.4.8")
3 | 


--------------------------------------------------------------------------------
/src/main/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | # AUTOGENERATED
 2 | # Most lines in this file are derived from sbt settings. These settings are printed above the lines
 3 | # they affect.
 4 | #
 5 | # IMPORTANT: If you wish to make edits to this file, make changes BELOW the line starting with
 6 | # "#+#". Any updates to commands above this line should happen through sbt, and pushed to the
 7 | # Dockerfile using the `generateDockerfile` task.
 8 | 
 9 | # This image depends on the dependency image.
10 | #
11 | # The dependency image inherits from:
12 | #   dockerImageBase := "allenai-docker-private-docker.bintray.io/java-dynet"
13 | FROM allenai-docker-private-docker.bintray.io/org.allenai/pnp-dependencies
14 | 
15 | # The ports which are available to map in the image.
16 | # sbt setting:
17 | #   dockerPorts := Seq[Int]()
18 | 
19 | 
20 | # The variable determining which typesafe config file to use. You can override this with the -e
21 | # flag:
22 | #   docker run -e CONFIG_ENV=prod allenai-docker-private-docker.bintray.io/org.allenai/pnp
23 | # Note the default is "dev".
24 | ENV CONFIG_ENV ${CONFIG_ENV:-dev}
25 | 
26 | # The arguments to send to the JVM. These can be overridden at runtime with the -e flag:
27 | #   docker run -e JVM_ARGS="-Xms=1G -Xmx=1G" allenai-docker-private-docker.bintray.io/org.allenai/pnp
28 | #
29 | # sbt setting:
30 | #   javaOptions := Seq("-Dlogback.appname=pnp")
31 | ENV JVM_ARGS ${JVM_ARGS:--Dlogback.appname=pnp}
32 | 
33 | # The main class to execute when using the ENTRYPOINT command. You can override this at runtime with
34 | # the -e flag:
35 | #   docker run -e JAVA_MAIN=org.allenai.HelloWorld allenai-docker-private-docker.bintray.io/org.allenai/pnp
36 | # sbt setting:
37 | #  mainClass := None
38 | # (No mainClass set)
39 | 
40 | # The default arguments to use for running the image.
41 | # See https://docs.docker.com/engine/reference/builder/#/understand-how-cmd-and-entrypoint-interact
42 | # for detailed information on CMD vs ENTRYPOINT.
43 | # sbt setting:
44 | #   dockerMainArgs := Seq[String]()
45 | CMD []
46 | 
47 | # The script for this application to run. This can be overridden with the --entrypoint flag:
48 | #   docker run --entrypoint /bin/bash allenai-docker-private-docker.bintray.io/org.allenai/pnp
49 | ENTRYPOINT ["bin/run-docker.sh"]
50 | 
51 | # The directories in the staging directory which will be mapping into the Docker image.
52 | #   dockerCopyMappings := Seq(
53 | #     (file("src/main/resources"), "conf"),
54 | #     (file("lib"), "lib"),
55 | #     (file("data"), "data"),
56 | #     (file("experiments"), "experiments")
57 | #   )
58 | COPY conf conf
59 | COPY lib lib
60 | COPY data data
61 | COPY experiments experiments
62 | 
63 | # lib is always copied, since it has the built jars.
64 | COPY lib lib
65 | 
66 | # Any additions to the file below this line will be retained when `generateDockerfile` is run.
67 | # Do not remove this line unless you want your changes overwritten!
68 | #+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+#+
69 | 
70 | # Copy dynet libraries to the lib folder.
71 | RUN cp /dynet/build/swig/*_scala.jar lib/
72 | RUN cp /dynet/build/swig/libdynet* lib/
73 | 


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/dqa/labeling/AnswerSelector.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.dqa.labeling
 2 | 
 3 | import org.allenai.pnp.Pnp
 4 | 
 5 | class AnswerSelector {
 6 |   
 7 |   def selectAnswer(denotation: AnyRef, answerOptions: AnswerOptions): Pnp[Int] = {
 8 |     if (denotation.isInstanceOf[Part]) {
 9 |       val part = denotation.asInstanceOf[Part]
10 |       val index = answerOptions.matchTokens(part.id)
11 |       if (index >= 0) {
12 |         Pnp.value(index)
13 |       } else {
14 |         Pnp.fail
15 |       }
16 |     } else {
17 |       // TODO
18 |       Pnp.fail
19 |     }
20 |   }
21 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/dqa/labeling/Diagram.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.dqa.labeling
 2 | 
 3 | import scala.io.Source
 4 | 
 5 | import spray.json.DefaultJsonProtocol._
 6 | import spray.json.JsArray
 7 | import spray.json.JsNumber
 8 | import spray.json.JsObject
 9 | import spray.json.deserializationError
10 | import spray.json.pimpString
11 | import scala.util.Random
12 | 
13 | /**
14 |  * A diagram marked with a collection of parts. Each
15 |  * part has an x/y coordinate and a text label (e.g. "A") 
16 |  */
17 | case class Diagram(id: String, imageId: String, width: Int, height: Int,
18 |     parts: Vector[Part], features: DiagramFeatures)
19 | 
20 | /**
21 |  * A part of a diagram.
22 |  */
23 | case class Part(id: String, ind: Int, coords: Point)
24 | 
25 | /**
26 |  * An x/y point in a diagram. 
27 |  */
28 | case class Point(x: Int, y: Int)
29 | 
30 | /**
31 |  * A label for a diagram. The label includes a type for
32 |  * the entire diagram (e.g., "car") along with labels for
33 |  * each part (e.g., "wheel"). The indexes of {@code partLabels}  
34 |  * correspond to {@code part.ind}.
35 |  */
36 | case class DiagramLabel(diagramType: String, partLabels: Vector[String])
37 | 
38 | object Diagram {
39 |   
40 |   def fromJsonFile(filename: String, features: Map[String, DiagramFeatures]
41 |     ): Array[(Diagram, DiagramLabel)] = {
42 |     val lines = Source.fromFile(filename).getLines
43 |     lines.map(fromJsonLine(_, features)).toArray
44 |   }
45 | 
46 |   def fromJsonLine(line: String, features: Map[String, DiagramFeatures]
47 |     ): (Diagram, DiagramLabel) = {
48 |     val js = line.parseJson.asJsObject
49 |     val diagramLabel = js.fields("label").convertTo[String]
50 |     val diagramId = js.fields("id").convertTo[String]
51 |     val imageId = js.fields("imageId").convertTo[String]
52 |     val width = js.fields("width").convertTo[Int]
53 |     val height = js.fields("height").convertTo[Int]
54 |     
55 |     // val pointJsons = Random.shuffle(js.fields("points").asInstanceOf[JsArray].elements)
56 |     val pointJsons = js.fields("points").asInstanceOf[JsArray].elements
57 | 
58 |     val labeledParts = for {
59 |       (pointJson, i) <- pointJsons.zipWithIndex
60 |       p = pointJson.asJsObject
61 |       id = p.fields("textId").convertTo[String]
62 |       label = p.fields("label").convertTo[String]
63 |       xy = p.fields("xy") match {
64 |         case JsArray(Vector(JsNumber(x), JsNumber(y))) => Point(x.toInt, y.toInt)
65 |         case _ => deserializationError("Array of x/y coordinates expected")
66 |       }
67 |     } yield {
68 |       (Part(id, i, xy),  label)
69 |     }
70 | 
71 |     val f = features(imageId)
72 | 
73 |     (Diagram(diagramId, imageId, width, height, labeledParts.map(_._1), f),
74 |         (DiagramLabel(diagramLabel, labeledParts.map(_._2))))
75 |   }
76 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/dqa/labeling/DiagramFeatures.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.dqa.labeling
 2 | 
 3 | import scala.io.Source
 4 | 
 5 | import edu.cmu.dynet._
 6 | import spray.json._
 7 | import spray.json.DefaultJsonProtocol._
 8 | 
 9 | /**
10 |  * Features of points in a diagram. 
11 |  */
12 | case class DiagramFeatures(imageId: String, pointFeatures: Map[Point, PointFeatures]) {
13 | 
14 |   def getFeatures(part: Part): PointFeatures = {
15 |     pointFeatures(part.coords)
16 |   }
17 | 
18 |   def getFeatureMatrix(parts: Seq[Part]): Array[PointExpressions] = {
19 |     val expressions = for {
20 |       part <- parts
21 |     } yield {
22 |       val features = pointFeatures(part.coords)
23 |       val xy = Expression.input(Dim(features.xy.size), features.xy)
24 |       val matching = Expression.input(Dim(features.matching.size), features.matching)
25 |       val vgg0 = Expression.input(Dim(features.vgg0.size), features.vgg0)
26 |       val vgg1 = Expression.input(Dim(features.vgg1.size), features.vgg1)
27 |       val vgg2 = Expression.input(Dim(features.vgg2.size), features.vgg2)
28 |       val vggAll = Expression.input(Dim(features.vggAll.size), features.vggAll)
29 |       PointExpressions(xy, matching, vgg0, vgg1, vgg2, vggAll)
30 |     }
31 |     expressions.toArray
32 |   }
33 | }
34 | 
35 | case class PointFeatures(xy: FloatVector, matching: FloatVector,
36 |     vgg0: FloatVector, vgg1: FloatVector, vgg2: FloatVector,
37 |     vggAll: FloatVector)
38 | case class PointExpressions(xy: Expression, matching: Expression,
39 |     vgg0: Expression, vgg1: Expression, vgg2: Expression,
40 |     vggAll: Expression)
41 | 
42 | object DiagramFeatures {
43 |   
44 |   def fromJsonFile(filename: String): Array[DiagramFeatures] = {
45 |     val lines = Source.fromFile(filename).getLines
46 |     lines.map(fromJsonLine(_)).toArray
47 |   }
48 |   
49 |   def fromJsonLine(line: String): DiagramFeatures = {
50 |     val js = line.parseJson.asJsObject
51 |     val imageId = js.fields("imageId").convertTo[String]
52 |     
53 |     val pointJsons = js.fields("points").asInstanceOf[JsArray]
54 |     
55 |     val pointFeatures = for {
56 |       pointJson <- pointJsons.elements
57 |       p = pointJson.asJsObject
58 |       xy = p.fields("xy") match {
59 |         case JsArray(Vector(JsNumber(x), JsNumber(y))) => Point(x.toInt, y.toInt)
60 |         case _ => deserializationError("Array of x/y coordinates expected")
61 |       }
62 |       
63 |       xyVec = new FloatVector(p.fields("xy_vec").asInstanceOf[JsArray].elements.map(
64 |           x => x.convertTo[Float]))
65 |       matchingVec = new FloatVector(p.fields("matching_vec").asInstanceOf[JsArray].elements.map(
66 |           x => x.convertTo[Float]))
67 |       vgg0Vec = new FloatVector(p.fields("vgg_0_vec").asInstanceOf[JsArray].elements.map(
68 |           x => x.convertTo[Float]))
69 |       vgg1Vec = new FloatVector(p.fields("vgg_1_vec").asInstanceOf[JsArray].elements.map(
70 |           x => x.convertTo[Float]))
71 |       vgg2Vec = new FloatVector(p.fields("vgg_2_vec").asInstanceOf[JsArray].elements.map(
72 |           x => x.convertTo[Float]))
73 |       vggAll = new FloatVector(vgg0Vec.toSeq ++ vgg1Vec.toSeq ++ vgg2Vec.toSeq)
74 |     } yield {
75 |       (xy, PointFeatures(xyVec, matchingVec, vgg0Vec, vgg1Vec, vgg2Vec, vggAll))
76 |     }
77 | 
78 |     DiagramFeatures(imageId, pointFeatures.toMap)
79 |   }
80 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/dqa/labeling/LabelingDqaCli.scala:
--------------------------------------------------------------------------------
  1 | package org.allenai.dqa.labeling
  2 | 
  3 | import scala.collection.JavaConverters._
  4 | import scala.collection.mutable.ListBuffer
  5 | import com.jayantkrish.jklol.ccg.lambda.ExplicitTypeDeclaration
  6 | import com.jayantkrish.jklol.ccg.lambda.ExpressionParser
  7 | import com.jayantkrish.jklol.ccg.lambda2.ExpressionSimplifier
  8 | import com.jayantkrish.jklol.ccg.lambda2.SimplificationComparator
  9 | import com.jayantkrish.jklol.cli.AbstractCli
 10 | import com.jayantkrish.jklol.util.IndexedList
 11 | import edu.cmu.dynet._
 12 | import joptsimple.OptionParser
 13 | import joptsimple.OptionSet
 14 | import joptsimple.OptionSpec
 15 | import org.allenai.pnp.semparse.SemanticParser
 16 | import org.allenai.pnp._
 17 | 
 18 | import com.jayantkrish.jklol.training.DefaultLogFunction
 19 | import org.allenai.pnp.semparse.ActionSpace
 20 | 
 21 | import com.google.common.collect.HashMultimap
 22 | 
 23 | class LabelingDqaCli extends AbstractCli {
 24 |   
 25 |   var diagramsOpt: OptionSpec[String] = null
 26 |   var diagramFeaturesOpt: OptionSpec[String] = null
 27 |   var trainingDataOpt: OptionSpec[String] = null
 28 |   
 29 |   override def initializeOptions(parser: OptionParser): Unit = {
 30 |     diagramsOpt = parser.accepts("diagrams").withRequiredArg().ofType(classOf[String]).required()
 31 |     diagramFeaturesOpt = parser.accepts("diagramFeatures").withRequiredArg().ofType(classOf[String]).required()
 32 |     trainingDataOpt = parser.accepts("trainingData").withRequiredArg().ofType(classOf[String]).withValuesSeparatedBy(',').required()
 33 |   }
 34 |   
 35 |   override def run(options: OptionSet): Unit = {
 36 |     Initialize.initialize()
 37 |   
 38 |     // Initialize expression processing for logical forms. 
 39 |     val typeDeclaration = ExplicitTypeDeclaration.getDefault
 40 |     val simplifier = ExpressionSimplifier.lambdaCalculus()
 41 |     val comparator = new SimplificationComparator(simplifier)
 42 |     
 43 |     // Read and preprocess data
 44 |     val diagramFeatures = DiagramFeatures.fromJsonFile(options.valueOf(diagramFeaturesOpt)).map(
 45 |         x => (x.imageId, x)).toMap
 46 |     val diagramsAndLabels = Diagram.fromJsonFile(options.valueOf(diagramsOpt), diagramFeatures)
 47 |     val diagrams = diagramsAndLabels.map(_._1)
 48 |     val diagramLabels = diagramsAndLabels.map(_._2)
 49 |     val diagramMap = diagramsAndLabels.map(x => (x._1.id, x)).toMap
 50 |     // TODO: fix the feature dimensionality
 51 |     val partFeatureDim = diagramFeatures.head._2.pointFeatures.head._2.xy.size.toInt
 52 | 
 53 |     val trainingData = ListBuffer[LabelingExample]()
 54 |     for (filename <- options.valuesOf(trainingDataOpt).asScala) {
 55 |       trainingData ++= LabelingExample.fromJsonFile(filename, diagramMap)
 56 |     }
 57 |     
 58 |     println(trainingData.size + " training examples")
 59 |     val wordCounts = LabelingExample.getWordCounts(trainingData)
 60 |     
 61 |     // Vocab consists of all words that appear more than once in
 62 |     // the training data.
 63 |     val vocab = IndexedList.create(wordCounts.getKeysAboveCountThreshold(1.9))
 64 |     vocab.add(LabelingUtil.UNK)
 65 |     
 66 |     val trainPreprocessed = trainingData.map(_.preprocess(vocab))
 67 | 
 68 |     // Configure executor for the labeling question domain theory
 69 | 
 70 |     /*
 71 |     println("diagramTypes: " + diagramTypes)
 72 |     println("diagramParts: " + diagramParts)
 73 |     println("typePartMap: " + typePartMap)
 74 |     */
 75 |     val model = PnpModel.init(true)
 76 |     val executor = LabelingExecutor.fromLabels(diagramLabels, partFeatureDim, model)
 77 | 
 78 |     // Configure semantic parser
 79 |     val actionSpace: ActionSpace = ActionSpace.fromLfConstants(executor.bindings.keySet,
 80 |         typeDeclaration)
 81 |     println("parser root types: " + actionSpace.rootTypes)
 82 |     println("parser actions: ")
 83 |     for (t <- actionSpace.typeTemplateMap.keys) {
 84 |       println(t + " ->")
 85 |       for (template <- actionSpace.typeTemplateMap.get(t)) {
 86 |         println("  " + template)
 87 |       }
 88 |     }
 89 | 
 90 |     val parser = SemanticParser.create(actionSpace, vocab, model)
 91 |     val answerSelector = new AnswerSelector()
 92 |     val p3 = new LabelingP3Model(parser, executor, answerSelector)
 93 | 
 94 |     validateParser(trainPreprocessed, parser)
 95 |     train(trainPreprocessed, p3)
 96 |     test(trainPreprocessed, p3, model)
 97 |   }
 98 |   
 99 |   def validateParser(examples: Seq[PreprocessedLabelingExample], parser: SemanticParser): Unit = {
100 |     for (ex <- examples) {
101 |       ComputationGraph.renew()
102 |       val lfDist = parser.generateExpression(ex.tokenIds, ex.entityLinking)
103 |       val context = PnpInferenceContext.init(parser.model)
104 |       val dist = lfDist.beamSearch(100, 100, Env.init, context)
105 |       println(ex.ex.tokens.mkString(" "))
106 |       for (x <- dist.executions) {
107 |         println("  "  + x)
108 |       }
109 |     }
110 |   }
111 | 
112 |   def train(examples: Seq[PreprocessedLabelingExample], p3: LabelingP3Model): PnpModel = {
113 | 
114 |     // TODO: figure out how to set this configuration in a more
115 |     // reliable way.
116 |     p3.parser.dropoutProb = -1
117 | 
118 |     val pnpExamples = examples.map(p3.exampleToPnpExample(_))
119 | 
120 |     // Train model
121 |     val model = p3.getModel
122 | 
123 |     val sgd = new SimpleSGDTrainer(model.model, 0.1f, 0.01f)
124 |     val trainer = new LoglikelihoodTrainer(50, 100, true, model, sgd, new DefaultLogFunction())
125 |     trainer.train(pnpExamples.toList)
126 | 
127 |     model
128 |   }
129 |   
130 |   def test(examples: Seq[PreprocessedLabelingExample], p3: LabelingP3Model,
131 |       model: PnpModel): Unit = {
132 |     var numCorrect = 0 
133 |     for (ex <- examples) {
134 |       ComputationGraph.renew()
135 |       val pp = p3.exampleToPnpExample(ex).unconditional
136 |       val context = PnpInferenceContext.init(model)
137 |       val dist = pp.beamSearch(100, 100, Env.init, context)
138 | 
139 |       println(ex.ex.tokens.mkString(" "))
140 |       println(ex.ex.answerOptions)
141 |       val marginals = dist.marginals
142 |       for (x <- marginals.getSortedKeys.asScala) {
143 |         println("  "  + x + " " + marginals.getProbability(x))
144 |       }
145 | 
146 |       if (marginals.getSortedKeys.size > 0) {
147 |         val bestAnswer = marginals.getSortedKeys.get(0)
148 |         if (bestAnswer == ex.ex.correctAnswer) {
149 |           numCorrect += 1
150 |         }
151 |       }
152 |     }
153 |     
154 |     val accuracy = numCorrect.asInstanceOf[Double] / examples.length
155 |     println("Accuracy: " + accuracy + " (" + numCorrect + " / " + examples.length + ")")
156 |   }
157 | }
158 | 
159 | 
160 | object LabelingDqaCli {  
161 |   def main(args: Array[String]): Unit = {
162 |     (new LabelingDqaCli()).run(args)
163 |   }
164 | }
165 | 
166 | 


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/dqa/labeling/LabelingExample.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.dqa.labeling
 2 | 
 3 | import org.allenai.pnp.semparse.EntityLinking
 4 | 
 5 | import com.jayantkrish.jklol.util.CountAccumulator
 6 | import com.jayantkrish.jklol.util.IndexedList
 7 | 
 8 | import spray.json._
 9 | import spray.json.DefaultJsonProtocol._
10 | import scala.io.Source
11 | 
12 | case class LabelingExample(val tokens: Array[String],
13 |     val diagram: Diagram, val diagramLabel: DiagramLabel,
14 |     val answerOptions: AnswerOptions, val correctAnswer: Int) {
15 |   
16 |   def preprocess(vocab: IndexedList[String]): PreprocessedLabelingExample = {
17 |     val unkedTokens = tokens.map(
18 |         x => if (vocab.contains(x)) { x } else { LabelingUtil.UNK })
19 |     val tokenIds = unkedTokens.map(x => vocab.getIndex(x))
20 | 
21 |     // TODO: match ABCD labels.
22 |     val entityLinking: EntityLinking = EntityLinking(List())
23 | 
24 |     PreprocessedLabelingExample(tokenIds, unkedTokens, entityLinking, this)
25 |   }
26 | }
27 | 
28 | case class PreprocessedLabelingExample(val tokenIds: Array[Int], val unkedTokens: Array[String],
29 |     val entityLinking: EntityLinking, val ex: LabelingExample)
30 | 
31 | case class AnswerOptions(val optionTokens: Vector[Vector[String]]) {
32 |   
33 |   val length = optionTokens.length
34 |   
35 |   def matchTokens(s: String): Int = {
36 |     // TODO: do this better.
37 |     val indexMatches = optionTokens.zipWithIndex.map(x =>
38 |       (x._2, x._1.filter(t => t.equals(s)).length))
39 |     
40 |     val best = indexMatches.maxBy(x => x._2)
41 | 
42 |     if (best._2 > 0) {
43 |       best._1
44 |     } else {
45 |       -1
46 |     }
47 |   }
48 | }
49 | 
50 | object LabelingExample {
51 |   
52 |   def fromJsonFile(filename: String, diagramMap: Map[String, (Diagram, DiagramLabel)]): Array[LabelingExample] = {
53 |     val examples = for {
54 |       line <- Source.fromFile(filename).getLines 
55 |     } yield {
56 |       fromJson(line, diagramMap)
57 |     }
58 | 
59 |     examples.toArray
60 |   }
61 |   
62 |   def fromJson(str: String, diagramMap: Map[String, (Diagram, DiagramLabel)]): LabelingExample = {
63 |     val js = str.parseJson.asJsObject.fields
64 |     val tokens = LabelingUtil.tokenize(js("question").asInstanceOf[JsString].value)
65 |     val answerOptions = js("answerOptions").asInstanceOf[JsArray].elements.map(_.convertTo[String])
66 |     val correctAnswer = js("correctAnswer").convertTo[Int]
67 |     val diagramId = js("diagramId").convertTo[String]
68 |     
69 |     val answerOptionTokens = answerOptions.map(_.split(" ").toVector).toVector
70 | 
71 |     val d = diagramMap(diagramId)
72 |     LabelingExample(tokens, d._1, d._2, AnswerOptions(answerOptionTokens),
73 |         correctAnswer) 
74 |   }
75 |   
76 |   def getWordCounts(examples: Seq[LabelingExample]): CountAccumulator[String] = {
77 |     val acc = CountAccumulator.create[String]
78 |     for (ex <- examples) {
79 |       ex.tokens.map(x => acc.increment(x, 1.0)) 
80 |     }
81 |     acc
82 |   }
83 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/dqa/labeling/LabelingP3Model.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.dqa.labeling
 2 | 
 3 | import org.allenai.pnp.semparse.SemanticParser
 4 | import org.allenai.pnp.PnpExample
 5 | import org.allenai.pnp.Pnp
 6 | import org.allenai.pnp.Env
 7 | import org.allenai.pnp.PnpModel
 8 | 
 9 | class LabelingP3Model(val parser: SemanticParser,
10 |     val executor: LabelingExecutor, val answerSelector: AnswerSelector) {
11 |   
12 |   def exampleToPnpExample(ex: PreprocessedLabelingExample): PnpExample[Int] = {
13 |     val denotationDist = for {
14 |       // TODO: stage beam search?
15 |       lf <- parser.generateExpression(ex.tokenIds, ex.entityLinking)
16 |       denotation <- executor.execute(lf, ex.ex.diagram)
17 |     } yield {
18 |       denotation
19 |     }
20 | 
21 |     val unconditional = for {
22 |       denotation <- denotationDist
23 |       answer <- answerSelector.selectAnswer(denotation, ex.ex.answerOptions)
24 |     } yield {
25 |       answer
26 |     }
27 | 
28 |     val conditional = for {
29 |       denotation <- denotationDist
30 |       // choose the answer and condition on getting the correct answer
31 |       // in a single search step to reduce the possibility of search errors.
32 |       correctAnswer <- (for {
33 |         answer <- answerSelector.selectAnswer(denotation, ex.ex.answerOptions)
34 |         _ <- Pnp.require(answer.equals(ex.ex.correctAnswer))
35 |       } yield {
36 |         answer
37 |       }).inOneStep()
38 |     } yield {
39 |       correctAnswer
40 |     }
41 |     
42 |     val score = executor.labelToExecutionScore(ex.ex.diagramLabel)
43 |     PnpExample(unconditional, conditional, Env.init, score)
44 |   }
45 |   
46 |   def getModel: PnpModel = {
47 |     // TODO: need to be able to append parameters from each model.
48 |     parser.model
49 |   }
50 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/dqa/labeling/LabelingUtil.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.dqa.labeling
 2 | 
 3 | object LabelingUtil {
 4 |   
 5 |   val UNK = "<UNK>"
 6 | 
 7 |   def tokenize(language: String): Array[String] = {
 8 |     // The first set of characters are always mapped to their own
 9 |     // token. The second set gets a token containing any non-space
10 |     // characters to the right.
11 |     language.toLowerCase().replaceAll("([:&,?./\\(\\)-])", " $1 ")
12 |       .replaceAll("(['])", " $1").split("[ ]+")
13 |   }
14 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/dqa/matching/MatchingExample.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.dqa.matching
 2 | 
 3 | import org.allenai.dqa.labeling.DiagramLabel
 4 | import org.allenai.dqa.labeling.Diagram
 5 | import com.google.common.base.Preconditions
 6 | 
 7 | import spray.json._
 8 | import spray.json.DefaultJsonProtocol._
 9 | import scala.io.Source
10 | 
11 | /**
12 |  * Example for diagram part matching model. Each example
13 |  * consists of a source diagram whose parts are to be
14 |  * matched with those of the target diagram. The label
15 |  * is the correct matching. sourceLabel and targetLabel are
16 |  * included for evaluation purposes, but should not be 
17 |  * used in a matching model. They will be null for real
18 |  * test examples.  
19 |  */
20 | case class MatchingExample(source: Diagram, sourceLabel: DiagramLabel,
21 |     target: Diagram, targetLabel: DiagramLabel, label: MatchingLabel) {
22 |   
23 | }
24 | 
25 | case class MatchingLabel(targetToSourcePartMap: Map[Int, Int]) {
26 |   def getSourcePartInd(targetPartInd: Int): Int = {
27 |     targetToSourcePartMap(targetPartInd)
28 |   }
29 | }
30 | 
31 | object MatchingExample {
32 |   
33 |   def fromJsonFile(filename: String, labeledDiagrams: Map[String, (Diagram, DiagramLabel)]
34 |     ): Array[MatchingExample] = {
35 |     val lines = Source.fromFile(filename).getLines
36 |     lines.map(fromJsonLine(_, labeledDiagrams)).toArray
37 |   }
38 | 
39 |   def fromJsonLine(line: String, labeledDiagrams: Map[String, (Diagram, DiagramLabel)]
40 |     ): MatchingExample = {
41 |     val js = line.parseJson.asJsObject
42 |     val src = js.fields("src").convertTo[String]
43 |     val target = js.fields("target").convertTo[String]
44 | 
45 |     val (srcDiagram, srcLabel) = labeledDiagrams(src)
46 |     val (targetDiagram, targetLabel) = labeledDiagrams(target)
47 |     fromDiagrams(srcDiagram, srcLabel, targetDiagram, targetLabel)
48 |   }
49 | 
50 |   /**
51 |    * Create a matching example from two diagrams by matching 
52 |    * their equivalently-labeled parts. 
53 |    */
54 |   def fromDiagrams(source: Diagram, sourceLabel: DiagramLabel,
55 |       target: Diagram, targetLabel: DiagramLabel): MatchingExample = {
56 |     
57 |     val partMap = for {
58 |       sourcePart <- source.parts
59 |     } yield {
60 |       val sourcePartLabel = sourceLabel.partLabels(sourcePart.ind)
61 |       val targetInd = targetLabel.partLabels.indexOf(sourcePartLabel) 
62 | 
63 |       Preconditions.checkState(targetInd != -1, "Could not find part label %s in list %s",
64 |           sourcePartLabel, targetLabel.partLabels)
65 | 
66 |       (targetInd, sourcePart.ind)
67 |     }
68 | 
69 |     val label = MatchingLabel(partMap.toMap)
70 | 
71 |     MatchingExample(source, sourceLabel, target, targetLabel, label)
72 |   }
73 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/dqa/matching/VisualizeMatchingCli.scala:
--------------------------------------------------------------------------------
  1 | package org.allenai.dqa.matching
  2 | 
  3 | import scala.collection.JavaConverters._
  4 | 
  5 | import org.allenai.dqa.labeling.Diagram
  6 | import org.allenai.dqa.labeling.DiagramFeatures
  7 | import org.allenai.dqa.labeling.Part
  8 | import org.allenai.dqa.labeling.Point
  9 | import org.allenai.dqa.labeling.PointFeatures
 10 | import org.allenai.pnp.PnpModel
 11 | 
 12 | import com.jayantkrish.jklol.cli.AbstractCli
 13 | 
 14 | import edu.cmu.dynet._
 15 | import joptsimple.OptionParser
 16 | import joptsimple.OptionSet
 17 | import joptsimple.OptionSpec
 18 | import scala.util.Random
 19 | import org.allenai.pnp.Pnp
 20 | import org.allenai.dqa.labeling.DiagramLabel
 21 | 
 22 | class VisualizeMatchingCli extends AbstractCli {
 23 |   var diagramsOpt: OptionSpec[String] = null
 24 |   var diagramFeaturesOpt: OptionSpec[String] = null
 25 |   var modelOpt: OptionSpec[String] = null
 26 |   
 27 |   var sourceOpt: OptionSpec[String] = null
 28 |   var targetOpt: OptionSpec[String] = null
 29 |   var labelsToMatch: OptionSpec[String] = null
 30 |   var sourcePartOpt: OptionSpec[String] = null
 31 |   var numGridOpt: OptionSpec[Integer] = null
 32 | 
 33 |   override def initializeOptions(parser: OptionParser): Unit = {
 34 |     diagramsOpt = parser.accepts("diagrams").withRequiredArg().ofType(classOf[String]).required()
 35 |     diagramFeaturesOpt = parser.accepts("diagramFeatures").withRequiredArg().ofType(classOf[String]).required()
 36 |     modelOpt = parser.accepts("model").withRequiredArg().ofType(classOf[String]).required()
 37 |     
 38 |     sourceOpt = parser.accepts("source").withRequiredArg().ofType(classOf[String]).required()
 39 |     targetOpt = parser.accepts("target").withRequiredArg().ofType(classOf[String]).required()
 40 |     labelsToMatch = parser.accepts("labelsToMatch").withRequiredArg().ofType(classOf[String])
 41 |       .withValuesSeparatedBy(",").required()
 42 |     sourcePartOpt = parser.accepts("sourcePart").withRequiredArg().ofType(classOf[String]).required()
 43 |     numGridOpt = parser.accepts("numGrid").withRequiredArg().ofType(classOf[Integer]).defaultsTo(10)
 44 |   }
 45 |   
 46 |   override def run(options: OptionSet): Unit = {
 47 |     Initialize.initialize()
 48 | 
 49 |     // Read and preprocess data
 50 |     val diagramFeatures = DiagramFeatures.fromJsonFile(options.valueOf(diagramFeaturesOpt)).map(
 51 |         x => (x.imageId, x)).toMap
 52 |     val diagramsAndLabels = Diagram.fromJsonFile(options.valueOf(diagramsOpt), diagramFeatures)
 53 |     val diagramsMap = diagramsAndLabels.map(x => (x._1.id, x)).toMap
 54 | 
 55 |     // Read model
 56 |     val loader = new ModelLoader(options.valueOf(modelOpt))
 57 |     val model = PnpModel.load(loader)
 58 |     val matchingModel = MatchingModel.load(loader, model)
 59 |     loader.done()
 60 | 
 61 |     val (source, sourceLabel) = diagramsMap(options.valueOf(sourceOpt))
 62 |     val (target, targetLabel) = diagramsMap(options.valueOf(targetOpt))
 63 | 
 64 |     val matching = for {
 65 |       label <- options.valuesOf(labelsToMatch).asScala 
 66 |     } yield {
 67 |       val targetInd = targetLabel.partLabels.indexOf(label)
 68 |       val sourceInd = sourceLabel.partLabels.indexOf(label)
 69 |       (target.parts(targetInd), source.parts(sourceInd)) 
 70 |     }
 71 | 
 72 |     val numGrid = options.valueOf(numGridOpt)
 73 |     val sourcePart = source.parts(sourceLabel.partLabels.indexOf(options.valueOf(sourcePartOpt)))
 74 |     val scores = getGlobalScores(matching, source, sourceLabel, target, sourcePart,
 75 |         matchingModel, numGrid)
 76 |     
 77 |     val sortedScores = scores.toList.sortBy(p => (p._1.y, p._1.x))
 78 |     val matrix = (for {
 79 |       i <- 0 until numGrid
 80 |     } yield {
 81 |       sortedScores.slice(i * numGrid, (i + 1) * numGrid).map(_._2).toArray
 82 |     }).toArray
 83 | 
 84 |     println("[")
 85 |     println(matrix.map(row => "[" + row.map(_.formatted("%02.3f")).mkString(", ") + "]").mkString(",\n"))
 86 |     println("]")
 87 |   }
 88 |   
 89 |   def augmentDiagramParts(diagram: Diagram, numGridPoints: Int): Diagram = {
 90 |     // Extend the target diagram with many Parts in a grid.
 91 |     val newParts = for {
 92 |       i <- 0 until numGridPoints
 93 |       j <- 0 until numGridPoints
 94 |     } yield {
 95 |       val x = ((j + 0.5) * (diagram.width.toFloat / numGridPoints)).toInt
 96 |       val y = ((i + 0.5) * (diagram.height.toFloat / numGridPoints)).toInt
 97 |       val point = Point(x, y)
 98 | 
 99 |       val partInd = diagram.parts.length + (i * numGridPoints) + j
100 |       Part("n/a", partInd, point) 
101 |     }
102 |     
103 |     val features = diagram.features.pointFeatures(diagram.parts(0).coords)
104 |     val newFeatures = newParts.map{ part =>
105 |       val point = part.coords
106 |       val normX = point.x.toFloat / diagram.width
107 |       val normY = point.y.toFloat / diagram.height
108 |       
109 |       val xyFeatures = new FloatVector(List(normX, normY))
110 |       val matchingFeatures = new FloatVector(List.fill(features.matching.length)(0f))
111 |       val vgg0 = new FloatVector(List.fill(features.vgg0.length)(0.0f))
112 |       val vgg1 = new FloatVector(List.fill(features.vgg1.length)(0.0f))
113 |       val vgg2 = new FloatVector(List.fill(features.vgg2.length)(0.0f))
114 |       val vggAll = new FloatVector(List.fill(features.vggAll.length)(0.0f))
115 |       val pointFeatures = PointFeatures(xyFeatures, matchingFeatures, vgg0, vgg1, vgg2, vggAll)
116 | 
117 |       (point, pointFeatures)
118 |     }.toMap
119 | 
120 |     val newDiagramFeatures = DiagramFeatures(diagram.features.imageId,
121 |         diagram.features.pointFeatures ++ newFeatures) 
122 |     
123 |     Diagram(diagram.id, diagram.imageId, diagram.width, diagram.height,
124 |         diagram.parts ++ newParts, newDiagramFeatures)
125 |   }
126 | 
127 |   def getGlobalScores(matching: Seq[(Part, Part)], source: Diagram, sourceLabel: DiagramLabel,
128 |       target: Diagram, sourcePart: Part, model: MatchingModel, numGridPoints: Int): Map[Point, Float] = {
129 |     val augmentedTarget = augmentDiagramParts(target, numGridPoints)
130 |     val gridParts = augmentedTarget.parts.drop(target.parts.length)
131 | 
132 |     val computationGraph = ComputationGraph.renew()
133 |     val cg = model.model.getComputationGraph()
134 |     val preprocessing = model.preprocess(source, sourceLabel, augmentedTarget,
135 |         augmentedTarget.parts, cg)
136 | 
137 |     val matchingList = matching.toList
138 |     val matchingScore = model.getNnGlobalScore(matchingList, cg, preprocessing)
139 | 
140 |     val partScoreMap = gridParts.map {
141 |       part =>
142 |       val candidateMatching = (part, sourcePart) :: matchingList
143 |       val candidateScore = model.getNnGlobalScore(candidateMatching, cg, preprocessing)
144 |       
145 |       val scoreDelta = ComputationGraph.incrementalForward(candidateScore - matchingScore).toFloat
146 |       (part.coords, scoreDelta)
147 |     }
148 | 
149 |     partScoreMap.toMap
150 |   }
151 | }
152 | 
153 | object VisualizeMatchingCli {
154 |   def main(args: Array[String]): Unit = {
155 |     (new VisualizeMatchingCli()).run(args)
156 |   }
157 | }
158 | 


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/CompGraph.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp
 2 | 
 3 | import com.jayantkrish.jklol.util.IndexedList
 4 | 
 5 | import edu.cmu.dynet._
 6 | 
 7 | /** Computation graph of a neural network.
 8 |   */
 9 | class CompGraph(val model: Model,
10 |     val paramNames: Map[String, Parameter], val lookupParamNames: Map[String, LookupParameter], 
11 |     val locallyNormalized: Boolean) {
12 | 
13 |   def getParameter(name: String): Parameter = {
14 |     paramNames(name)
15 |   }
16 | 
17 |   def getLookupParameter(name: String): LookupParameter = {
18 |     lookupParamNames(name)
19 |   }
20 | }
21 | 
22 | object CompGraph {
23 |   def empty(model: Model): CompGraph = {
24 |     new CompGraph(model, Map(), Map(), false)
25 |   }
26 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/Env.scala:
--------------------------------------------------------------------------------
  1 | package org.allenai.pnp
  2 | 
  3 | import com.jayantkrish.jklol.util.IndexedList
  4 | import com.jayantkrish.jklol.training.LogFunction
  5 | import com.jayantkrish.jklol.training.NullLogFunction
  6 | import edu.cmu.dynet._
  7 | 
  8 | /** Mutable global state of a neural probabilistic program
  9 |   * execution. Env also tracks the chosen values for any
 10 |   * nondeterministic choices whose values depended on
 11 |   * computation graph nodes. These values are necessary
 12 |   * to compute gradients with respect to the neural network
 13 |   * parameters.
 14 |   *
 15 |   * Env is immutable.
 16 |   */
 17 | class Env(val labels: List[Int], val labelNodeIds: List[Expression],
 18 |     varnames: IndexedList[String], vars: Array[Any]) {
 19 | 
 20 |   /** Get the value of the named variable as an instance
 21 |     * of type A.
 22 |     */
 23 |   def getVar[A](name: String): A = {
 24 |     vars(varnames.getIndex(name)).asInstanceOf[A]
 25 |   }
 26 |   
 27 |   def getVar[A](name: String, default: A): A = {
 28 |     if (varnames.contains(name)) {
 29 |       getVar(name)
 30 |     } else {
 31 |       default
 32 |     }
 33 |   }
 34 | 
 35 |   def getVar[A](nameInt: Int): A = {
 36 |     vars(nameInt).asInstanceOf[A]
 37 |   }
 38 | 
 39 |   def getVar[A](nameInt: Int, default: A): A = {
 40 |     if (nameInt < vars.length) {
 41 |       getVar(nameInt)
 42 |     } else {
 43 |       default
 44 |     }
 45 |   }
 46 | 
 47 |   /** Get a new environment with the named variable
 48 |     * set to value.
 49 |     */
 50 |   def setVar(name: String, value: Any): Env = {
 51 |     val nextVarNames = if (varnames.contains(name)) {
 52 |       varnames
 53 |     } else {
 54 |       val i = IndexedList.create(varnames)
 55 |       i.add(name)
 56 |       i
 57 |     }
 58 | 
 59 |     val nextVars = Array.ofDim[Any](nextVarNames.size)
 60 |     Array.copy(vars, 0, nextVars, 0, vars.size)
 61 |     val index = nextVarNames.getIndex(name)
 62 |     nextVars(index) = value
 63 | 
 64 |     new Env(labels, labelNodeIds, nextVarNames, nextVars)
 65 |   }
 66 | 
 67 |   def setVar(nameInt: Int, value: Any): Env = {
 68 |     val nextVars = Array.ofDim[Any](vars.size)
 69 |     Array.copy(vars, 0, nextVars, 0, vars.size)
 70 |     nextVars(nameInt) = value
 71 | 
 72 |     new Env(labels, labelNodeIds, varnames, nextVars)
 73 |   }
 74 | 
 75 |   def isVarBound(name: String): Boolean = {
 76 |     varnames.contains(name)
 77 |   }
 78 | 
 79 |   /** Attaches a label to a node of the computation graph in this
 80 |     * execution.
 81 |     */
 82 |   def addLabel(param: Expression, index: Int): Env = {
 83 |     new Env(index :: labels, param :: labelNodeIds, varnames, vars)
 84 |   }
 85 |   
 86 |   /** Get a scalar-valued expression that evaluates to the
 87 |     * score of the execution that this env is part of. If   
 88 |     * normalize is false, this score is computed by summing
 89 |     * the scores associated with choice. If normalize is true,
 90 |     * the score is computed by summing the negative log-softmax
 91 |     * scores of each choice.
 92 |     */
 93 |   def getScore(normalize: Boolean): Expression = {
 94 |     var exScore = Expression.input(0)
 95 |     for ((expr, labelInd) <- labelNodeIds.zip(labels)) {
 96 |       val decisionScore = if (normalize) {
 97 |         Expression.pickNegLogSoftmax(expr, labelInd)
 98 |       } else {
 99 |         Expression.pick(expr, labelInd)
100 |       }
101 |       exScore = exScore + decisionScore
102 |     }
103 |     exScore
104 |   }
105 | }
106 | 
107 | object Env {
108 |   def init: Env = {
109 |     new Env(List.empty, List.empty, IndexedList.create(), Array())
110 |   }
111 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/ExecutionScore.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp
 2 | 
 3 | object ExecutionScore {
 4 |   
 5 |   /**
 6 |    * ExecutionScore is a function from a tag
 7 |    * (i.e., a name for a choice point), a choice,
 8 |    * and an env to a score for the choice.
 9 |    */
10 |   type ExecutionScore = (Any, Any, Env) => Double
11 | 
12 |   val zero = new ExecutionScore() {
13 |     def apply(tag: Any, choice: Any, env: Env): Double = {
14 |         0.0
15 |     }
16 |   }
17 |   
18 |   def fromFilter(keepState: Env => Boolean): ExecutionScore = {
19 |     new ExecutionScore() {
20 |       def apply(tag: Any, choice: Any, env: Env): Double = {
21 |         if (keepState(env)) {
22 |           0.0
23 |         } else {
24 |           Double.NegativeInfinity
25 |         }
26 |       }
27 |     }
28 |   }
29 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/GlobalLoglikelihoodTrainer.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp
 2 | 
 3 | import com.google.common.base.Preconditions
 4 | import com.jayantkrish.jklol.training.LogFunction
 5 | 
 6 | import edu.cmu.dynet._
 7 | import scala.util.Random
 8 | 
 9 | class GlobalLoglikelihoodTrainer(val epochs: Int, val beamSize: Int,
10 |     val maxSearchSteps: Int, val model: PnpModel, val trainer: Trainer,
11 |     val logFn: LogFunction) {
12 | 
13 |   def train[A](examples: Seq[PnpExample[A]]): Unit = {
14 |     for (i <- 0 until epochs) {
15 |       var loss = 0.0
16 |       var searchErrors = 0
17 |       logFn.notifyIterationStart(i)
18 |       for (example <- Random.shuffle(examples)) {
19 |         ComputationGraph.renew()
20 | 
21 |         val env = example.env
22 |         val context = PnpInferenceContext.init(model).setLog(logFn)
23 | 
24 |         // Compute the distribution over correct executions.
25 |         logFn.startTimer("pp_loglikelihood/conditional")
26 |         val conditional = example.conditional.beamSearch(beamSize, maxSearchSteps,
27 |             env, context.addExecutionScore(example.conditionalExecutionScore))
28 |         val conditionalPartitionFunction = conditional.partitionFunction
29 |         logFn.stopTimer("pp_loglikelihood/conditional")
30 | 
31 |         // TODO: handle search errors
32 |         
33 |         // Compute the unconditional distribution over 
34 |         // all executions.
35 |         logFn.startTimer("pp_loglikelihood/unconditional")
36 |         val unconditional = example.unconditional.beamSearch(beamSize, maxSearchSteps, env, context)
37 |         val unconditionalPartitionFunction = unconditional.partitionFunction
38 |         logFn.stopTimer("pp_loglikelihood/unconditional")
39 | 
40 |         val conditionalLogSumProb = marginalsToLogProbExpression(conditional)
41 |         val unconditionalLogSumProb = marginalsToLogProbExpression(unconditional)
42 |         
43 |         if (conditionalLogSumProb.isDefined && unconditionalLogSumProb.isDefined) {
44 |           val lossExpr = unconditionalLogSumProb.get - conditionalLogSumProb.get
45 | 
46 |           loss += ComputationGraph.incrementalForward(lossExpr).toFloat
47 |           ComputationGraph.backward(lossExpr)
48 |           trainer.update(1.0f)
49 |         } else {
50 |           searchErrors += 1
51 |         }
52 |       }
53 |       logFn.logStatistic(i, "search errors", searchErrors) 
54 |       // println(i + "  loss: " + loss)
55 |       trainer.updateEpoch()
56 |     }
57 |   }
58 |   
59 |   private def marginalsToLogProbExpression[A](marginals: PnpBeamMarginals[A]): Option[Expression] = {
60 |     val exScores = marginals.executions.map(_.env.getScore(false))
61 | 
62 |     if (exScores.length == 0) {
63 |       None 
64 |     } else if (exScores.length == 1) {
65 |       Some(exScores(0))
66 |     } else {
67 |       Some(Expression.logSumExp(new ExpressionVector(exScores)))
68 |     }
69 |   }
70 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/LoglikelihoodTrainer.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp
 2 | 
 3 | import scala.collection.JavaConverters._
 4 | import scala.collection.mutable.ListBuffer
 5 | 
 6 | import com.google.common.base.Preconditions
 7 | import com.jayantkrish.jklol.training.LogFunction
 8 | 
 9 | import edu.cmu.dynet._
10 | import scala.util.Random
11 | 
12 | class LoglikelihoodTrainer(val epochs: Int, val beamSize: Int, val sumMultipleExecutions: Boolean,
13 |     val model: PnpModel, val trainer: Trainer, val log: LogFunction) {
14 | 
15 |   Preconditions.checkArgument(model.locallyNormalized == true)
16 | 
17 |   def train[A](examples: Seq[PnpExample[A]]): Unit = {
18 |     for (i <- 0 until epochs) {
19 |       var loss = 0.0
20 |       var searchErrors = 0
21 |       log.notifyIterationStart(i)
22 | 
23 |       for (example <- Random.shuffle(examples)) {
24 |         ComputationGraph.renew()
25 | 
26 |         val env = example.env
27 |         val context = PnpInferenceContext.init(model).setLog(log)
28 | 
29 |         // Compute the distribution over correct executions.
30 |         log.startTimer("pp_loglikelihood/forward")
31 |         val conditional = example.conditional.beamSearch(beamSize, -1,
32 |           env, context.addExecutionScore(example.conditionalExecutionScore))
33 |         log.stopTimer("pp_loglikelihood/forward")
34 |         
35 |         log.startTimer("pp_loglikelihood/build_loss")
36 |         val exLosses = conditional.executions.map(_.env.getScore(true))
37 |         
38 |         val lossExpr = if (exLosses.length == 0) {
39 |           Preconditions.checkState(sumMultipleExecutions,
40 |               "Found %s conditional executions (expected exactly 1) for example: %s",
41 |               conditional.executions.size.asInstanceOf[AnyRef], example)
42 | 
43 |           null
44 |         } else if (exLosses.length == 1) {
45 |           exLosses(0)
46 |         } else {
47 |           // This flag is used to ensure that training with a
48 |           // single label per example doesn't work "by accident" 
49 |           // with an execution score that permits multiple labels.
50 |           Preconditions.checkState(sumMultipleExecutions,
51 |               "Found %s conditional executions (expected exactly 1) for example: %s",
52 |               conditional.executions.size.asInstanceOf[AnyRef], example)
53 | 
54 |           Expression.logSumExp(new ExpressionVector(exLosses))
55 |         }
56 |         log.stopTimer("pp_loglikelihood/build_loss")
57 |         
58 |         if (lossExpr != null) {
59 |           log.startTimer("pp_loglikelihood/eval_loss")
60 |           loss += ComputationGraph.incrementalForward(lossExpr).toFloat
61 |           log.stopTimer("pp_loglikelihood/eval_loss")
62 | 
63 |           // cg.print_graphviz()
64 |           log.startTimer("pp_loglikelihood/backward")
65 |           ComputationGraph.backward(lossExpr)
66 |           trainer.update(1.0f)
67 |           log.stopTimer("pp_loglikelihood/backward")
68 |         } else {
69 |           searchErrors += 1
70 |         }
71 |       }
72 | 
73 |       trainer.updateEpoch()
74 | 
75 |       log.logStatistic(i, "loss", loss)
76 |       log.logStatistic(i, "search errors", searchErrors)
77 |       log.notifyIterationEnd(i)
78 |     }
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/PnpContinuation.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp
 2 | 
 3 | sealed trait PnpContinuation[A, B] {
 4 |   def prepend[D](g: D => Pnp[A]): PnpContinuation[D, B]
 5 | 
 6 |   def append[D](g: B => Pnp[D]): PnpContinuation[A, D] = {
 7 |     append(PnpContinuationFunction(g))
 8 |   }
 9 |   def append[D](g: PnpContinuation[B, D]): PnpContinuation[A, D]
10 | 
11 |   def searchStep(arg: A, env: Env, logProb: Double, context: PnpInferenceContext, queue: PnpSearchQueue[B],
12 |       finished: PnpSearchQueue[B]): Unit
13 |       
14 |   def sampleStep(arg: A, env: Env, logProb: Double, context: PnpInferenceContext,  queue: PnpSearchQueue[B],
15 |       finished: PnpSearchQueue[B]): Unit
16 | }
17 | 
18 | case class PnpContinuationFunction[A, B](val f: A => Pnp[B]) extends PnpContinuation[A, B] {
19 |   val endContinuation = new PnpEndContinuation[B]
20 |   
21 |   override def prepend[D](g: D => Pnp[A]): PnpContinuation[D, B] = {
22 |     PnpContinuationChain(g, this)
23 |   }
24 |   
25 |   override def append[D](g: PnpContinuation[B, D]): PnpContinuation[A, D] = {
26 |     PnpContinuationChain(f, g)
27 |   }
28 |   
29 |   override def searchStep(arg: A, env: Env, logProb: Double, context: PnpInferenceContext, queue: PnpSearchQueue[B],
30 |       finished: PnpSearchQueue[B]): Unit = {
31 |     f(arg).searchStep(env, logProb, context, endContinuation, queue, finished)
32 |   }
33 |   
34 |   override def sampleStep(arg: A, env: Env, logProb: Double, context: PnpInferenceContext, queue: PnpSearchQueue[B],
35 |       finished: PnpSearchQueue[B]): Unit = {
36 |     f(arg).sampleStep(env, logProb, context, endContinuation, queue, finished)
37 |   }
38 | }
39 | 
40 | case class PnpContinuationChain[A, B, C](val f: A => Pnp[B], val cont: PnpContinuation[B, C])
41 |   extends PnpContinuation[A, C] {
42 |   
43 |   override def prepend[D](g: D => Pnp[A]): PnpContinuation[D, C] = {
44 |     PnpContinuationChain(g, this)
45 |   }
46 |   
47 |   override def append[D](g: PnpContinuation[C, D]): PnpContinuation[A, D] = {
48 |     PnpContinuationChain(f, cont.append(g))
49 |   }
50 |   
51 |   override def searchStep(arg: A, env: Env, logProb: Double, context: PnpInferenceContext, queue: PnpSearchQueue[C],
52 |       finished: PnpSearchQueue[C]): Unit = {
53 |     f(arg).searchStep(env, logProb, context, cont, queue, finished)
54 |   }
55 |   
56 |   override def sampleStep(arg: A, env: Env, logProb: Double, context: PnpInferenceContext, queue: PnpSearchQueue[C],
57 |       finished: PnpSearchQueue[C]): Unit = {
58 |     f(arg).sampleStep(env, logProb, context, cont, queue, finished)
59 |   }
60 | }
61 | 
62 | case class PnpEndContinuation[A]() extends PnpContinuation[A, A] {
63 |   override def prepend[D](g: D => Pnp[A]): PnpContinuation[D, A] = {
64 |     PnpContinuationChain(g, this)
65 |   }
66 | 
67 |   override def append[D](g: PnpContinuation[A, D]): PnpContinuation[A, D] = {
68 |     if (g.isInstanceOf[PnpEndContinuation[A]]) {
69 |       return this.asInstanceOf[PnpContinuation[A,D]]
70 |     } else {
71 |       throw new UnsupportedOperationException("Cannot append to the end continuation")
72 |     }
73 |   }
74 | 
75 |   override def searchStep(arg: A, env: Env, logProb: Double, context: PnpInferenceContext, queue: PnpSearchQueue[A],
76 |       finished: PnpSearchQueue[A]): Unit = {
77 |     finished.offer(Pnp.value(arg), env, logProb, context, null, null)
78 |   }
79 | 
80 |   override def sampleStep(arg: A, env: Env, logProb: Double, context: PnpInferenceContext, queue: PnpSearchQueue[A],
81 |       finished: PnpSearchQueue[A]): Unit = {
82 |     finished.offer(Pnp.value(arg), env, logProb, context, null, null)
83 |   }
84 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/PnpExample.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp
 2 | 
 3 | import ExecutionScore.ExecutionScore
 4 | 
 5 | /** A training example for neural probabilistic programs. An example
 6 |   * consists of a conditional and an unconditional program, and an
 7 |   * environment in which these programs execute. An additional
 8 |   * filter on environments may be provided to further restrict the set
 9 |   * of conditional executions during inference.
10 |   */
11 | case class PnpExample[A](unconditional: Pnp[A], conditional: Pnp[A],
12 |     env: Env, conditionalExecutionScore: ExecutionScore) {
13 | }
14 | 
15 | object PnpExample {
16 |   def fromDistributions[A](unconditional: Pnp[A], conditional: Pnp[A]) = {
17 |     PnpExample[A](unconditional, conditional, Env.init, ExecutionScore.zero)
18 |   }
19 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/PnpInferenceContext.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp
 2 | 
 3 | import com.jayantkrish.jklol.training.{LogFunction, NullLogFunction}
 4 | import org.allenai.pnp.ExecutionScore.ExecutionScore
 5 | 
 6 | class PnpInferenceContext(
 7 |   cg: CompGraph = null,
 8 |   val log: LogFunction = new NullLogFunction(),
 9 |   activeScores: Set[ExecutionScore] = Set.empty) {
10 | 
11 |   def compGraph: CompGraph = {
12 |     assert (cg != null)
13 |     cg
14 |   }
15 | 
16 |   def addExecutionScore(es: ExecutionScore) = new PnpInferenceContext(cg, log, activeScores + es)
17 |   def removeExecutionScore(es: ExecutionScore) = new PnpInferenceContext(cg, log, activeScores - es)
18 | 
19 |   def computeScore(tag: Any, choice: Any, env: Env): Double =
20 |     activeScores.map(_(tag, choice, env)).sum
21 | 
22 | 
23 |   def setLog(newLog: LogFunction): PnpInferenceContext = {
24 |     new PnpInferenceContext(cg, newLog, activeScores)
25 |   }
26 | }
27 | 
28 | object PnpInferenceContext {
29 |   def init: PnpInferenceContext = new PnpInferenceContext()
30 |   def init(cg: CompGraph): PnpInferenceContext = new PnpInferenceContext(cg)
31 |   def init(model: PnpModel): PnpInferenceContext = init(model.getComputationGraph())
32 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/PnpModel.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp
 2 | 
 3 | import com.jayantkrish.jklol.util.IndexedList
 4 | 
 5 | import edu.cmu.dynet._
 6 | import scala.collection.mutable.MapBuilder
 7 | import scala.collection.mutable.ListBuffer
 8 | 
 9 | /** A neural probabilistic program model consisting
10 |   * of a collection of named Tensor parameters. These
11 |   * parameters are used to initialize the computation
12 |   * graph of a program during inference.
13 |   */
14 | class PnpModel(var names: Map[String, Parameter], var lookupNames: Map[String, LookupParameter], 
15 |     val model: Model, var locallyNormalized: Boolean) {
16 | 
17 |   def addParameter(name: String, dim: Dim): Parameter = {
18 |     val param = model.addParameters(dim)
19 |     names += (name -> param)
20 |     param
21 |   }
22 |   
23 |   def addParameter(name: String, dim: Dim, init: ParameterInit): Parameter = {
24 |     val param = model.addParameters(dim, init)
25 |     names += (name -> param)
26 |     param
27 |   }
28 |   
29 |   def addLookupParameter(name: String, lookupNum: Long, dim: Dim): LookupParameter = {
30 |     val param = model.addLookupParameters(lookupNum, dim)
31 |     lookupNames += (name -> param)
32 |     param
33 |   }
34 | 
35 |   def addLookupParameter(name: String, lookupNum: Long, dim: Dim,
36 |       init: ParameterInit): LookupParameter = {
37 |     val param = model.addLookupParameters(lookupNum, dim, init)
38 |     lookupNames += (name -> param)
39 |     param
40 |   }
41 | 
42 |   def getParameter(name: String): Parameter = {
43 |     names(name)
44 |   }
45 | 
46 |   def getLookupParameter(name: String): LookupParameter = {
47 |     lookupNames(name)
48 |   }
49 | 
50 |   def getComputationGraph(): CompGraph = {
51 |     new CompGraph(model, names, lookupNames, locallyNormalized)
52 |   }
53 | 
54 |   def save(saver: ModelSaver): Unit = {
55 |     saver.addModel(model)
56 |     saver.addBoolean(locallyNormalized)
57 |     
58 |     saver.addInt(names.size)
59 |     for ((k, v) <- names) {
60 |       saver.addObject(k)
61 |       saver.addParameter(v)
62 |     }
63 | 
64 |     saver.addInt(lookupNames.size)
65 |     for ((k, v) <- lookupNames) {
66 |       saver.addObject(k)
67 |       saver.addLookupParameter(v)
68 |     }
69 |   }
70 | }
71 | 
72 | object PnpModel {
73 |   def init(locallyNormalized: Boolean): PnpModel = {
74 |     new PnpModel(Map(), Map(), new Model, locallyNormalized)
75 |   }
76 |   
77 |   def load(loader: ModelLoader): PnpModel = {
78 |     val model = loader.loadModel()
79 |     val locallyNormalized = loader.loadBoolean()
80 |     
81 |     val numParams = loader.loadInt()
82 |     val params = ListBuffer[(String, Parameter)]()
83 |     for (i <- 0 until numParams) {
84 |       val name = loader.loadObject(classOf[String])
85 |       val param = loader.loadParameter()
86 |       params += ((name, param))
87 |     }
88 | 
89 |     val numLookups = loader.loadInt()
90 |     val lookups = ListBuffer[(String, LookupParameter)]()
91 |     for (i <- 0 until numLookups) {
92 |       val name = loader.loadObject(classOf[String])
93 |       val param = loader.loadLookupParameter()
94 |       lookups += ((name, param))
95 |     }
96 | 
97 |     new PnpModel(params.toMap, lookups.toMap, model, locallyNormalized)
98 |   }
99 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/PnpSearchQueue.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp
 2 | 
 3 | import com.jayantkrish.jklol.training.LogFunction
 4 | import com.jayantkrish.jklol.util.KbestQueue
 5 | import ExecutionScore.ExecutionScore
 6 | 
 7 | trait PnpSearchQueue[A] {
 8 |   def offer(value: Pnp[A], env: Env, logProb: Double, context: PnpInferenceContext, tag: Any, choice: Any): Unit
 9 | }
10 | 
11 | class BeamPnpSearchQueue[A](size: Int) extends PnpSearchQueue[A] {
12 | 
13 |   val queue = new KbestQueue(size, Array.empty[SearchState[A]])
14 | 
15 |   override def offer(value: Pnp[A], env: Env, logProb: Double,
16 |     context: PnpInferenceContext, tag: Any, choice: Any): Unit = {
17 |     val stateLogProb = context.computeScore(tag, choice, env) + logProb
18 |     if (stateLogProb > Double.NegativeInfinity) {
19 |       queue.offer(SearchState(value, env, stateLogProb, tag, choice), stateLogProb)
20 |     }
21 |   }
22 | }
23 | 
24 | class EnumeratePnpSearchQueue[A] (
25 |     val finished: PnpSearchQueue[A]
26 | ) extends PnpSearchQueue[A] {
27 |   val endContinuation = new PnpEndContinuation[A]
28 |   
29 |   override def offer(value: Pnp[A], env: Env, logProb: Double,
30 |       context: PnpInferenceContext, tag: Any, choice: Any): Unit = {
31 |     val stateLogProb = context.computeScore(tag, choice, env) + logProb
32 |     if (stateLogProb > Double.NegativeInfinity) {
33 |       value.searchStep(env, stateLogProb, context, endContinuation, this, finished)
34 |     }
35 |   }
36 | }
37 | 
38 | class ContinuationPnpSearchQueue[A, B] (
39 |     val queue: PnpSearchQueue[B],
40 |     val cont: PnpContinuation[A,B]
41 | ) extends PnpSearchQueue[A] {
42 |   
43 |   override def offer(value: Pnp[A], env: Env, logProb: Double, context: PnpInferenceContext,
44 |       tag: Any, choice: Any): Unit = {
45 |     queue.offer(BindPnp(value, cont), env, logProb, context, tag, choice)
46 |   }
47 | }
48 | 
49 | case class SearchState[A](val value: Pnp[A], val env: Env, val logProb: Double, val tag: Any, val choice: Any) {
50 | }
51 | 


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/PnpUtil.scala:
--------------------------------------------------------------------------------
  1 | package org.allenai.pnp
  2 | 
  3 | import com.jayantkrish.jklol.ccg.lambda2.StaticAnalysis
  4 | import com.jayantkrish.jklol.ccg.lambda2.Expression2
  5 | 
  6 | import scala.collection.JavaConverters._
  7 | import com.google.common.base.Preconditions
  8 | 
  9 | /** Utilities for converting logical forms represented using
 10 |   * {@code Expression2} to neural probabilistic programs.
 11 |   */
 12 | object PnpUtil {
 13 | 
 14 |   /** Convert {@code lf} to a neural probabilistic program.
 15 |     * {@code bindings} represents the environment in which
 16 |     * {@code lf} is evaluated; it maps names in {@code lf} to
 17 |     * their corresponding probabilistic program values. This
 18 |     * function fails if a name in {@code lf} is not contained
 19 |     * in {@code bindings}.
 20 |     *
 21 |     * Non-function values in bindings can be of any type.
 22 |     * Functions must have type Vector[AnyRef] => Pnp[AnyRef].
 23 |     * The wrap functions below can be used to conveniently
 24 |     * convert existing functions to this type.
 25 |     */
 26 |   def lfToPnp(lf: Expression2, bindings: Map[String, AnyRef]): Pnp[AnyRef] = {
 27 |     if (lf.isConstant()) {
 28 |       if (lf.isStringValue()) {
 29 |         Pnp.value(lf.getStringValue)
 30 |       } else {
 31 |         // Look up the constant's value in bindings.
 32 |         val valueOption = bindings.get(lf.getConstant)
 33 |         Preconditions.checkState(valueOption.isDefined, "Unbound variable: %s", lf.getConstant)
 34 | 
 35 |         val value = valueOption.get
 36 |         if (value.isInstanceOf[Pnp[_]]) {
 37 |           value.asInstanceOf[Pnp[AnyRef]]
 38 |         } else {
 39 |           // Wrap non-Pnp values to guarantee that every
 40 |           // expression evaluates to a Pnp[AnyRef].
 41 |           Pnp.value(value)
 42 |         }
 43 |       }
 44 |     } else if (StaticAnalysis.isLambda(lf)) {
 45 |       // Create a Scala function representing the lambda. 
 46 |       val args = StaticAnalysis.getLambdaArguments(lf).asScala
 47 |       val body = StaticAnalysis.getLambdaBody(lf)
 48 | 
 49 |       def lambdaValue(argList: Vector[AnyRef]): Pnp[AnyRef] = {
 50 |         val newBindings = bindings ++ args.zip(argList)
 51 |         lfToPnp(body, newBindings)
 52 |       }
 53 |       Pnp.value(lambdaValue _)
 54 |     } else {
 55 |       // Function application.
 56 |       // Generate the distributions over values for the function
 57 |       // and each of its arguments. 
 58 |       val subexpressionValues = lf.getSubexpressions.asScala.map(x => lfToPnp(x, bindings))
 59 |       val subexpressionListPnp = subexpressionValues.foldLeft(Pnp.value(Vector[AnyRef]()))(
 60 |         (vecPnp, valPnp) => for {
 61 |           x <- vecPnp
 62 |           y <- valPnp
 63 |         } yield {
 64 |           x :+ y
 65 |         }
 66 |       )
 67 | 
 68 |       // Apply each possible function to its arguments. 
 69 |       for {
 70 |         valueList <- subexpressionListPnp
 71 |         args = valueList.slice(1, valueList.size)
 72 |         numArgs = args.size
 73 |         func = valueList(0)
 74 | 
 75 |         value <- func.asInstanceOf[AnyRef => Pnp[AnyRef]].apply(args)
 76 |       } yield {
 77 |         value
 78 |       }
 79 |     }
 80 |   }
 81 | 
 82 |   def wrap[A, P](f: A => Pnp[P]): (Vector[AnyRef] => Pnp[P]) = {
 83 |     x: Vector[AnyRef] =>
 84 |       {
 85 |         Preconditions.checkArgument(
 86 |           x.size == 1,
 87 |           "Wrong number of arguments. Expected 1 got %s", x.size.asInstanceOf[AnyRef]
 88 |         )
 89 |         f(x(0).asInstanceOf[A])
 90 |       }
 91 |   }
 92 | 
 93 |   def wrap[A, B, P](f: (A, B) => Pnp[P]): (Vector[AnyRef] => Pnp[P]) = {
 94 |     x: Vector[AnyRef] =>
 95 |       {
 96 |         Preconditions.checkArgument(
 97 |           x.size == 2,
 98 |           "Wrong number of arguments. Expected 2 got %s", x.size.asInstanceOf[AnyRef]
 99 |         )
100 |         f(x(0).asInstanceOf[A], x(1).asInstanceOf[B])
101 |       }
102 |   }
103 | 
104 |   def wrap2[A, P](f: A => P): (Vector[AnyRef] => Pnp[P]) = {
105 |     x: Vector[AnyRef] =>
106 |       {
107 |         Preconditions.checkArgument(
108 |           x.size == 1,
109 |           "Wrong number of arguments. Expected 1 got %s", x.size.asInstanceOf[AnyRef]
110 |         )
111 |         Pnp.value(f(x(0).asInstanceOf[A]))
112 |       }
113 |   }
114 | 
115 |   def wrap2[A, B, P](f: (A, B) => P): (Vector[AnyRef] => Pnp[P]) = {
116 |     x: Vector[AnyRef] =>
117 |       {
118 |         Preconditions.checkArgument(
119 |           x.size == 2,
120 |           "Wrong number of arguments. Expected 2 got %s", x.size.asInstanceOf[AnyRef]
121 |         )
122 |         Pnp.value(f(x(0).asInstanceOf[A], x(1).asInstanceOf[B]))
123 |       }
124 |   }
125 | 
126 |   def filter[A](f: AnyRef => Pnp[Boolean], elts: List[A]): Pnp[List[A]] = {
127 |     elts.foldRight(Pnp.value(List[A]()))(
128 |       (elt, list) => for {
129 |         t <- f(Vector(elt))
130 |         l <- list
131 |       } yield {
132 |         if (t) {
133 |           elt :: l
134 |         } else {
135 |           l
136 |         }
137 |       }
138 |     )
139 |   }
140 |   
141 |   // TODO: make this work for any seq type.
142 |   def map[A,B](f: A => Pnp[B], elts: List[A]): Pnp[List[B]] = {
143 |     elts.foldRight(Pnp.value(List[B]()))(
144 |       (elt, list) => for {
145 |         mapped <- f(elt)
146 |         l <- list
147 |       } yield {
148 |         mapped :: l
149 |       }
150 |     )
151 |   }
152 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/examples/MultilayerPerceptron.scala:
--------------------------------------------------------------------------------
  1 | package org.allenai.pnp.examples
  2 | 
  3 | import scala.collection.JavaConverters._
  4 | import scala.collection.mutable.ListBuffer
  5 | import org.allenai.pnp._
  6 | import org.allenai.pnp.Pnp._
  7 | 
  8 | import com.google.common.base.Preconditions
  9 | import com.jayantkrish.jklol.util.IndexedList
 10 | import edu.cmu.dynet._
 11 | import com.jayantkrish.jklol.training.NullLogFunction
 12 | 
 13 | class MultilayerPerceptron {
 14 |   
 15 |   import MultilayerPerceptron._
 16 |   
 17 |   }
 18 | 
 19 | object MultilayerPerceptron {
 20 |   
 21 |   val FEATURE_VECTOR_DIM = 3
 22 |   val HIDDEN_DIM = 50
 23 |   val LABEL_DIM = 10
 24 |   
 25 |   def mlp(x: FloatVector): Pnp[Boolean] = {
 26 |     for {
 27 |       weights1 <- param("layer1Weights")
 28 |       bias1 <- param("layer1Bias")
 29 |       weights2 <- param("layer2Weights")
 30 | 
 31 |       inputExpression = Expression.input(Dim(FEATURE_VECTOR_DIM), x)
 32 |       scores = weights2 * Expression.tanh((weights1 * inputExpression) + bias1)
 33 | 
 34 |       y <- choose(Array(true, false), scores)
 35 |     } yield {
 36 |       y
 37 |     }
 38 |   }
 39 |   
 40 |   def labelNn(left: Boolean, right: Boolean, cg: CompGraph): Expression = {
 41 |     val leftParam = cg.getLookupParameter("left")
 42 |     val rightParam = cg.getLookupParameter("right")
 43 |     val leftVec = Expression.lookup(leftParam, if (left) { 0 } else { 1 })
 44 |     val rightVec = Expression.lookup(rightParam, if (right) { 0 } else { 1 })
 45 |     
 46 |     Expression.dotProduct(leftVec, rightVec)
 47 |   }
 48 |   
 49 |   def sequenceTag(xs: Seq[FloatVector]): Pnp[List[Boolean]] = {
 50 |     xs.foldLeft(Pnp.value(List[Boolean]()))((x, y) => for {
 51 |       cur <- mlp(y)
 52 |       rest <- x
 53 | 
 54 |       cg <- Pnp.computationGraph()
 55 |       _ <- if (rest.length > 0) {
 56 |         score(labelNn(cur, rest.head, cg))
 57 |       } else {
 58 |         value(())
 59 |       }
 60 |     } yield {
 61 |       cur :: rest
 62 |     })
 63 |   }
 64 | 
 65 |   def main(args: Array[String]): Unit = {
 66 |     // Initialize dynet
 67 |     Initialize.initialize()
 68 | 
 69 |     val model = PnpModel.init(true)
 70 |     model.addParameter("layer1Weights", Dim(HIDDEN_DIM, FEATURE_VECTOR_DIM))
 71 |     model.addParameter("layer1Bias", Dim(HIDDEN_DIM))
 72 |     model.addParameter("layer2Weights", Dim(2, HIDDEN_DIM))
 73 | 
 74 |     val featureVector = new FloatVector(Seq(1.0f, 2f, 3f))
 75 |     val dist = mlp(featureVector)
 76 |     val marginals = dist.beamSearch(2, model)
 77 |  
 78 |     for (x <- marginals.executions) {
 79 |       println(x)
 80 |     }
 81 |     
 82 |     val featureVectors = Seq(featureVector, featureVector, featureVector)
 83 |     
 84 |     model.locallyNormalized = false
 85 |     model.addLookupParameter("left", 2, Dim(LABEL_DIM))
 86 |     model.addLookupParameter("right", 2, Dim(LABEL_DIM))
 87 |     val dist2 = sequenceTag(featureVectors)
 88 |     val marginals2 = dist2.beamSearch(5, model)
 89 |     for (x <- marginals2.executions) {
 90 |       println(x)
 91 |     }
 92 |     
 93 |     val flip: Pnp[Boolean] = choose(Array(true, false), Array(0.5, 0.5))
 94 |     val twoFlips: Pnp[Boolean] = for {
 95 |       x <- flip
 96 |       y <- flip
 97 |     } yield {
 98 |       x && y
 99 |     }
100 |     val marginals3 = twoFlips.beamSearch(5)
101 |     println(marginals3.marginals().getProbabilityMap)
102 |   }
103 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/semparse/EntityLinking.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp.semparse
 2 | 
 3 | import scala.collection.mutable.MultiMap
 4 | 
 5 | import com.jayantkrish.jklol.ccg.lambda.Type
 6 | import com.jayantkrish.jklol.ccg.lambda2.Expression2
 7 | import scala.collection.mutable.ListBuffer
 8 | 
 9 | case class EntityLinking(matches: List[(Span, Entity, List[Int], Double)]) {
10 |   val entities = matches.map(_._2).toSet.toList
11 |   
12 |   val entityMatches = SemanticParser.seqToMultimap(
13 |       matches.map(x => (x._2, (x._1, x._3, x._4))).toSeq)
14 |   val bestEntityMatches = entityMatches.map(x => (x._1, x._2.maxBy(_._3)))
15 |   val bestEntityMatchesList = bestEntityMatches.map(x => (x._2._1, x._1, x._2._2, x._2._3)).toList 
16 |   
17 |   def getEntitiesWithType(t: Type): List[Entity] = {
18 |     entities.filter(_.t.equals(t))
19 |   }
20 | }
21 | 
22 | case class Entity(val expr: Expression2, val t: Type,
23 |     val template: Template, val names: List[List[Int]]) {
24 | }
25 | 
26 | class EntityDict(val map: MultiMap[List[Int], Entity]) {
27 |   
28 |   def lookup(tokenIds: List[Int]): Set[(Entity, List[Int], Double)] = {
29 |     if (map.contains(tokenIds)) {
30 |       map(tokenIds).map(x => (x, tokenIds, tokenIds.length.asInstanceOf[Double])).toSet
31 |     } else {
32 |       Set()
33 |     }
34 |   }
35 | 
36 |   def link(tokenIds: List[Int]): EntityLinking = {
37 |     // This is a naive way to match entity names against the 
38 |     // question text, but it's probably fast enough for the moment.
39 |     val builder = ListBuffer[(Span, Entity, List[Int], Double)]()
40 |     for (i <- 0 until tokenIds.length) {
41 |       for (j <- (i + 1) to tokenIds.length) {
42 |         val entities = lookup(tokenIds.slice(i, j))
43 |         for (entity <- entities) {
44 |           builder += ((Span(i, j), entity._1, entity._2, entity._3))
45 |         }
46 |       }
47 |     }
48 |     new EntityLinking(builder.toList)
49 |   }
50 | }
51 | 
52 | case class Span(val start: Int, val end: Int) 


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/semparse/Scope.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp.semparse
 2 | 
 3 | import com.jayantkrish.jklol.ccg.lambda2.Expression2
 4 | import com.jayantkrish.jklol.ccg.lambda.Type
 5 | 
 6 | /** A list of typed variables that are in scope. These
 7 |   * variables are bound in lambda expressions that contain
 8 |   * the current expression.
 9 |   */
10 | case class Scope(val vars: List[(Expression2, Type)]) {
11 | 
12 |   def getVariableExpressions(t: Type): List[Expression2] = {
13 |     vars.filter(_._2.equals(t)).map(_._1)
14 |   }
15 |   
16 |   def getVariableTemplates(t: Type): List[Template] = {
17 |     getVariableExpressions(t).map(x => ConstantTemplate(t, x))
18 |   }
19 |   
20 |   /** Extend this scope with additional variables with the
21 |     * corresponding types.
22 |     */
23 |   def extend(types: List[Type]): (Scope, List[String]) = {
24 |     var varNames = List[String]()
25 |     var nextVars = vars
26 |     for (t <- types) {
27 |       val varName = "$" + nextVars.size
28 |       varNames = varName :: varNames
29 |       nextVars = (Expression2.constant(varName), t) :: nextVars
30 |     }
31 | 
32 |     val nextScope = new Scope(nextVars)
33 |     (nextScope, varNames)
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/semparse/SemanticParserState.scala:
--------------------------------------------------------------------------------
  1 | package org.allenai.pnp.semparse
  2 | 
  3 | import java.util.Arrays
  4 | 
  5 | import com.google.common.base.Preconditions
  6 | import com.jayantkrish.jklol.ccg.lambda.Type
  7 | import com.jayantkrish.jklol.ccg.lambda2.Expression2
  8 | import edu.cmu.dynet.Expression
  9 | 
 10 | /** State of the semantic parser during expression generation.
 11 |   * Each hole generated during parsing is assigned a unique id.
 12 |   * When a template is applied to the hole, the corresponding 
 13 |   * partial expression is mapped to the hole id. Unfilled holes
 14 |   * are stored in a list that tracks which portions of the
 15 |   * expression have yet to be generated.
 16 |   */
 17 | case class SemanticParserState(val parts: Map[Int, ExpressionPart],
 18 |     val unfilledHoleIds: List[Hole], val nextId: Int,
 19 |     val numActions: Int, val rootType: Type,
 20 |     val templates: List[Template], val attentions: List[Expression]) {
 21 | 
 22 |   def decodeExpression(partId: Int): Expression2 = {
 23 |     val part = parts(partId)
 24 | 
 25 |     var expr = part.expr
 26 |     for (i <- 1 to part.holes.length) {
 27 |       val ind = part.holes.length - i
 28 |       var subexpr = decodeExpression(part.holeIds(ind))
 29 |       expr = expr.substitute(part.holes(ind), subexpr)
 30 |     }
 31 | 
 32 |     expr
 33 |   }
 34 |   
 35 |   def decodeExpression: Expression2 = {
 36 |     Preconditions.checkState(unfilledHoleIds.length == 0)
 37 |     decodeExpression(0)
 38 |   }
 39 |   
 40 |   def addAttention(e: Expression): SemanticParserState = {
 41 |     SemanticParserState(parts, unfilledHoleIds, nextId, numActions,
 42 |         rootType, templates, e :: attentions)
 43 |   }
 44 |   
 45 |   def getTemplates: Array[Template] = {
 46 |     templates.reverse.toArray
 47 |   }
 48 |   
 49 |   def getAttentions: Array[Expression] = {
 50 |     attentions.reverse.toArray
 51 |   }
 52 |   
 53 |   def nextHole(): Option[Hole] = {
 54 |     if (unfilledHoleIds.size > 0) {
 55 |       Some(unfilledHoleIds(0))
 56 |     } else {
 57 |       None
 58 |     }
 59 |   }
 60 | 
 61 |   def fill(hole: Hole, part: ExpressionPart, newHoles: List[Hole], template: Template): SemanticParserState = {
 62 |     Preconditions.checkArgument(unfilledHoleIds(0).id == hole.id)
 63 |     val partTuple = (hole.id, part)
 64 |     
 65 |     val unfilledHoles = if (hole.repeated) {
 66 |       unfilledHoleIds
 67 |     } else {
 68 |       unfilledHoleIds.drop(1)
 69 |     }
 70 |     val nextHoles = newHoles ++ unfilledHoles
 71 |     
 72 |     SemanticParserState(parts + partTuple, nextHoles, nextId + newHoles.length,
 73 |         numActions + 1, rootType, template :: templates, attentions)
 74 |   }
 75 | 
 76 |   def drop(hole: Hole, template: Template): SemanticParserState = {
 77 |     Preconditions.checkArgument(unfilledHoleIds(0).id == hole.id)
 78 |     SemanticParserState(parts, unfilledHoleIds.drop(1), nextId,
 79 |         numActions + 1, rootType, template :: templates, attentions)
 80 |   }
 81 | 
 82 |   def hasRootType: Boolean = {
 83 |     rootType != null
 84 |   }
 85 | 
 86 |   def addRootType(rootType: Type): SemanticParserState = {
 87 |     Preconditions.checkState(unfilledHoleIds.length == 0 && numActions == 0,
 88 |         "The root type can only be added at the beginning of parsing".asInstanceOf[AnyRef])
 89 |     
 90 |     val scope = Scope(List.empty)
 91 |     SemanticParserState(parts, List(Hole(0, rootType, scope, false)), 1, 0, rootType, List(), List())
 92 |   }
 93 | }
 94 | 
 95 | object SemanticParserState {
 96 | 
 97 |   /** The start state of a semantic parser. The expected
 98 |     * use of this state is to call addRootType, followed by
 99 |     * applying a sequence of templates.  
100 |     */
101 |   def start(): SemanticParserState = {
102 |     SemanticParserState(Map.empty, List(), 1, 0, null, List(), List())
103 |   }
104 | }
105 | 
106 | case class ExpressionPart(val expr: Expression2,
107 |     val holes: Array[Int], val holeIds: Array[Int]) {
108 |   Preconditions.checkArgument(holes.length == holeIds.length)
109 |   
110 |   override def toString: String = {
111 |     "ExpressionPart(" + expr + ", " + Arrays.toString(holes) + ", " + Arrays.toString(holeIds) 
112 |   }
113 | }
114 | 
115 | case class Hole(id: Int, t: Type, scope: Scope, repeated: Boolean)


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/semparse/SemanticParserUtils.scala:
--------------------------------------------------------------------------------
  1 | package org.allenai.pnp.semparse
  2 | 
  3 | import scala.collection.JavaConverters._
  4 | import scala.collection.mutable.ListBuffer
  5 | import scala.collection.mutable.{Map => MutableMap}
  6 | import org.allenai.pnp.{Env, Pnp, PnpInferenceContext}
  7 | 
  8 | import com.jayantkrish.jklol.ccg.CcgExample
  9 | import com.jayantkrish.jklol.ccg.lambda.Type
 10 | import com.jayantkrish.jklol.ccg.lambda.TypeDeclaration
 11 | import com.jayantkrish.jklol.ccg.lambda2.StaticAnalysis
 12 | import com.jayantkrish.jklol.training.NullLogFunction
 13 | import com.jayantkrish.jklol.util.CountAccumulator
 14 | import edu.cmu.dynet._
 15 | import com.jayantkrish.jklol.ccg.lambda2.Expression2
 16 | 
 17 | object SemanticParserUtils {
 18 |   
 19 |   val DYNET_PARAMS = Map(
 20 |     "dynet-mem" -> "1024"
 21 |   )
 22 | 
 23 |   /**
 24 |    * Count the number of occurrences of each word type
 25 |    * in a collection of examples. 
 26 |    */
 27 |   def getWordCounts(examples: Seq[CcgExample]): CountAccumulator[String] = {
 28 |     val acc = CountAccumulator.create[String]
 29 |     for (ex <- examples) {
 30 |       ex.getSentence.getWords.asScala.map(x => acc.increment(x, 1.0)) 
 31 |     }
 32 |     acc
 33 |   }
 34 |   
 35 |   /**
 36 |    * Checks that {@code lf} is well-typed using {@code typeDeclaration}. 
 37 |    */
 38 |   def validateTypes(lf: Expression2, typeDeclaration: TypeDeclaration): Boolean = {
 39 |     val typeInference = StaticAnalysis.typeInference(lf, TypeDeclaration.TOP, typeDeclaration)
 40 | 
 41 |     val constraints = typeInference.getSolvedConstraints
 42 |     val typeMap = typeInference.getExpressionTypes.asScala
 43 | 
 44 |     if (!constraints.isSolvable) {
 45 |       // Type inference generated unsolvable type constraints.
 46 |       println(lf)
 47 |       println(typeInference.getConstraints)
 48 |       println(typeInference.getSolvedConstraints)
 49 |         
 50 |       for (i <- 0 until lf.size()) {
 51 |         if (typeMap.contains(i)) {
 52 |           val t = typeMap(i)
 53 |           println("    " + i + " " + t + " " + lf.getSubexpression(i))
 54 |         }
 55 |       }
 56 | 
 57 |       false
 58 |     } else {
 59 |       // Check that every subexpression is assigned a fully-instantiated 
 60 |       // type (i.e., no type variables), and that no types are
 61 |       // TOP or BOTTOM.
 62 |       val goodTypes = for {
 63 |         i <- 0 until lf.size()
 64 |         if typeMap.contains(i)
 65 |       } yield {
 66 |         val t = typeMap(i)
 67 |         val goodType = !isBadType(t)
 68 |         if (!goodType) {
 69 |           println(lf)
 70 |           println("  B " + i + " " + t + " " + lf.getSubexpression(i))
 71 |         }
 72 |         goodType
 73 |       }
 74 | 
 75 |       goodTypes.fold(true)(_ && _)
 76 |     }
 77 |   }
 78 | 
 79 |   private def isBadType(t: Type): Boolean = {
 80 |     if (t.isAtomic) {
 81 |       if (t.hasTypeVariables || t.equals(TypeDeclaration.TOP) || t.equals(TypeDeclaration.BOTTOM)) {
 82 |         true
 83 |       } else {
 84 |         false
 85 |       }
 86 |     } else {
 87 |       return isBadType(t.getArgumentType) || isBadType(t.getReturnType)
 88 |     }
 89 |   }
 90 | 
 91 |   /** Verify that the parser can generate the logical form
 92 |    * in each training example when the search is constrained
 93 |    * by the execution oracle.  
 94 |    */
 95 |   def validateActionSpace(examples: Seq[CcgExample], parser: SemanticParser,
 96 |       typeDeclaration: TypeDeclaration): Unit = {
 97 |     println("")
 98 |     var maxParts = 0
 99 |     var numFailed = 0
100 |     val usedRules = ListBuffer[(Type, Template)]()
101 |     for (e <- examples) {
102 |       val sent = e.getSentence
103 |       val tokenIds = sent.getAnnotation("tokenIds").asInstanceOf[Array[Int]]
104 |       val entityLinking = sent.getAnnotation("entityLinking").asInstanceOf[EntityLinking]
105 |       
106 |       val oracleOpt = parser.getLabelScore(e.getLogicalForm, entityLinking, typeDeclaration)
107 |       
108 |       if (oracleOpt.isDefined) {
109 |         val oracle = oracleOpt.get
110 |         ComputationGraph.renew()
111 |         val dist = parser.parse(tokenIds, entityLinking)
112 |         val context = PnpInferenceContext.init(parser.model).addExecutionScore(oracle)
113 |         val results = dist.beamSearch(1, 50, Env.init, context)
114 |         if (results.executions.size != 1) {
115 |           println("ERROR: " + e + " " + results)
116 |           println("  " + e.getSentence.getWords)
117 |           println("  " + e.getLogicalForm)
118 |           println("  " + e.getSentence.getAnnotation("entityLinking"))
119 | 
120 |           numFailed += 1
121 |         } else {
122 |           val numParts = results.executions(0).value.parts.size
123 |           maxParts = Math.max(numParts, maxParts)
124 |           if (results.executions.length > 1) {
125 |             println("MULTIPLE: " + results.executions.length + " " + e)
126 |             println("  " + e.getSentence.getWords)
127 |             println("  " + e.getLogicalForm)
128 |             println("  " + e.getSentence.getAnnotation("entityLinking"))
129 |           } else {
130 |             // println("OK   : " + numParts + " " + " "
131 |           }
132 |         }
133 |         
134 |         // Accumulate the rules used in each example
135 |         usedRules ++= oracle.holeTypes.zip(oracle.templates)
136 | 
137 |         // Print out the rules used to generate this logical form.
138 |         /*
139 |         println(e.getLogicalForm)
140 |         for (t <- oracle.templates) {
141 |           println("  " + t)
142 |         }
143 |         */
144 |         
145 |       } else {
146 |         println("ORACLE: " + e)
147 |         println("  " + e.getSentence.getWords)
148 |         println("  " + e.getLogicalForm)
149 |         println("  " + e.getSentence.getAnnotation("entityLinking"))
150 | 
151 |         numFailed += 1
152 |       }
153 |     }
154 |     println("max parts: " + maxParts)
155 |     println("decoding failures: " + numFailed)
156 |     
157 |     val holeTypes = usedRules.map(_._1).toSet
158 |     val countMap = MutableMap[Type, CountAccumulator[Template]]()
159 |     for (t <- holeTypes) {
160 |       countMap(t) = CountAccumulator.create()
161 |     }
162 |     
163 |     for ((t, template) <- usedRules) {
164 |       countMap(t).increment(template, 1.0)
165 |     }
166 |     
167 |     for (t <- holeTypes) {
168 |       println(t)
169 |       val counts = countMap(t)
170 |       for (template <- counts.getSortedKeys.asScala) {
171 |         val count = counts.getCount(template)
172 |         println("  " + count + " " + template) 
173 |       }
174 |     }
175 |   }
176 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/semparse/Template.scala:
--------------------------------------------------------------------------------
  1 | package org.allenai.pnp.semparse
  2 | 
  3 | import scala.collection.JavaConverters._
  4 | import scala.collection.mutable.ListBuffer
  5 | 
  6 | import com.jayantkrish.jklol.ccg.lambda.Type
  7 | import com.jayantkrish.jklol.ccg.lambda2.Expression2
  8 | import com.jayantkrish.jklol.ccg.lambda2.StaticAnalysis
  9 | 
 10 | /** Template represents an action that expands a type
 11 |   * to an expression. The expression itself may contain
 12 |   * typed "holes" to be filled by future templates. 
 13 |   * Applying a template to a semantic parser state
 14 |   * expands the current hole in the state with the template,
 15 |   * returning the next state. 
 16 |   */
 17 | sealed trait Template {
 18 |   val root: Type
 19 |   val holeIndexes: Array[Int]
 20 |   
 21 |   /** Update state by applying this template to expand the  
 22 |     * current hole.
 23 |     */
 24 |   def apply(state: SemanticParserState): SemanticParserState
 25 | 
 26 |   /** Returns true if the subexpression at expIndex of exp could
 27 |     * be generated by using this template.
 28 |     */
 29 |   def matches(expIndex: Int, exp: Expression2, typeMap: Map[Integer, Type]): Boolean
 30 | }
 31 | 
 32 | /** A function application template that rewrites a type
 33 |   * as a function type applied to one or more argument types,
 34 |   * e.g., t -> (<e,t> <e>)
 35 |   */
 36 | case class ApplicationTemplate(val root: Type, val expr: Expression2,
 37 |     val holes: List[(Int, Type, Boolean)]) extends Template {
 38 | 
 39 |   val holeIndexes = holes.map(_._1).toArray
 40 |   val holeTypes = holes.map(_._2).toArray 
 41 |   
 42 |   override def apply(state: SemanticParserState): SemanticParserState = {
 43 |     val holeIds = ListBuffer.empty[Int]
 44 |     for (i <- state.nextId until (state.nextId + holeIndexes.length)) {
 45 |       holeIds += i
 46 |     }
 47 | 
 48 |     val filled = state.nextHole().get
 49 |     val holeScope = filled.scope
 50 |     val part = ExpressionPart(expr, holeIndexes.toArray, holeIds.toArray)
 51 |     val newHoles = holeIds.zip(holes).map(x => Hole(x._1, x._2._2, holeScope, x._2._3))
 52 |     
 53 |     state.fill(filled, part, newHoles.toList, this)
 54 |   } 
 55 | 
 56 |   override def matches(expIndex: Int, exp: Expression2, typeMap: Map[Integer, Type]): Boolean = {
 57 |     val subexp = exp.getSubexpression(expIndex)
 58 |     if (!subexp.isConstant) {
 59 |       val childIndexes = exp.getChildIndexes(expIndex).toList
 60 |       val exprChildIndexes = expr.getChildIndexes(0).toList
 61 |       if (childIndexes.length == exprChildIndexes.length) {
 62 |         for (i <- 0 until childIndexes.length) {
 63 |           val child = childIndexes(i)
 64 |           val exprChild = exprChildIndexes(i)
 65 |           
 66 |           if (holeIndexes.contains(exprChild)) {
 67 |             if (!typeMap(child).equals(holeTypes(holeIndexes.indexOf(exprChild)))) {
 68 |               return false
 69 |             }
 70 |           } else {
 71 |             if (!exp.getSubexpression(child).equals(expr.getSubexpression(exprChild))) {
 72 |               return false
 73 |             }
 74 |           }
 75 |         }
 76 |         true
 77 |       } else {
 78 |         false
 79 |       }
 80 |     } else {
 81 |       false
 82 |     }
 83 |   }
 84 | 
 85 |   override def toString(): String = {
 86 |     root + " -> " + expr
 87 |   }
 88 | }
 89 | 
 90 | object ApplicationTemplate {
 91 |   def fromTypes(root: Type, holeTypes: List[(Type, Boolean)]): ApplicationTemplate = {
 92 |     val varNames: ListBuffer[Expression2] = ListBuffer.empty
 93 |     val holesBuffer: ListBuffer[Int] = ListBuffer.empty
 94 |     for (i <- 1 to holeTypes.length) {
 95 |       varNames += Expression2.constant(holeTypes(i - 1)._1.toString)
 96 |       holesBuffer += i
 97 |     }
 98 | 
 99 |     val expr = Expression2.nested(varNames.toList.asJava)
100 |     val holeIndexes = holesBuffer.toList
101 |     
102 |     val holes = holeIndexes.zip(holeTypes).map(x => (x._1, x._2._1, x._2._2))
103 | 
104 |     ApplicationTemplate(root, expr, holes)
105 |   }
106 | }
107 | 
108 | /** A template generating a constant, e.g., argmax:<<e,t>,t>.
109 |   * This template is the base case of expression generation as
110 |   * it has no holes.
111 |   */
112 | case class ConstantTemplate(val root: Type, val expr: Expression2) extends Template {
113 |   val holeIndexes = Array[Int]()
114 |   
115 |   override def apply(state: SemanticParserState): SemanticParserState = {
116 |     val filled = state.nextHole().get
117 |     val part = ExpressionPart(expr, Array.empty[Int], Array.empty[Int])
118 |     state.fill(filled, part, List(), this)
119 |   }
120 | 
121 |   override def matches(expIndex: Int, exp: Expression2, typeMap: Map[Integer, Type]): Boolean = {
122 |     exp.getSubexpression(expIndex).equals(expr)
123 |   }
124 |   
125 |   override def toString(): String = {
126 |     root + " -> " + expr
127 |   }
128 | }
129 | 
130 | /** A template that generates a lambda expression.
131 |   */ 
132 | case class LambdaTemplate(val root: Type, val args: List[Type], val body: Type) extends Template {
133 |   val holeIndexes = Array[Int](3 + args.length)
134 |   
135 |   override def apply(state: SemanticParserState): SemanticParserState = {
136 |     val filled = state.nextHole().get
137 |     val currentScope = filled.scope
138 |     val (nextScope, varNames) = currentScope.extend(args)
139 |     
140 |     val expr = Expression2.lambda(varNames.asJava, Expression2.constant("TEMP"))
141 | 
142 |     val hole = StaticAnalysis.getLambdaBodyIndex(expr, 0)
143 |     val holeId = state.nextId
144 | 
145 |     val part = ExpressionPart(expr, Array(hole), Array(holeId))
146 |     
147 |     state.fill(filled, part, List(Hole(holeId, body, nextScope, false)), this)
148 |   }
149 | 
150 |   override def matches(expIndex: Int, exp: Expression2, typeMap: Map[Integer, Type]): Boolean = {
151 |     if (StaticAnalysis.isLambda(exp, expIndex)) {
152 |       val subexpArgIndexes = StaticAnalysis.getLambdaArgumentIndexes(exp, expIndex).toList
153 |       val subexpBodyIndex = StaticAnalysis.getLambdaBodyIndex(exp, expIndex)
154 | 
155 |       subexpArgIndexes.map(typeMap(_)).equals(args) && typeMap(subexpBodyIndex).equals(body)
156 |     } else {
157 |       false
158 |     }
159 |   }
160 |   
161 |   override def toString(): String = {
162 |     root + " -> (lambda (" + args.zipWithIndex.map(x => "$" + x._2 + ":" + x._1).mkString(" ") +
163 |       ") " +  body + ")"
164 |   }
165 | }
166 | 
167 | case class DropTemplate(val root: Type) extends Template {
168 |   val holeIndexes = Array[Int]()
169 |   
170 |   override def apply(state: SemanticParserState): SemanticParserState = {
171 |     val filled = state.nextHole.get
172 |     state.drop(filled, this)
173 |   }
174 | 
175 |   override def matches(expIndex: Int, exp: Expression2, typeMap: Map[Integer, Type]): Boolean = {
176 |     // TODO
177 |     return false
178 |   }
179 |   
180 |   override def toString(): String = {
181 |     root + "-> <done>"
182 |   }
183 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/semparse/TestSemanticParserCli.scala:
--------------------------------------------------------------------------------
  1 | package org.allenai.pnp.semparse
  2 | 
  3 | import scala.collection.JavaConverters._
  4 | import scala.collection.mutable.ListBuffer
  5 | import org.allenai.pnp.{Env, PnpInferenceContext, PnpModel}
  6 | 
  7 | import com.jayantkrish.jklol.ccg.CcgExample
  8 | import com.jayantkrish.jklol.ccg.cli.TrainSemanticParser
  9 | import com.jayantkrish.jklol.ccg.lambda.TypeDeclaration
 10 | import com.jayantkrish.jklol.ccg.lambda2.ExpressionComparator
 11 | import com.jayantkrish.jklol.ccg.lambda2.ExpressionSimplifier
 12 | import com.jayantkrish.jklol.ccg.lambda2.SimplificationComparator
 13 | import com.jayantkrish.jklol.cli.AbstractCli
 14 | import com.jayantkrish.jklol.experiments.geoquery.GeoqueryUtil
 15 | import com.jayantkrish.jklol.training.NullLogFunction
 16 | import edu.cmu.dynet._
 17 | import joptsimple.OptionParser
 18 | import joptsimple.OptionSet
 19 | import joptsimple.OptionSpec
 20 | 
 21 | 
 22 | class TestSemanticParserCli extends AbstractCli() {
 23 |   
 24 |   var entityDataOpt: OptionSpec[String] = null
 25 |   var testDataOpt: OptionSpec[String] = null
 26 |   var modelOpt: OptionSpec[String] = null
 27 |   
 28 |   override def initializeOptions(parser: OptionParser): Unit = {
 29 |     entityDataOpt = parser.accepts("entityData").withRequiredArg().ofType(classOf[String]).withValuesSeparatedBy(',').required()
 30 |     testDataOpt = parser.accepts("testData").withRequiredArg().ofType(classOf[String]).withValuesSeparatedBy(',').required()
 31 |     modelOpt = parser.accepts("model").withRequiredArg().ofType(classOf[String]).required()
 32 |   }
 33 | 
 34 |   override def run(options: OptionSet): Unit = {
 35 |     Initialize.initialize(SemanticParserUtils.DYNET_PARAMS)
 36 |     
 37 |     // Initialize expression processing for Geoquery logical forms. 
 38 |     val typeDeclaration = GeoqueryUtil.getSimpleTypeDeclaration()
 39 |     val simplifier = GeoqueryUtil.getExpressionSimplifier
 40 |     val comparator = new SimplificationComparator(simplifier)
 41 |     
 42 |     val entityData = ListBuffer[CcgExample]()
 43 |     for (filename <- options.valuesOf(entityDataOpt).asScala) {
 44 |       entityData ++= TrainSemanticParser.readCcgExamples(filename).asScala
 45 |     }
 46 | 
 47 |     val testData = ListBuffer[CcgExample]()
 48 |     if (options.has(testDataOpt)) {
 49 |       for (filename <- options.valuesOf(testDataOpt).asScala) {
 50 |         testData ++= TrainSemanticParser.readCcgExamples(filename).asScala
 51 |       }
 52 |     }
 53 |     println(testData.size + " test examples")
 54 | 
 55 |     val loader = new ModelLoader(options.valueOf(modelOpt))
 56 |     val model = PnpModel.load(loader)
 57 |     val parser = SemanticParser.load(loader, model)
 58 |     loader.done()
 59 | 
 60 |     val vocab = parser.vocab
 61 | 
 62 |     val entityDict = TrainSemanticParserCli.buildEntityDictionary(entityData,
 63 |         vocab, typeDeclaration)
 64 |     
 65 |     val testPreprocessed = testData.map(x =>
 66 |       TrainSemanticParserCli.preprocessExample(x, simplifier, vocab, entityDict))
 67 | 
 68 |     println("*** Running Evaluation ***")
 69 |     val results = test(testPreprocessed, parser, typeDeclaration, simplifier, comparator)
 70 |   }
 71 |   
 72 |   /** Evaluate the test accuracy of parser on examples. Logical
 73 |    *  forms are compared for equality using comparator.  
 74 |    */
 75 |   def test(examples: Seq[CcgExample], parser: SemanticParser,
 76 |       typeDeclaration: TypeDeclaration, simplifier: ExpressionSimplifier,
 77 |       comparator: ExpressionComparator): SemanticParserLoss = {
 78 |     println("")
 79 |     var numCorrect = 0
 80 |     var numCorrectAt10 = 0
 81 |     for (e <- examples) {
 82 |       ComputationGraph.renew()
 83 |       val context = PnpInferenceContext.init(parser.model)
 84 | 
 85 |       println(e.getSentence.getWords.asScala.mkString(" "))
 86 |       println(e.getSentence.getAnnotation("originalTokens").asInstanceOf[List[String]].mkString(" "))
 87 |       println("expected: " + e.getLogicalForm)
 88 |       
 89 |       val sent = e.getSentence
 90 |       val dist = parser.parse(
 91 |           sent.getAnnotation("tokenIds").asInstanceOf[Array[Int]],
 92 |           sent.getAnnotation("entityLinking").asInstanceOf[EntityLinking])
 93 |       val results = dist.beamSearch(5, 75, Env.init, context)
 94 |           
 95 |       val beam = results.executions.slice(0, 10)
 96 |       val correct = beam.map { x =>
 97 |         val simplified = simplifier.apply(x.value.decodeExpression)
 98 |         if (comparator.equals(e.getLogicalForm, simplified)) {
 99 |           println("* " + x.logProb.formatted("%02.3f") + "  " + simplified)
100 |           true
101 |         } else {
102 |           println("  " + x.logProb.formatted("%02.3f") + "  " + simplified)
103 |           false
104 |         }
105 |       }
106 |       
107 |       if (correct.length > 0 && correct(0)) {
108 |         numCorrect += 1
109 |       }
110 |       if (correct.fold(false)(_ || _)) {
111 |         numCorrectAt10 += 1
112 |       }
113 |       
114 |       // Print the attentions of the best predicted derivation
115 |       if (beam.length > 0) {
116 |         val state = beam(0).value
117 |         val templates = state.getTemplates
118 |         val attentions = state.getAttentions
119 |         val tokens = e.getSentence.getWords.asScala.toArray
120 |         for (i <- 0 until templates.length) {
121 |           val floatVector = ComputationGraph.getValue(attentions(i)).toVector
122 |           val values = for {
123 |             j <- 0 until floatVector.size
124 |           } yield {
125 |             floatVector(j)
126 |           }
127 |         
128 |           val maxIndex = values.zipWithIndex.max._2
129 |         
130 |           val tokenStrings = for {
131 |             j <- 0 until values.length
132 |           } yield {
133 |             val color = if (j == maxIndex) {
134 |               Console.RED
135 |             } else if (values(j) > 0.1) {
136 |               Console.YELLOW
137 |             } else {
138 |               Console.RESET
139 |             }
140 |           
141 |             color + tokens(j) + Console.RESET
142 |           }
143 |           println("  " + tokenStrings.mkString(" ") + " " + templates(i))
144 |         }
145 |       }
146 |     }
147 |     
148 |     val loss = SemanticParserLoss(numCorrect, numCorrectAt10, examples.length)
149 |     println(loss)
150 |     loss
151 |   }
152 | }
153 | 
154 | case class SemanticParserLoss(numCorrect: Int, oracleNumCorrect: Int, numExamples: Int) {
155 |   val accuracy: Double = numCorrect.asInstanceOf[Double] / numExamples
156 |   val oracleAccuracy: Double = oracleNumCorrect.asInstanceOf[Double] / numExamples
157 |   
158 |   override def toString(): String = {
159 |     "accuracy: " + accuracy + " " + numCorrect + " / " + numExamples + "\n" +
160 |     "oracle  : " + oracleAccuracy + " " + oracleNumCorrect + " / " + numExamples  
161 |   }
162 | }
163 | 
164 | object TestSemanticParserCli {
165 |   def main(args: Array[String]): Unit = {
166 |     (new TestSemanticParserCli()).run(args)
167 |   }
168 | }


--------------------------------------------------------------------------------
/src/main/scala/org/allenai/pnp/util/Trie.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp.util
 2 | 
 3 | import scala.collection.mutable.{ Map => MutableMap, Set => MutableSet }
 4 | 
 5 | class Trie[T] {
 6 |   
 7 |   val next = MutableMap[Int, MutableMap[T, Int]]()
 8 |   val startNodeId = 0
 9 |   var numNodes = 0
10 |   val inTrie = MutableSet[Int]()
11 | 
12 |   /**
13 |    * Insert a key into this trie.
14 |    */
15 |   def insert(key: Seq[T]): Unit = {
16 |     if (!next.contains(startNodeId)) {
17 |       next.put(startNodeId, MutableMap[T, Int]())
18 |       numNodes += 1
19 |     }
20 |     
21 |     insertHelper(key, startNodeId)
22 |   }
23 |   
24 |   private def insertHelper(key: Seq[T], currentNodeId: Int): Unit = {
25 |     if (key.size == 0) {
26 |       inTrie.add(currentNodeId)
27 |     } else {
28 |       if (!next(currentNodeId).contains(key.head)) {
29 |         val nextNodeId = numNodes
30 |         numNodes += 1
31 |         next.put(nextNodeId, MutableMap[T, Int]())
32 |         next(currentNodeId).put(key.head, nextNodeId)
33 |       }
34 | 
35 |       val nextNodeId = next(currentNodeId)(key.head)
36 |       insertHelper(key.tail, nextNodeId)
37 |     }
38 |   }
39 |   
40 |   /**
41 |    * Lookup a key prefix in this trie. If the trie contains
42 |    * a key with that prefix, returns the id of the trie node 
43 |    * corresponding to that prefix.
44 |    */
45 |   def lookup(keyPrefix: Seq[T]): Option[Int] = {
46 |     if (numNodes > 0) {
47 |       lookup(keyPrefix, startNodeId)
48 |     } else {
49 |       None
50 |     }
51 |   }
52 | 
53 |   def lookup(keyPrefix: Seq[T], currentNodeId: Int): Option[Int] = {
54 |     if (keyPrefix.size == 0) {
55 |       Some(currentNodeId)
56 |     } else {
57 |       val nextEdges = next(currentNodeId)
58 |       val nextNodeId = nextEdges.get(keyPrefix.head)
59 |       if (nextNodeId.isDefined) {
60 |         lookup(keyPrefix.tail, nextNodeId.get)
61 |       } else {
62 |         None
63 |       }
64 |     }
65 |   }
66 | 
67 |   /**
68 |    * Gets the map to next trie nodes for a current node.  
69 |    */
70 |   def getNextMap(nodeId: Int): MutableMap[T, Int] = {
71 |     next(nodeId)
72 |   }
73 | }


--------------------------------------------------------------------------------
/src/test/scala/org/allenai/pnp/BsoTrainerSpec.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp
 2 | 
 3 | import scala.collection.JavaConverters._
 4 | 
 5 | import org.allenai.pnp.examples.Seq2Seq
 6 | import org.scalatest.FlatSpec
 7 | import org.scalatest.Matchers
 8 | 
 9 | import com.jayantkrish.jklol.training.NullLogFunction
10 | import com.jayantkrish.jklol.util.IndexedList
11 | 
12 | import edu.cmu.dynet._
13 | import com.jayantkrish.jklol.training.DefaultLogFunction
14 | 
15 | class BsoTrainerSpec extends FlatSpec with Matchers {
16 |   
17 |   Initialize.initialize()
18 |   
19 |   val TOLERANCE = 0.01
20 | 
21 |   val rawData = Array(("hola", "hi <eos>"),
22 |       ("como estas", "how are you <eos>"))
23 |       
24 |   val data = rawData.map(x => (x._1.split(" ").toList, x._2.split(" ").toList))
25 |   
26 |   val sourceVocab = IndexedList.create(data.flatMap(_._1).toSet.asJava)
27 |   val targetVocab = IndexedList.create(data.flatMap(_._2).toSet.asJava)
28 |   val endTokenIndex = targetVocab.getIndex("<eos>")
29 |   
30 |   val indexedData = for {
31 |     d <- data
32 |   } yield {
33 |     val sourceIndexes = d._1.map(x => sourceVocab.getIndex(x)).toArray
34 |     val targetIndexes = d._2.map(x => targetVocab.getIndex(x)).toArray
35 |     (sourceIndexes, targetIndexes)
36 |   }
37 | 
38 |   def getSeq2Seq(): Seq2Seq[String, String] = {
39 |     val model = PnpModel.init(false)
40 |     Seq2Seq.create(sourceVocab, targetVocab, endTokenIndex, model)
41 |   }
42 |   
43 |   def runTest(seq2seq: Seq2Seq[String, String], input: String, expected: String): Unit = {    
44 |     val inputIndexes = input.split(" ").map(x => sourceVocab.getIndex(x)).toArray
45 |     val expectedIndexes = expected.split(" ").map(x => targetVocab.getIndex(x)).toArray
46 |     val unconditional = seq2seq.applyEncoded(inputIndexes)
47 | 
48 |     ComputationGraph.renew()
49 |     val context = PnpInferenceContext.init(seq2seq.model)
50 | 
51 |     val marginals = unconditional.beamSearch(10, 10, Env.init, context)
52 |     
53 |     marginals.executions.size should be > 0
54 |     /*
55 |     for (x <- marginals.executions) {
56 |       println(x.logProb + " " + x.value.map(i => targetVocab.get(i)).mkString(" "))
57 |     }
58 |     */
59 |     
60 |     marginals.executions(0).value.toList should be (expectedIndexes.toList)
61 |   }
62 | 
63 | 
64 |   "BsoTrainerSpec" should "train seq2seq models" in {
65 |     val seq2seq = getSeq2Seq()
66 |     val model = seq2seq.model
67 | 
68 |     val examples = for {
69 |       d <- indexedData
70 |     } yield {
71 |       val unconditional = seq2seq.applyEncoded(d._1)
72 |       val oracle = seq2seq.getMarginCostEncoded(d._2)
73 |       PnpExample(unconditional, unconditional, Env.init, oracle)
74 |     }
75 | 
76 |     val sgd = new SimpleSGDTrainer(model.model, 0.1f, 0.01f)
77 |     val trainer = new BsoTrainer(50, 2, 10, model, sgd, new NullLogFunction())
78 |     trainer.train(examples)
79 | 
80 |     for (d <- rawData) {
81 |       runTest(seq2seq, d._1, d._2)
82 |     }
83 |   }
84 | }
85 | 
86 | object BsoTrainerSpec {
87 |   
88 | 
89 |   
90 | }


--------------------------------------------------------------------------------
/src/test/scala/org/allenai/pnp/GlobalLoglikelihoodTrainerSpec.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp
 2 | 
 3 | import org.allenai.pnp.ExecutionScore.ExecutionScore
 4 | import org.scalatest.FlatSpec
 5 | import org.scalatest.Matchers
 6 | 
 7 | import com.jayantkrish.jklol.training.NullLogFunction
 8 | 
 9 | import edu.cmu.dynet._
10 | import com.jayantkrish.jklol.util.IndexedList
11 | import com.jayantkrish.jklol.training.NullLogFunction
12 | 
13 | class GlobalLoglikelihoodTrainerSpec extends FlatSpec with Matchers {
14 |     
15 |   Initialize.initialize()
16 | 
17 |   val TOLERANCE = 0.01
18 | 
19 | 
20 | 
21 |   "GlobalLoglikelihoodTrainer" should "train" in {
22 |     val vocab = Array(0,1,2)
23 | 
24 |     val model = PnpModel.init(false)
25 |     val startParam = model.addParameter("start", Dim(vocab.length))
26 |     val transitionParam = model.addParameter("transition", Dim(vocab.length * vocab.length))
27 | 
28 |     def lm(k: Int): Pnp[Array[Int]] = {
29 |       if (k == 1) {
30 |         for {
31 |           params <- Pnp.param("start")
32 |           choice <- Pnp.choose(vocab, params, k - 1)
33 |         } yield {
34 |           Array(choice)
35 |         }
36 |       } else {
37 |         for {
38 |           rest <- lm(k - 1)
39 |           previous = rest.last
40 |           transition <- Pnp.param("transition")
41 |           params = Expression.pickrange(
42 |             transition, previous * vocab.length, (previous + 1) * vocab.length)
43 |           choice <- Pnp.choose(vocab, params, k - 1)
44 |         } yield {
45 |           rest ++ Array(choice)
46 |         }
47 |       }
48 |     }
49 | 
50 |     def makeOracle(label: Array[Int]): ExecutionScore = {
51 |       new ExecutionScore() {
52 |         def apply(tag: Any, choice: Any, env: Env): Double = {
53 |           if (tag != null && tag.isInstanceOf[Int]) {
54 |             val tagInt = tag.asInstanceOf[Int]
55 |             if (tagInt >= 0 && tagInt < label.length) {
56 |               if (choice == label(tagInt)) {
57 |                 0.0
58 |               } else {
59 |                 Double.NegativeInfinity
60 |               }
61 |             } else {
62 |               Double.NegativeInfinity
63 |             }
64 |           } else {
65 |             0.0
66 |           }
67 |         }
68 |       }
69 |     }
70 |     
71 | 
72 |     val examples = List(
73 |         PnpExample(lm(3), lm(3), Env.init, makeOracle(Array(0,1,0))),
74 |         PnpExample(lm(3), lm(3), Env.init, makeOracle(Array(0,1,2)))
75 |     )
76 | 
77 |     val sgd = new SimpleSGDTrainer(model.model, 0.1f, 0.1f)
78 |     val trainer = new GlobalLoglikelihoodTrainer(1000, 100, -1, model, sgd, new NullLogFunction())
79 |     // val trainer = new BsoTrainer(100, 1, -1, model, sgd, new NullLogFunction())
80 |     
81 |     trainer.train(examples)
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/src/test/scala/org/allenai/pnp/PnpUtilSpec.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp
 2 | 
 3 | import scala.collection.JavaConverters._
 4 | 
 5 | import org.scalatest._
 6 | import org.scalatest.Matchers
 7 | 
 8 | import com.jayantkrish.jklol.ccg.lambda.ExpressionParser
 9 | 
10 | class PnpUtilSpec extends FlatSpec with Matchers {
11 | 
12 |   val TOLERANCE = 0.0001
13 |   val parser = ExpressionParser.expression2
14 | 
15 |   def flip(p: Double): Pnp[Boolean] = {
16 |     Pnp.chooseMap(Seq((true, p), (false, 1.0 - p)))
17 |   }
18 | 
19 |   val bindings = Map[String, AnyRef](
20 |     "true" -> true.asInstanceOf[AnyRef],
21 |     "false" -> false.asInstanceOf[AnyRef],
22 |     "coin" -> Pnp.chooseMap(Seq((true, 0.6), (false, 0.4))),
23 |     "flipProb" -> 0.6.asInstanceOf[AnyRef],
24 |     "flipProb2" -> 0.55.asInstanceOf[AnyRef],
25 |     "flip" -> PnpUtil.wrap(flip _),
26 |     "filter" -> PnpUtil.wrap(PnpUtil.filter _),
27 |     "list" -> { x: Vector[AnyRef] => Pnp.value(x.toList) },
28 |     "concat" -> PnpUtil.wrap2({ (x: String, y: String) => x ++ y })
29 |   )
30 | 
31 |   def runTest[A](exprString: String, expected: Seq[(A, Double)]): Unit = {
32 |     val expr = parser.parse(exprString)
33 |     val pp = PnpUtil.lfToPnp(expr, bindings)
34 | 
35 |     val values = pp.beamSearch(100).executions.map(x => (x.value, x.prob))
36 | 
37 |     for ((value, expected) <- values.zip(expected)) {
38 |       value._1 should be(expected._1)
39 |       value._2 should be(expected._2 +- TOLERANCE)
40 |     }
41 |   }
42 | 
43 |   "PpUtil" should "correctly interpret constants" in {
44 |     runTest("coin", Seq((true, 0.6), (false, 0.4)))
45 |   }
46 | 
47 |   it should "correctly interpret string constants" in {
48 |     runTest("\"foo\"", Seq(("foo", 1.0)))
49 |   }
50 | 
51 |   it should "correctly interpret applications" in {
52 |     runTest("(flip flipProb)", Seq((true, 0.6), (false, 0.4)))
53 |   }
54 | 
55 |   it should "correctly interpret applications (2)" in {
56 |     runTest("(list flipProb)", Seq((List(0.6), 1.0)))
57 |   }
58 | 
59 |   it should "correctly interpret applications (3)" in {
60 |     runTest("(concat \"foo\" \"bar\")", Seq(("foobar", 1.0)))
61 |   }
62 | 
63 |   it should "correctly interpret filters" in {
64 |     runTest(
65 |       "(filter (lambda (x) (flip x)) (list flipProb flipProb2))",
66 |       Seq((List(0.6, 0.55), 0.6 * 0.55), (List(0.6), 0.6 * 0.45),
67 |         (List(0.55), 0.4 * 0.55), (List(), 0.4 * 0.45))
68 |     )
69 |   }
70 | }


--------------------------------------------------------------------------------
/src/test/scala/org/allenai/pnp/SampleSpec.scala:
--------------------------------------------------------------------------------
  1 | package org.allenai.pnp
  2 | 
  3 | import edu.cmu.dynet._
  4 | import org.scalatest._
  5 | import org.allenai.pnp.ExecutionScore.ExecutionScore
  6 | 
  7 | class SampleSpec extends FlatSpec with Matchers {
  8 | 
  9 |   "Pnp" should "sample unconditionally" in {
 10 | 
 11 |     val flip = Pnp.choose(Seq("a", "b"), Seq(0.25d, 0.75d))
 12 | 
 13 |     val samples = for (i <- 1 to 10000) yield flip.sample()
 14 | 
 15 |     val numA = samples.map(_.value).filter(_ == "a").size
 16 |     val numB = samples.map(_.value).filter(_ == "b").size
 17 | 
 18 |     numA + numB shouldBe 10000
 19 | 
 20 |     numA should be > 2000
 21 |     numA should be < 3000
 22 | 
 23 |     numB should be > 7000
 24 |     numB should be < 8000
 25 |   }
 26 | 
 27 |   it should "take scores into account" in {
 28 |     val flip = Pnp.chooseTag(Seq("a", "b"), "choice")
 29 | 
 30 |     val score = new ExecutionScore() {
 31 |       def apply(tag: Any, choice: Any, env: Env): Double = {
 32 |         if (tag == "choice" && choice == "a") 1.0 else 0.0
 33 |       }
 34 |     }
 35 | 
 36 |     val env = Env.init
 37 |     val context = PnpInferenceContext.init.addExecutionScore(score)
 38 |     val samples = for (i <- 1 to 10000) yield flip.sample(env=env, context=context)
 39 | 
 40 |     // This is how the probabilities should work out.
 41 |     val aProb = math.E / (1 + math.E)
 42 |     val bProb = 1 / (1 + math.E)
 43 | 
 44 |     val numA = samples.map(_.value).filter(_ == "a").size
 45 |     val numB = samples.map(_.value).filter(_ == "b").size
 46 | 
 47 |     numA + numB shouldBe 10000
 48 | 
 49 |     numA.toDouble / 10000 shouldBe aProb +- 0.05
 50 |     numB.toDouble / 10000 shouldBe bProb +- 0.05
 51 |   }
 52 | 
 53 |   it should "take scores into account when sampling from expressions" in {
 54 |     Initialize.initialize()
 55 |     ComputationGraph.renew()
 56 | 
 57 |     val weightVector = new FloatVector(Seq(0f, 0f))
 58 |     val weights = Expression.input(Dim(2), weightVector)
 59 |     val flip = Pnp.choose(Array("a", "b"), weights, "choice")
 60 | 
 61 |     val score = new ExecutionScore() {
 62 |       def apply(tag: Any, choice: Any, env: Env): Double = {
 63 |         if (tag == "choice" && choice == "a") 1.0 else 0.0
 64 |       }
 65 |     }
 66 | 
 67 |     val model = new Model()
 68 |     val cg = CompGraph.empty(model)
 69 |     val context = PnpInferenceContext.init(cg).addExecutionScore(score)
 70 |     val env = Env.init
 71 | 
 72 |     val samples = for (i <- 1 to 10000) yield flip.sample(env=env, context=context)
 73 | 
 74 |     // This is how the probabilities should work out.
 75 |     val aProb = math.E / (1 + math.E)
 76 |     val bProb = 1 / (1 + math.E)
 77 | 
 78 |     val numA = samples.map(_.value).filter(_ == "a").size
 79 |     val numB = samples.map(_.value).filter(_ == "b").size
 80 | 
 81 |     numA + numB shouldBe 10000
 82 | 
 83 |     numA.toDouble / 10000 shouldBe aProb +- 0.05
 84 |     numB.toDouble / 10000 shouldBe bProb +- 0.05
 85 |   }
 86 | 
 87 |   it should "sample multistep" in {
 88 | 
 89 |     val flip = for {
 90 |       first <- Pnp.choose(Seq(true, false), Seq(0.2d, 0.8d))
 91 |       second <- if (first) {
 92 |         Pnp.value(1)
 93 |       } else {
 94 |         Pnp.choose(Seq(3, 4), Seq(0.4d, 0.6d))
 95 |       }
 96 |       third <- if (second == 4) {
 97 |         Pnp.choose(Seq(4, 5), Seq(0.1d, 0.9d))
 98 |       } else {
 99 |         Pnp.value(second)
100 |       }
101 |     } yield third
102 | 
103 | 
104 |     val samples = for (i <- 1 to 10000) yield flip.sample()
105 | 
106 |     val counts = samples.map(_.value).groupBy(identity).mapValues(_.size)
107 |     counts.keySet shouldBe Set(1, 3, 4, 5)
108 |     counts.values.sum shouldBe 10000
109 | 
110 |     // 1 = 10000 * .2  = 2000
111 |     counts(1) should be > 1800
112 |     counts(1) should be < 2200
113 | 
114 |     // 3 = 10000 * .8 * .4 = 3200
115 |     counts(3) should be > 3000
116 |     counts(3) should be < 3400
117 | 
118 |     // 4 = 10000 * .8 * .6 * .1 = 480
119 |     counts(4) should be > 400
120 |     counts(4) should be < 560
121 | 
122 |     // 5 = 10000 * .8 * .6 * .9 = 4320
123 |     counts(5) should be > 4120
124 |     counts(5) should be < 4520
125 |   }
126 | 
127 |   type RandomVariable[T] = () => T
128 |   class Distribution(rv: RandomVariable[Float]) extends Pnp[Float] {
129 |     override def searchStep[C](env: Env, logProb: Double, context: PnpInferenceContext,
130 |         continuation: PnpContinuation[Float, C], queue: PnpSearchQueue[C], finished: PnpSearchQueue[C]): Unit = ???
131 | 
132 |     /** Implements a single step of forward sampling.
133 |       */
134 |     override def sampleStep[C](env: Env, logProb: Double, context: PnpInferenceContext,
135 |         continuation: PnpContinuation[Float, C], queue: PnpSearchQueue[C], finished: PnpSearchQueue[C]): Unit = {
136 |       continuation.sampleStep(rv(), env, logProb, context, queue, finished)
137 |     }
138 |   }
139 | 
140 |   def uniform(lo: Float, hi: Float): Pnp[Float] = new Distribution(
141 |     () => scala.util.Random.nextFloat * (hi - lo) + lo)
142 | 
143 |   it should "deal with 'continuous' variables" in {
144 |     val dist = uniform(0f, 1f)
145 | 
146 |     val samples = for (i <- 1 to 10000) yield dist.sample().value
147 | 
148 |     samples.max should be <= 1f
149 |     samples.min should be >= 0f
150 | 
151 |     val deciles = samples.groupBy(v => math.floor(10f * v)).mapValues(_.size)
152 | 
153 |     deciles.keySet shouldBe (0 until 10).toSet
154 |     for (i <- 0 until 10) {
155 |       deciles(i) should be > 900
156 |       deciles(i) should be < 1100
157 |     }
158 |   }
159 | }
160 | 


--------------------------------------------------------------------------------
/src/test/scala/org/allenai/pnp/semparse/SemanticParserSpec.scala:
--------------------------------------------------------------------------------
 1 | package org.allenai.pnp.semparse
 2 | 
 3 | import scala.collection.JavaConverters._
 4 | import org.allenai.pnp.{Env, Pnp, PnpInferenceContext, PnpModel}
 5 | 
 6 | import org.scalatest.FlatSpec
 7 | import org.scalatest.Matchers
 8 | import com.jayantkrish.jklol.ccg.lambda.ExplicitTypeDeclaration
 9 | import com.jayantkrish.jklol.ccg.lambda.ExpressionParser
10 | import com.jayantkrish.jklol.training.NullLogFunction
11 | import com.jayantkrish.jklol.util.IndexedList
12 | import edu.cmu.dynet._
13 | 
14 | class SemanticParserSpec extends FlatSpec with Matchers {
15 |   
16 |   Initialize.initialize()
17 |  
18 |   val dataStrings = List(
19 |       ("state", "state:<e,t>"),
20 |       ("city", "city:<e,t>"),
21 |       ("biggest city", "(argmax:<<e,t>,e> city:<e,t>)"),
22 |       ("texas", "texas:e"),
23 |       ("major city", "(lambda ($0) (and:<t*,t> (city:<e,t> $0) (major:<e,t> $0)))")
24 |   )
25 | 
26 |   val exprParser = ExpressionParser.expression2()
27 |   val typeDeclaration = ExplicitTypeDeclaration.getDefault()
28 | 
29 |   val data = dataStrings.map(x => (x._1.split(" "), exprParser.parse(x._2)))
30 | 
31 |   val lexicon = ActionSpace.fromExpressions(data.map(_._2), typeDeclaration, true)
32 |   val vocab = IndexedList.create[String]
33 |   for (d <- data) {
34 |     vocab.addAll(d._1.toList.asJava)
35 |   }
36 |   val model = PnpModel.init(true)
37 |   val parser = SemanticParser.create(lexicon, vocab, model)
38 | 
39 |   "SemanticParser" should "generate application templates" in {
40 |     println(lexicon.typeTemplateMap)
41 |   }
42 | 
43 |   it should "decode expressions to template sequences" in {
44 |     val e = exprParser.parse(
45 |         "(argmax:<<e,t>,e> (lambda ($0) (and:<t*,t> (city:<e,t> $0) (major:<e,t> $0))))")
46 |     // This method will throw an error if it can't decode the expression properly. 
47 |     val templates = parser.generateActionSequence(e, EntityLinking(List()), typeDeclaration)
48 |   }
49 |   
50 |   it should "condition on expressions" in {
51 |     val label = exprParser.parse("(lambda ($0) (and:<t*,t> (city:<e,t> $0) (major:<e,t> $0)))")
52 |     val entityLinking = EntityLinking(List())
53 |     val oracle = parser.getLabelScore(label, entityLinking, typeDeclaration).get
54 |     val exprs = parser.generateExpression(Array("major", "city").map(vocab.getIndex(_)),
55 |         entityLinking)
56 | 
57 |     ComputationGraph.renew()
58 |     val context = PnpInferenceContext.init(model).addExecutionScore(oracle)
59 | 
60 |     val results = exprs.beamSearch(1, -1, Env.init, context).executions
61 |     results.length should be(1)
62 |     results(0).value should equal(label)
63 |   }
64 |   
65 |   it should "condition on multiple expressions" in {
66 |     val label1 = exprParser.parse("(lambda ($0) (and:<t*,t> (city:<e,t> $0) (major:<e,t> $0)))")
67 |     val label2 = exprParser.parse("(lambda ($0) (state:<e,t> $0))")
68 |     val labels = Set(label1, label2)
69 |     val entityLinking = EntityLinking(List())
70 |     val oracle = parser.getMultiLabelScore(labels, entityLinking, typeDeclaration).get
71 |     
72 |     val exprs = parser.generateExpression(Array("major", "city").map(vocab.getIndex(_)),
73 |         entityLinking)
74 | 
75 |     ComputationGraph.renew()
76 |     val context = PnpInferenceContext.init(model).addExecutionScore(oracle)
77 | 
78 |     val results = exprs.beamSearch(2, -1, Env.init, context).executions
79 |     results.length should be(2)
80 |     results.map(_.value).toSet should equal(labels)
81 |   }
82 | }
83 | 


--------------------------------------------------------------------------------