├── jitpack.yml ├── examples ├── maxent-weights.txt ├── tests │ ├── vinkenTAG.irtb │ └── asbestosTAG.irtb ├── test.rdg ├── pcfg-training.txt ├── jlm.irtg ├── lcfrs.irtg ├── wide-cfg.irtg ├── ghkm.irtg ├── pcfg-annotated-training.txt ├── mbot.irtg ├── ptb_ctf.txt ├── cfg.irtg ├── altoDemoStringToGraph.irtg ├── chinese.scfg ├── elephant-weighted.irtg ├── maxent-cfg.irtg ├── fcfg.irtg ├── elephant.irtg ├── chasing.tag ├── chinese.irtg ├── reg.irtg ├── ftag.irtg ├── apposition.irtg ├── scfg.irtg ├── cohn-lapata.irtg ├── nesson-shieber.irtg ├── testString5sub1_3sources.irtg ├── english.tag ├── tiny-tag.irtg ├── reg-fromthehat.irtg ├── session.scala └── atomicSGraphTest.irtg ├── img ├── irtg-grammar-gui.png ├── tulipac-chasing.png ├── wiki │ ├── decompose.png │ ├── rdg-parse.png │ ├── rdg-shell.png │ ├── gui-file-menu.png │ ├── parse-window.png │ ├── grammar-window.png │ └── language-window.png └── the-boy-wants-to-go-gui.png ├── src ├── test │ ├── resources │ │ ├── g10.irtb │ │ ├── examples │ │ │ ├── wide-cfg.irtg │ │ │ ├── ghkm.irtg │ │ │ ├── chinese.irtg │ │ │ ├── cohn-lapata.irtg │ │ │ └── nesson-shieber.irtg │ │ ├── 45.tag │ │ ├── 46.tag │ │ └── unary.irtg │ └── groovy │ │ └── de │ │ └── up │ │ └── ling │ │ └── irtg │ │ ├── util │ │ ├── BuildPropertiesTest.groovy │ │ └── LogSpaceOperationsTest.java │ │ ├── automata │ │ ├── UniversalAutomatonTest.groovy │ │ ├── DeterminizerTest.groovy │ │ └── SingletonAutomatonTest.groovy │ │ ├── codec │ │ ├── TreeAutomatonInputCodecTest.groovy │ │ ├── TiburonTreeAutomatonInputCodecTest.groovy │ │ ├── NltkPcfgInputCodecTest.groovy │ │ ├── TreeYieldOutputCodecTest.groovy │ │ └── SynchronousCfgInputCodecTest.groovy │ │ ├── random_automata │ │ └── RandomTreeAutomatonTest.java │ │ ├── corpus │ │ └── OnTheFlyChartsTest.groovy │ │ └── algebra │ │ └── FeatureStructureAlgebraTest.groovy └── main │ ├── resources │ ├── images │ │ ├── open.png │ │ └── open-16x16.png │ ├── build.properties │ └── META-INF │ │ └── services │ │ ├── de.up.ling.irtg.binarization.RegularSeed │ │ ├── de.up.ling.irtg.codec.OutputCodec │ │ ├── de.up.ling.irtg.algebra.Algebra │ │ └── de.up.ling.irtg.codec.InputCodec │ ├── java │ └── de │ │ ├── up │ │ └── ling │ │ │ └── irtg │ │ │ ├── io │ │ │ ├── package-info.java │ │ │ ├── StringCodec.java │ │ │ ├── NumberCodec.java │ │ │ ├── UtfStringCodec.java │ │ │ └── HttpCache.java │ │ │ ├── corpus │ │ │ ├── package-info.java │ │ │ ├── CorpusReadingException.java │ │ │ ├── ByteArrayInputStreamSupplier.java │ │ │ ├── ChartAttacher.java │ │ │ └── FileInputStreamSupplier.java │ │ │ ├── script │ │ │ ├── package-info.java │ │ │ ├── CorpusParser.java │ │ │ ├── TestAutomataMemorySize2.java │ │ │ └── CreateRandomAutomata.java │ │ │ ├── util │ │ │ ├── package-info.java │ │ │ ├── ValueAndTimeConsumer.java │ │ │ ├── IntForEach.java │ │ │ ├── ProgressBarWorker.java │ │ │ ├── ProgressListener.java │ │ │ ├── IntDeque.java │ │ │ ├── MapFactory.java │ │ │ ├── MutableBoolean.java │ │ │ ├── MutableDouble.java │ │ │ ├── DebuggingWriter.java │ │ │ ├── LambdaStopwatch.java │ │ │ ├── BuildProperties.java │ │ │ ├── Lazy.java │ │ │ ├── ObjectWithStringCode.java │ │ │ ├── ArrayListRangeIterable.java │ │ │ ├── IntAgenda.java │ │ │ ├── ForeachArrayTuple.java │ │ │ └── ConsoleProgressBar.java │ │ │ ├── hom │ │ │ └── package-info.java │ │ │ ├── automata │ │ │ ├── package-info.java │ │ │ ├── pruning │ │ │ │ ├── package-info.java │ │ │ │ ├── FOM.java │ │ │ │ ├── RulePairConsumer.java │ │ │ │ ├── MultiFOM.java │ │ │ │ ├── PruningPolicy.java │ │ │ │ └── RulePair.java │ │ │ ├── coarse_to_fine │ │ │ │ ├── package-info.java │ │ │ │ ├── RulePairList.java │ │ │ │ ├── FineToCoarseMapping.java │ │ │ │ └── RrtSummary.java │ │ │ ├── index │ │ │ │ └── package-info.java │ │ │ ├── language_iteration │ │ │ │ ├── package-info.java │ │ │ │ ├── ItemEvaluator.java │ │ │ │ ├── RuleRefiner.java │ │ │ │ ├── IdentityRuleRefiner.java │ │ │ │ ├── EvaluatedItemComparator.java │ │ │ │ ├── TreeCombiningItemEvaluator.java │ │ │ │ └── EvaluatedItem.java │ │ │ ├── RuleEvaluator.java │ │ │ ├── RuleEvaluatorTopDown.java │ │ │ ├── condensed │ │ │ │ ├── package-info.java │ │ │ │ └── ConcatenatedIterable.java │ │ │ ├── Intersectable.java │ │ │ ├── LeafToStateSubstitution.java │ │ │ ├── SkipFailRulesFilter.java │ │ │ ├── AbstractRule.java │ │ │ ├── WeightedTree.java │ │ │ ├── UniversalAutomaton.java │ │ │ └── EdgeEvaluator.java │ │ │ ├── maxent │ │ │ ├── package-info.java │ │ │ ├── RuleNameFeature.java │ │ │ └── ChildOfFeature.java │ │ │ ├── algebra │ │ │ ├── package-info.java │ │ │ ├── graph │ │ │ │ ├── package-info.java │ │ │ │ └── GraphEdgeFactory.java │ │ │ ├── NullFilterAlgebra.java │ │ │ ├── RightMbotAlgebra.java │ │ │ ├── LcfrsAlgebra.java │ │ │ ├── BinarizingTagTreeAlgebra.java │ │ │ ├── BinarizingTreeAlgebra.java │ │ │ ├── ParserException.java │ │ │ ├── BinarizingTreeWithAritiesAlgebra.java │ │ │ └── BinarizingTagTreeWithAritiesAlgebra.java │ │ │ ├── main │ │ │ └── package-info.java │ │ │ ├── codec │ │ │ ├── irtg │ │ │ │ └── package-info.java │ │ │ ├── ptb_tree │ │ │ │ └── package-info.java │ │ │ ├── tag │ │ │ │ ├── package-info.java │ │ │ │ ├── ElementaryTreeType.java │ │ │ │ ├── NodeAnnotation.java │ │ │ │ └── NodeType.java │ │ │ ├── template_irtg │ │ │ │ └── package-info.java │ │ │ ├── treeautomaton │ │ │ │ └── package-info.java │ │ │ ├── pcfg_as_irtg │ │ │ │ └── package-info.java │ │ │ ├── bolinas_hrg │ │ │ │ └── package-info.java │ │ │ ├── bottomup_treeautomaton │ │ │ │ └── package-info.java │ │ │ ├── isiamr │ │ │ │ └── package-info.java │ │ │ ├── tiburon_treeautomaton │ │ │ │ └── package-info.java │ │ │ ├── package-info.java │ │ │ ├── ThrowingErrorListener.java │ │ │ ├── CogsOutputCodec.java │ │ │ ├── ToStringOutputCodec.java │ │ │ ├── PtbTreeOutputCodec.java │ │ │ ├── CodecParseException.java │ │ │ ├── NoSignatureBinaryIrtgOutputCodec.java │ │ │ ├── TreeYieldOutputCodec.java │ │ │ ├── CodecMetadata.java │ │ │ ├── SgraphAmrWithSourcesOutputCodec.java │ │ │ ├── ListOutputCodec.java │ │ │ ├── IsiAmrInputCodec.java │ │ │ ├── TikzQtreeOutputCodec.java │ │ │ └── AlgebraStringRepresentationOutputCodec.java │ │ │ ├── laboratory │ │ │ ├── package-info.java │ │ │ ├── OperationAnnotation.java │ │ │ ├── VariableNotDefinedException.java │ │ │ ├── TaskCache.java │ │ │ ├── AltoLabHttpCache.java │ │ │ ├── AdditionalDataCache.java │ │ │ └── UnparsedTask.java │ │ │ ├── semiring │ │ │ ├── package-info.java │ │ │ ├── AndOrSemiring.java │ │ │ ├── Semiring.java │ │ │ ├── LongArithmeticSemiring.java │ │ │ ├── DoubleArithmeticSemiring.java │ │ │ ├── AdditiveViterbiSemiring.java │ │ │ └── ViterbiWithBackpointerSemiring.java │ │ │ ├── learning_rates │ │ │ ├── package-info.java │ │ │ └── LearningRate.java │ │ │ ├── sampling │ │ │ ├── package-info.java │ │ │ └── rule_weighting │ │ │ │ ├── package-info.java │ │ │ │ └── AutomatonWeighted.java │ │ │ ├── random_automata │ │ │ └── package-info.java │ │ │ ├── binarization │ │ │ ├── package-info.java │ │ │ ├── BinaryRuleFactory.java │ │ │ └── GensymBinaryRuleFactory.java │ │ │ ├── siblingfinder │ │ │ └── package-info.java │ │ │ ├── gui │ │ │ ├── package-info.java │ │ │ ├── TreeAutomatonAnnotator.java │ │ │ ├── JDerivationDisplayable.java │ │ │ ├── IrtgTreeAutomatonAnnotator.java │ │ │ ├── JDerivationTree.form │ │ │ └── JInterpretationsPanel.form │ │ │ ├── signature │ │ │ ├── package-info.java │ │ │ ├── IdentitySignatureMapper.java │ │ │ └── IntSetInterner.java │ │ │ └── package-info.java │ │ └── saar │ │ └── coli │ │ └── featstruct │ │ └── FsParsingException.java │ ├── antlr │ └── de │ │ ├── up │ │ └── ling │ │ │ └── irtg │ │ │ ├── codec │ │ │ ├── ptb_tree │ │ │ │ └── PtbTree.g4 │ │ │ ├── rdg │ │ │ │ └── Rdg.g4 │ │ │ ├── pcfg_as_irtg │ │ │ │ └── PcfgAsIrtg.g4 │ │ │ ├── tiburon_treeautomaton │ │ │ │ └── TiburonTreeAutomaton.g4 │ │ │ ├── scfg │ │ │ │ └── SynchronousContextFreeGrammar.g4 │ │ │ ├── bottomup_treeautomaton │ │ │ │ └── BottomUpTreeAutomaton.g4 │ │ │ ├── irtg │ │ │ │ └── Irtg.g4 │ │ │ └── tulipac │ │ │ │ └── Tulipac.g4 │ │ │ └── laboratory │ │ │ └── DottedCommand.g4 │ │ └── saar │ │ └── coli │ │ └── featstruct │ │ └── FeatStruct.g4 │ └── javacc │ └── de │ └── up │ └── ling │ └── irtg │ └── algebra │ └── SetParser.jj ├── gradle └── wrapper │ ├── gradle-wrapper.jar │ └── gradle-wrapper.properties ├── scripts ├── alto ├── ape ├── alab ├── grammar-preprocessor │ └── preprocess.py ├── acc └── agc ├── settings.gradle ├── .settings └── org.eclipse.m2e.core.prefs ├── .gitignore ├── .github └── workflows │ └── build.yml ├── .travis.yml ├── init.scala └── license-info.md /jitpack.yml: -------------------------------------------------------------------------------- 1 | jdk: 2 | - openjdk11 3 | -------------------------------------------------------------------------------- /examples/maxent-weights.txt: -------------------------------------------------------------------------------- 1 | f1 = 0.2 2 | f2 = 0.8 3 | -------------------------------------------------------------------------------- /img/irtg-grammar-gui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/img/irtg-grammar-gui.png -------------------------------------------------------------------------------- /img/tulipac-chasing.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/img/tulipac-chasing.png -------------------------------------------------------------------------------- /img/wiki/decompose.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/img/wiki/decompose.png -------------------------------------------------------------------------------- /img/wiki/rdg-parse.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/img/wiki/rdg-parse.png -------------------------------------------------------------------------------- /img/wiki/rdg-shell.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/img/wiki/rdg-shell.png -------------------------------------------------------------------------------- /img/wiki/gui-file-menu.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/img/wiki/gui-file-menu.png -------------------------------------------------------------------------------- /img/wiki/parse-window.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/img/wiki/parse-window.png -------------------------------------------------------------------------------- /examples/tests/vinkenTAG.irtb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/examples/tests/vinkenTAG.irtb -------------------------------------------------------------------------------- /img/wiki/grammar-window.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/img/wiki/grammar-window.png -------------------------------------------------------------------------------- /img/wiki/language-window.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/img/wiki/language-window.png -------------------------------------------------------------------------------- /src/test/resources/g10.irtb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/src/test/resources/g10.irtb -------------------------------------------------------------------------------- /examples/tests/asbestosTAG.irtb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/examples/tests/asbestosTAG.irtb -------------------------------------------------------------------------------- /img/the-boy-wants-to-go-gui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/img/the-boy-wants-to-go-gui.png -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/gradle/wrapper/gradle-wrapper.jar -------------------------------------------------------------------------------- /src/main/resources/images/open.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/src/main/resources/images/open.png -------------------------------------------------------------------------------- /scripts/alto: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # alto = Alto GUI 4 | 5 | # Runs the Alto GUI. 6 | 7 | java -Xmx8G -jar $ALTO_JAR 8 | -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * This file was generated by the Gradle 'init' task. 3 | */ 4 | 5 | rootProject.name = 'alto' 6 | -------------------------------------------------------------------------------- /src/main/resources/build.properties: -------------------------------------------------------------------------------- 1 | version=${project.version} 2 | scm-revision=${buildNumber} 3 | compile-time=${timestamp} 4 | -------------------------------------------------------------------------------- /src/main/resources/images/open-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coli-saar/alto/HEAD/src/main/resources/images/open-16x16.png -------------------------------------------------------------------------------- /.settings/org.eclipse.m2e.core.prefs: -------------------------------------------------------------------------------- 1 | activeProfiles= 2 | eclipse.preferences.version=1 3 | resolveWorkspaceProjects=true 4 | version=1 5 | -------------------------------------------------------------------------------- /examples/test.rdg: -------------------------------------------------------------------------------- 1 | S -> (VP) 2 | VP -> (NP, NP) 3 | NP -> 4 | NP -> (Det) 5 | Det -> -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/io/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Classes used for Http communication. 3 | */ 4 | package de.up.ling.irtg.io; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/corpus/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Classes for working with corpora. 3 | */ 4 | package de.up.ling.irtg.corpus; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/script/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Various useful scripts. 3 | */ 4 | package de.up.ling.irtg.script; 5 | 6 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Various utilities. 3 | * 4 | */ 5 | package de.up.ling.irtg.util; 6 | 7 | -------------------------------------------------------------------------------- /scripts/ape: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # acc = Alto Parsing Evaluator 4 | 5 | java -Xmx8G -cp $ALTO_JAR de.up.ling.irtg.script.ParsingEvaluator "$@" 6 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/hom/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Classes for working with tree homomorphisms. 3 | */ 4 | package de.up.ling.irtg.hom; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Classes for tree automata of various types. 3 | */ 4 | package de.up.ling.irtg.automata; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/maxent/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Classes for working with maximum-entropy IRTGs. 3 | */ 4 | package de.up.ling.irtg.maxent; -------------------------------------------------------------------------------- /scripts/alab: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # alab = Alto Lab Task Runner 4 | 5 | java -Xmx8G -cp $ALTO_JAR de.up.ling.irtg.laboratory.CommandLineInterface "$@" 6 | 7 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/algebra/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Classes representing a number of useful algebras. 3 | */ 4 | package de.up.ling.irtg.algebra; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/main/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This package contains the main class which runs the Alto GUI. 3 | */ 4 | package de.up.ling.irtg.main; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/irtg/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Contains the IRTG input codec, and supporting classes. 3 | */ 4 | package de.up.ling.irtg.codec.irtg; 5 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/algebra/graph/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * The algebra of s-graphs, together with supporting classes. 3 | */ 4 | package de.up.ling.irtg.algebra.graph; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/laboratory/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This package contains the core classes used to run Alto Lab tasks. 3 | */ 4 | package de.up.ling.irtg.laboratory; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/semiring/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * A number of semirings used in computation over tree automata. 3 | */ 4 | package de.up.ling.irtg.semiring; 5 | 6 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/learning_rates/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This package includes basic learning rates used for SGD algorithms. 3 | */ 4 | package de.up.ling.irtg.learning_rates; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/sampling/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This package contains tools to implement approximate inference via sampling. 3 | */ 4 | package de.up.ling.irtg.sampling; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/pruning/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This package implements beam search in the construction of a parse chart. 3 | */ 4 | package de.up.ling.irtg.automata.pruning; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/ptb_tree/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Contains the input codec for Penn Treebank trees, and supporting classes. 3 | */ 4 | package de.up.ling.irtg.codec.ptb_tree; 5 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/tag/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This package contains a codec for reading in the grammar format used by Chen and Rambow. 3 | */ 4 | package de.up.ling.irtg.codec.tag; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/template_irtg/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Contains the template IRTG input codec, and supporting classes. 3 | */ 4 | package de.up.ling.irtg.codec.template_irtg; 5 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/treeautomaton/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Contains the tree automaton input codec, and supporting classes. 3 | */ 4 | package de.up.ling.irtg.codec.treeautomaton; 5 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/pcfg_as_irtg/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Contains the input codec for PCFG as IRTG, as well as supporting classes. 3 | */ 4 | package de.up.ling.irtg.codec.pcfg_as_irtg; 5 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/sampling/rule_weighting/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This package contains automata with adaptable sampling heuristics. 3 | */ 4 | package de.up.ling.irtg.sampling.rule_weighting; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/bolinas_hrg/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Support classes for the {@link de.up.ling.irtg.codec.BolinasHrgInputCodec}. 3 | */ 4 | package de.up.ling.irtg.codec.bolinas_hrg; 5 | 6 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/random_automata/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This package contains code to generate random tree automata for evaluation 3 | * purposes. 4 | */ 5 | package de.up.ling.irtg.random_automata; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/binarization/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Classes for binarizing IRTGs. 3 | * 4 | * The core class for binarizing grammars is the BkvBinarizer. 5 | */ 6 | package de.up.ling.irtg.binarization; -------------------------------------------------------------------------------- /src/main/resources/META-INF/services/de.up.ling.irtg.binarization.RegularSeed: -------------------------------------------------------------------------------- 1 | de.up.ling.irtg.binarization.StringAlgebraSeed 2 | de.up.ling.irtg.binarization.IdentitySeed 3 | de.up.ling.irtg.binarization.BinarizingAlgebraSeed -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/bottomup_treeautomaton/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Contains the bottom-up tree automaton input codec, and supporting classes. 3 | */ 4 | package de.up.ling.irtg.codec.bottomup_treeautomaton; 5 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/siblingfinder/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This contains class for sibling finder based intersection, which is used to 3 | * speed up intersection. 4 | */ 5 | package de.up.ling.irtg.siblingfinder; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/gui/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * A graphical user interface (GUI) for Alto. 3 | * 4 | * @see de.up.ling.irtg.automata.condensed.CondensedTreeAutomaton 5 | * 6 | */ 7 | package de.up.ling.irtg.gui; 8 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/isiamr/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Contains the import codec for graphs in the AMR bank format (see amr.isi.edu), 3 | * as well as supporting classes. 4 | */ 5 | package de.up.ling.irtg.codec.isiamr; 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | *# 3 | *.iml 4 | /.classpath 5 | /.project 6 | /target/ 7 | /bin/ 8 | /out/ 9 | /.idea/ 10 | /.settings/ 11 | .DS_Store 12 | ^src/main/java/de/up/ling/irtg/script/ParseCorpus\.java$ 13 | /build/ 14 | /.gradle/ 15 | /.nb-gradle/ -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/tiburon_treeautomaton/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Contains the input codec for tiburon style tree automata, as well as 3 | * supporting classes. 4 | */ 5 | package de.up.ling.irtg.codec.tiburon_treeautomaton; 6 | -------------------------------------------------------------------------------- /gradle/wrapper/gradle-wrapper.properties: -------------------------------------------------------------------------------- 1 | distributionBase=GRADLE_USER_HOME 2 | distributionPath=wrapper/dists 3 | distributionUrl=https\://services.gradle.org/distributions/gradle-7.0-bin.zip 4 | zipStoreBase=GRADLE_USER_HOME 5 | zipStorePath=wrapper/dists 6 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/coarse_to_fine/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Classes used to implement coarse-to-fine parsing. 3 | * 4 | * The main class of this package is CoarseToFineParser. 5 | */ 6 | package de.up.ling.irtg.automata.coarse_to_fine; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/index/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Index data structures for tree automata. 3 | * 4 | * These data structures are used to quickly look up rules within a tree 5 | * automaton. 6 | * 7 | */ 8 | package de.up.ling.irtg.automata.index; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/language_iteration/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This package implements Alto's functionality for iterating over the 3 | * language of a tree automaton in best first order. 4 | */ 5 | package de.up.ling.irtg.automata.language_iteration; -------------------------------------------------------------------------------- /examples/pcfg-training.txt: -------------------------------------------------------------------------------- 1 | # IRTG unannotated corpus file, v1.0 2 | # 3 | # interpretation i: de.up.ling.irtg.algebra.StringAlgebra 4 | 5 | john watches the woman with the telescope 6 | john watches the telescope with the telescope 7 | john watches the telescope with the woman 8 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/signature/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Signatures and other classes for mapping between human-readable 3 | * representations of states and symbols and internal numeric representations. 4 | * 5 | */ 6 | package de.up.ling.irtg.signature; 7 | 8 | -------------------------------------------------------------------------------- /examples/jlm.irtg: -------------------------------------------------------------------------------- 1 | 2 | interpretation i: de.up.ling.irtg.algebra.StringAlgebra 3 | 4 | 5 | S! -> r1(NP,VP) 6 | [i] *(?1,?2) 7 | 8 | VP -> r2(V,NP) 9 | [i] *(?1,?2) 10 | 11 | NP -> r3 12 | [i] john 13 | 14 | V -> r4 15 | [i] loves 16 | 17 | NP -> r5 18 | [i] mary 19 | 20 | -------------------------------------------------------------------------------- /examples/lcfrs.irtg: -------------------------------------------------------------------------------- 1 | interpretation i: de.up.ling.irtg.algebra.LcfrsAlgebra 2 | 3 | 4 | S! -> loves(NP, NP, VPa) 5 | [i] *(proj_1(?1), *(proj_1(?3), *(loves, *(proj_1(?2), proj_2(?3))))) 6 | 7 | NP -> john 8 | [i] john 9 | 10 | NP -> mary 11 | [i] mary 12 | 13 | VPa -> pass 14 | [i] **(sometimes, passionately) 15 | 16 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * This package contains the core classes for Alto, which represent Interpreted 3 | * Regular tree grammars. 4 | * 5 | * This includes a representation for an interpretation and a template version 6 | * of IRTGs used in learning feature based models. 7 | */ 8 | package de.up.ling.irtg; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/RuleEvaluator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | 6 | package de.up.ling.irtg.automata; 7 | 8 | /** 9 | * 10 | * @author koller 11 | */ 12 | public interface RuleEvaluator { 13 | E evaluateRule(Rule rule); 14 | } 15 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: 4 | push: 5 | 6 | jobs: 7 | build: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v1 11 | - name: Set up JDK 1.11 12 | uses: actions/setup-java@v1 13 | with: 14 | java-version: 1.11 15 | - name: build 16 | run: ./gradlew build 17 | -------------------------------------------------------------------------------- /src/main/antlr/de/up/ling/irtg/codec/ptb_tree/PtbTree.g4: -------------------------------------------------------------------------------- 1 | grammar PtbTree; 2 | 3 | @header{ 4 | package de.up.ling.irtg.codec.ptb_tree; 5 | } 6 | 7 | BKOPEN: '('; 8 | BKCLOSE: ')'; 9 | NAME: (~ [() \n\t\r])+; 10 | 11 | WS: [ \n\t\r]+ -> skip; 12 | 13 | corpus: (BKOPEN (tree) BKCLOSE)*; 14 | 15 | tree: (BKOPEN NAME tree+ BKCLOSE) 16 | | NAME; 17 | 18 | -------------------------------------------------------------------------------- /examples/wide-cfg.irtg: -------------------------------------------------------------------------------- 1 | interpretation left: de.up.ling.irtg.algebra.WideStringAlgebra 2 | interpretation right: de.up.ling.irtg.algebra.WideStringAlgebra 3 | 4 | A! -> r1(B,C,D) [2] 5 | [left] conc3(?1, ?2, ?3) 6 | [right] conc4(?3, a, ?1, ?2) 7 | 8 | B -> r2 9 | [left] b 10 | [right] b 11 | 12 | C -> r3 13 | [left] c 14 | [right] c 15 | 16 | D -> r4 17 | [left] d 18 | [right] d 19 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/RuleEvaluatorTopDown.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.automata; 6 | 7 | /** 8 | * 9 | * @author koller 10 | */ 11 | public interface RuleEvaluatorTopDown { 12 | E initialValue(); 13 | E evaluateRule(Rule rule, int i); 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/condensed/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Classes for "condensed" tree automata. A rule of a 3 | * condensed tree automaton can represent an entire set 4 | * of ordinary tree automaton rules, which are identical 5 | * except for the terminal symbol. 6 | * 7 | * @see CondensedTreeAutomaton 8 | * 9 | */ 10 | package de.up.ling.irtg.automata.condensed; -------------------------------------------------------------------------------- /src/test/resources/examples/wide-cfg.irtg: -------------------------------------------------------------------------------- 1 | interpretation left: de.up.ling.irtg.algebra.WideStringAlgebra 2 | interpretation right: de.up.ling.irtg.algebra.WideStringAlgebra 3 | 4 | A! -> r1(B,C,D) [2] 5 | [left] conc3(?1, ?2, ?3) 6 | [right] conc4(?3, a, ?1, ?2) 7 | 8 | B -> r2 9 | [left] b 10 | [right] b 11 | 12 | C -> r3 13 | [left] c 14 | [right] c 15 | 16 | D -> r4 17 | [left] d 18 | [right] d 19 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/language_iteration/ItemEvaluator.java: -------------------------------------------------------------------------------- 1 | package de.up.ling.irtg.automata.language_iteration; 2 | 3 | import de.up.ling.irtg.automata.Rule; 4 | import java.util.List; 5 | 6 | /** 7 | * 8 | * @author koller 9 | */ 10 | public interface ItemEvaluator { 11 | EvaluatedItem evaluate(Rule refinedRule, List children, UnevaluatedItem unevaluatedItem); 12 | } 13 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/tag/ElementaryTreeType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec.tag; 8 | 9 | /** 10 | * 11 | * @author koller 12 | */ 13 | public enum ElementaryTreeType { 14 | INITIAL, AUXILIARY 15 | } 16 | -------------------------------------------------------------------------------- /examples/ghkm.irtg: -------------------------------------------------------------------------------- 1 | /* Example tree-to-string transducer from Galley et al. 04 */ 2 | 3 | interpretation french: de.up.ling.irtg.algebra.StringAlgebra 4 | interpretation english: de.up.ling.irtg.algebra.TreeAlgebra 5 | 6 | S! -> r1(NP,VB) 7 | [french] *(*(*(?1, ne), ?2), pas) 8 | [english] s(?1, vp(aux(does), rb(not), ?2)) 9 | 10 | NP -> r2 11 | [french] il 12 | [english] np(prp(he)) 13 | 14 | VB -> r3 15 | [french] va 16 | [english] vb(go) 17 | 18 | -------------------------------------------------------------------------------- /examples/pcfg-annotated-training.txt: -------------------------------------------------------------------------------- 1 | # IRTG annotated corpus file, v1.0 2 | # 3 | # interpretation i: de.up.ling.irtg.algebra.StringAlgebra 4 | 5 | john watches the woman with the telescope 6 | r1(r7,r5( r4(r8, r2(r9,r10)), r6(r12, r2(r9,r11)))) 7 | john watches the telescope with the telescope 8 | r1(r7,r5( r4(r8, r2(r9,r11)), r6(r12, r2(r9,r11)))) 9 | john watches the telescope with the woman 10 | r1(r7,r5( r4(r8, r2(r9,r11)), r6(r12, r2(r9,r10)))) 11 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/tag/NodeAnnotation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.codec.tag; 7 | 8 | /** 9 | * 10 | * @author koller 11 | */ 12 | public enum NodeAnnotation { 13 | NONE, 14 | NO_ADJUNCTION, 15 | OBLIGATORY_ADJUNCTION 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/ValueAndTimeConsumer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.util; 8 | 9 | /** 10 | * 11 | * @author koller 12 | */ 13 | @FunctionalInterface 14 | public interface ValueAndTimeConsumer { 15 | void accept(E result, long time); 16 | } 17 | -------------------------------------------------------------------------------- /src/test/resources/examples/ghkm.irtg: -------------------------------------------------------------------------------- 1 | /* Example tree-to-string transducer from Galley et al. 04 */ 2 | 3 | interpretation french: de.up.ling.irtg.algebra.StringAlgebra 4 | interpretation english: de.up.ling.irtg.algebra.TreeAlgebra 5 | 6 | S! -> r1(NP,VB) 7 | [french] *(*(*(?1, ne), ?2), pas) 8 | [english] s(?1, vp(aux(does), rb(not), ?2)) 9 | 10 | NP -> r2 11 | [french] il 12 | [english] np(prp(he)) 13 | 14 | VB -> r3 15 | [french] va 16 | [english] vb(go) 17 | 18 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/IntForEach.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.util; 8 | 9 | import java.util.function.IntConsumer; 10 | 11 | /** 12 | * 13 | * @author koller 14 | */ 15 | public interface IntForEach { 16 | void forEach(IntConsumer consumer); 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/algebra/NullFilterAlgebra.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.algebra; 7 | 8 | import de.up.ling.irtg.automata.TreeAutomaton; 9 | 10 | /** 11 | * 12 | * @author koller 13 | */ 14 | public interface NullFilterAlgebra { 15 | TreeAutomaton nullFilter(); 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/ProgressBarWorker.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.util; 8 | 9 | /** 10 | * 11 | * @author koller 12 | */ 13 | @FunctionalInterface 14 | public interface ProgressBarWorker { 15 | E compute(ProgressListener dialog) throws Exception; 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/ProgressListener.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.util; 8 | 9 | /** 10 | * 11 | * @author koller 12 | */ 13 | @FunctionalInterface 14 | public interface ProgressListener { 15 | void accept(int currentValue, int maxValue, String string); 16 | } 17 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/Intersectable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.automata; 7 | 8 | /** 9 | * Dummy interface that both TreeAutomaton and SiblingFinderInvhom implement. 10 | * @author Jonas 11 | */ 12 | public interface Intersectable { 13 | 14 | } 15 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/IntDeque.java: -------------------------------------------------------------------------------- 1 | package de.up.ling.irtg.util; 2 | 3 | import java.util.Deque; 4 | 5 | import it.unimi.dsi.fastutil.ints.IntIterable; 6 | 7 | public interface IntDeque extends Deque, IntIterable { 8 | 9 | void addFirst(int x); 10 | boolean offerFirst(int x); 11 | 12 | int removeFirstInt(); 13 | int getFirstInt(); 14 | 15 | 16 | void addLast(int x); 17 | boolean offerLast(int x); 18 | 19 | int removeLastInt(); 20 | int getLastInt(); 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/LeafToStateSubstitution.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | 6 | package de.up.ling.irtg.automata; 7 | 8 | /** 9 | * 10 | * @author koller 11 | */ 12 | public class LeafToStateSubstitution { 13 | public boolean isSubstituted(Label x) { 14 | return false; 15 | } 16 | 17 | public State substitute(Label x) { 18 | return null; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /scripts/grammar-preprocessor/preprocess.py: -------------------------------------------------------------------------------- 1 | from jinja2 import * 2 | import sys 3 | 4 | class _Counter(object): 5 | def __init__(self, start_value=1): 6 | self.value=start_value 7 | 8 | def current(self): 9 | return self.value 10 | 11 | def next(self): 12 | v=self.value 13 | self.value+=1 14 | return v 15 | 16 | env = Environment(loader=FileSystemLoader(".")) 17 | env.globals['counter'] = _Counter 18 | 19 | template = env.get_template(sys.argv[1]) 20 | print(template.render()) 21 | 22 | 23 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/gui/TreeAutomatonAnnotator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.gui; 6 | 7 | import de.up.ling.irtg.automata.Rule; 8 | import java.util.Collection; 9 | 10 | /** 11 | * 12 | * @author koller 13 | */ 14 | public interface TreeAutomatonAnnotator { 15 | Collection getAnnotationIdentifiers(); 16 | String getAnnotation(Rule rule, String annotationIdentifier); 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/io/StringCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.io; 7 | 8 | import java.io.IOException; 9 | 10 | /** 11 | * 12 | * @author koller 13 | */ 14 | public interface StringCodec { 15 | long writeString(String s) throws IOException; 16 | String readString() throws IOException; 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/MapFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.util; 7 | 8 | import it.unimi.dsi.fastutil.ints.Int2ObjectMap; 9 | import java.io.Serializable; 10 | 11 | /** 12 | * 13 | * @author koller 14 | */ 15 | public interface MapFactory extends Serializable { 16 | Int2ObjectMap createMap(int depth); 17 | } 18 | -------------------------------------------------------------------------------- /examples/mbot.irtg: -------------------------------------------------------------------------------- 1 | 2 | interpretation left: de.up.ling.irtg.algebra.TreeAlgebra 3 | interpretation right: de.up.ling.irtg.algebra.RightMbotAlgebra 4 | 5 | S! -> r1(NP, VP) 6 | [left] s(?1, ?2) 7 | [right] s(proj_1(?1), proj_1(?2)) 8 | 9 | NP -> r2 10 | [left] np(john) 11 | [right] np(hans) 12 | 13 | NP -> r3 14 | [left] np(mary) 15 | [right] np(maria) 16 | 17 | VP -> r4(V, NP) 18 | [left] vp(?1, ?2) 19 | [right] vp(proj_1(?1), proj_1(?2), proj_2(?1)) 20 | 21 | V -> r5 22 | [left] v(saw) 23 | [right] **(aux(hat), part(gesehen)) 24 | 25 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/language_iteration/RuleRefiner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.automata.language_iteration; 8 | 9 | import de.up.ling.irtg.automata.Rule; 10 | import java.util.List; 11 | 12 | /** 13 | * 14 | * @author koller 15 | */ 16 | public interface RuleRefiner { 17 | List refine(Rule rule); 18 | } 19 | -------------------------------------------------------------------------------- /scripts/acc: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # acc = Alto Codec Converter 4 | 5 | # Converts a corpus from one codec to another. The input file is assumed to 6 | # represent a list of instances, one per line, using some input codec; blank lines are allowed. 7 | # This tool will iterate over these instances and convert each into a string, using 8 | # given output codecs, which it writes into an output file. Each instance may 9 | # be converted by multiple output codecs or just a single one. 10 | 11 | java -cp $ALTO_JAR de.up.ling.irtg.script.CodecConverter "$@" 12 | -------------------------------------------------------------------------------- /src/main/resources/META-INF/services/de.up.ling.irtg.codec.OutputCodec: -------------------------------------------------------------------------------- 1 | de.up.ling.irtg.codec.ToStringOutputCodec 2 | de.up.ling.irtg.codec.TikzQtreeOutputCodec 3 | de.up.ling.irtg.codec.TikzSgraphOutputCodec 4 | de.up.ling.irtg.codec.GraphVizDotOutputCodec 5 | de.up.ling.irtg.codec.BolinasGraphOutputCodec 6 | de.up.ling.irtg.codec.SgraphAmrOutputCodec 7 | de.up.ling.irtg.codec.SgraphAmrWithSourcesOutputCodec 8 | de.up.ling.irtg.codec.PtbTreeOutputCodec 9 | de.up.ling.irtg.codec.TreeYieldOutputCodec 10 | de.up.ling.irtg.codec.BinaryIrtgOutputCodec 11 | de.up.ling.irtg.codec.CogsOutputCodec -------------------------------------------------------------------------------- /examples/ptb_ctf.txt: -------------------------------------------------------------------------------- 1 | ___( 2 | TOP(TOP(TOP(TOP))), 3 | 4 | P( 5 | HP( 6 | S_( 7 | S, 8 | VP, 9 | SQ, 10 | SBAR, 11 | SBARQ, 12 | SINV 13 | ), 14 | N_( 15 | NP, 16 | NAC, 17 | NX, 18 | LST, 19 | X, 20 | UCP, 21 | FRAG 22 | ) 23 | ), 24 | MP( 25 | A_( 26 | ADJP, 27 | QP, 28 | CONJP, 29 | ADVP, 30 | INTJ, 31 | PRN, 32 | PRT 33 | ), 34 | P_( 35 | PP, 36 | RRC, 37 | WHADJP, 38 | WHADVP, 39 | WHNP, 40 | WHPP 41 | ) 42 | ) 43 | ) 44 | ) 45 | -------------------------------------------------------------------------------- /src/main/antlr/de/up/ling/irtg/codec/rdg/Rdg.g4: -------------------------------------------------------------------------------- 1 | grammar Rdg; 2 | 3 | @header{ 4 | package de.up.ling.irtg.codec.rdg; 5 | } 6 | 7 | LABEL: '<' (~[>])+ '>'; 8 | NAME: [a-zA-Z_*$@+] ([a-zA-Z0-9_*$@+/.-]*); 9 | 10 | COLON: ':'; 11 | OPEN_ABK: '<'; 12 | CLOSE_ABK: '>'; 13 | OPEN_BK: '('; 14 | CLOSE_BK: ')'; 15 | ARROW: '->'; 16 | 17 | 18 | WS: [ \n\t\r]+ -> skip; 19 | 20 | COMMENT 21 | : ( '//' ~[\r\n]* '\r'? '\n' 22 | | '/*' .*? '*/' 23 | ) -> skip 24 | ; 25 | 26 | rdg : rdg_rule+; 27 | 28 | rdg_rule: NAME ARROW LABEL state_list; 29 | state_list: ('(' (NAME ',')* NAME ')')?; 30 | 31 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/pruning/FOM.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.automata.pruning; 8 | 9 | import de.up.ling.irtg.automata.Rule; 10 | import de.up.ling.irtg.automata.condensed.CondensedRule; 11 | 12 | /** 13 | * 14 | * @author koller 15 | */ 16 | public interface FOM { 17 | double evaluate(Rule left, CondensedRule right); 18 | double evaluateStates(int leftState, int rightState); 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/pruning/RulePairConsumer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.automata.pruning; 8 | 9 | import de.up.ling.irtg.automata.Rule; 10 | import de.up.ling.irtg.automata.condensed.CondensedRule; 11 | 12 | /** 13 | * 14 | * @author koller 15 | */ 16 | @FunctionalInterface 17 | public interface RulePairConsumer { 18 | void accept(Rule left, CondensedRule right, double value); 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Classes for reading and writing IRTGs, tree automata, 3 | * and other objects.

4 | * 5 | * Note that the only classes in this package that are meant for 6 | * public consumption are {@link InputCodec}, {@link OutputCodec}, 7 | * and their subclasses. All others are automatically produced by 8 | * parser generators, and are not intended to be used directly.

9 | * 10 | * See the Alto 11 | * wiki for an overview of the available codecs. 12 | * 13 | */ 14 | package de.up.ling.irtg.codec; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/pruning/MultiFOM.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.automata.pruning; 7 | 8 | import de.up.ling.irtg.automata.Rule; 9 | import de.up.ling.irtg.automata.condensed.CondensedRule; 10 | 11 | /** 12 | * 13 | * @author koller 14 | */ 15 | public interface MultiFOM { 16 | double evaluate(Rule left, CondensedRule right); 17 | double evaluateStates(int leftState, int[] rightStates); 18 | } 19 | -------------------------------------------------------------------------------- /examples/cfg.irtg: -------------------------------------------------------------------------------- 1 | interpretation i: de.up.ling.irtg.algebra.StringAlgebra 2 | 3 | 4 | 5 | 6 | S! -> r1(NP,VP) 7 | [i] *(?1,?2) 8 | 9 | 10 | VP -> r4(V,NP) 11 | [i] *(?1,?2) 12 | 13 | 14 | VP -> r5(VP,PP) 15 | [i] *(?1,?2) 16 | 17 | 18 | PP -> r6(P,NP) 19 | [i] *(?1,?2) 20 | 21 | 22 | NP -> r7 23 | [i] john 24 | 25 | 26 | NP -> r2(Det,N) 27 | [i] *(?1,?2) 28 | 29 | 30 | V -> r8 31 | [i] watches 32 | 33 | 34 | Det -> r9 35 | [i] the 36 | 37 | 38 | N -> r10 39 | [i] woman 40 | 41 | 42 | N -> r11 43 | [i] telescope 44 | 45 | N -> r3(N,PP) 46 | [i] *(?1,?2) 47 | 48 | P -> r12 49 | [i] with 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /examples/altoDemoStringToGraph.irtg: -------------------------------------------------------------------------------- 1 | /* Example for the alto-demonstration paper*/ 2 | interpretation english: de.up.ling.irtg.algebra.StringAlgebra 3 | interpretation semantic: de.up.ling.irtg.algebra.graph.GraphAlgebra 4 | 5 | NE -> r2 [0.6] 6 | [english] John 7 | [semantic] '(u / John)' 8 | 9 | S! -> r1(NE,NE,NP) [1.0] 10 | [english] *(?1,*(tells,*(?2,?3))) 11 | [semantic] merge(merge(merge('(u / tell :ARG0 (s) :ARG1 (o1) :ARG2 (o2))',?3),r_root_obj1(?2)),r_root_subj(?1)) 12 | 13 | NE -> r3 [0.4] 14 | [english] Mary 15 | [semantic] '(u / Mary)' 16 | 17 | NP -> r4 [1.0] 18 | [english] *(the,news) 19 | [semantic] '(u / news)' 20 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/SkipFailRulesFilter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.automata; 6 | 7 | import com.google.common.base.Predicate; 8 | 9 | /** 10 | * 11 | * @author koller 12 | */ 13 | public class SkipFailRulesFilter implements Predicate { 14 | private TreeAutomaton auto; 15 | 16 | public SkipFailRulesFilter(TreeAutomaton auto) { 17 | this.auto = auto; 18 | } 19 | 20 | @Override 21 | public boolean apply(Rule t) { 22 | return ! InverseHomAutomaton.isFailedRule(t, auto); 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/corpus/CorpusReadingException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.corpus; 6 | 7 | /** 8 | * An exception that indicates that something went wrong 9 | * when reading a corpus from a reader. 10 | * 11 | * @author koller 12 | */ 13 | public class CorpusReadingException extends Exception { 14 | public CorpusReadingException(String message) { 15 | super(message); 16 | } 17 | 18 | public CorpusReadingException(String message, Throwable cause) { 19 | super(message, cause); 20 | } 21 | 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/main/resources/META-INF/services/de.up.ling.irtg.algebra.Algebra: -------------------------------------------------------------------------------- 1 | de.up.ling.irtg.algebra.BinarizingTreeAlgebra 2 | de.up.ling.irtg.algebra.BinarizingTreeWithAritiesAlgebra 3 | de.up.ling.irtg.algebra.SetAlgebra 4 | de.up.ling.irtg.algebra.StringAlgebra 5 | de.up.ling.irtg.algebra.TagStringAlgebra 6 | de.up.ling.irtg.algebra.TagTreeAlgebra 7 | de.up.ling.irtg.algebra.TreeAlgebra 8 | de.up.ling.irtg.algebra.TreeWithAritiesAlgebra 9 | de.up.ling.irtg.algebra.WideStringAlgebra 10 | de.up.ling.irtg.algebra.graph.GraphAlgebra 11 | de.up.ling.irtg.algebra.graph.ApplyModifyGraphAlgebra 12 | de.up.ling.irtg.algebra.BinarizingTagTreeAlgebra 13 | de.up.ling.irtg.algebra.RdgStringAlgebra -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/MutableBoolean.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.util; 8 | 9 | /** 10 | * 11 | * @author koller 12 | */ 13 | public class MutableBoolean { 14 | private boolean val = false; 15 | 16 | public MutableBoolean(boolean b) { 17 | val = b; 18 | } 19 | 20 | public void setValue(boolean b) { 21 | val = b; 22 | } 23 | 24 | public boolean booleanValue() { 25 | return val; 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /examples/chinese.scfg: -------------------------------------------------------------------------------- 1 | /* Grammar for Chinese-English SCFG example from Chiang 2007 */ 2 | 3 | /* example sentences are: 4 | 5 | 30 duonianlai de youhao hezuo 6 | friendly cooperation over the past 30 years 7 | 8 | In Alto, parse the Chinese sentence on the "left" interpretation 9 | or the English sentence on the "right" interpretation. 10 | 11 | */ 12 | 13 | 14 | S 15 | 16 | 17 | S -> X 18 | S -> X 19 | 20 | X -> X de X 21 | X -> X[2] X[1] 22 | 23 | X -> X X 24 | X -> X[1] X[2] 25 | 26 | X -> X duonianlai 27 | X -> over the past X years 28 | 29 | X -> 30 30 | X -> 30 31 | 32 | X -> youhao 33 | X -> friendly 34 | 35 | X -> hezuo 36 | X -> cooperation 37 | 38 | 39 | -------------------------------------------------------------------------------- /examples/elephant-weighted.irtg: -------------------------------------------------------------------------------- 1 | interpretation i: de.up.ling.irtg.algebra.StringAlgebra 2 | 3 | S! -> r1(NP,VP) [1.0] 4 | [i] *(?1,?2) 5 | 6 | NP -> r2 [0.3] 7 | [i] john 8 | 9 | NP -> r3 [0.2] 10 | [i] *(an, elephant) 11 | 12 | NP -> r4 [0.2] 13 | [i] *(his, pyjamas) 14 | 15 | NP -> r5(NP,PP) [0.3] 16 | [i] *(?1, ?2) 17 | 18 | VP -> r6(V,NP) [0.5] 19 | [i] *(?1, ?2) 20 | 21 | VP -> r7(VP,PP) [0.5] 22 | [i] *(?1, ?2) 23 | 24 | V -> r8 [1.0] 25 | [i] shot 26 | 27 | PP -> r9(P,NP) [1.0] 28 | [i] *(?1,?2) 29 | 30 | P -> r10 [1.0] 31 | [i] in 32 | 33 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/tag/NodeType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec.tag; 8 | 9 | /** 10 | * 11 | * @author koller 12 | */ 13 | public enum NodeType { 14 | DEFAULT(""), SUBSTITUTION("!"), FOOT("*"), HEAD("<>"), SECONDARY_LEX("[]"); 15 | 16 | public String mark(String x) { 17 | return x + marker; 18 | } 19 | 20 | private final String marker; 21 | 22 | private NodeType(String m) { 23 | this.marker = m; 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/algebra/graph/GraphEdgeFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.algebra.graph; 6 | 7 | import org.jgrapht.EdgeFactory; 8 | 9 | /** 10 | * An edge factory. Given to {@code GraphNode} objects {@code v}, {@code v1} the 11 | * factory simply returns the edge from {@code v} to {@code v1}. 12 | * @author koller 13 | */ 14 | public class GraphEdgeFactory implements EdgeFactory { 15 | 16 | @Override 17 | public GraphEdge createEdge(GraphNode v, GraphNode v1) { 18 | return new GraphEdge(v, v1); 19 | } 20 | 21 | } 22 | -------------------------------------------------------------------------------- /examples/maxent-cfg.irtg: -------------------------------------------------------------------------------- 1 | interpretation i: de.up.ling.irtg.algebra.StringAlgebra 2 | 3 | feature f1: de.up.ling.irtg.maxent.ChildOfFeature('VP','PP') 4 | feature f2: de.up.ling.irtg.maxent.ChildOfFeature('N','PP') 5 | 6 | S! -> r1(NP,VP) 7 | [i] *(?1,?2) 8 | 9 | VP -> r4(V,NP) 10 | [i] *(?1,?2) 11 | 12 | VP -> r5(VP,PP) 13 | [i] *(?1,?2) 14 | 15 | PP -> r6(P,NP) 16 | [i] *(?1,?2) 17 | 18 | NP -> r7 19 | [i] john 20 | 21 | NP -> r2(Det,N) 22 | [i] *(?1,?2) 23 | 24 | V -> r8 25 | [i] watches 26 | 27 | Det -> r9 28 | [i] the 29 | 30 | N -> r10 31 | [i] woman 32 | 33 | N -> r11 34 | [i] telescope 35 | 36 | N -> r3(N,PP) 37 | [i] *(?1,?2) 38 | 39 | P -> r12 40 | [i] with 41 | 42 | 43 | -------------------------------------------------------------------------------- /examples/fcfg.irtg: -------------------------------------------------------------------------------- 1 | interpretation string: de.up.ling.irtg.algebra.StringAlgebra 2 | interpretation tree: de.up.ling.irtg.algebra.TreeWithAritiesAlgebra 3 | interpretation ft: de.up.ling.irtg.algebra.FeatureStructureAlgebra 4 | 5 | 6 | 7 | S! -> r1(NP, VP) 8 | [string] *(?1, ?2) 9 | [tree] S_2(?1, ?2) 10 | [ft] unify(unify("[num: #1, c1: [num: #1], c2: [num: #1]]", emb_c1(?1)), emb_c2(?2)) 11 | 12 | NP -> r2 13 | [string] john 14 | [tree] NP_1(john_0) 15 | [ft] "[num: sg]" 16 | 17 | NP -> r3 18 | [string] children 19 | [tree] NP_1(children_0) 20 | [ft] "[num: pl]" 21 | 22 | VP -> r4 23 | [string] sleeps 24 | [tree] VP_1(sleeps_0) 25 | [ft] "[num: sg]" 26 | 27 | VP -> r5 28 | [string] sleep 29 | [tree] VP_1(sleep_0) 30 | [ft] "[num: pl]" 31 | 32 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/corpus/ByteArrayInputStreamSupplier.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.corpus; 6 | 7 | import com.google.common.base.Supplier; 8 | import java.io.ByteArrayInputStream; 9 | import java.io.InputStream; 10 | 11 | /** 12 | * 13 | * @author koller 14 | */ 15 | public class ByteArrayInputStreamSupplier implements Supplier { 16 | private byte[] byteArray; 17 | 18 | public ByteArrayInputStreamSupplier(byte[] byteArray) { 19 | this.byteArray = byteArray; 20 | } 21 | 22 | public InputStream get() { 23 | return new ByteArrayInputStream(byteArray); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/language_iteration/IdentityRuleRefiner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.automata.language_iteration; 8 | 9 | import de.up.ling.irtg.automata.Rule; 10 | import java.util.ArrayList; 11 | import java.util.List; 12 | 13 | /** 14 | * 15 | * @author koller 16 | */ 17 | public class IdentityRuleRefiner implements RuleRefiner { 18 | @Override 19 | public List refine(Rule rule) { 20 | List ret = new ArrayList<>(); 21 | ret.add(rule); 22 | return ret; 23 | } 24 | 25 | } 26 | -------------------------------------------------------------------------------- /src/main/resources/META-INF/services/de.up.ling.irtg.codec.InputCodec: -------------------------------------------------------------------------------- 1 | de.up.ling.irtg.codec.IrtgInputCodec 2 | de.up.ling.irtg.codec.PcfgIrtgInputCodec 3 | de.up.ling.irtg.codec.SynchronousCfgInputCodec 4 | de.up.ling.irtg.codec.TreeAutomatonInputCodec 5 | de.up.ling.irtg.codec.BottomUpTreeAutomatonInputCodec 6 | de.up.ling.irtg.codec.TiburonTreeAutomatonInputCodec 7 | de.up.ling.irtg.codec.TemplateIrtgInputCodec 8 | de.up.ling.irtg.codec.BolinasHrgInputCodec 9 | de.up.ling.irtg.codec.IsiAmrInputCodec 10 | de.up.ling.irtg.codec.PtbTreeInputCodec 11 | de.up.ling.irtg.codec.JoshuaInputCodec 12 | de.up.ling.irtg.codec.BinaryIrtgInputCodec 13 | de.up.ling.irtg.codec.TulipacInputCodec 14 | de.up.ling.irtg.codec.RdgInputCodec 15 | de.up.ling.irtg.codec.NltkPcfgInputCodec -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/algebra/RightMbotAlgebra.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.algebra; 8 | 9 | import de.up.ling.tree.Tree; 10 | 11 | /** 12 | * An experimental algebra for MBOTs. 13 | * 14 | * This algebra represents instances of the TupleAlgebra, which are defined 15 | * over tuples of trees with string labels. Expect bugs when using this. 16 | * 17 | * @author koller 18 | */ 19 | class RightMbotAlgebra extends TupleAlgebra> { 20 | public RightMbotAlgebra() { 21 | super(new TreeAlgebra()); 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /examples/elephant.irtg: -------------------------------------------------------------------------------- 1 | interpretation i: de.up.ling.irtg.algebra.StringAlgebra 2 | 3 | S! -> r1(NP,VP) 4 | [i] *(?1,?2) 5 | 6 | NP -> r2 7 | [i] john 8 | 9 | NP -> r3 10 | [i] *(an, elephant) 11 | 12 | NP -> r4 13 | [i] *(his, pyjamas) 14 | 15 | NP -> r5(NP,PP) 16 | [i] *(?1, ?2) 17 | 18 | VP -> r6(V,NP) 19 | [i] *(?1, ?2) 20 | 21 | VP -> r7(VP,PP) 22 | [i] *(?1, ?2) 23 | 24 | V -> r8 25 | [i] shot 26 | 27 | PP -> r9(P,NP) 28 | [i] *(?1,?2) 29 | 30 | P -> r10 31 | [i] in 32 | 33 | NP -> r11 34 | [i] mary 35 | 36 | V -> r12 37 | [i] watched 38 | 39 | NP -> r13 40 | [i] *(a, man) 41 | 42 | P -> r14 43 | [i] with 44 | 45 | NP -> r15 46 | [i] *(a, telescope) 47 | 48 | NP -> r16 49 | [i] sue 50 | 51 | NP -> r17 52 | [i] *(a, hat) 53 | 54 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/algebra/LcfrsAlgebra.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.algebra; 8 | 9 | import java.util.List; 10 | 11 | /** 12 | * An experimental algebra for LCFRS. 13 | * 14 | * Expect bugs when using this. This is an instantiation of the TupleAlgebra 15 | * for Strings Lists. 16 | * 17 | * @author koller 18 | */ 19 | class LcfrsAlgebra extends TupleAlgebra> { 20 | 21 | /** 22 | * Create a new instance with its own signature. 23 | */ 24 | public LcfrsAlgebra() { 25 | super(new StringAlgebra()); 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/semiring/AndOrSemiring.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | 6 | package de.up.ling.irtg.semiring; 7 | 8 | /** 9 | * 10 | * @author koller 11 | */ 12 | public class AndOrSemiring implements Semiring { 13 | 14 | public static AndOrSemiring INSTANCE = new AndOrSemiring(); 15 | 16 | public Boolean add(Boolean x, Boolean y) { 17 | return x || y; 18 | } 19 | 20 | public Boolean multiply(Boolean x, Boolean y) { 21 | return x && y; 22 | } 23 | 24 | public Boolean infinity() { 25 | return true; 26 | } 27 | 28 | public Boolean zero() { 29 | return false; 30 | } 31 | 32 | public Boolean one() { 33 | return true; 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/antlr/de/up/ling/irtg/laboratory/DottedCommand.g4: -------------------------------------------------------------------------------- 1 | grammar DottedCommand; 2 | 3 | @header{ 4 | package de.up.ling.irtg.laboratory; 5 | } 6 | 7 | TOKEN: [A-Za-z$<#_][A-Za-z0-9_$#<>]*; 8 | NUMBERTOKEN: [0-9][.0-9]*; 9 | 10 | INTERPLEFT: '['; 11 | INTERPRIGHT: ']'; 12 | BKOPEN: '('; 13 | BKCLOSE: ')'; 14 | DOT: '.'; 15 | COMMA: ',' 16 | | ', ' 17 | ; 18 | 19 | expr: dottedCommand BKOPEN argument BKCLOSE 20 | | TOKEN BKOPEN argument BKCLOSE 21 | | dottedExpr 22 | | TOKEN 23 | | NUMBERTOKEN 24 | ; 25 | 26 | argument: expr COMMA argument 27 | | expr 28 | ; 29 | 30 | dottedCommand: dottedExpr DOT TOKEN; 31 | 32 | dottedExpr: dottedExpr DOT TOKEN 33 | | dottedExpr DOT interpretation 34 | | TOKEN 35 | ; 36 | 37 | interpretation: INTERPLEFT TOKEN INTERPRIGHT; -------------------------------------------------------------------------------- /examples/chasing.tag: -------------------------------------------------------------------------------- 1 | tree trans: 2 | S { 3 | NP![case=nom][] 4 | VP { 5 | V+ 6 | NP![case=acc][] 7 | } 8 | } 9 | 10 | 11 | tree intrans: 12 | S { 13 | NP![case=nom][] 14 | V+ 15 | } 16 | 17 | 18 | tree np_n: 19 | NP[][case=?case] { 20 | Det! [case=?case][] 21 | N+ [case=?case][] 22 | } 23 | 24 | tree aux_adj: 25 | N [][case=?case] { 26 | Adj+ [case=?case][] 27 | N* [case=?case][] 28 | } 29 | 30 | 31 | tree det: 32 | Det+ 33 | 34 | 35 | word 'jagt': trans 36 | 37 | word 'hund': np_n[case=nom] 38 | word 'hund': np_n[case=acc] 39 | 40 | word 'hase': np_n[case=nom] 41 | word 'hasen': np_n[case=acc] 42 | 43 | word 'der': det[case=nom] 44 | word 'den': det[case=acc] 45 | 46 | lemma 'schnell': aux_adj { 47 | word 'schnelle': [case=nom] 48 | word 'schnellen': [case=acc] 49 | } 50 | -------------------------------------------------------------------------------- /examples/chinese.irtg: -------------------------------------------------------------------------------- 1 | /* Grammar for Chinese-English SCFG example from Chiang 2007 */ 2 | 3 | /* example sentences are: 4 | 5 | 30 duonianlai de youhao hezuo 6 | friendly cooperation over the past 30 years 7 | 8 | */ 9 | 10 | interpretation english: de.up.ling.irtg.algebra.StringAlgebra 11 | interpretation chinese: de.up.ling.irtg.algebra.StringAlgebra 12 | 13 | S! -> r1(X) 14 | [chinese] ?1 15 | [english] ?1 16 | 17 | X -> r2(X,X) 18 | [chinese] *(*(?1, de), ?2) 19 | [english] *(?2, ?1) 20 | 21 | X -> r3(X,X) 22 | [chinese] *(?1, ?2) 23 | [english] *(?1, ?2) 24 | 25 | X -> r4(X) 26 | [chinese] *(?1, duonianlai) 27 | [english] *(*(*(*(over,the),past), ?1), years) 28 | 29 | X -> r5 30 | [chinese] '30' 31 | [english] '30' 32 | 33 | X -> r6 34 | [chinese] youhao 35 | [english] friendly 36 | 37 | X -> r7 38 | [chinese] hezuo 39 | [english] cooperation 40 | 41 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | deploy: 3 | provider: releases 4 | api_key: 5 | secure: aD1Rnd8IHrON874vUmeuY3T3shXmdF7IvE6Pz6KQ0A2YWh18FApuQCKmY81NmgXWem4YIuSZ9HR6NJpVHlasCFM8uBeF08DtSUGXOJMGfavO8mu5ZYzLW4XUoevDN3umG3ATv3GHm9lbmZpU/D4uTb8SgcLAr9whSQphXtzzcfGrACHTxcuGCW8BpCO79t+TJx3rWarxH/cY0v6CQAAJzXUZqNgHsgwoZ0ED6XF7cJ+bARjyqWQ839NP1VdM728CnntQ0I+g5gSv1mKmKNaGkuOanEivyhe7kClCXwtamrPoLxVspJf5TqOzoyDO639dE4Pag/kkF/NsLtMBhCRfSNDJbY5R8fdRqDldhnRXNimysYcvLSozlkFp8mE95LzWr5FkrfE1p4DVVic3umu5+9MnoPFW+SpaH00th6I2AtWbBVR6VNFewHMnC8z4nFNBZJyri8gvt+F5cDgGDNpOQb7V97xFIC43P6wUKKwZr+zR4/zGB3U7UdBOapc0Kz8wNKI0233X5F5rHISdJ05biMfwOqKWY7xcvNkt74O9BeCv1E7f6DVZTqtYMWgUYNfM4ex5svs6fxKE36bjHdr4ZCu7cjJBXTk5ggfCuPuCCX5YcVGAWudzx2HNpTIF7hMqZHt9ijm8lZE2M6AQaQ78L2/Y/7mk4k/V0IE2AEowekU= 6 | skip_cleanup: true 7 | file_glob: true 8 | file: build/libs/alto-*-all.jar 9 | on: 10 | tags: true 11 | -------------------------------------------------------------------------------- /src/main/antlr/de/up/ling/irtg/codec/pcfg_as_irtg/PcfgAsIrtg.g4: -------------------------------------------------------------------------------- 1 | grammar PcfgAsIrtg; 2 | 3 | @header{ 4 | package de.up.ling.irtg.codec.pcfg_as_irtg; 5 | } 6 | 7 | ARROW : '->'; 8 | NUMBER_IN_BRACKETS: '[' [0-9.] ([0-9.eE-]*) ']'; 9 | 10 | //QUOTED_NAME: ['] (~['])* [']; 11 | //DOUBLE_QUOTED_NAME: ["] (~["])* ["]; 12 | 13 | // all non-whitespace strings that are not -> or [...] are symbols 14 | NAME: ~( '[' | '\t' | ' ' | '\r' | '\n'| '\u000C' ) ((~( '\t' | ' ' | '\r' | '\n'| '\u000C' ))*); 15 | 16 | 17 | WS: [ \n\t\r]+ -> skip; 18 | 19 | COMMENT 20 | : ( '//' ~[\r\n]* '\r'? '\n' 21 | | '/*' .*? '*/' 22 | ) -> skip 23 | ; 24 | 25 | 26 | pcfg: startsymbol pcfg_rule+; 27 | 28 | startsymbol: name; 29 | 30 | pcfg_rule: name ARROW name+ NUMBER_IN_BRACKETS?; 31 | 32 | 33 | name : NAME #RAW ; //| QUOTED_NAME #QUOTED | DOUBLE_QUOTED_NAME #QUOTED ; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/semiring/Semiring.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | 6 | package de.up.ling.irtg.semiring; 7 | 8 | /** 9 | * A Semiring defines add, multiply and zero. 10 | * add is commutative and zero + x = x 11 | * multiply with zero is always zero. 12 | * 13 | * A Semiring does not have state. Implementing the Semiring interface 14 | * means that the same semiring object can be savely shared between diferent 15 | * computations. 16 | * 17 | * The semirings in alto provide a constant INSTANCE that hold a default instance 18 | * to be used instead of creating a new instance. 19 | */ 20 | public interface Semiring { 21 | E add(E x, E y); 22 | E multiply(E x, E y); 23 | E zero(); // a value such that zero + x = x for all x 24 | E one(); 25 | } 26 | -------------------------------------------------------------------------------- /src/test/resources/examples/chinese.irtg: -------------------------------------------------------------------------------- 1 | /* Grammar for Chinese-English SCFG example from Chiang 2007 */ 2 | 3 | /* example sentences are: 4 | 5 | 30 duonianlai de youhao hezuo 6 | friendly cooperation over the past 30 years 7 | 8 | */ 9 | 10 | interpretation english: de.up.ling.irtg.algebra.StringAlgebra 11 | interpretation chinese: de.up.ling.irtg.algebra.StringAlgebra 12 | 13 | S! -> r1(X) 14 | [chinese] ?1 15 | [english] ?1 16 | 17 | X -> r2(X,X) 18 | [chinese] *(*(?1, de), ?2) 19 | [english] *(?2, ?1) 20 | 21 | X -> r3(X,X) 22 | [chinese] *(?1, ?2) 23 | [english] *(?1, ?2) 24 | 25 | X -> r4(X) 26 | [chinese] *(?1, duonianlai) 27 | [english] *(*(*(*(over,the),past), ?1), years) 28 | 29 | X -> r5 30 | [chinese] '30' 31 | [english] '30' 32 | 33 | X -> r6 34 | [chinese] youhao 35 | [english] friendly 36 | 37 | X -> r7 38 | [chinese] hezuo 39 | [english] cooperation 40 | 41 | -------------------------------------------------------------------------------- /src/main/antlr/de/up/ling/irtg/codec/tiburon_treeautomaton/TiburonTreeAutomaton.g4: -------------------------------------------------------------------------------- 1 | grammar TiburonTreeAutomaton; 2 | 3 | @header{ 4 | package de.up.ling.irtg.codec.tiburon_treeautomaton; 5 | } 6 | 7 | ARROW : '->'; 8 | OPBK : '('; 9 | CLBK : ')'; 10 | HASH: '#'; 11 | 12 | 13 | DOUBLE_QUOTED_NAME: ["] (~["])* ["]; 14 | QUOTED_NAME: ['] (~['])* [']; 15 | NAME: ~('#' | '(' | ')' | '"' | '\'' | '\t' | ' ' | '\r' | '\n'| '\u000C' ) ((~( '(' | ')' | '\t' | ' ' | '\r' | '\n'| '\u000C' ))*); 16 | 17 | WS: [ \n\t\r]+ -> skip; 18 | 19 | 20 | COMMENT: ( '%' ~[\r\n]* '\r'? '\n' ) -> skip; 21 | 22 | 23 | fta : state+ auto_rule+; 24 | 25 | 26 | auto_rule : state '->' name state_list weight?; 27 | state_list : ('(' state+ ')')?; 28 | 29 | weight : '#' name; 30 | 31 | 32 | name : NAME #RAW | QUOTED_NAME #QUOTED | DOUBLE_QUOTED_NAME #QUOTED ; 33 | state : name; 34 | 35 | -------------------------------------------------------------------------------- /src/main/antlr/de/saar/coli/featstruct/FeatStruct.g4: -------------------------------------------------------------------------------- 1 | grammar FeatStruct; 2 | 3 | @header{ 4 | package de.saar.coli.featstruct; 5 | } 6 | 7 | 8 | 9 | NAME: [a-zA-Z_*$@+] ([a-zA-Z0-9_<>*$@+/.-]*); 10 | QUOTED_NAME: ['] (~['])* [']; 11 | DOUBLE_QUOTED_NAME: ["] (~["])* ["]; 12 | INDEX : [#] [a-zA-Z0-9_-]*; 13 | INT: [0-9]+; 14 | 15 | OP_SQBK: '['; 16 | CL_SQBK: ']'; 17 | COLON: ':'; 18 | COMMA: ','; 19 | 20 | 21 | WS: [ \n\t\r]+ -> skip; 22 | 23 | COMMENT 24 | : ( '//' ~[\r\n]* '\r'? '\n' 25 | | '/*' .*? '*/' 26 | ) -> skip 27 | ; 28 | 29 | 30 | featstruct: index | (index? (avm|primitive)); 31 | 32 | avm: OP_SQBK (avpair COMMA)* avpair? CL_SQBK; 33 | 34 | avpair: name COLON featstruct; 35 | 36 | primitive: name | number; 37 | 38 | name : NAME #RAW | QUOTED_NAME #QUOTED | DOUBLE_QUOTED_NAME #DQUOTED ; 39 | 40 | index : INDEX; 41 | 42 | number: INT; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/gui/JDerivationDisplayable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.gui; 6 | 7 | import de.up.ling.irtg.TreeWithInterpretations; 8 | import de.up.ling.tree.Tree; 9 | import java.awt.Color; 10 | import javax.swing.JPanel; 11 | 12 | /** 13 | * 14 | * @author koller 15 | */ 16 | abstract public class JDerivationDisplayable extends JPanel { 17 | abstract public void setDerivationTree(TreeWithInterpretations twi); 18 | // abstract public void setDerivationTree(Tree derivationTree); 19 | 20 | // abstract public void refreshMarkers(Map, Color> markedNodesInDerivationTree); 21 | 22 | abstract public void mark(Tree nodeInDerivationTree, Color markupColor); 23 | 24 | abstract public void unmark(Tree nodeInDerivationTree); 25 | } 26 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/MutableDouble.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.util; 7 | 8 | /** 9 | * 10 | * @author koller 11 | */ 12 | public class MutableDouble { 13 | private double value; 14 | 15 | public MutableDouble() { 16 | } 17 | 18 | public MutableDouble(double value) { 19 | this.value = value; 20 | } 21 | 22 | public double getValue() { 23 | return value; 24 | } 25 | 26 | public void setValue(double value) { 27 | this.value = value; 28 | } 29 | 30 | public void multiplyBy(double value) { 31 | this.value *= value; 32 | } 33 | 34 | public void add(double value){ 35 | this.value += value; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/pruning/PruningPolicy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.automata.pruning; 8 | 9 | import de.up.ling.irtg.automata.Rule; 10 | import de.up.ling.irtg.automata.condensed.CondensedRule; 11 | import de.up.ling.irtg.laboratory.OperationAnnotation; 12 | 13 | /** 14 | * 15 | * @author koller 16 | */ 17 | public interface PruningPolicy { 18 | void foreachPrunedRulePair(int rightParent, RulePairConsumer consumer); 19 | void collect(int rightParent, Rule left, CondensedRule right); 20 | 21 | @OperationAnnotation(code = "numIteratedRules") 22 | long numIteratedRules(); 23 | 24 | @OperationAnnotation(code = "numCollectedRules") 25 | long numCollectedRules(); 26 | } 27 | -------------------------------------------------------------------------------- /src/test/groovy/de/up/ling/irtg/util/BuildPropertiesTest.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.util 8 | 9 | import org.junit.* 10 | import static org.junit.Assert.* 11 | import static org.hamcrest.CoreMatchers.*; 12 | 13 | /** 14 | * 15 | * @author koller 16 | */ 17 | class BuildPropertiesTest { 18 | @Test 19 | public void testVersion() { 20 | assertThat(BuildProperties.getVersion(), is(not(null))) 21 | assertThat(BuildProperties.getVersion(), is(not("(undefined)"))) 22 | } 23 | 24 | @Test 25 | public void testBuild() { 26 | assertThat(BuildProperties.getBuild(), is(not(null))) 27 | assertThat(BuildProperties.getBuild(), is(not("(undefined)"))) 28 | } 29 | } 30 | 31 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/corpus/ChartAttacher.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.corpus; 7 | 8 | import java.util.Iterator; 9 | 10 | /** 11 | * This interface provides an attach() method that takes an iterator over 12 | * instances and returns an iterator over copies of these instances that have 13 | * a chart attached to them. 14 | * 15 | * @author christoph_teichmann 16 | */ 17 | public interface ChartAttacher { 18 | 19 | /** 20 | * This method must accept an instance iterator and return another instance 21 | * iterator which only returns instances that are copies of the original with 22 | * charts attached. 23 | * 24 | */ 25 | Iterator attach(Iterator source); 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/laboratory/OperationAnnotation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.laboratory; 7 | 8 | import java.lang.annotation.Retention; 9 | import static java.lang.annotation.RetentionPolicy.RUNTIME; 10 | 11 | /** 12 | * Annotate a method with this, to make it available in Alto Lab. The String 13 | * {@code code} will be the function name in Alto Lab tasks. Codes for 14 | * static methods must be globally unique, codes for non-static methods must be 15 | * unique in a class and all super + sub-classes. If a super-method is annotated, 16 | * the code carries over. 17 | * @author groschwitz 18 | */ 19 | @Retention(value = RUNTIME) 20 | public @interface OperationAnnotation { 21 | String code(); 22 | 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/ThrowingErrorListener.java: -------------------------------------------------------------------------------- 1 | package de.up.ling.irtg.codec; 2 | 3 | import org.antlr.v4.runtime.BaseErrorListener; 4 | import org.antlr.v4.runtime.RecognitionException; 5 | import org.antlr.v4.runtime.Recognizer; 6 | 7 | /** 8 | * CC-by-sa 4.0 Mouagip (if this snippet is copyrightable at all) 9 | * https://stackoverflow.com/questions/18132078/handling-errors-in-antlr4 10 | */ 11 | 12 | public class ThrowingErrorListener extends BaseErrorListener { 13 | 14 | public static final ThrowingErrorListener INSTANCE = new ThrowingErrorListener(); 15 | 16 | @Override 17 | public void syntaxError(Recognizer recognizer, Object offendingSymbol, 18 | int line, int charPositionInLine, String msg, RecognitionException e) 19 | throws CodecParseException { 20 | throw new CodecParseException("line " + line + ":" + charPositionInLine + " " + msg); 21 | } 22 | } -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/io/NumberCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.io; 7 | 8 | import java.io.IOException; 9 | 10 | /** 11 | * 12 | * @author koller 13 | */ 14 | public interface NumberCodec { 15 | int readInt() throws IOException; 16 | int readSignedInt() throws IOException; 17 | long readLong() throws IOException; 18 | long readSignedLong() throws IOException; 19 | double readDouble() throws IOException; 20 | 21 | long writeInt(int value) throws IOException; 22 | long writeSignedInt(int value) throws IOException; 23 | long writeLong(long value) throws IOException; 24 | long writeSignedLong(long value) throws IOException; 25 | long writeDouble(double value) throws IOException; 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/pruning/RulePair.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.automata.pruning; 8 | 9 | import de.up.ling.irtg.automata.Rule; 10 | import de.up.ling.irtg.automata.condensed.CondensedRule; 11 | 12 | /** 13 | * 14 | * @author koller 15 | */ 16 | class RulePair implements Comparable { 17 | public Rule left; 18 | public CondensedRule right; 19 | public double value; 20 | 21 | public RulePair(Rule left, CondensedRule right, double value) { 22 | this.left = left; 23 | this.right = right; 24 | this.value = value; 25 | } 26 | 27 | @Override 28 | public int compareTo(RulePair o) { 29 | return -Double.compare(value, o.value); 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/CogsOutputCodec.java: -------------------------------------------------------------------------------- 1 | package de.up.ling.irtg.codec; 2 | 3 | import de.saar.basic.Pair; 4 | import de.saar.coli.algebra.OrderedFeatureTreeAlgebra; 5 | 6 | import java.io.*; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | /** 11 | * An output codec for printing OrderedFeatureTrees in the variable-free COGS format. 12 | * 13 | */ 14 | @CodecMetadata(name = "cogs", description = "Encodes an OrderedFeatureTree in the COGS format", type = OrderedFeatureTreeAlgebra.OrderedFeatureTree.class) 15 | public class CogsOutputCodec extends OutputCodec { 16 | @Override 17 | public void write(OrderedFeatureTreeAlgebra.OrderedFeatureTree ft, OutputStream ostream) throws IOException, UnsupportedOperationException { 18 | Writer w = new BufferedWriter(new OutputStreamWriter(ostream)); 19 | w.write(ft.toString(true)); 20 | w.flush(); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/semiring/LongArithmeticSemiring.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.semiring; 6 | 7 | /** 8 | * 9 | * @author koller 10 | */ 11 | public class LongArithmeticSemiring implements Semiring { 12 | 13 | public static LongArithmeticSemiring INSTANCE = new LongArithmeticSemiring(); 14 | 15 | public Long add(Long x, Long y) { 16 | if (x.equals(infinity()) || y.equals(infinity())) { 17 | return infinity(); 18 | } else { 19 | return x + y; 20 | } 21 | } 22 | 23 | public Long multiply(Long x, Long y) { 24 | return x * y; 25 | } 26 | 27 | public Long infinity() { 28 | return Long.MAX_VALUE; 29 | } 30 | 31 | public Long zero() { 32 | return 0L; 33 | } 34 | 35 | public Long one() { 36 | return 1L; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/AbstractRule.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.automata; 7 | 8 | /** 9 | * 10 | * @author christoph_teichmann 11 | */ 12 | public interface AbstractRule { 13 | 14 | int getArity(); 15 | 16 | int[] getChildren(); 17 | 18 | /** 19 | * Retrieves the auxiliary information from this rule. 20 | * 21 | * @see #setExtra(java.lang.Object) 22 | */ 23 | Object getExtra(); 24 | 25 | int getParent(); 26 | 27 | double getWeight(); 28 | 29 | boolean isLoop(); 30 | 31 | /** 32 | * Stores auxiliary information within this rule. Do not use this unless you 33 | * know what you're doing. 34 | * 35 | */ 36 | void setExtra(Object extra); 37 | 38 | void setWeight(double weight); 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/de/saar/coli/featstruct/FsParsingException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.saar.coli.featstruct; 7 | 8 | /** 9 | * 10 | * @author koller 11 | */ 12 | public class FsParsingException extends Exception { 13 | public FsParsingException() { 14 | } 15 | 16 | public FsParsingException(String message) { 17 | super(message); 18 | } 19 | 20 | public FsParsingException(String message, Throwable cause) { 21 | super(message, cause); 22 | } 23 | 24 | public FsParsingException(Throwable cause) { 25 | super(cause); 26 | } 27 | 28 | public FsParsingException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { 29 | super(message, cause, enableSuppression, writableStackTrace); 30 | } 31 | 32 | } 33 | -------------------------------------------------------------------------------- /examples/reg.irtg: -------------------------------------------------------------------------------- 1 | /* 2 | Demo grammar for RE generation using the set algebra. 3 | 4 | The set algebra only works if you specify a model over which it should interpret 5 | the atomic predicate symbols. You can do this by passing a JSON representation 6 | of the model as an option. Try translating {e} and {r1} into strings using the 7 | following model representation: 8 | 9 | {"sleep": [["e", "r1"]], "rabbit": [["r1"], ["r2"]], "white": [["r1"], ["b"]], "in": [["r1","h"], ["f","h2"]], "hat": [["h"], ["h2"]] } 10 | 11 | */ 12 | 13 | interpretation sem: de.up.ling.irtg.algebra.SetAlgebra 14 | interpretation string: de.up.ling.irtg.algebra.StringAlgebra 15 | 16 | S! -> a_sleeps_r1(N) 17 | [sem] project_1(intersect_2(sleep, uniq_r1(?1))) 18 | [string] *(?1, sleeps) 19 | 20 | N! -> a_rabbit_r2(Adj_N) 21 | [sem] member_r2(intersect_1(rabbit, ?1)) 22 | [string] *(the, *(?1, rabbit)) 23 | 24 | Adj_N -> b_white 25 | [sem] white 26 | [string] white 27 | 28 | Adj_N -> b_nop 29 | [sem] T 30 | [string] '' 31 | 32 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/coarse_to_fine/RulePairList.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.automata.coarse_to_fine; 7 | 8 | import it.unimi.dsi.fastutil.ints.IntList; 9 | import java.util.List; 10 | 11 | /** 12 | * 13 | * @author koller 14 | */ 15 | public class RulePairList { 16 | private List grammarNodes; 17 | private IntList invhomStates; 18 | 19 | public RulePairList(List grammarNodes, IntList invhomStates) { 20 | this.grammarNodes = grammarNodes; 21 | this.invhomStates = invhomStates; 22 | } 23 | 24 | public List getGrammarNodes() { 25 | return grammarNodes; 26 | } 27 | 28 | public IntList getInvhomStates() { 29 | return invhomStates; 30 | } 31 | 32 | 33 | } 34 | -------------------------------------------------------------------------------- /src/main/antlr/de/up/ling/irtg/codec/scfg/SynchronousContextFreeGrammar.g4: -------------------------------------------------------------------------------- 1 | grammar SynchronousContextFreeGrammar; 2 | 3 | @header{ 4 | package de.up.ling.irtg.codec.scfg; 5 | } 6 | 7 | ARROW : '->'; 8 | NUMBER_IN_BRACKETS: '[' ([0-9]+) ']'; 9 | 10 | //QUOTED_NAME: ['] (~['])* [']; 11 | //DOUBLE_QUOTED_NAME: ["] (~["])* ["]; 12 | 13 | // all non-whitespace strings that are not -> or [...] are symbols 14 | NAME: ~( '[' | '\t' | ' ' | '\r' | '\n'| '\u000C' ) ((~( '\t' | ' ' | '\r' | '\n'| '\u000C' | '[' ))*); 15 | 16 | 17 | WS: [ \n\t\r]+ -> skip; 18 | 19 | COMMENT 20 | : ( '//' ~[\r\n]* '\r'? '\n' 21 | | '/*' .*? '*/' 22 | ) -> skip 23 | ; 24 | 25 | 26 | scfg: startsymbol rulepair+; 27 | 28 | startsymbol: name; 29 | 30 | rulepair: cfg_rule cfg_rule; 31 | cfg_rule: name ARROW name_with_optional_bracket+ NUMBER_IN_BRACKETS?; 32 | 33 | name : NAME #RAW ; //| QUOTED_NAME #QUOTED | DOUBLE_QUOTED_NAME #QUOTED ; 34 | name_with_optional_bracket : name (NUMBER_IN_BRACKETS)?; 35 | -------------------------------------------------------------------------------- /src/main/antlr/de/up/ling/irtg/codec/bottomup_treeautomaton/BottomUpTreeAutomaton.g4: -------------------------------------------------------------------------------- 1 | grammar BottomUpTreeAutomaton; 2 | 3 | @header{ 4 | package de.up.ling.irtg.codec.bottomup_treeautomaton; 5 | } 6 | 7 | ARROW : '->'; 8 | OPBK : '('; 9 | CLBK : ')'; 10 | COMMA: ','; 11 | 12 | ANGLE_IDENTIFIER: '<' (~[>])* '>'; 13 | DOUBLE_QUOTED_NAME: ["] (~["])* ["]; 14 | QUOTED_NAME: ['] (~['])* [']; 15 | NAME: ~('<' | '(' | ')' | '"' | '\'' | ','| '\t' | ' ' | '\r' | '\n'| '\u000C' ) ((~( '(' | ')' | ',' | '\t' | ' ' | '\r' | '\n'| '\u000C' ))*); 16 | 17 | WS: [ \n\t\r]+ -> skip; 18 | 19 | COMMENT 20 | : ( '#' ~[\r\n]* '\r'? '\n' 21 | ) -> skip 22 | ; 23 | 24 | 25 | 26 | 27 | fta : state+ auto_rule+; 28 | 29 | 30 | 31 | auto_rule : name state_list ARROW state weight?; 32 | state_list : ('(' (state ',')* state ')')?; 33 | 34 | weight: ANGLE_IDENTIFIER; 35 | 36 | name : NAME #RAW | DOUBLE_QUOTED_NAME #QUOTED | QUOTED_NAME #QUOTED | ANGLE_IDENTIFIER #ANGLE; 37 | state : name; 38 | 39 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/semiring/DoubleArithmeticSemiring.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.semiring; 6 | 7 | /** 8 | * 9 | * @author koller 10 | */ 11 | public class DoubleArithmeticSemiring implements Semiring { 12 | 13 | public static DoubleArithmeticSemiring INSTANCE = new DoubleArithmeticSemiring(); 14 | 15 | public Double add(Double x, Double y) { 16 | if (x.equals(infinity()) || y.equals(infinity())) { 17 | return infinity(); 18 | } else { 19 | return x + y; 20 | } 21 | } 22 | 23 | public Double multiply(Double x, Double y) { 24 | return x * y; 25 | } 26 | 27 | public Double infinity() { 28 | return Double.POSITIVE_INFINITY; 29 | } 30 | 31 | public Double zero() { 32 | return 0.0; 33 | } 34 | 35 | public Double one() { 36 | return 1.0; 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/ToStringOutputCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec; 8 | 9 | import java.io.OutputStream; 10 | import java.io.OutputStreamWriter; 11 | import java.io.PrintWriter; 12 | 13 | /** 14 | * An output codec that encodes arbitrary objects by simply 15 | * calling their {@link Object#toString() } method. 16 | * 17 | * @author koller 18 | */ 19 | @CodecMetadata(name = "toString", description = "encodes an object using its toString method", type = Object.class, displayInPopup = false) 20 | public class ToStringOutputCodec extends OutputCodec { 21 | @Override 22 | public void write(Object object, OutputStream ostream) { 23 | PrintWriter w = new PrintWriter(new OutputStreamWriter(ostream)); 24 | w.write(object.toString()); 25 | w.flush(); 26 | } 27 | 28 | } 29 | -------------------------------------------------------------------------------- /examples/ftag.irtg: -------------------------------------------------------------------------------- 1 | interpretation tree: de.up.ling.irtg.algebra.TagTreeAlgebra 2 | interpretation string: de.up.ling.irtg.algebra.TagStringAlgebra 3 | interpretation ft: de.up.ling.irtg.algebra.FeatureStructureAlgebra 4 | 5 | 6 | 7 | S_S! -> loves(NP_S, NP_S, VP_A) 8 | [string] *CONC11*(?1, *WRAP21*(?3, *CONC11*(loves, ?2))) 9 | [tree] S2(?1, @(?3, VP2(V0(loves), ?2))) 10 | [ft] unify(unify(unify(unify("[root: [], c1: [num: #1], anch: [num: #1 sg], c22: [num: #2], t2: #3, b2: #3]", 11 | emb_c1(proj_root(?1))), 12 | emb_c22(proj_root(?2))), 13 | emb_t2(proj_root(?3))), 14 | emb_b2(proj_foot(?3))) 15 | 16 | VP_A -> nop 17 | [string] *EE* 18 | [tree] * 19 | [ft] "[root: #1, foot: #1]" 20 | 21 | NP_S -> john 22 | [string] john 23 | [tree] NP0(john) 24 | [ft] "[root: [num: sg]]" 25 | 26 | NP_S -> john_pl 27 | [string] john 28 | [tree] NP0(johnnn) 29 | [ft] "[root: [num: pl]]" 30 | 31 | 32 | NP_S -> mary 33 | [string] mary 34 | [tree] NP0(mary) 35 | [ft] "[root: [num: sg]]" 36 | 37 | -------------------------------------------------------------------------------- /scripts/agc: -------------------------------------------------------------------------------- 1 | #! /bin/bash 2 | 3 | # agc = Alto Grammar Converter 4 | 5 | # Converts a grammar from one format to another. 6 | # 7 | # The input is read from a file if one is specified on the 8 | # command-line, or otherwise from stdin. The input codec is determined, 9 | # in this order, (a) from the --input-codec command-line argument 10 | # if one is given, or (b) from the filename, if one is given and its 11 | # extension can be resolved to an input codec. If no input codec can be 12 | # determined, the script aborts. 13 | # 14 | # The output is written to a file if one is specified with the 15 | # --output-file command-line argument, or otherwise to stdout. 16 | # The output codec is determined, in this order, (a) from the --output-codec 17 | # command-line argument if one is given; (b) from the output file name 18 | # if one was specified and its extension can be resolved to an output codec; 19 | # (c) the {@link ToStringOutputCodec} if no other output codec can be 20 | # determined. 21 | 22 | java -cp $ALTO_JAR de.up.ling.irtg.script.GrammarConverter "$@" 23 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/corpus/FileInputStreamSupplier.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.corpus; 6 | 7 | import com.google.common.base.Supplier; 8 | import java.io.File; 9 | import java.io.FileInputStream; 10 | import java.io.FileNotFoundException; 11 | import java.io.InputStream; 12 | import java.util.logging.Level; 13 | import java.util.logging.Logger; 14 | 15 | /** 16 | * 17 | * @author koller 18 | */ 19 | public class FileInputStreamSupplier implements Supplier { 20 | private File file; 21 | 22 | public FileInputStreamSupplier(File file) { 23 | this.file = file; 24 | } 25 | 26 | public InputStream get() { 27 | try { 28 | return new FileInputStream(file); 29 | } catch (FileNotFoundException ex) { 30 | Logger.getLogger(FileInputStreamSupplier.class.getName()).log(Level.SEVERE, null, ex); 31 | return null; 32 | } 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /init.scala: -------------------------------------------------------------------------------- 1 | // Start the IRTG tool as follows: 2 | // 3 | // scala -cp irtg-1.0.jar -Yrepl-sync -i init.scala 4 | // 5 | // If you are using Scala 2.9 or lower, delete the indicated line 6 | // below first. 7 | 8 | import de.up.ling.irtg._ 9 | import de.up.ling.irtg.algebra._ 10 | import de.up.ling.irtg.automata._ 11 | import de.up.ling.irtg.hom._ 12 | import de.up.ling.irtg.signature._ 13 | import de.up.ling.irtg.corpus._ 14 | import de.up.ling.irtg.codec._ 15 | import de.up.ling.tree._ 16 | import de.saar.basic._ 17 | import de.up.ling.irtg.util.TestingTools._ 18 | 19 | 20 | import java.io._ 21 | import collection.JavaConverters._ 22 | 23 | // comment this for Scala 2.9 or lower 24 | import scala.language.implicitConversions; 25 | 26 | import ScalaShell._ 27 | 28 | 29 | implicit def intmap2integermap(map:scala.collection.immutable.Map[String,Int]) = map.asJava.asInstanceOf[java.util.Map[String,java.lang.Integer]] 30 | implicit def stringmap2java(map:scala.collection.immutable.Map[String,String]) = map.asJava 31 | implicit def string2tree(s:String) = pt(s) 32 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/PtbTreeOutputCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec; 8 | 9 | import de.up.ling.tree.Tree; 10 | 11 | import java.io.OutputStream; 12 | import java.io.OutputStreamWriter; 13 | import java.io.PrintWriter; 14 | 15 | /** 16 | * 17 | * @author koller 18 | */ 19 | @CodecMetadata(name = "ptb-out", description = "encodes a tree as a PTB-style Lisp string", type = Tree.class) 20 | public class PtbTreeOutputCodec extends OutputCodec { 21 | @Override 22 | public void write(Tree tree, OutputStream ostream) throws UnsupportedOperationException { 23 | if( hasTrueOption("top")) { 24 | tree = Tree.create("TOP", tree); 25 | } 26 | 27 | PrintWriter w = new PrintWriter(new OutputStreamWriter(ostream)); 28 | w.write(tree.toLispString()); 29 | w.flush(); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/coarse_to_fine/FineToCoarseMapping.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.automata.coarse_to_fine; 7 | 8 | /** 9 | * Represents a mapping from coarse to fine nonterminals. 10 | * 11 | * The number of levels is assumed to include the finest, i.e. final level. 12 | * Nonterminals on all levels need to be unique. 13 | * 14 | * @author koller 15 | */ 16 | public interface FineToCoarseMapping { 17 | 18 | /** 19 | * Maps a nonterminal to its coarser version or itself if there is no coarser 20 | * version. 21 | * 22 | */ 23 | String coarsify(String symbol); 24 | 25 | /** 26 | * Returns the number of coarse to fine levels. 27 | * 28 | * This must include a level for the finest resulution, i.e. the original 29 | * nonterminals. 30 | * 31 | */ 32 | int numLevels(); 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/gui/IrtgTreeAutomatonAnnotator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.gui; 6 | 7 | import de.up.ling.irtg.InterpretedTreeAutomaton; 8 | import de.up.ling.irtg.automata.Rule; 9 | import de.up.ling.irtg.hom.Homomorphism; 10 | import java.util.Collection; 11 | 12 | /** 13 | * 14 | * @author koller 15 | */ 16 | public class IrtgTreeAutomatonAnnotator implements TreeAutomatonAnnotator { 17 | public InterpretedTreeAutomaton irtg; 18 | 19 | public IrtgTreeAutomatonAnnotator(InterpretedTreeAutomaton irtg) { 20 | this.irtg = irtg; 21 | } 22 | 23 | public Collection getAnnotationIdentifiers() { 24 | return irtg.getInterpretations().keySet(); 25 | } 26 | 27 | public String getAnnotation(Rule rule, String annotationIdentifier) { 28 | Homomorphism hom = irtg.getInterpretation(annotationIdentifier).getHomomorphism(); 29 | return hom.rhsAsString(hom.get(rule.getLabel())); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/DebuggingWriter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.util; 8 | 9 | import java.util.function.Supplier; 10 | 11 | /** 12 | * 13 | * @author koller 14 | */ 15 | public class DebuggingWriter { 16 | private boolean enabled = false; 17 | 18 | public void withDebug(Runnable todo) { 19 | setEnabled(true); 20 | 21 | try { 22 | todo.run(); 23 | } finally { 24 | setEnabled(false); 25 | } 26 | } 27 | 28 | public boolean isEnabled() { 29 | return enabled; 30 | } 31 | 32 | public void setEnabled(boolean enabled) { 33 | this.enabled = enabled; 34 | } 35 | 36 | public void D(int depth, Supplier s) { 37 | if (enabled) { 38 | System.err.println(Util.repeat(" ", depth) + s.get()); 39 | } 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /examples/apposition.irtg: -------------------------------------------------------------------------------- 1 | /* 2 | D T , resident engineer for VDOT , 3 | */ 4 | 5 | 6 | interpretation string: de.up.ling.irtg.algebra.StringAlgebra 7 | interpretation graph: de.up.ling.irtg.algebra.graph.GraphAlgebra 8 | 9 | NP! -> r1(NP, NP) 10 | [string] *(*(*(?1, ","), ?2), ",") 11 | [graph] merge(?1, ?2) 12 | 13 | NP -> r2(NP, PP) 14 | [string] *(?1, ?2) 15 | [graph] merge(?1, ?2) 16 | 17 | NP -> r3(NN, NN) 18 | [string] *(?1, ?2) 19 | [graph] merge(?1, ?2) 20 | 21 | PP -> r4(NP) 22 | [string] *(for, ?1) 23 | [graph] f_npobj(merge("(a :poss (b))", r_npobj(?1))) 24 | /* f_npobj(merge("(a / work-for :ARG0 (b) :ARG1 (c))", r_npobj(?1))) */ 25 | 26 | NP -> r5 27 | [string] *("D", "T") 28 | [graph] "(a / person :name (b / name :op1 (c / D) :op2 (d / T)))" 29 | 30 | NP -> r6 31 | [string] VDOT 32 | [graph] "(a / gov-org :name (b / name :op1 (c / VDOT)))" 33 | 34 | NN -> r7 35 | [string] resident 36 | [graph] "(a / resident :ARG0 (b))" 37 | 38 | NN -> r8 39 | [string] engineer 40 | [graph] "(a / engineer :ARG0 (b))" 41 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/CodecParseException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec; 8 | 9 | /** 10 | * An exception that occurred while parsing an input representation. 11 | * 12 | * @author koller 13 | */ 14 | public class CodecParseException extends RuntimeException { 15 | 16 | public CodecParseException() { 17 | } 18 | 19 | public CodecParseException(String message) { 20 | super(message); 21 | } 22 | 23 | public CodecParseException(String message, Throwable cause) { 24 | super(message, cause); 25 | } 26 | 27 | public CodecParseException(Throwable cause) { 28 | super(cause); 29 | } 30 | 31 | public CodecParseException(String message, Throwable cause, boolean enableSuppression, boolean writableStackTrace) { 32 | super(message, cause, enableSuppression, writableStackTrace); 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/laboratory/VariableNotDefinedException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.laboratory; 7 | 8 | /** 9 | * Thrown when a variable in a task is not given in the execution of said task. 10 | * @author Jonas 11 | * @see 12 | * bitbucket.org/tclup/alto/wiki/AltoLab 13 | */ 14 | public class VariableNotDefinedException extends Exception { 15 | 16 | public VariableNotDefinedException(String variableName, String line) { 17 | super("Variable " +variableName + " in line " + line + " is undefined in this program and was not found in the variable remapper"); 18 | } 19 | 20 | public VariableNotDefinedException(String variableName) { 21 | super("Variable " +variableName + " is undefined in this program and was not found in the variable remapper"); 22 | } 23 | 24 | } 25 | -------------------------------------------------------------------------------- /src/test/resources/45.tag: -------------------------------------------------------------------------------- 1 | // This grammar has undefined tree families. 2 | // It reproduces issue #45 (or rather, a version of it). 3 | 4 | 5 | //family alphanx0Vnx1: { alphanx0Vnx1_nn, alphanx0Vnx1_wn, alphanx0Vnx1_nw } 6 | 7 | //family alphanx0Vnx1: { alphanx0Vnx1_nn, alphanx0Vnx1_wn } 8 | 9 | tree alphanx0Vnx1_nn: 10 | S[][inv=no] { 11 | NP! [wh=no] 12 | VP [][] { 13 | V+ [fin=yes] 14 | NP! [wh=no, case=acc] 15 | } 16 | } 17 | 18 | 19 | tree alphanx0Vnx1_wn: 20 | S[] { 21 | NP! [wh=yes, case=nom] 22 | S { 23 | VP [][] { 24 | V+ [fin=yes] 25 | NP! [wh=no, case=acc] 26 | } 27 | } 28 | } 29 | 30 | tree alphanx0Vnx1_nw: 31 | S[] { 32 | NP! [wh=yes, case=acc] 33 | S[inv=yes][inv=no] { 34 | NP! [wh=no] 35 | V+ [fin=no] 36 | } 37 | } 38 | 39 | 40 | word 'likes': [fin=yes] 41 | word 'like': [fin=no] 42 | 43 | 44 | 45 | tree aux: 46 | S[][inv=yes] { 47 | Aux+ 48 | S* [inv=no] 49 | } 50 | 51 | word 'does': aux 52 | 53 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/algebra/BinarizingTagTreeAlgebra.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.algebra; 8 | 9 | import de.up.ling.tree.Tree; 10 | import de.up.ling.tree.TreePanel; 11 | import javax.swing.JComponent; 12 | 13 | /** 14 | * A {@link BinarizingAlgebra} whose underlying algebra is a {@link TagTreeAlgebra}. 15 | * 16 | * This will behave exactly as a BinarizingAlgebra with the underlying algebra being 17 | * a TagTreeAlgebra. 18 | * 19 | * @author koller 20 | */ 21 | public class BinarizingTagTreeAlgebra extends BinarizingAlgebra> { 22 | 23 | /** 24 | * Creates a new instance with a new signature. 25 | */ 26 | public BinarizingTagTreeAlgebra() { 27 | super(new TagTreeAlgebra()); 28 | } 29 | 30 | @Override 31 | public JComponent visualize(Tree object) { 32 | return new TreePanel(object); 33 | } 34 | 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/semiring/AdditiveViterbiSemiring.java: -------------------------------------------------------------------------------- 1 | package de.up.ling.irtg.semiring; 2 | 3 | import de.saar.basic.Pair; 4 | import de.up.ling.irtg.automata.Rule; 5 | 6 | /** 7 | * Author: Arne Köhn 8 | * A ViterbiWithBackpointerSemiring, but multiply is addition, i.e. a (max, add) instead of (max, multiply) semiring 9 | */ 10 | public class AdditiveViterbiSemiring extends ViterbiWithBackpointerSemiring { 11 | 12 | public static final AdditiveViterbiSemiring INSTANCE = new AdditiveViterbiSemiring(); 13 | 14 | @Override 15 | public Pair multiply(Pair x, Pair y) { 16 | if (x == ONE_PAIR) { 17 | return y; 18 | } 19 | if (y == ONE_PAIR) { 20 | return x; 21 | } 22 | if (x.left == ZERO || y.left == ZERO) { 23 | // ensure that zero * x = x * zero = zero; 24 | // otherwise could get zero * zero = +Infinity 25 | return new Pair(ZERO, x.right); 26 | } else { 27 | return new Pair<>(x.left + y.left, x.right); 28 | } 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /examples/scfg.irtg: -------------------------------------------------------------------------------- 1 | 2 | interpretation english: de.up.ling.irtg.algebra.StringAlgebra 3 | interpretation german: de.up.ling.irtg.algebra.StringAlgebra 4 | 5 | 6 | S! -> r1(NP,VP) 7 | [english] *(?1,?2) 8 | [german] *(?1,?2) 9 | 10 | 11 | NP -> r2(Det,N) 12 | [english] *(?1,?2) 13 | [german] *(?1,?2) 14 | 15 | N -> r3(N,PP) 16 | [english] *(?1,?2) 17 | [german] *(?1,?2) 18 | 19 | VP -> r4(V,NP) 20 | [english] *(?1,?2) 21 | [german] *(?1,?2) 22 | 23 | VP -> r5(VP,PP) 24 | [english] *(?1,?2) 25 | [german] *(?1,?2) 26 | 27 | PP -> r6(P,NP) 28 | [english] *(?1,?2) 29 | [german] *(?1,?2) 30 | 31 | NP -> r7 32 | [english] john 33 | [german] hans 34 | 35 | V -> r8 36 | [english] watches 37 | [german] betrachtet 38 | 39 | Det -> r9 40 | [english] the 41 | [german] die 42 | 43 | Det -> r9b 44 | [english] the 45 | [german] dem 46 | 47 | N -> r10 48 | [english] woman 49 | [german] frau 50 | 51 | N -> r11 52 | [english] telescope 53 | [german] fernrohr 54 | 55 | P -> r12 56 | [english] with 57 | [german] mit 58 | 59 | 60 | 61 | -------------------------------------------------------------------------------- /src/test/resources/46.tag: -------------------------------------------------------------------------------- 1 | tree subj: 2 | S[] { 3 | N+ 4 | VP! @NA 5 | } 6 | 7 | word 'mer': subj 8 | 9 | tree copy_a: 10 | VP @NA { 11 | A+ 12 | VP { 13 | A_leaf! 14 | } 15 | } 16 | 17 | word 'a': copy_a 18 | 19 | tree just_a: 20 | A_leaf+ 21 | 22 | word 'a': just_a 23 | 24 | tree copy_b: 25 | VP @NA { 26 | B+ 27 | VP { 28 | B_leaf! 29 | } 30 | } 31 | 32 | word 'b': copy_b 33 | 34 | tree just_b: 35 | B_leaf+ 36 | 37 | word 'b': just_b 38 | 39 | tree copy_c: 40 | VP @NA { 41 | C+ 42 | VP { 43 | C_leaf! 44 | } 45 | } 46 | 47 | word 'c': copy_c 48 | 49 | tree just_c: 50 | C_leaf+ 51 | 52 | word 'c': just_c 53 | 54 | tree aux_a: 55 | VP @NA { 56 | A+ 57 | VP { 58 | VP* 59 | VP @NA { 60 | A_leaf! 61 | } 62 | } 63 | } 64 | 65 | word 'a': aux_a 66 | 67 | tree aux_b: 68 | VP @NA { 69 | B+ 70 | VP { 71 | VP* 72 | VP @NA { 73 | B_leaf! 74 | } 75 | } 76 | } 77 | 78 | word 'b': aux_b 79 | 80 | tree aux_c: 81 | VP @NA { 82 | C+ 83 | VP { 84 | VP* 85 | VP @NA { 86 | C_leaf! 87 | } 88 | } 89 | } 90 | 91 | word 'c': aux_c 92 | 93 | 94 | 95 | -------------------------------------------------------------------------------- /examples/cohn-lapata.irtg: -------------------------------------------------------------------------------- 1 | /* Sentence compression grammar for example from Cohn & Lapata 09 */ 2 | 3 | /* example tree from Cohn & Lapata is: 4 | 5 | s3(sbar(whnp2(rb(exactly),wp(what)),s2(np(nns(records)),vp2(vbd(made),np(prp(it))))), 6 | cc(and), sbar(whnp1(wp(which)), s2(np(nns(ones)), vp2(vbp(are),vp1(vbn(involved)))))) 7 | */ 8 | 9 | interpretation long: de.up.ling.irtg.algebra.TreeAlgebra 10 | interpretation compressed: de.up.ling.irtg.algebra.TreeAlgebra 11 | 12 | S! -> r1(WHNP, S, Sbar) 13 | [long] s3(sbar(?1, ?2), cc(and), ?3) 14 | [compressed] s2(?1, s2(?2, ?3)) 15 | 16 | WHNP -> r2(WP) 17 | [long] whnp2(rb(exactly), ?1) 18 | [compressed] whnp1(?1) 19 | 20 | S -> r3 21 | [long] s2(np(nns(records)), vp2(vbd(made),np(prp(it)))) 22 | [compressed] np(nns(records)) 23 | 24 | Sbar -> r4(VP) 25 | [long] sbar(whnp1(wp(which)), s2(np(nns(ones)),?1)) 26 | [compressed] ?1 27 | 28 | WP -> r5 29 | [long] wp(what) 30 | [compressed] wp(what) 31 | 32 | VP -> r6 33 | [long] vp2(vbp(are), vp1(vbn(involved))) 34 | [compressed] vp2(vbp(are), vp1(vbn(involved))) 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/learning_rates/LearningRate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.learning_rates; 7 | 8 | /** 9 | * This interface represents a learning rate for Gradient Descent Methods. 10 | * 11 | * The interface divides parameters into groups to allow for easier addressing. 12 | * If this is not needed, then all parameters should simply be in group 0. 13 | * The learning rate can be based on the gradient for a given parameter. 14 | * 15 | * @author teichmann 16 | */ 17 | public interface LearningRate { 18 | /** 19 | * Returns the learning rate for the parameter in the given group with the 20 | * given number. 21 | * 22 | * The gradient passed should be the gradient in the current iterate. 23 | * 24 | */ 25 | double getLearningRate(int group, int number, double gradient); 26 | 27 | /** 28 | * Resets the learning rate sequence to its initial state. 29 | */ 30 | void reset(); 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/LambdaStopwatch.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.util; 7 | 8 | import static de.up.ling.irtg.util.Util.cputime; 9 | import static de.up.ling.irtg.util.Util.formatTime; 10 | import java.io.PrintStream; 11 | import java.util.function.Supplier; 12 | 13 | /** 14 | * 15 | * @author koller 16 | */ 17 | public class LambdaStopwatch { 18 | private PrintStream ps; 19 | 20 | public LambdaStopwatch(PrintStream ps) { 21 | this.ps = ps; 22 | } 23 | 24 | public E t(String description, Supplier fn) { 25 | if (ps == null) { 26 | return fn.get(); 27 | } else { 28 | long start = cputime(); 29 | E val = fn.get(); 30 | long end = cputime(); 31 | 32 | if (description != null) { 33 | ps.println(description + ": " + formatTime(end - start)); 34 | } 35 | 36 | return val; 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /license-info.md: -------------------------------------------------------------------------------- 1 | # License information 2 | 3 | Alto is published under the Apache License 2.0. 4 | 5 | # Dependencies and sources 6 | 7 | 1. This Projected uses a code snippet from the [Spring Framework](http://projects.spring.io/spring-framework/). Specifically, the method 8 | org.springframework.core.annotation.AnnotationUtils.findAnnotation(Method, Class) has been 9 | copied and adapted into this project's de.up.ling.irtg.laboratory.Program.findAnnotation( 10 | Method, Class) method. Further, this project relies on the org.springframework.spring-jdbc 11 | package as a dependency. 12 | This is in accordance with the Apache License 2.0 (see the included file APACHE-LICENSE-2.0.txt) under which the Spring Framework is published. 13 | 2. This project uses [Stanford CoreNLP](https://stanfordnlp.github.io/CoreNLP) as a dependency, which is published under the [GNU v3+ license](https://www.gnu.org/licenses/gpl.html) 14 | 3. This project uses the [MIT Java Wordnet Interface](https://projects.csail.mit.edu/jwi), published under a [license](https://projects.csail.mit.edu/jwi/license.html) identical 15 | to the Creative Commons Attribution Version 4.0 International Public License. -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/language_iteration/EvaluatedItemComparator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.automata.language_iteration; 7 | 8 | import java.util.Comparator; 9 | 10 | /** 11 | * 12 | * @author koller 13 | */ 14 | public class EvaluatedItemComparator implements Comparator { 15 | public static EvaluatedItemComparator INSTANCE = new EvaluatedItemComparator(); 16 | 17 | @Override 18 | public int compare(EvaluatedItem w1, EvaluatedItem w2) { 19 | // streams that can't deliver values right now are dispreferred (= get minimum weight) 20 | double weight1 = (w1 == null) ? Double.NEGATIVE_INFINITY : w1.getItemWeight(); 21 | double weight2 = (w2 == null) ? Double.NEGATIVE_INFINITY : w2.getItemWeight(); 22 | 23 | // sort descending, i.e. streams with high weights go at the beginning of the list 24 | return Double.compare(weight2, weight1); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/test/resources/examples/cohn-lapata.irtg: -------------------------------------------------------------------------------- 1 | /* Sentence compression grammar for example from Cohn & Lapata 09 */ 2 | 3 | /* example tree from Cohn & Lapata is: 4 | 5 | s3(sbar(whnp2(rb(exactly),wp(what)),s2(np(nns(records)),vp2(vbd(made),np(prp(it))))), 6 | cc(and), sbar(whnp1(wp(which)), s2(np(nns(ones)), vp2(vbp(are),vp1(vbn(involved)))))) 7 | */ 8 | 9 | interpretation long: de.up.ling.irtg.algebra.TreeAlgebra 10 | interpretation compressed: de.up.ling.irtg.algebra.TreeAlgebra 11 | 12 | S! -> r1(WHNP, S, Sbar) 13 | [long] s3(sbar(?1, ?2), cc(and), ?3) 14 | [compressed] s2(?1, s2(?2, ?3)) 15 | 16 | WHNP -> r2(WP) 17 | [long] whnp2(rb(exactly), ?1) 18 | [compressed] whnp1(?1) 19 | 20 | S -> r3 21 | [long] s2(np(nns(records)), vp2(vbd(made),np(prp(it)))) 22 | [compressed] np(nns(records)) 23 | 24 | Sbar -> r4(VP) 25 | [long] sbar(whnp1(wp(which)), s2(np(nns(ones)),?1)) 26 | [compressed] ?1 27 | 28 | WP -> r5 29 | [long] wp(what) 30 | [compressed] wp(what) 31 | 32 | VP -> r6 33 | [long] vp2(vbp(are), vp1(vbn(involved))) 34 | [compressed] vp2(vbp(are), vp1(vbn(involved))) 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/language_iteration/TreeCombiningItemEvaluator.java: -------------------------------------------------------------------------------- 1 | package de.up.ling.irtg.automata.language_iteration; 2 | 3 | import de.up.ling.irtg.automata.Rule; 4 | import de.up.ling.irtg.automata.WeightedTree; 5 | import de.up.ling.tree.Tree; 6 | import java.util.ArrayList; 7 | import java.util.List; 8 | 9 | /** 10 | * 11 | * @author koller 12 | */ 13 | public class TreeCombiningItemEvaluator implements ItemEvaluator { 14 | @Override 15 | public EvaluatedItem evaluate(Rule refinedRule, List children, UnevaluatedItem unevaluatedItem) { 16 | double weight = 1; 17 | List> childTrees = new ArrayList<>(); 18 | 19 | for( EvaluatedItem ch : children ) { 20 | weight *= ch.getWeightedTree().getWeight(); 21 | childTrees.add(ch.getWeightedTree().getTree()); 22 | } 23 | 24 | double itemWeight = weight * refinedRule.getWeight(); 25 | WeightedTree wtree = new WeightedTree(Tree.create(refinedRule.getLabel(), childTrees), itemWeight); 26 | 27 | return new EvaluatedItem(unevaluatedItem, wtree, itemWeight); 28 | } 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/NoSignatureBinaryIrtgOutputCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.codec; 7 | 8 | import de.up.ling.irtg.io.NumberCodec; 9 | import de.up.ling.irtg.io.StringCodec; 10 | import de.up.ling.irtg.signature.Signature; 11 | import java.io.IOException; 12 | 13 | /** 14 | * Use this with NoSignatureBinaryIrtgInputCodec, together with a reference IRTG that contains 15 | * the signatures that are not written here (note that the mapping from IDs to symbols 16 | * in the reference IRTG must be identical to the one in the IRTG written here). 17 | * @author Jonas 18 | */ 19 | public class NoSignatureBinaryIrtgOutputCodec extends BinaryIrtgOutputCodec { 20 | 21 | @Override 22 | protected long writeSignature(Signature sig, NumberCodec nc, StringCodec sc) throws IOException { 23 | long bytes = 0; 24 | 25 | bytes += nc.writeInt(0); 26 | 27 | return bytes; 28 | } 29 | 30 | 31 | 32 | 33 | } 34 | -------------------------------------------------------------------------------- /examples/nesson-shieber.irtg: -------------------------------------------------------------------------------- 1 | /* STAG grammar for semantic construction. The example is 2 | taken from Nesson & Shieber 06. 3 | 4 | You can parse the string "john apparently likes mary" 5 | or the derived syntax tree "s(np(john), vp(adv(apparently), 6 | vp(v(likes), np(mary))))" and decode them into a derived 7 | semantic tree. You can also generate a syntax tree or a string from 8 | the semantic tree 9 | "t(t_t(apparently),t(e_t(likes,e(mary)),e(john)))". 10 | */ 11 | 12 | 13 | interpretation string: de.up.ling.irtg.algebra.TagStringAlgebra 14 | interpretation syntax: de.up.ling.irtg.algebra.TagTreeAlgebra 15 | interpretation semantics: de.up.ling.irtg.algebra.TagTreeAlgebra 16 | 17 | S! -> a1(NP, NP, VPa) 18 | [string] *CONC11*(?1, *WRAP21*(?3, *CONC11*(likes, ?2))) 19 | [syntax] s(?1, @(?3, vp(v(likes), ?2))) 20 | [semantics] @(?3, t(e_t(likes, ?2), ?1)) 21 | 22 | NP -> a2 23 | [string] john 24 | [syntax] np(john) 25 | [semantics] e(john) 26 | 27 | NP -> a3 28 | [string] mary 29 | [syntax] np(mary) 30 | [semantics] e(mary) 31 | 32 | VPa -> b4 33 | [string] *CONC12*(apparently, *EE*) 34 | [syntax] vp(adv(apparently), *) 35 | [semantics] t(t_t(apparently), *) 36 | -------------------------------------------------------------------------------- /src/test/groovy/de/up/ling/irtg/automata/UniversalAutomatonTest.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | 6 | package de.up.ling.irtg.automata 7 | 8 | import static de.up.ling.irtg.util.TestingTools.*; 9 | import org.junit.* 10 | import java.util.* 11 | import java.io.* 12 | import static org.junit.Assert.* 13 | import de.up.ling.tree.Tree 14 | 15 | 16 | /** 17 | * 18 | * @author koller 19 | */ 20 | class UniversalAutomatonTest { 21 | @Test 22 | public void testAcceptance() { 23 | UniversalAutomaton auto = new UniversalAutomaton(sig(["f":1, "g":2, "a":0])); 24 | 25 | assert auto.accepts(pt("a")) 26 | assert auto.accepts(pt("f(a)")) 27 | assert auto.accepts(pt("g(a, f(a))")) 28 | } 29 | 30 | @Test 31 | public void testRuleTopDown() { 32 | UniversalAutomaton auto = new UniversalAutomaton(sig(["f":1, "a":0])); 33 | Iterator it = auto.languageIterator(); 34 | 35 | assertEquals(it.next(), pt("a")); 36 | assertEquals(it.next(), pt("f(a)")); 37 | assertEquals(it.next(), pt("f(f(a))")); 38 | } 39 | 40 | } 41 | 42 | -------------------------------------------------------------------------------- /src/test/resources/examples/nesson-shieber.irtg: -------------------------------------------------------------------------------- 1 | /* STAG grammar for semantic construction. The example is 2 | taken from Nesson & Shieber 06. 3 | 4 | You can parse the string "john apparently likes mary" 5 | or the derived syntax tree "s(np(john), vp(adv(apparently), 6 | vp(v(likes), np(mary))))" and decode them into a derived 7 | semantic tree. You can also generate a syntax tree or a string from 8 | the semantic tree 9 | "t(t_t(apparently),t(e_t(likes,e(mary)),e(john)))". 10 | */ 11 | 12 | 13 | interpretation string: de.up.ling.irtg.algebra.TagStringAlgebra 14 | interpretation syntax: de.up.ling.irtg.algebra.TagTreeAlgebra 15 | interpretation semantics: de.up.ling.irtg.algebra.TagTreeAlgebra 16 | 17 | S! -> a1(NP, NP, VPa) 18 | [string] *CONC11*(?1, *WRAP21*(?3, *CONC11*(likes, ?2))) 19 | [syntax] s(?1, @(?3, vp(v(likes), ?2))) 20 | [semantics] @(?3, t(e_t(likes, ?2), ?1)) 21 | 22 | NP -> a2 23 | [string] john 24 | [syntax] np(john) 25 | [semantics] e(john) 26 | 27 | NP -> a3 28 | [string] mary 29 | [syntax] np(mary) 30 | [semantics] e(mary) 31 | 32 | VPa -> b4 33 | [string] *CONC12*(apparently, *EE*) 34 | [syntax] vp(adv(apparently), *) 35 | [semantics] t(t_t(apparently), *) 36 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/algebra/BinarizingTreeAlgebra.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.algebra; 6 | 7 | import de.up.ling.tree.Tree; 8 | import de.up.ling.tree.TreePanel; 9 | import javax.swing.JComponent; 10 | 11 | /** 12 | * A {@link BinarizingAlgebra} that interprets values 13 | * over a {@link TreeAlgebra}. 14 | * 15 | * This is a BinarizingAlgebra where the underlying algebra is a TreeAlgebra. 16 | * 17 | * @author koller 18 | */ 19 | public class BinarizingTreeAlgebra extends BinarizingAlgebra> { 20 | 21 | /** 22 | * Creates a new instance with its own signature. 23 | */ 24 | public BinarizingTreeAlgebra() { 25 | super(new TreeAlgebra()); 26 | } 27 | 28 | /** 29 | * Creates a new instance with its own signature and a user specified concatenation symbol. 30 | */ 31 | public BinarizingTreeAlgebra(String appendSymbol) { 32 | super(new TreeAlgebra(), appendSymbol); 33 | } 34 | 35 | @Override 36 | public JComponent visualize(Tree object) { 37 | return new TreePanel(object); 38 | } 39 | 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/BuildProperties.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.util; 8 | 9 | import java.io.IOException; 10 | import java.util.Properties; 11 | 12 | /** 13 | * 14 | * @author koller 15 | */ 16 | public class BuildProperties { 17 | private static final Properties props = new Properties(); 18 | 19 | private static final String IMPL_VERSION = "version"; 20 | private static final String IMPL_BUILD = "scm-revision"; 21 | 22 | static { 23 | try { 24 | props.load(BuildProperties.class.getClassLoader().getResourceAsStream("build.properties")); 25 | } catch (IOException ex) { 26 | props.put(IMPL_VERSION, "(undefined)"); 27 | props.put(IMPL_BUILD, "(undefined)"); 28 | } 29 | } 30 | 31 | public static String getVersion() { 32 | return (String) props.get(IMPL_VERSION); 33 | } 34 | 35 | public static String getBuild() { 36 | return (String) props.get(IMPL_BUILD); 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/TreeYieldOutputCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.codec; 7 | 8 | import de.up.ling.tree.Tree; 9 | 10 | import java.io.OutputStream; 11 | import java.io.OutputStreamWriter; 12 | import java.io.PrintWriter; 13 | 14 | /** 15 | * An output codec that converts a tree to its yield string. 16 | * The yield string consists of the leaf labels of the tree, from 17 | * left to right, separated by single spaces. Thus, if the tree 18 | * is f(a, g(b,c)), then its encoding with this codec is "a b c". 19 | * 20 | * @author koller 21 | */ 22 | @CodecMetadata(name = "tree-yield", description = "Converts a tree to its yield string", type = Tree.class) 23 | public class TreeYieldOutputCodec extends OutputCodec { 24 | @Override 25 | public void write(Tree tree, OutputStream ostream) throws UnsupportedOperationException { 26 | PrintWriter w = new PrintWriter(new OutputStreamWriter(ostream)); 27 | w.write(String.join(" ", tree.getLeafLabels())); 28 | w.flush(); 29 | } 30 | 31 | } 32 | -------------------------------------------------------------------------------- /src/test/groovy/de/up/ling/irtg/codec/TreeAutomatonInputCodecTest.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec 8 | 9 | import org.junit.* 10 | import java.util.* 11 | import java.io.* 12 | import de.up.ling.irtg.automata.* 13 | import de.up.ling.irtg.automata.TreeAutomaton 14 | import static org.junit.Assert.* 15 | import de.saar.chorus.term.parser.*; 16 | import de.up.ling.tree.*; 17 | import de.up.ling.irtg.algebra.*; 18 | import de.up.ling.irtg.hom.*; 19 | import static de.up.ling.irtg.util.TestingTools.*; 20 | 21 | /** 22 | * 23 | * @author koller 24 | */ 25 | class TreeAutomatonInputCodecTest { 26 | InputCodec codec = new TreeAutomatonInputCodec(); 27 | 28 | @Test 29 | public void testFta() { 30 | String str = '''S! -> f(A,B)\n\ 31 | B -> b\n\ 32 | A -> c\n\ 33 | A -> g(D)\n\ 34 | D -> d 35 | \n\ 36 | '''; 37 | 38 | TreeAutomaton fta = codec.read(str); 39 | assertEquals(new HashSet([pt("f(c,b)"), pt("f(g(d),b)")]), fta.language()) 40 | } 41 | } 42 | 43 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/CodecMetadata.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec; 8 | 9 | import java.lang.annotation.ElementType; 10 | import java.lang.annotation.Retention; 11 | import java.lang.annotation.RetentionPolicy; 12 | import java.lang.annotation.Target; 13 | 14 | /** 15 | * An annotation type for adding metadata to a codec class. 16 | * Each codec class must be annotated with CodecMetadata in order 17 | * to be registered with the CodecManager. Use this 18 | * annotation to specify a name and extension for the codec. 19 | * You may optionally annotate a codec as "experimental=true" to 20 | * mark it as experimental. (From Utool.) 21 | * 22 | * @author Alexander Koller 23 | * 24 | */ 25 | @Retention(RetentionPolicy.RUNTIME) 26 | @Target(ElementType.TYPE) 27 | public @interface CodecMetadata { 28 | String name(); 29 | String description(); 30 | String extension() default ""; 31 | Class type(); 32 | boolean displayInPopup() default true; 33 | boolean experimental() default false; 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/SgraphAmrWithSourcesOutputCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec; 8 | 9 | import de.up.ling.irtg.algebra.graph.SGraph; 10 | 11 | /** 12 | * Encodes an s-graph as an AMR in the style of the ISI AMR-banks, 13 | * and also prints source names. 14 | * An example representation looks as follows:

15 | * 16 | * (u_1 / boy :ARG0-of (u_2<root> / want :ARG1 (u_3 / go :ARG0 u_1))) 17 | *

18 | * 19 | * The codec annotates each source node with its source names. In the example, 20 | * the node named u_2 is identified as a root-source.

21 | * 22 | * See the documentation of {@link SgraphAmrOutputCodec} for details 23 | * on this codec. 24 | * 25 | * @author koller 26 | */ 27 | @CodecMetadata(name = "amr-sgraph-src", description = "ISI-style AMR (with sources)", type = SGraph.class) 28 | public class SgraphAmrWithSourcesOutputCodec extends SgraphAmrOutputCodec { 29 | 30 | public SgraphAmrWithSourcesOutputCodec() { 31 | printSources = true; 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/algebra/ParserException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.algebra; 6 | 7 | /** 8 | * An exception that indicates that a string representation 9 | * could not successfully be resolved into an object of the 10 | * algebra. 11 | * 12 | * @author koller 13 | */ 14 | public class ParserException extends Exception { 15 | 16 | /** 17 | * Creates an instance from the given Throwable by calling the superconstructor with it. 18 | * 19 | */ 20 | public ParserException(Throwable thrwbl) { 21 | super(thrwbl); 22 | } 23 | 24 | /** 25 | * Creates an instance from the given Throwable and String by calling the superconstructor with it. 26 | * 27 | */ 28 | public ParserException(String string, Throwable thrwbl) { 29 | super(string, thrwbl); 30 | } 31 | 32 | /** 33 | * Creates an instance from the given String by calling the superconstructor with it. 34 | * 35 | */ 36 | public ParserException(String string) { 37 | super(string); 38 | } 39 | 40 | /** 41 | * Creates an instance using the default superconstructor. 42 | */ 43 | public ParserException() { 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/Lazy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.util; 8 | 9 | import java.io.IOException; 10 | import java.io.Serializable; 11 | import java.util.function.Supplier; 12 | 13 | /** 14 | * 15 | * @author koller 16 | */ 17 | public class Lazy implements Serializable { 18 | private transient E value; 19 | private boolean dirty; 20 | private final Supplier supplier; 21 | 22 | public Lazy(Supplier supplier) { 23 | value = null; 24 | dirty = true; 25 | this.supplier = supplier; 26 | } 27 | 28 | // abstract protected E evaluate(); 29 | 30 | public E getValue() { 31 | if( dirty ) { 32 | value = supplier.get(); 33 | dirty = false; 34 | } 35 | 36 | return value; 37 | } 38 | 39 | public void setDirty() { 40 | dirty = true; 41 | } 42 | 43 | // for deserialization 44 | private void readObject(java.io.ObjectInputStream stream) throws IOException, ClassNotFoundException { 45 | stream.defaultReadObject(); 46 | dirty = true; 47 | } 48 | } 49 | 50 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/ListOutputCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec; 8 | 9 | import de.saar.basic.StringTools; 10 | import de.up.ling.irtg.util.Util; 11 | 12 | import java.io.OutputStream; 13 | import java.io.OutputStreamWriter; 14 | import java.io.PrintWriter; 15 | import java.util.List; 16 | 17 | /** 18 | * An output codec that takes a list of objects, transforms them into 19 | * strings, and concatenates them with spaces.

20 | * 21 | * This codec is deprecated and not registered in the output codec service. 22 | * Use {@link AlgebraStringRepresentationOutputCodec} instead. 23 | * 24 | * @author koller 25 | */ 26 | @Deprecated 27 | @CodecMetadata(name = "list", description = "space-separated (e.g. words)", type = List.class) 28 | public class ListOutputCodec extends OutputCodec { 29 | @Override 30 | public void write(List list, OutputStream ostream) throws UnsupportedOperationException { 31 | PrintWriter w = new PrintWriter(new OutputStreamWriter(ostream)); 32 | w.write(StringTools.join(Util.mapToList(list, x -> x.toString()), " ")); 33 | w.flush(); 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/script/CorpusParser.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.script; 6 | 7 | import de.up.ling.irtg.InterpretedTreeAutomaton; 8 | import de.up.ling.irtg.codec.CodecParseException; 9 | import de.up.ling.irtg.corpus.Charts; 10 | import de.up.ling.irtg.corpus.Corpus; 11 | import de.up.ling.irtg.corpus.CorpusReadingException; 12 | import java.io.FileInputStream; 13 | import java.io.FileOutputStream; 14 | import java.io.FileReader; 15 | import java.io.IOException; 16 | import java.io.OutputStream; 17 | 18 | /** 19 | * This reads an (annotated or unannotated) corpus, parses all inputs, 20 | * and saves the parse charts into a separate file.

21 | * 22 | * Usage: java CorpusParser <IRTG> <corpus> <chart file> 23 | * 24 | * @author koller 25 | */ 26 | public class CorpusParser { 27 | public static void main(String[] args) throws IOException, CorpusReadingException, CodecParseException { 28 | InterpretedTreeAutomaton irtg = InterpretedTreeAutomaton.read(new FileInputStream(args[0])); 29 | OutputStream ostream = new FileOutputStream(args[2]); 30 | Corpus corpus = Corpus.readCorpus(new FileReader(args[1]), irtg); 31 | Charts.computeCharts(corpus, irtg, ostream); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /examples/testString5sub1_3sources.irtg: -------------------------------------------------------------------------------- 1 | interpretation graph: de.up.ling.irtg.algebra.graph.GraphAlgebra 2 | 3 | S! -> m( X, X) 4 | [graph] merge(?1, ?2) 5 | 6 | X -> f0(X) 7 | [graph] f_0(?1) 8 | 9 | X -> r01(X) 10 | [graph] r_0_1(?1) 11 | 12 | X -> r02(X) 13 | [graph] r_0_2(?1) 14 | 15 | X -> f1(X) 16 | [graph] f_1(?1) 17 | 18 | X -> r10(X) 19 | [graph] r_1_0(?1) 20 | 21 | X -> r12(X) 22 | [graph] r_1_2(?1) 23 | 24 | X -> f2(X) 25 | [graph] f_2(?1) 26 | 27 | X -> r20(X) 28 | [graph] r_2_0(?1) 29 | 30 | X -> r21(X) 31 | [graph] r_2_1(?1) 32 | 33 | X -> bCONST 34 | [graph] "(b<0> / boy)" 35 | 36 | X -> bel1CONST 37 | [graph] "(bel1<0> / believe)" 38 | 39 | X -> wCONST 40 | [graph] "(w<0> / want)" 41 | 42 | X -> bel2CONST 43 | [graph] "(bel2<0> / believe)" 44 | 45 | X -> bel1ARG0bCONST 46 | [graph] "(bel1<0> :ARG0 (b<1>))" 47 | 48 | X -> bel1ARG0bCONST2 49 | [graph] "(bel1<1> :ARG0 (b<0>))" 50 | 51 | X -> bel1ARG1wCONST 52 | [graph] "(bel1<0> :ARG1 (w<1>))" 53 | 54 | X -> bel1ARG1wCONST2 55 | [graph] "(bel1<1> :ARG1 (w<0>))" 56 | 57 | X -> wARG1bel2CONST 58 | [graph] "(w<0> :ARG1 (bel2<1>))" 59 | 60 | X -> wARG1bel2CONST2 61 | [graph] "(w<1> :ARG1 (bel2<0>))" 62 | 63 | X -> bel2ARG0bCONST 64 | [graph] "(bel2<0> :ARG0 (b<1>))" 65 | 66 | X -> bel2ARG0bCONST2 67 | [graph] "(bel2<1> :ARG0 (b<0>))" 68 | 69 | X -> m( X, X) 70 | [graph] merge(?1, ?2) 71 | 72 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/signature/IdentitySignatureMapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.signature; 7 | 8 | /** 9 | * This is a special mapper for a single interner, which always maps a symbol 10 | * to itself. 11 | * 12 | * @author koller 13 | */ 14 | public class IdentitySignatureMapper extends SignatureMapper { 15 | 16 | /** 17 | * Creates a new instance for the given interner. 18 | */ 19 | public IdentitySignatureMapper(Interner interner) { 20 | forward = null; 21 | backward = null; 22 | input = interner; 23 | output = interner; 24 | } 25 | 26 | // public IdentitySignatureMapper(Signature signature) { 27 | // this(signature.getInterner()); 28 | // } 29 | 30 | @Override 31 | public void recompute() { 32 | // NOP 33 | } 34 | 35 | @Override 36 | public int remapBackward(int symbolID) { 37 | return symbolID; 38 | } 39 | 40 | @Override 41 | public int remapForward(int symbolID) { 42 | return symbolID; 43 | } 44 | 45 | @Override 46 | public String toString() { 47 | return "identity mapping for " + input.toString(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/io/UtfStringCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.io; 7 | 8 | import java.io.ByteArrayOutputStream; 9 | import java.io.IOException; 10 | import java.io.ObjectInputStream; 11 | import java.io.ObjectOutputStream; 12 | 13 | /** 14 | * 15 | * @author koller 16 | */ 17 | public class UtfStringCodec implements StringCodec { 18 | 19 | private ObjectInputStream ois; 20 | private ObjectOutputStream oos; 21 | 22 | public UtfStringCodec(ObjectInputStream ois) { 23 | this.ois = ois; 24 | oos = null; 25 | } 26 | 27 | public UtfStringCodec(ObjectOutputStream oos) { 28 | this.oos = oos; 29 | ois = null; 30 | } 31 | 32 | @Override 33 | public long writeString(String s) throws IOException { 34 | ByteArrayOutputStream baos = new ByteArrayOutputStream(); 35 | ObjectOutputStream obaos = new ObjectOutputStream(baos); 36 | obaos.writeUTF(s); 37 | obaos.close(); 38 | 39 | oos.writeUTF(s); 40 | 41 | return baos.toByteArray().length; 42 | } 43 | 44 | @Override 45 | public String readString() throws IOException { 46 | return ois.readUTF(); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/laboratory/TaskCache.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.laboratory; 8 | 9 | import com.fasterxml.jackson.databind.ObjectMapper; 10 | import java.io.IOException; 11 | import java.io.InputStream; 12 | import java.io.OutputStream; 13 | import java.net.URI; 14 | import java.nio.file.Path; 15 | 16 | /** 17 | * 18 | * @author koller 19 | */ 20 | class TaskCache extends AltoLabHttpCache { 21 | private ObjectMapper om = new ObjectMapper(); 22 | 23 | public TaskCache(Path baseDir, URI baseURL, AltoLabHttpClient labClient) { 24 | super(baseDir, baseURL, labClient); 25 | } 26 | 27 | @Override 28 | protected String makeCacheFilename(String identifier) { 29 | return String.format("tasks/%s", identifier); 30 | } 31 | 32 | @Override 33 | protected UnparsedTask readFromStream(String identifier, InputStream is, boolean remote) throws IOException { 34 | return om.readValue(is, UnparsedTask.class); 35 | } 36 | 37 | @Override 38 | protected void writeToStream(String identifier, UnparsedTask value, OutputStream os) throws IOException { 39 | om.writeValue(os, value); 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/io/HttpCache.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.io; 8 | 9 | import de.up.ling.irtg.util.Logging; 10 | import java.io.IOException; 11 | import java.net.MalformedURLException; 12 | import java.net.URI; 13 | import java.net.URL; 14 | import java.nio.file.Path; 15 | 16 | /** 17 | * 18 | * @author koller 19 | */ 20 | public abstract class HttpCache extends Cache { 21 | private final URI baseURL; 22 | 23 | public HttpCache(Path baseDir, URI baseURL) { 24 | super(baseDir); 25 | this.baseURL = baseURL; 26 | } 27 | 28 | @Override 29 | protected E loadFromRemote(String identifier) throws IOException, ValueReadingException { 30 | URI uri = makeURI(identifier); 31 | 32 | try { 33 | URL url = uri.toURL(); 34 | return readFromStream(identifier, url.openStream(), true); 35 | } catch (MalformedURLException ex) { 36 | Logging.get().warning("Malformed URL in HttpCache#loadFromRemote: " + uri); 37 | return null; 38 | } 39 | } 40 | 41 | protected URI makeURI(String identifier) { 42 | return baseURL.resolve(identifier); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/maxent/RuleNameFeature.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.maxent; 6 | 7 | import de.up.ling.irtg.automata.Rule; 8 | import de.up.ling.irtg.automata.TreeAutomaton; 9 | import java.util.Map; 10 | 11 | /** 12 | * A feature that returns 1 iff the rule name matches 13 | * the name that is passed to the constructor. The feature 14 | * function returns 0 otherwise. 15 | * 16 | * @author koller 17 | */ 18 | public class RuleNameFeature extends FeatureFunction { 19 | private String x; 20 | 21 | public RuleNameFeature(String x) { 22 | this.x = x; 23 | } 24 | 25 | public String getX() { 26 | return x; 27 | } 28 | 29 | @Override 30 | public Double evaluate(Rule rule, TreeAutomaton automaton, MaximumEntropyIrtg irtg, Map inputs) { 31 | if( x.equals(rule.getLabel(irtg.getAutomaton())) ) { 32 | return 1.0; 33 | } else { 34 | return 0.0; 35 | } 36 | } 37 | 38 | @Override 39 | public String toString() { 40 | StringBuilder ret = new StringBuilder(); 41 | ret.append(super.toString()); 42 | ret.append("('"); 43 | ret.append(x); 44 | ret.append("')"); 45 | return ret.toString(); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/ObjectWithStringCode.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.util; 7 | 8 | /** 9 | * 10 | * @author Jonas 11 | */ 12 | public abstract class ObjectWithStringCode { 13 | 14 | abstract public String getCode(); 15 | 16 | //not sure if this is a good idea 17 | @Override 18 | public boolean equals(Object obj) { 19 | if (obj == null || !(obj.getClass().equals(getClass()))) { 20 | return false; 21 | } 22 | ObjectWithStringCode objWSC = (ObjectWithStringCode) obj; 23 | return getCode().equals(objWSC.getCode()); 24 | } 25 | 26 | @Override 27 | public int hashCode() { 28 | return getCode().hashCode(); 29 | } 30 | 31 | 32 | 33 | //how do i formulate this if ObjectWithStringCode is an interface? 34 | public static T getObjectWithCode(Iterable iterable, String code) { 35 | T ret = null; 36 | for (T candidate : iterable) { 37 | if (candidate.getCode().equals(code)) { 38 | ret = candidate; 39 | } 40 | } 41 | return ret; 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/algebra/BinarizingTreeWithAritiesAlgebra.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.algebra; 6 | 7 | import de.up.ling.tree.Tree; 8 | import de.up.ling.tree.TreePanel; 9 | import javax.swing.JComponent; 10 | 11 | /** 12 | * A {@link BinarizingAlgebra} that interprets its values 13 | * over a {@link TreeWithAritiesAlgebra}. 14 | * 15 | * This gives the same result as creating a binarizing algebra with a TreeWithAritiesAlgebra 16 | * as the underlying algebra. 17 | * 18 | * @author koller 19 | */ 20 | public class BinarizingTreeWithAritiesAlgebra extends BinarizingAlgebra> { 21 | 22 | /** 23 | * Creates a new instance with the default concatenation symbol _@_ and 24 | * a new signature. 25 | */ 26 | public BinarizingTreeWithAritiesAlgebra() { 27 | super(new TreeWithAritiesAlgebra()); 28 | } 29 | 30 | /** 31 | * Creates a new instance with a use specified concatenation symbol and 32 | * a new signature. 33 | * 34 | */ 35 | public BinarizingTreeWithAritiesAlgebra(String appendSymbol) { 36 | super(new TreeWithAritiesAlgebra(), appendSymbol); 37 | } 38 | 39 | @Override 40 | public JComponent visualize(Tree object) { 41 | return new TreePanel(object); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/ArrayListRangeIterable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.util; 8 | 9 | import java.util.Iterator; 10 | import java.util.List; 11 | import java.util.function.Consumer; 12 | 13 | /** 14 | * 15 | * @author koller 16 | */ 17 | public class ArrayListRangeIterable implements Iterable { 18 | private List values; 19 | private int start, end; 20 | 21 | public ArrayListRangeIterable(List values, int start, int end) { 22 | this.values = values; 23 | this.start = start; 24 | this.end = end; 25 | } 26 | 27 | @Override 28 | public Iterator iterator() { 29 | return new Iterator() { 30 | private int pos = start; 31 | 32 | @Override 33 | public boolean hasNext() { 34 | return pos < end; 35 | } 36 | 37 | @Override 38 | public E next() { 39 | return values.get(pos++); 40 | } 41 | }; 42 | } 43 | 44 | @Override 45 | public void forEach(Consumer action) { 46 | for( int pos = start; pos < end; pos++ ) { 47 | action.accept(values.get(pos)); 48 | } 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/test/groovy/de/up/ling/irtg/random_automata/RandomTreeAutomatonTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.random_automata; 7 | 8 | import de.up.ling.irtg.automata.TreeAutomaton; 9 | import org.junit.Before; 10 | import org.junit.Test; 11 | import static org.junit.Assert.*; 12 | 13 | /** 14 | * 15 | * @author teichmann 16 | */ 17 | public class RandomTreeAutomatonTest { 18 | 19 | /** 20 | * 21 | */ 22 | private RandomTreeAutomaton rta; 23 | 24 | @Before 25 | public void setUp() { 26 | rta = new RandomTreeAutomaton(9248973479L, 0.5); 27 | } 28 | 29 | /** 30 | * Test of getRandomAutomaton method, of class RandomTreeAutomaton. 31 | */ 32 | @Test 33 | public void testGetRandomAutomaton() { 34 | for (int i = 0; i < 5; ++i) { 35 | TreeAutomaton ta = rta.getRandomAutomaton(50); 36 | 37 | assertTrue(ta.getReachableStates().containsAll(ta.getAllStates())); 38 | assertTrue(ta.isBottomUpDeterministic()); 39 | assertTrue(ta.getAllStates().size() > 1000); 40 | 41 | assertTrue(ta.languageIterator().hasNext()); 42 | 43 | assertEquals(ta.viterbi().getLeafLabels().size(),50); 44 | } 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/test/groovy/de/up/ling/irtg/codec/TiburonTreeAutomatonInputCodecTest.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec 8 | 9 | 10 | import org.junit.* 11 | import java.util.* 12 | import java.io.* 13 | import de.up.ling.irtg.automata.* 14 | import de.up.ling.irtg.automata.TreeAutomaton 15 | import static org.junit.Assert.* 16 | import de.saar.chorus.term.parser.*; 17 | import de.up.ling.tree.*; 18 | import de.up.ling.irtg.algebra.*; 19 | import de.up.ling.irtg.hom.*; 20 | import static de.up.ling.irtg.util.TestingTools.*; 21 | 22 | 23 | /** 24 | * 25 | * @author koller 26 | */ 27 | class TiburonTreeAutomatonInputCodecTest { 28 | InputCodec codec = new TiburonTreeAutomatonInputCodec(); 29 | 30 | @Test 31 | public void testWrtg2() { 32 | TreeAutomaton auto = codec.read(wrtg2); 33 | assert auto.accepts(pt("S(John, likes, candy)")) 34 | assert auto.accepts(pt("S(Stacy, hates, candy)")) 35 | } 36 | 37 | private final static String wrtg2 = '''\n\ 38 | %% Filename wrtg2 %%\n\ 39 | q\n\ 40 | q -> S(subj vb obj) # 0.8\n\ 41 | q -> S(subj hates obj) # 0.2\n\ 42 | subj -> John # 0.7\n\ 43 | subj -> Stacy # 0.4\n\ 44 | obj -> candy\n\ 45 | vb -> likes # 0.4\n\ 46 | vb -> hates # 0.6 47 | '''; 48 | } 49 | 50 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/WeightedTree.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.automata; 6 | 7 | import de.up.ling.irtg.signature.Signature; 8 | import de.up.ling.tree.Tree; 9 | 10 | /** 11 | * A tree with a weight. 12 | * 13 | * @author koller 14 | */ 15 | public class WeightedTree implements Comparable { 16 | private Tree tree; 17 | private double weight; 18 | 19 | public WeightedTree(Tree tree, double weight) { 20 | this.tree = tree; 21 | this.weight = weight; 22 | } 23 | 24 | public Tree getTree() { 25 | return tree; 26 | } 27 | 28 | public double getWeight() { 29 | return weight; 30 | } 31 | 32 | @Override 33 | public int compareTo(WeightedTree o) { 34 | return Double.compare(weight, o.weight); 35 | } 36 | 37 | @Override 38 | public String toString() { 39 | return tree.toString() + ":" + weight; 40 | } 41 | 42 | public String toString(Signature sig) { 43 | return sig.resolve(getTree()) + ":" + getWeight(); 44 | } 45 | 46 | public static String formatWeightedTree(WeightedTree wt, Signature sig) { 47 | if (wt == null) { 48 | return ""; 49 | } else { 50 | return sig.resolve(wt.getTree()) + ":" + wt.getWeight(); 51 | } 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /examples/english.tag: -------------------------------------------------------------------------------- 1 | 2 | family alphanx0Vnx1: { alphanx0Vnx1_nn, alphanx0Vnx1_wn, alphanx0Vnx1_nw } 3 | 4 | tree alphanx0Vnx1_nn: 5 | S[][inv=no] { 6 | NP! [wh=no] 7 | VP [][] { 8 | V+ [fin=yes] 9 | NP! [wh=no, case=acc] 10 | } 11 | } 12 | 13 | 14 | tree alphanx0Vnx1_wn: 15 | S[] { 16 | NP! [wh=yes, case=nom] 17 | S { 18 | VP [][] { 19 | V+ [fin=yes] 20 | NP! [wh=no, case=acc] 21 | } 22 | } 23 | } 24 | 25 | tree alphanx0Vnx1_nw: 26 | S[] { 27 | NP! [wh=yes, case=acc] 28 | S[inv=yes][inv=no] { 29 | NP! [wh=no] 30 | V+ [fin=no] 31 | } 32 | } 33 | 34 | 35 | word 'likes': [fin=yes] 36 | word 'like': [fin=no] 37 | 38 | 39 | 40 | tree aux: 41 | S[][inv=yes] { 42 | Aux+ 43 | S* [inv=no] 44 | } 45 | 46 | word 'does': aux 47 | 48 | 49 | 50 | ///////////////////////////////////////////////////////////////////////////////// 51 | 52 | 53 | tree pn: 54 | NP[][wh=no] { 55 | PN+ 56 | } 57 | 58 | tree np: 59 | NP[][wh=no, case=?c] { 60 | Det! 61 | N+ [case=?c] 62 | } 63 | 64 | tree det: 65 | Det+ 66 | 67 | tree whnp: 68 | NP[][wh=yes, case=?c] { 69 | WH+ [case=?c] 70 | } 71 | 72 | 73 | word 'john': pn 74 | word 'mary': pn 75 | 76 | word 'book': np 77 | 78 | word 'every': det 79 | 80 | word 'whom': whnp[case=acc] 81 | word 'who': whnp[case=nom] 82 | 83 | -------------------------------------------------------------------------------- /src/test/resources/unary.irtg: -------------------------------------------------------------------------------- 1 | // Input file for CondensedIntersectionAutomatonTest. 2 | // This is a small part of the binarized grammar for PTB Section 00. 3 | // It contains a chain rule VP -> VBD, which translated into 4 | // a rule of the form q -> f(q), which can throw off the intersection algorithm. 5 | 6 | interpretation string: de.up.ling.irtg.algebra.StringAlgebra 7 | interpretation tree: de.up.ling.irtg.algebra.BinarizingTreeWithAritiesAlgebra 8 | 9 | S! -> r28_br18139(NP-SBJ, q18140) [0.1562209842154132] 10 | [string] *(?1,?2) 11 | [tree] S_3('_@_'(?1,?2)) 12 | 13 | q18140 -> r28_br18138(VP, '.') [1.0] 14 | [string] *(?1,?2) 15 | [tree] '_@_'(?1,?2) 16 | 17 | NP-SBJ -> r1280_br9876(NN, q9877) [2.6232948583420777E-4] 18 | [string] *(?1,?2) 19 | [tree] NP-SBJ_3('_@_'(?1,?2)) 20 | 21 | q9877 -> r1280_br9875(CC, NN) [1.0] 22 | [string] *(?1,?2) 23 | [tree] '_@_'(?1,?2) 24 | 25 | NN -> r1278_br288 [1.6310552927744252E-4] 26 | [string] Champagne 27 | [tree] NN_1(Champagne_0) 28 | 29 | CC -> r48_br252 [0.6886120996441281] 30 | [string] and 31 | [tree] CC_1(and_0) 32 | 33 | NN -> r1279_br6324 [1.6310552927744252E-4] 34 | [string] dessert 35 | [tree] NN_1(dessert_0) 36 | 37 | VP -> r969_br14525(VBD) [0.0015326738191444892] 38 | [string] ?1 39 | [tree] VP_1(?1) 40 | 41 | VBD -> r1281_br8789 [0.002635046113306983] 42 | [string] followed 43 | [tree] VBD_1(followed_0) 44 | 45 | '.' -> r27_br2559 [0.9890453834115805] 46 | [string] '.' 47 | [tree] '._1'('._0') 48 | 49 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/IsiAmrInputCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec; 8 | 9 | import de.up.ling.irtg.algebra.graph.SGraph; 10 | import de.up.ling.irtg.codec.CodecMetadata; 11 | import de.up.ling.irtg.codec.CodecParseException; 12 | import de.up.ling.irtg.codec.InputCodec; 13 | import de.up.ling.irtg.codec.isiamr.IsiAmrParser; 14 | import de.up.ling.irtg.codec.isiamr.ParseException; 15 | import java.io.IOException; 16 | import java.io.InputStream; 17 | import java.io.InputStreamReader; 18 | 19 | /** 20 | * Reads an AMR in the style of the ISI AMR-banks. An example for 21 | * an AMR in this format is: 22 | * 23 | * (b / blink-01
24 | * :ARG0 (i / i)
25 | * :ARG1 (e / eye
26 | * :part-of i)
27 | * :manner (h / hard))
28 | * 29 | * 30 | * @author koller 31 | */ 32 | @CodecMetadata(name = "isi-amr", description = "ISI-style AMRs", type = SGraph.class) 33 | public class IsiAmrInputCodec extends InputCodec { 34 | @Override 35 | public SGraph read(InputStream is) throws CodecParseException, IOException { 36 | try { 37 | return IsiAmrParser.parse(new InputStreamReader(is)); 38 | } catch (ParseException ex) { 39 | throw new CodecParseException(ex); 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /examples/tiny-tag.irtg: -------------------------------------------------------------------------------- 1 | /* A tiny TAG grammar, automatically converted from a small fragment 2 | of XTAG. 3 | 4 | The grammar can parse "the businessman sleeps" and "the happy 5 | businessman sleeps", or decode them into derived trees. 6 | */ 7 | 8 | 9 | interpretation tree: de.up.ling.irtg.algebra.TagTreeAlgebra 10 | interpretation string: de.up.ling.irtg.algebra.TagStringAlgebra 11 | 12 | S_S! -> inx0V-sleep(NP_S, V_A, VP_A, S_A) [1.0] 13 | [tree] @(?4, S2(?1, @(?3, VP1(@(?2, V0(sleeps)))))) 14 | [string] *WRAP21*(?4, *CONC11*(?1, *WRAP21*(?3, *WRAP21*(?2, sleeps)))) 15 | 16 | NP_S -> iNXN-businessman(N_A, NP_A) [1.0] 17 | [tree] @(?2, NP1(@(?1, N0(businessman)))) 18 | [string] *WRAP21*(?2, *WRAP21*(?1, businessman)) 19 | 20 | NP_A -> aDnx-the(D_A, NP_A) [1.0] 21 | [tree] @(?2, NP2(@(?1, D0(the)), *)) 22 | [string] *WRAP22*(?2, *CONC12*(*WRAP21*(?1, the), *EE*)) 23 | 24 | N_A -> aAn-happy(A_A, N_A) [1.0] 25 | [tree] @(?2, N2(@(?1, A0(happy)), *)) 26 | [string] *WRAP22*(?2, *CONC12*(*WRAP21*(?1, happy), *EE*)) 27 | 28 | NP_A -> *NOP* [1.0] 29 | [tree] * 30 | [string] *EE* 31 | 32 | N_A -> *NOP* [1.0] 33 | [tree] * 34 | [string] *EE* 35 | 36 | VP_A -> *NOP* [1.0] 37 | [tree] * 38 | [string] *EE* 39 | 40 | V_A -> *NOP* [1.0] 41 | [tree] * 42 | [string] *EE* 43 | 44 | S_A -> *NOP* [1.0] 45 | [tree] * 46 | [string] *EE* 47 | 48 | D_A -> *NOP* [1.0] 49 | [tree] * 50 | [string] *EE* 51 | 52 | A_A -> *NOP* [1.0] 53 | [tree] * 54 | [string] *EE* 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/IntAgenda.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.util; 7 | 8 | import it.unimi.dsi.fastutil.ints.IntArrayFIFOQueue; 9 | import it.unimi.dsi.fastutil.ints.IntIterable; 10 | import it.unimi.dsi.fastutil.ints.IntOpenHashSet; 11 | import it.unimi.dsi.fastutil.ints.IntSet; 12 | 13 | /** 14 | * An agenda of ints. Values are held in a FIFO queue, 15 | * and it is ensured that no value is ever added twice. 16 | * 17 | * @author koller 18 | */ 19 | public class IntAgenda { 20 | private IntArrayFIFOQueue agenda; 21 | private IntSet seenEntries; 22 | 23 | public IntAgenda() { 24 | agenda = new IntArrayFIFOQueue(); 25 | seenEntries = new IntOpenHashSet(); 26 | } 27 | 28 | public IntAgenda(IntIterable initialEntries) { 29 | this(); 30 | enqueueAll(initialEntries); 31 | } 32 | 33 | public void enqueue(int entry) { 34 | if (seenEntries.add(entry)) { 35 | agenda.enqueue(entry); 36 | } 37 | } 38 | 39 | public void enqueueAll(IntIterable entries) { 40 | FastutilUtils.forEach(entries, this::enqueue); 41 | } 42 | 43 | public int pop() { 44 | return agenda.dequeueInt(); 45 | } 46 | 47 | public boolean isEmpty() { 48 | return agenda.isEmpty(); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/laboratory/AltoLabHttpCache.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.laboratory; 7 | 8 | import de.up.ling.irtg.io.Cache; 9 | import java.io.ByteArrayInputStream; 10 | import java.io.IOException; 11 | import java.net.URI; 12 | import java.nio.charset.StandardCharsets; 13 | import java.nio.file.Path; 14 | 15 | /** 16 | * 17 | * @author koller 18 | */ 19 | abstract class AltoLabHttpCache extends Cache { 20 | private final AltoLabHttpClient labClient; 21 | private final URI baseURL; 22 | 23 | public AltoLabHttpCache(Path baseDir, URI baseURL, AltoLabHttpClient labClient) { 24 | super(baseDir); 25 | this.labClient = labClient; 26 | this.baseURL = baseURL; 27 | } 28 | 29 | @Override 30 | protected E loadFromRemote(String identifier) throws ValueReadingException, IOException { 31 | if( labClient == null ) { 32 | return null; 33 | } else { 34 | URI uri = makeURI(identifier); 35 | String response = labClient.get(uri.toString()); 36 | return readFromStream(identifier, new ByteArrayInputStream(response.getBytes(StandardCharsets.UTF_8)), true); 37 | } 38 | } 39 | 40 | protected URI makeURI(String identifier) { 41 | return baseURL.resolve(identifier); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/signature/IntSetInterner.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.signature; 6 | 7 | import it.unimi.dsi.fastutil.ints.IntSet; 8 | 9 | /** 10 | * A specialized interner for IntSets used for example to keep track of sets 11 | * of symbol ids. 12 | * 13 | * This adds a special method used to extend a set which has been interned. 14 | * 15 | * @author gontrum 16 | */ 17 | public class IntSetInterner extends Interner{ 18 | 19 | /** 20 | * This method adds the given newValue to the set which is associated with 21 | * the given index. 22 | * 23 | * The method returns true if such a set exists and false otherwise. In the 24 | * latter case nothing is changed within the interner. 25 | * 26 | */ 27 | public boolean addValueToSetByID(int index, int newValue) { 28 | IntSet toChange = resolveId(index); 29 | 30 | if (toChange != null) { 31 | // remove set from the map where it is the key 32 | objectToInt.removeInt(toChange); 33 | 34 | // change the set by adding the new value 35 | toChange.add(newValue); 36 | 37 | // Put it back in the map that it was removed from 38 | 39 | objectToInt.put(toChange, index); 40 | } else return false; 41 | return true; 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/test/groovy/de/up/ling/irtg/corpus/OnTheFlyChartsTest.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.corpus 8 | 9 | import org.junit.After 10 | import org.junit.AfterClass 11 | import org.junit.Before 12 | import org.junit.BeforeClass 13 | import org.junit.Test 14 | import static de.up.ling.irtg.util.TestingTools.*; 15 | import static org.junit.Assert.* 16 | import de.up.ling.irtg.InterpretedTreeAutomaton 17 | import de.up.ling.irtg.automata.TreeAutomaton 18 | 19 | /** 20 | * 21 | * @author christoph_teichmann 22 | */ 23 | class OnTheFlyChartsTest { 24 | 25 | @Test 26 | public void computeOnTheFlyTest() 27 | { 28 | // this is basically the same test as CorpusTest.testComputeCharts(), but with OnTheFlyCharts instead of Charts 29 | InterpretedTreeAutomaton irtg = pi(CorpusTest.CFG_STR); 30 | Corpus corpus = Corpus.readCorpus(new StringReader(CorpusTest.UNANNOTATED_CORPUS), irtg); 31 | 32 | ChartAttacher it = new OnTheFlyCharts(irtg); 33 | 34 | corpus.attachCharts(it); 35 | 36 | int count = 0; 37 | for( Instance inst : corpus ) { 38 | assert irtg.parseInputObjects(inst.getInputObjects()).equals(inst.getChart()) : "chart test failed for " + count; 39 | count++; 40 | } 41 | 42 | assert count == 3; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/laboratory/AdditionalDataCache.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.laboratory; 8 | 9 | import de.saar.basic.StringTools; 10 | import java.io.IOException; 11 | import java.io.InputStream; 12 | import java.io.InputStreamReader; 13 | import java.io.OutputStream; 14 | import java.io.OutputStreamWriter; 15 | import java.io.Writer; 16 | import java.net.URI; 17 | import java.nio.file.Path; 18 | 19 | /** 20 | * 21 | * @author koller 22 | */ 23 | public class AdditionalDataCache extends AltoLabHttpCache { 24 | public AdditionalDataCache(Path baseDir, URI baseURL, AltoLabHttpClient labClient) { 25 | super(baseDir, baseURL, labClient); 26 | } 27 | 28 | @Override 29 | protected String makeCacheFilename(String identifier) { 30 | return String.format("additional_data/%s", identifier); 31 | } 32 | 33 | @Override 34 | protected String readFromStream(String identifier, InputStream is, boolean remote) throws IOException { 35 | String ret = StringTools.slurp(new InputStreamReader(is)); 36 | return ret; 37 | } 38 | 39 | @Override 40 | protected void writeToStream(String identifier, String value, OutputStream os) throws IOException { 41 | Writer w = new OutputStreamWriter(os); 42 | w.write(value); 43 | w.flush(); 44 | } 45 | 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/laboratory/UnparsedTask.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.laboratory; 8 | 9 | /** 10 | * 11 | * @author koller 12 | */ 13 | class UnparsedTask { 14 | // v = {"id":task.id, "name":task.name, "grammar":task.grammar_id, "corpus":task.corpus_id, "tree":task.tree, "warmup":task.warmup, "iterations":task.iterations} 15 | public int id; 16 | public String name; 17 | public int grammar; 18 | public int corpus; 19 | public String tree; 20 | public int warmup; 21 | public int iterations; 22 | 23 | public int getId() { 24 | return id; 25 | } 26 | 27 | public String getName() { 28 | return name; 29 | } 30 | 31 | public int getGrammar() { 32 | return grammar; 33 | } 34 | 35 | public int getCorpus() { 36 | return corpus; 37 | } 38 | 39 | public String getTree() { 40 | return tree; 41 | } 42 | 43 | public int getWarmup() { 44 | return warmup; 45 | } 46 | 47 | public int getIterations() { 48 | return iterations; 49 | } 50 | 51 | 52 | 53 | @Override 54 | public String toString() { 55 | return "JacksonTask{" + "id=" + id + ", name=" + name + ", grammar=" + grammar + ", corpus=" + corpus + ", tree=" + tree + ", warmup=" + warmup + ", iterations=" + iterations + '}'; 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/test/groovy/de/up/ling/irtg/util/LogSpaceOperationsTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.util; 7 | 8 | import org.junit.Test; 9 | import static org.junit.Assert.*; 10 | 11 | /** 12 | * 13 | * @author christoph_teichmann 14 | */ 15 | public class LogSpaceOperationsTest { 16 | /** 17 | * Test of addAlmostZero method, of class LogSpaceOperations. 18 | */ 19 | @Test 20 | public void testAdd() { 21 | assertEquals(LogSpaceOperations.add(Double.NEGATIVE_INFINITY, Math.log(0.1)),Math.log(0.1),0.00000001); 22 | assertEquals(LogSpaceOperations.add(Double.NEGATIVE_INFINITY, Math.log(0.001)),Math.log(0.001),0.00000001); 23 | 24 | assertEquals(LogSpaceOperations.add(Math.log(0.00000003), 25 | Math.log(0.0000000001)),Math.log(0.00000003+0.0000000001),0.00000000000001); 26 | 27 | assertEquals(LogSpaceOperations.add(Math.log(3), 28 | Math.log(1)),Math.log(3+1),0.0000001); 29 | 30 | assertEquals(LogSpaceOperations.add(Math.log(600), 31 | Math.log(50)),Math.log(600+50),0.0000001); 32 | 33 | assertEquals(LogSpaceOperations.add(Math.log(1000000), 34 | Math.log(0.0000001)),Math.log(1000000+0.0000001),0.0000001); 35 | 36 | assertEquals(LogSpaceOperations.add(0.0, -4),Math.log1p(Math.exp(-4)),0.0000001); 37 | } 38 | } -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/algebra/BinarizingTagTreeWithAritiesAlgebra.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.algebra; 7 | 8 | import de.up.ling.tree.Tree; 9 | import de.up.ling.tree.TreePanel; 10 | import javax.swing.JComponent; 11 | 12 | /** 13 | * This class behaves like a BinarizingTagTreeAlgebra, but an underlying TagTreeWithAritiesAlgebra. 14 | * 15 | * Warning: this uses TagTreeWithAritiesAlgebra, which is a bit hacky (see comment there). 16 | * @author Jonas 17 | */ 18 | public class BinarizingTagTreeWithAritiesAlgebra extends BinarizingAlgebra> { 19 | 20 | /** 21 | * Creates a new instance with it's own signature. 22 | * 23 | * This will use the default concatenation symbol. 24 | */ 25 | public BinarizingTagTreeWithAritiesAlgebra() { 26 | super(new TagTreeWithAritiesAlgebra()); 27 | } 28 | 29 | /** 30 | * This creates a new instance with its own signature and a user specified 31 | * concatenation symbol. 32 | * 33 | * @param appendSymbol 34 | */ 35 | public BinarizingTagTreeWithAritiesAlgebra(String appendSymbol) { 36 | super(new TagTreeWithAritiesAlgebra(), appendSymbol); 37 | } 38 | 39 | @Override 40 | public JComponent visualize(Tree object) { 41 | return new TreePanel(object); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/ForeachArrayTuple.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.util; 7 | 8 | import java.lang.reflect.Array; 9 | import java.util.function.Consumer; 10 | 11 | /** 12 | * 13 | * @author koller 14 | */ 15 | public class ForeachArrayTuple { 16 | private final Iterable[] arrayTuple; 17 | private final Class clazz; 18 | private final boolean empty; 19 | 20 | public ForeachArrayTuple(Iterable[] arrayTuple) { 21 | this.arrayTuple = arrayTuple; 22 | 23 | if (arrayTuple.length > 0 && arrayTuple[0].iterator().hasNext()) { 24 | empty = false; 25 | clazz = (Class) arrayTuple[0].iterator().next().getClass(); 26 | } else { 27 | empty = true; 28 | clazz = null; 29 | } 30 | } 31 | 32 | public void foreach(Consumer fn) { 33 | if (!empty) { 34 | T[] values = (T[]) Array.newInstance(clazz, arrayTuple.length); 35 | foreach(fn, values, 0); 36 | } 37 | } 38 | 39 | private void foreach(Consumer fn, T[] values, int depth) { 40 | if (depth == values.length) { 41 | fn.accept(values); 42 | } else { 43 | for (T value : arrayTuple[depth]) { 44 | values[depth] = value; 45 | foreach(fn, values, depth + 1); 46 | } 47 | } 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /src/test/groovy/de/up/ling/irtg/codec/NltkPcfgInputCodecTest.groovy: -------------------------------------------------------------------------------- 1 | package de.up.ling.irtg.codec 2 | 3 | import de.up.ling.irtg.InterpretedTreeAutomaton 4 | import org.junit.Test 5 | 6 | import static de.up.ling.irtg.util.TestingTools.pt 7 | import static org.junit.Assert.assertEquals 8 | import static org.junit.Assert.assertTrue; 9 | 10 | 11 | class NltkPcfgInputCodecTest { 12 | @Test 13 | public void testToyPcfg() { 14 | // from https://www.nltk.org/howto/grammar.html 15 | String PCFG = """ 16 | S -> NP VP [1.0] 17 | NP -> Det N [0.5] | NP PP [0.25] | 'John' [0.1] | 'I' [0.15] 18 | Det -> 'the' [0.8] | 'my' [0.2] 19 | N -> 'man' [0.5] | 'telescope' [0.5] 20 | VP -> VP PP [0.1] | V NP [0.7] | V [0.2] 21 | V -> 'ate' [0.35] | 'saw' [0.65] 22 | PP -> P NP [1.0] 23 | P -> 'with' [0.61] | 'under' [0.39]"""; 24 | 25 | NltkPcfgInputCodec codec = new NltkPcfgInputCodec(); 26 | InterpretedTreeAutomaton irtg = codec.read(PCFG); 27 | 28 | System.err.println(irtg); 29 | 30 | Set decoded = irtg.decode("tree", ["string": "John ate my telescope"]); 31 | assertEquals(pt("S(NP(John), VP(V(ate), NP(Det(my), N(telescope))))"), decoded.iterator().next()); 32 | } 33 | 34 | @Test(expected = CodecParseException.class) 35 | public void testEmptyProduction() { 36 | String PCFG = """ 37 | S -> A B 38 | A -> 'a' 39 | # An empty production: 40 | B -> 'b' | 41 | """; 42 | 43 | NltkPcfgInputCodec codec = new NltkPcfgInputCodec(); 44 | InterpretedTreeAutomaton irtg = codec.read(PCFG); 45 | 46 | System.err.println(irtg); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/binarization/BinaryRuleFactory.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.binarization; 8 | 9 | import de.up.ling.irtg.InterpretedTreeAutomaton; 10 | import de.up.ling.irtg.automata.Rule; 11 | import de.up.ling.tree.Tree; 12 | import java.util.List; 13 | 14 | /** 15 | * 16 | * @author koller 17 | */ 18 | public interface BinaryRuleFactory { 19 | /** 20 | * Generates an automaton rule for a single node of the common variable tree. 21 | * 22 | * @param nodeInVartree - the node in the variable tree at which we are generating a rule 23 | * @param pathToNode - the path from the root to this node in the variable tree (in a suitable format for {@link Tree#select(java.lang.String, int) }) 24 | * @param binarizedChildStates - the states that were generated for the children 25 | * @param originalRule - the rule in the original, unbinarized IRTG 26 | * @param vartree - the variable tree for which we are generating rules 27 | * @param originalIrtg - the original, unbinarized IRTG 28 | * @param binarizedIrtg - the binarized IRTG whose rules we are currently creating 29 | * @return - the created binarized rule 30 | */ 31 | Rule generateBinarizedRule(Tree nodeInVartree, List binarizedChildStates, String pathToNode, Rule originalRule, Tree vartree, InterpretedTreeAutomaton originalIrtg, InterpretedTreeAutomaton binarizedIrtg); 32 | } 33 | -------------------------------------------------------------------------------- /src/test/groovy/de/up/ling/irtg/codec/TreeYieldOutputCodecTest.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec 8 | 9 | 10 | import org.junit.* 11 | import java.util.* 12 | import java.io.* 13 | import de.up.ling.irtg.automata.* 14 | import static org.junit.Assert.* 15 | import de.saar.chorus.term.parser.*; 16 | import de.up.ling.tree.*; 17 | import de.up.ling.irtg.algebra.*; 18 | import de.up.ling.irtg.hom.*; 19 | import static de.up.ling.irtg.util.TestingTools.*; 20 | import de.up.ling.irtg.* 21 | import de.up.ling.irtg.codec.isiamr.IsiAmrParser; 22 | 23 | import org.junit.* 24 | import java.util.* 25 | import java.io.* 26 | import de.up.ling.irtg.automata.* 27 | import static org.junit.Assert.* 28 | import de.saar.chorus.term.parser.*; 29 | import de.up.ling.tree.*; 30 | import de.up.ling.irtg.algebra.*; 31 | import de.up.ling.irtg.algebra.graph.GraphEdge 32 | import de.up.ling.irtg.algebra.graph.GraphNode 33 | import de.up.ling.irtg.algebra.graph.SGraph 34 | import de.up.ling.irtg.hom.*; 35 | import static de.up.ling.irtg.util.TestingTools.*; 36 | import de.up.ling.irtg.* 37 | 38 | 39 | import org.jgrapht.*; 40 | import org.jgrapht.alg.*; 41 | import org.jgrapht.graph.*; 42 | 43 | /** 44 | * 45 | * @author koller 46 | */ 47 | class TreeYieldOutputCodecTest { 48 | @Test 49 | public void testTreeYield() { 50 | assertEquals("a b c", new TreeYieldOutputCodec().asString(pt("f(a,g(b,c))"))) 51 | } 52 | } 53 | 54 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/UniversalAutomaton.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.automata; 6 | 7 | import de.up.ling.irtg.signature.Signature; 8 | import java.util.HashSet; 9 | import java.util.Set; 10 | 11 | /** 12 | * A tree automaton that accepts all terms over the given signature. 13 | * 14 | * @author koller 15 | */ 16 | public class UniversalAutomaton extends TreeAutomaton { 17 | public static final String STATE = "q"; 18 | private int stateId; 19 | 20 | public UniversalAutomaton(Signature signature) { 21 | super(signature); 22 | 23 | stateId = addState(STATE); 24 | finalStates.add(stateId); 25 | } 26 | 27 | @Override 28 | public Set getRulesBottomUp(int label, int[] childStates) { 29 | Set ret = new HashSet<>(); 30 | ret.add(createRule(stateId, label, childStates, 1)); 31 | return ret; 32 | } 33 | 34 | @Override 35 | public Set getRulesTopDown(int label, int parentState) { 36 | Set ret = new HashSet<>(); 37 | int[] childStates = new int[signature.getArity(label)]; 38 | 39 | for( int i = 0; i < signature.getArity(label); i++ ) { 40 | childStates[i] = stateId; 41 | } 42 | 43 | ret.add(createRule(stateId, label, childStates, 1)); 44 | 45 | return ret; 46 | } 47 | 48 | @Override 49 | public boolean isBottomUpDeterministic() { 50 | return true; 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/util/ConsoleProgressBar.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.util; 7 | 8 | import com.google.common.base.Strings; 9 | import java.io.PrintStream; 10 | 11 | /** 12 | * 13 | * @author koller 14 | */ 15 | public class ConsoleProgressBar { 16 | 17 | private int barWidth; 18 | private PrintStream strm; 19 | private int previousPosbar = -1; 20 | private String previousString = null; 21 | 22 | public ConsoleProgressBar(int barWidth, PrintStream strm) { 23 | this.barWidth = barWidth - 2; // leave room for [ ] 24 | this.strm = strm; 25 | } 26 | 27 | public void update(long current, long max, String str) { 28 | int posbar = (int) ((barWidth * current) / max); 29 | 30 | if (posbar != previousPosbar || !str.equals(previousString)) { 31 | previousPosbar = posbar; 32 | 33 | StringBuffer buf = new StringBuffer("\r["); 34 | buf.append(Strings.repeat("=", posbar)); 35 | buf.append(Strings.repeat("-", barWidth - posbar)); 36 | buf.append("] "); 37 | buf.append(str); 38 | strm.print(buf); 39 | } 40 | } 41 | 42 | public void finish() { 43 | strm.println(); 44 | } 45 | 46 | public ProgressListener createListener() { 47 | return (current, max, str) -> { 48 | ConsoleProgressBar.this.update(current, max, str); 49 | }; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/antlr/de/up/ling/irtg/codec/irtg/Irtg.g4: -------------------------------------------------------------------------------- 1 | grammar Irtg; 2 | 3 | @header{ 4 | package de.up.ling.irtg.codec.irtg; 5 | } 6 | 7 | INTERPRETATION : 'interpretation'; 8 | FEATURE : 'feature'; 9 | 10 | NAME: [a-zA-Z_*$@+] ([a-zA-Z0-9_<>*$@+/.-]*); 11 | QUOTED_NAME: ['] (~['])* [']; 12 | DOUBLE_QUOTED_NAME: ["] (~["])* ["]; 13 | VARIABLE : [?] [a-zA-Z0-9_-]*; 14 | NUMBER : '-'? [0-9.] ([0-9.eE-]*); 15 | 16 | FIN_MARK : '!' | '\u00b0'; 17 | ARROW : '->'; 18 | OPEN_BK : '('; 19 | CLOSE_BK : ')'; 20 | OPEN_SQBK: '['; 21 | CLOSE_SQBK: ']'; 22 | COMMA : ','; 23 | COLON : ':'; 24 | 25 | WS: [ \n\t\r]+ -> skip; 26 | 27 | COMMENT 28 | : ( '//' ~[\r\n]* '\r'? '\n' 29 | | '/*' .*? '*/' 30 | ) -> skip 31 | ; 32 | 33 | //Two intended top-level rules: 34 | // 1. an interpreted regular tree grammar 35 | irtg : interpretation_decl+ feature_decl* irtg_rule+; 36 | // 2. a tree automaton 37 | fta : auto_rule+; 38 | 39 | interpretation_decl: INTERPRETATION name ':' name; 40 | 41 | feature_decl: FEATURE name ':' name state_list #CONSTRUCTOR_FEATURE 42 | | FEATURE name ':' name ':' ':' name state_list #STATIC_FEATURE; 43 | 44 | irtg_rule : auto_rule hom_rule*; 45 | 46 | auto_rule : state '->' name state_list weight?; 47 | state_list : ('(' (state ',')* state ')')?; 48 | 49 | hom_rule : '[' name ']' term; 50 | term : name ('(' (term ',')* term ')')? #CONSTANT_TERM 51 | | variable #VARIABLE_TERM; 52 | 53 | weight : '[' NUMBER ']'; 54 | 55 | 56 | name : NAME #RAW | QUOTED_NAME #QUOTED | DOUBLE_QUOTED_NAME #QUOTED ; 57 | state : name FIN_MARK?; 58 | variable : VARIABLE; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/TikzQtreeOutputCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec; 8 | 9 | import de.up.ling.tree.Tree; 10 | 11 | import java.io.OutputStream; 12 | import java.io.OutputStreamWriter; 13 | import java.io.PrintWriter; 14 | 15 | /** 16 | * An output codec that encodes Tree objects as Latex code, 17 | * using the the tikz-qtree 18 | * package. You can copy and paste this code into your Latex 19 | * document and have it typeset. 20 | * 21 | * @author koller 22 | */ 23 | @CodecMetadata(name = "tikz-qtree", description = "encodes a tree as LaTeX code using the tikz-qtree package", type = Tree.class) 24 | public class TikzQtreeOutputCodec extends OutputCodec { 25 | @Override 26 | public void write(Tree tree, OutputStream ostream) { 27 | PrintWriter w = new PrintWriter(new OutputStreamWriter(ostream)); 28 | w.print("\\Tree"); 29 | write(tree, "", w); 30 | w.flush(); 31 | } 32 | 33 | private void write(Tree tree, String prefix, PrintWriter w) { 34 | if( tree.getChildren().isEmpty() ) { 35 | w.println(prefix + tree.getLabel().toString()); 36 | } else { 37 | w.println(prefix + "[." + tree.getLabel().toString()); 38 | tree.getChildren().forEach( child -> { 39 | write((Tree) child, prefix + " ", w); 40 | }); 41 | w.println(prefix + "]"); 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /src/test/groovy/de/up/ling/irtg/algebra/FeatureStructureAlgebraTest.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.algebra 8 | 9 | 10 | import org.junit.Test 11 | import java.util.* 12 | import java.io.* 13 | import com.google.common.collect.Iterators 14 | import de.up.ling.irtg.automata.* 15 | import static org.junit.Assert.* 16 | import de.saar.chorus.term.parser.*; 17 | import de.saar.coli.featstruct.FeatureStructure 18 | import de.up.ling.tree.*; 19 | import de.up.ling.irtg.algebra.*; 20 | import de.up.ling.irtg.algebra.graph.SGraph 21 | import de.up.ling.irtg.hom.*; 22 | import de.up.ling.irtg.corpus.* 23 | import static de.up.ling.irtg.util.TestingTools.*; 24 | 25 | import static org.hamcrest.MatcherAssert.assertThat; 26 | import static org.hamcrest.CoreMatchers.*; 27 | 28 | 29 | 30 | /** 31 | * 32 | * @author koller 33 | */ 34 | class FeatureStructureAlgebraTest { 35 | @Test 36 | public void testProj() { 37 | FeatureStructureAlgebra alg = new FeatureStructureAlgebra() 38 | FeatureStructure fs = alg.evaluate(pt("proj_root('[root: [num: sg]]')")) 39 | 40 | assertThat(fs, is(FeatureStructure.parse("[num: sg]"))) 41 | } 42 | 43 | @Test 44 | public void testIssue46() { 45 | FeatureStructureAlgebra alg = new FeatureStructureAlgebra() 46 | FeatureStructure fs = alg.evaluate(pt("unify(emb_n2(proj_root('[n1b: #1 [], n1t: #1, root: #1]')),'[]')")) 47 | assertThat(fs, is(FeatureStructure.parse("[n2: []]"))) 48 | } 49 | } 50 | 51 | -------------------------------------------------------------------------------- /examples/reg-fromthehat.irtg: -------------------------------------------------------------------------------- 1 | /* 2 | Demo grammar for RE generation using the set algebra. 3 | 4 | The set algebra only works if you specify a model over which it should interpret 5 | the atomic predicate symbols. You can do this by passing a JSON representation 6 | of the model as an option. Try translating {e} and {r1} into strings using the 7 | following model representation: 8 | 9 | {"sleep": [["e", "r1"]], "takefrom": [["e2", "r1", "h"]], "rabbit": [["r1"], ["r2"]], "white": [["r1"], ["b"]], "brown": [["r2"]], "in": [["r1","h"], ["f","h2"]], "hat": [["h"], ["h2"]] } 10 | 11 | */ 12 | 13 | interpretation sem: de.up.ling.irtg.algebra.SetAlgebra 14 | interpretation string: de.up.ling.irtg.algebra.StringAlgebra 15 | 16 | S_e! -> a_sleeps_e_r1(N_r1) 17 | [sem] project_1(intersect_2(sleep, uniq_r1(?1))) 18 | [string] *(?1, sleeps) 19 | 20 | N_r1! -> a_rabbit(Adj_N_r1) 21 | [sem] intersect_1(rabbit, ?1) 22 | [string] *(the, *(?1, rabbit)) 23 | 24 | Adj_N_r1 -> b_white [0.3] 25 | [sem] white 26 | [string] white 27 | 28 | Adj_N_r1 -> b_nop [0.7] 29 | [sem] T 30 | [string] '' 31 | 32 | S_e2! -> takefrom_e2_r1_h(N_r1, N_h) 33 | [string] *(*(take, ?1), *(from, ?2)) 34 | [sem] project_1(intersect_3(intersect_2(takefrom, uniq_r1(intersect_1(?1, project_1(intersect_2(in, ?2))))), uniq_h(intersect_1(?2, project_2(intersect_1(in, ?1)))))) 35 | 36 | /* 37 | // ref of "the rabbit": uniq_r1(intersect_1(?1, project_1(intersect_2(in, ?2)))) 38 | // ref of "the hat": uniq_h(intersect_1(?2, project_2(intersect_1(in, ?1)))) 39 | */ 40 | 41 | N_h -> a_hat 42 | [sem] hat 43 | [string] *(the, hat) 44 | 45 | N_h -> thing_h 46 | [sem] T 47 | [string] *(the, thing) 48 | 49 | N_r1 -> thing_r1 50 | [sem] T 51 | [string] *(the, thing) 52 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/sampling/rule_weighting/AutomatonWeighted.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.sampling.rule_weighting; 7 | 8 | import de.up.ling.irtg.automata.Rule; 9 | import de.up.ling.irtg.automata.TreeAutomaton; 10 | import de.up.ling.irtg.learning_rates.LearningRate; 11 | import de.up.ling.tree.Tree; 12 | import java.util.List; 13 | 14 | /** 15 | * This class simply assigns each tree the weight of the multiplication of its 16 | * rule weights. 17 | * 18 | * The underlying automaton is not changed in any way during sampling. 19 | * 20 | * @author teichmann 21 | */ 22 | public class AutomatonWeighted extends RegularizedKLRuleWeighting { 23 | /** 24 | * Creates a new instance that weights trees according to the rule weights 25 | * from the given automaton and uses the automaton as the basis for sampling. 26 | * 27 | * The adaption is implemented by the parent class. 28 | * 29 | */ 30 | public AutomatonWeighted(TreeAutomaton basis, int regularizationExponent, double regularizationDivisor, LearningRate rate) { 31 | super(basis, regularizationExponent, regularizationDivisor, rate); 32 | } 33 | 34 | @Override 35 | public double getLogTargetProbability(Tree sample) { 36 | double total = 0.0; 37 | List> list = sample.getAllNodes(); 38 | 39 | for(int i=0;i { 12 | private String parentLabel; 13 | private String childLabel; 14 | 15 | public ChildOfFeature(String parentLabel, String childLabel) { 16 | this.parentLabel = parentLabel; 17 | this.childLabel = childLabel; 18 | } 19 | 20 | public String getParentLabel() { 21 | return parentLabel; 22 | } 23 | 24 | public String getChildLabel() { 25 | return childLabel; 26 | } 27 | 28 | @Override 29 | public Double evaluate(Rule rule, TreeAutomaton automaton, MaximumEntropyIrtg irtg, Map inputs){ 30 | String pLabel = getLabelFor(automaton.getStateForId(rule.getParent())); 31 | 32 | if (pLabel.equals(parentLabel)) { 33 | for (int child : rule.getChildren()) { 34 | String cLabel = getLabelFor(automaton.getStateForId(child)); 35 | if (cLabel.equals(childLabel)) { 36 | return 1.0; 37 | } 38 | } 39 | } 40 | return 0.0; 41 | } 42 | 43 | @Override 44 | public String toString() { 45 | StringBuilder ret = new StringBuilder(); 46 | ret.append(super.toString()); 47 | ret.append("("); 48 | ret.append(masking(parentLabel)); 49 | ret.append(","); 50 | ret.append(masking(childLabel)); 51 | ret.append(")"); 52 | return ret.toString(); 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/gui/JDerivationTree.form: -------------------------------------------------------------------------------- 1 | 2 | 3 |

4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/EdgeEvaluator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.automata; 8 | 9 | /** 10 | * This class is used to predict the outside score of a state. 11 | * 12 | * This is used in the agenda based intersection algorithms which attempt to find 13 | * the based tree based on an A* approach. 14 | * 15 | * @author koller 16 | */ 17 | public interface EdgeEvaluator { 18 | /** 19 | * This method is called each time a rule is added to the output 20 | * automaton. Its standard implementation does nothing, but 21 | * subclasses might implement some other behavior, e.g. to keep 22 | * track of inside probabilities of states. 23 | * 24 | */ 25 | default void ruleAdded(Rule rule) { 26 | 27 | } 28 | 29 | /** 30 | * This method is called each time the intersection algorithm 31 | * discovers a new state to put on the agenda. This is a state 32 | * of the intersection automaton, and thus it represents a pair 33 | * (p,q) of states from the original left and and right automata. 34 | * The evaluate method is supposed to return a numeric "evaluation" 35 | * of the new state, so it can be sorted into the right place in 36 | * the agenda (which is a priority queue, sorted by descending 37 | * evaluation values). You can use {@link IntersectionAutomaton#getLeftState(int) } 38 | * and {@link IntersectionAutomaton#getRightState(int) } with the 39 | * "auto" argument to obtain p and q. 40 | * 41 | */ 42 | double evaluate(int outputState, IntersectionAutomaton auto); 43 | } 44 | -------------------------------------------------------------------------------- /src/test/groovy/de/up/ling/irtg/codec/SynchronousCfgInputCodecTest.groovy: -------------------------------------------------------------------------------- 1 | package de.up.ling.irtg.codec 2 | 3 | import de.up.ling.irtg.Interpretation 4 | import de.up.ling.irtg.InterpretedTreeAutomaton 5 | import org.junit.* 6 | import java.util.* 7 | import java.io.* 8 | import de.up.ling.irtg.automata.* 9 | import de.up.ling.irtg.automata.TreeAutomaton 10 | import static org.junit.Assert.* 11 | import de.saar.chorus.term.parser.*; 12 | import de.up.ling.tree.*; 13 | import de.up.ling.irtg.algebra.*; 14 | import de.up.ling.irtg.hom.*; 15 | import de.up.ling.irtg.algebra.graph.*; 16 | import static de.up.ling.irtg.util.TestingTools.*; 17 | 18 | 19 | class SynchronousCfgInputCodecTest { 20 | @Test 21 | public void testScfg() { 22 | InterpretedTreeAutomaton irtg = new SynchronousCfgInputCodec().read(VALID_GRAMMAR); 23 | TreeAutomaton chart = irtg.parse(["left": "30 duonianlai de youhao hezuo"]); 24 | assert chart.accepts(pt("r1(r2(r4(r5),r3(r6,r7)))")); 25 | } 26 | 27 | @Test 28 | public void testDecode() { 29 | InterpretedTreeAutomaton irtg = new SynchronousCfgInputCodec().read(VALID_GRAMMAR); 30 | Interpretation rightInterp = irtg.getInterpretation("right"); 31 | Tree gold = pt("r1(r2(r4(r5),r3(r6,r7)))"); 32 | 33 | assertEquals("friendly cooperation over the past 30 years", rightInterp.getAlgebra().representAsString(rightInterp.interpret(gold))); 34 | } 35 | 36 | private static final String VALID_GRAMMAR = """ 37 | S 38 | 39 | S -> X 40 | S -> X 41 | 42 | X -> X de X 43 | X -> X[2] X[1] 44 | 45 | X -> X X 46 | X -> X[1] X[2] 47 | 48 | X -> X duonianlai 49 | X -> over the past X years 50 | 51 | X -> 30 52 | X -> 30 53 | 54 | X -> youhao 55 | X -> friendly 56 | 57 | X -> hezuo 58 | X -> cooperation 59 | """; 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/codec/AlgebraStringRepresentationOutputCodec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.codec; 8 | 9 | import de.up.ling.irtg.algebra.Algebra; 10 | import de.up.ling.irtg.algebra.StringAlgebra; 11 | 12 | import java.io.OutputStream; 13 | import java.io.OutputStreamWriter; 14 | import java.io.PrintWriter; 15 | 16 | /** 17 | * A codec that uses an algebra's {@link Algebra#representAsString(java.lang.Object) } method 18 | * to encode an object as a string. For most algebras, this has the same behavior as the 19 | * {@link ToStringOutputCodec}; but a few algebras (e.g. {@link StringAlgebra}) overwrite 20 | * this method.

21 | * 22 | * Because the codec requires an algebra object to be instantiated, we do not add it 23 | * to the list of registered output codecs, and it will not be returned by 24 | * {@link OutputCodec#getAllOutputCodecs() } and related methods. 25 | * 26 | * @author koller 27 | */ 28 | @CodecMetadata(name = "text", description = "encodes an object using its algebra's default method", type = Object.class, displayInPopup = false) 29 | public class AlgebraStringRepresentationOutputCodec extends OutputCodec { 30 | private Algebra algebra; 31 | 32 | public AlgebraStringRepresentationOutputCodec(Algebra algebra) { 33 | this.algebra = algebra; 34 | } 35 | 36 | @Override 37 | public void write(E object, OutputStream ostream) throws UnsupportedOperationException { 38 | PrintWriter w = new PrintWriter(new OutputStreamWriter(ostream)); 39 | w.write(algebra.representAsString(object)); 40 | w.flush(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /examples/session.scala: -------------------------------------------------------------------------------- 1 | scala -J-server -cp target/irtg-1.1-SNAPSHOT-jar-with-dependencies.jar -Yrepl-sync -i init.scala 2 | 3 | 4 | 5 | // parsing and drawing trees 6 | 7 | val t = pt("f(a,g(c))") 8 | t.draw 9 | 10 | 11 | // loading and intersecting automata; language of automata 12 | 13 | val auto = loadAutomaton("examples/test.auto") 14 | val auto2 = loadAutomaton("examples/test2.auto") 15 | 16 | val intersection = auto.intersect(auto2) 17 | 18 | intersection.language 19 | 20 | intersection.language.iterator.next.draw 21 | 22 | 23 | // homomorphisms 24 | 25 | val s = sig(Map("F" -> 2, "A" -> 0)) 26 | val h = hom(Map("A" -> "h(a)", "F" -> "f(f(?1,b),?2)"), s) 27 | h.apply(pt("F(A,A)")) 28 | 29 | 30 | // inverse homomorphism of automata 31 | 32 | val original = loadAutomaton("examples/test3.auto") 33 | original.language 34 | 35 | val preimage = original.inverseHomomorphism(h) 36 | preimage.language 37 | 38 | 39 | // IRTGs 40 | 41 | val irtg = loadIrtg("examples/scfg.irtg") 42 | val chart = irtg.parse("german" >> "hans betrachtet die frau mit dem fernrohr") 43 | 44 | chart.viterbi 45 | chart.language 46 | 47 | irtg.decode("english", "german" >> "hans betrachtet die frau mit dem fernrohr") 48 | 49 | 50 | 51 | // algebras 52 | 53 | val alg = new StringAlgebra() 54 | alg.decompose(alg.parseString("a b c")) 55 | 56 | 57 | 58 | // ML training 59 | 60 | val irtg = loadIrtg("examples/cfg.irtg") 61 | val corpus = irtg.readCorpus(file("examples/pcfg-annotated-training.txt")) 62 | irtg.trainML(corpus) 63 | 64 | 65 | // EM training 66 | 67 | val irtg = loadIrtg("examples/cfg.irtg") 68 | val corpus = irtg.readCorpus(file("examples/pcfg-training.txt")) 69 | 70 | Charts.computeCharts(corpus, irtg, fostream("charts.zip")) 71 | corpus.attachCharts(new Charts(new FileInputStreamSupplier(new File("charts.zip")))) 72 | 73 | irtg.trainEM(corpus) 74 | 75 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/language_iteration/EvaluatedItem.java: -------------------------------------------------------------------------------- 1 | package de.up.ling.irtg.automata.language_iteration; 2 | 3 | import de.up.ling.irtg.automata.TreeAutomaton; 4 | import de.up.ling.irtg.automata.WeightedTree; 5 | 6 | /** 7 | * An evaluated item, consisting of a weighted tree and the original 8 | * unevaluated item from which it was created. 9 | */ 10 | public class EvaluatedItem implements Comparable { 11 | private UnevaluatedItem item; // unevaluated item from which it was produced 12 | private WeightedTree weightedTree; // tree it represents, with weight of that tree 13 | private double itemWeight; // weight to be used in ordering the priority queue -- need not be the same as weight of the tree 14 | 15 | public EvaluatedItem(UnevaluatedItem item, WeightedTree wtree, double itemWeight) { 16 | this.item = item; 17 | weightedTree = wtree; 18 | this.itemWeight = itemWeight; 19 | } 20 | 21 | public UnevaluatedItem getItem() { 22 | return item; 23 | } 24 | 25 | public WeightedTree getWeightedTree() { 26 | return weightedTree; 27 | } 28 | 29 | public double getItemWeight() { 30 | return itemWeight; 31 | } 32 | 33 | @Override 34 | public int compareTo(EvaluatedItem o) { 35 | // evalItem1 < evalItem2 if the tree in evalItem1 has a HIGHER weight than the tree in evalItem2 36 | return Double.compare(o.itemWeight, itemWeight); 37 | } 38 | 39 | @Override 40 | public String toString() { 41 | return "[" + weightedTree + ", iw=" + itemWeight + " (from " + item.toString() + ")]"; 42 | } 43 | 44 | public String toString(TreeAutomaton auto) { 45 | return "[" + weightedTree.toString(auto.getSignature()) + ", iw=" + itemWeight + " (from " + item.toString() + ")]"; 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/gui/JInterpretationsPanel.form: -------------------------------------------------------------------------------- 1 | 2 | 3 |

4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 |
36 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/script/TestAutomataMemorySize2.java: -------------------------------------------------------------------------------- 1 | package de.up.ling.irtg.script; 2 | 3 | import de.up.ling.irtg.algebra.StringAlgebra; 4 | import de.up.ling.irtg.automata.ConcreteTreeAutomaton; 5 | import de.up.ling.irtg.automata.Rule; 6 | import de.up.ling.irtg.automata.TreeAutomaton; 7 | import de.up.ling.irtg.codec.BinaryIrtgInputCodec; 8 | import de.up.ling.irtg.codec.BinaryIrtgOutputCodec; 9 | import de.up.ling.irtg.codec.TreeAutomatonInputCodec; 10 | import it.unimi.dsi.fastutil.ints.Int2ObjectMap; 11 | 12 | import java.io.FileInputStream; 13 | import java.io.IOException; 14 | import java.util.ArrayList; 15 | import java.util.Arrays; 16 | import java.util.List; 17 | import java.util.Scanner; 18 | import java.util.stream.Collectors; 19 | 20 | import static java.lang.Thread.sleep; 21 | 22 | public class TestAutomataMemorySize2 { 23 | 24 | public static void main(String[] args) throws IOException, InterruptedException { 25 | 26 | BinaryIrtgInputCodec binaryCodecIn = new BinaryIrtgInputCodec(); 27 | Scanner scanner = new Scanner(System. in); 28 | 29 | TreeAutomaton auto = binaryCodecIn.read(new FileInputStream("test.irtb")).getAutomaton(); 30 | 31 | System.out.println("waiting for input, getRuleSet is next"); 32 | System.out.println(scanner.nextLine()); 33 | 34 | auto.getRuleSet(); 35 | 36 | System.out.println("waiting for input, inside is next"); 37 | System.out.println(scanner.nextLine()); 38 | 39 | Int2ObjectMap insides = auto.logInside(); 40 | 41 | System.out.println("waiting for input, outside is next"); 42 | System.out.println(scanner.nextLine()); 43 | 44 | auto.logOutside(insides); 45 | 46 | System.out.println("waiting for input to finish program"); 47 | System.out.println(scanner.nextLine()); 48 | 49 | } 50 | 51 | 52 | } 53 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/coarse_to_fine/RrtSummary.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.automata.coarse_to_fine; 7 | 8 | import java.util.Arrays; 9 | 10 | /** 11 | * 12 | * @author koller 13 | */ 14 | class RrtSummary { 15 | private int coarseParent; 16 | private int[] coarseChildren; 17 | private int termId; 18 | 19 | public RrtSummary(int coarseParent, int termId, int[] coarseChildren) { 20 | this.coarseParent = coarseParent; 21 | this.termId = termId; 22 | this.coarseChildren = coarseChildren; 23 | } 24 | 25 | public int getCoarseParent() { 26 | return coarseParent; 27 | } 28 | 29 | public int[] getCoarseChildren() { 30 | return coarseChildren; 31 | } 32 | 33 | @Override 34 | public int hashCode() { 35 | int hash = 7; 36 | hash = 13 * hash + this.coarseParent; 37 | hash = 13 * hash + Arrays.hashCode(this.coarseChildren); 38 | hash = 13 * hash + this.termId; 39 | return hash; 40 | } 41 | 42 | @Override 43 | public boolean equals(Object obj) { 44 | if (this == obj) { 45 | return true; 46 | } 47 | if (obj == null) { 48 | return false; 49 | } 50 | if (getClass() != obj.getClass()) { 51 | return false; 52 | } 53 | final RrtSummary other = (RrtSummary) obj; 54 | if (this.coarseParent != other.coarseParent) { 55 | return false; 56 | } 57 | if (this.termId != other.termId) { 58 | return false; 59 | } 60 | return Arrays.equals(this.coarseChildren, other.coarseChildren); 61 | } 62 | 63 | 64 | } 65 | -------------------------------------------------------------------------------- /src/test/groovy/de/up/ling/irtg/automata/DeterminizerTest.groovy: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | 7 | package de.up.ling.irtg.automata 8 | 9 | 10 | import org.junit.* 11 | import java.util.* 12 | import java.io.* 13 | import static org.junit.Assert.* 14 | import de.saar.basic.Pair 15 | import de.up.ling.irtg.automata.Rule 16 | import de.up.ling.irtg.automata.condensed.CondensedTreeAutomatonParser 17 | import de.up.ling.irtg.automata.* 18 | import de.up.ling.irtg.signature.Signature 19 | import static de.up.ling.irtg.util.TestingTools.*; 20 | 21 | 22 | /** 23 | * 24 | * @author koller 25 | */ 26 | class DeterminizerTest { 27 | @Test 28 | public void testDeterminizeMatcher() { 29 | TreeAutomaton match = pa(MATCH1) 30 | TreeAutomaton det = match.determinize() 31 | 32 | assert det.isBottomUpDeterministic() 33 | 34 | assert det.accepts(pt("f(g(a,b),a)")) 35 | assert det.accepts(pt("f(g(a,f(b,a)),g(a,b))")) 36 | assert det.accepts(pt("g(a,f(g(a,b),a))")) 37 | assert ! det.accepts(pt("f(g(b,a),a)")) 38 | assert ! det.accepts(pt("a")) 39 | } 40 | 41 | private static final String MATCH1 = ''' 42 | q1! -> f(q0, q1) 43 | q1 -> f(q1, q0) 44 | q1 -> g(q0, q1) 45 | q1 -> g(q1, q0) 46 | 47 | q0 -> f(q0, q0) 48 | q0 -> g(q0, q0) 49 | q0 -> a 50 | q0 -> b 51 | 52 | q1! -> f(q0, t_1/) 53 | q1 -> f(t_1/, q0) 54 | q1 -> g(q0, t_1/) 55 | q1 -> g(t_1/, q0) 56 | 57 | t_1/ ! -> f(t_1/1, t_1/2) 58 | t_1/1 -> g(t_1/11, t_1/12) 59 | t_1/11 -> a 60 | 61 | t_1/12 -> f(q0, q0) 62 | t_1/12 -> g(q0, q0) 63 | t_1/12 -> a 64 | t_1/12 -> b 65 | 66 | t_1/2 -> f(q0, q0) 67 | t_1/2 -> g(q0, q0) 68 | t_1/2 -> a 69 | t_1/2 -> b 70 | 71 | '''; 72 | } 73 | 74 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/automata/condensed/ConcatenatedIterable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.automata.condensed; 6 | 7 | import java.util.Collections; 8 | import java.util.Iterator; 9 | 10 | /** 11 | * 12 | * @author koller 13 | * @param 14 | */ 15 | public class ConcatenatedIterable implements Iterable { 16 | private final Iterable> iterables; 17 | 18 | public ConcatenatedIterable(Iterable> its) { 19 | this.iterables = its; 20 | } 21 | 22 | @Override 23 | public Iterator iterator() { 24 | return new ConcatenatedIterator(); 25 | } 26 | 27 | private class ConcatenatedIterator implements Iterator { 28 | private final Iterator> iterators; 29 | private Iterator currentIterator; 30 | 31 | public ConcatenatedIterator() { 32 | iterators = iterables.iterator(); 33 | currentIterator = Collections.emptyIterator(); 34 | } 35 | 36 | @Override 37 | public boolean hasNext() { 38 | while( ! currentIterator.hasNext() ) { 39 | if( iterators.hasNext() ) { 40 | currentIterator = iterators.next().iterator(); 41 | } else { 42 | return false; 43 | } 44 | } 45 | 46 | return true; 47 | } 48 | 49 | @Override 50 | public E next() { 51 | return currentIterator.next(); 52 | } 53 | 54 | @Override 55 | public void remove() { 56 | throw new UnsupportedOperationException("Not supported yet."); //To change body of generated methods, choose Tools | Templates. 57 | } 58 | } 59 | 60 | 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/javacc/de/up/ling/irtg/algebra/SetParser.jj: -------------------------------------------------------------------------------- 1 | /** 2 | * JavaCC file 3 | */ 4 | 5 | 6 | options { 7 | JDK_VERSION = "1.5"; 8 | STATIC = false; 9 | LOOKAHEAD=2; // speed up parser by removing this and fixing choice conflict in ctt() differently 10 | FORCE_LA_CHECK=true; 11 | SUPPORT_CLASS_VISIBILITY_PUBLIC=false; 12 | } 13 | PARSER_BEGIN(SetParser) 14 | package de.up.ling.irtg.algebra; 15 | 16 | import java.io.ByteArrayInputStream; 17 | import java.util.ArrayList; 18 | import java.util.HashMap; 19 | import java.util.*; 20 | import java.io.*; 21 | 22 | /** 23 | * Parser for sets of strings and sets of tuples of strings, 24 | * e.g. "{a, b, c}" or "{(a,b), (c,d)}". 25 | * Sets of strings are represented as sets of one-tuples of strings. 26 | * I.e. {a} and {(a)} represent the same structure. 27 | * @author Alexander Koller 28 | */ 29 | public class SetParser { 30 | private static SetParser parser = new SetParser(new ByteArrayInputStream(new byte[]{})); 31 | private static Set> ret = null; 32 | 33 | public static Set> parse(Reader reader) throws ParseException{ 34 | parser.ReInit(reader); 35 | ret = new HashSet>(); 36 | 37 | parser.set(); 38 | return ret; 39 | } 40 | } 41 | PARSER_END(SetParser) 42 | 43 | SKIP : { " " | "\t" } 44 | 45 | TOKEN : 46 | { 47 | < ELEMENT: (["a"-"z","A"-"Z","_", "'", "<", ">", "+", "-", "0"-"9", "*", "."] )+ > | 48 | 49 | } 50 | 51 | void set() : 52 | { 53 | List x; 54 | } 55 | { 56 | "{" x = tuple() { ret.add(x); } ("," x = tuple() { ret.add(x); })* "}" 57 | } 58 | 59 | List tuple() : 60 | { 61 | List ret = new ArrayList(); 62 | String x; 63 | } 64 | { 65 | (x=.image { ret.add(x); } | 66 | "(" x = .image { ret.add(x); } ("," x = .image { ret.add(x); })* ")") 67 | { return ret; } 68 | } 69 | 70 | -------------------------------------------------------------------------------- /examples/atomicSGraphTest.irtg: -------------------------------------------------------------------------------- 1 | /* A synchronous grammar that translates between strings and 2 | semantic representations, represented as graphs. 3 | 4 | Try parsing this sentence: "the boy wants to go" 5 | 6 | And parsing this graph: 7 | (w / want :ARG0 (b / boy) :ARG1 (g / go :ARG0 b)) 8 | 9 | 10 | The grammar can translate the sentence "the boy wants the girl to believe that 11 | the boy likes the girl", which is modeled after Chiang et al., ACL 2012, Figure 2. 12 | A crucial difference is that our grammar does not attempt to map the pronouns 13 | in Chiang et al.'s example to the same nodes as "the boy" and "the girl". 14 | 15 | For the reverse direction, try parsing the following graph: 16 | (w_5 / want :ARG0 (subj_6 / boy) :ARG1 (vcomp_7 / believe :ARG0 (obj_8 / girl) :ARG1 (xcomp_6_3 / like :ARG0 (subj_5_2_4 / boy) :ARG1 (obj_6_3_5 / girl)))) 17 | 18 | */ 19 | 20 | 21 | interpretation graph: de.up.ling.irtg.algebra.graph.GraphAlgebra 22 | 23 | S! -> f0(X) 24 | [graph] f_0(?1) 25 | 26 | S! -> f1(X) 27 | [graph] f_1(?1) 28 | 29 | S! -> m(Y, Y) 30 | [graph] merge(?1, ?2) 31 | 32 | S! -> m(X, Y) 33 | [graph] merge(?1, ?2) 34 | 35 | S! -> m(Y, X) 36 | [graph] merge(?1, ?2) 37 | 38 | S!-> m(X, X) 39 | [graph] merge(?1, ?2) 40 | 41 | X -> m(Y, Y) 42 | [graph] merge(?1, ?2) 43 | 44 | X -> m(X, Y) 45 | [graph] merge(?1, ?2) 46 | 47 | X -> m(Y, X) 48 | [graph] merge(?1, ?2) 49 | 50 | X -> m(X, X) 51 | [graph] merge(?1, ?2) 52 | 53 | X -> f0(X) 54 | [graph] f_0(?1) 55 | 56 | X -> f1(X) 57 | [graph] f_1(?1) 58 | 59 | X -> f0(Y) 60 | [graph] f_0(?1) 61 | 62 | X -> f1(Y) 63 | [graph] f_1(?1) 64 | 65 | Y -> r01(X) 66 | [graph] r_0_1(?1) 67 | 68 | Y -> r10(X) 69 | [graph] r_1_0(?1) 70 | 71 | X -> aaa 72 | [graph] "(x<0> / gamma)" 73 | 74 | X -> edge 75 | [graph] "(x<0> :alpha (y<1>))" 76 | 77 | X -> edge2 78 | [graph] "(x<1> :alpha (y<0>))" 79 | 80 | X -> bbb 81 | [graph] "(y<0> / beta)" 82 | -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/script/CreateRandomAutomata.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package de.up.ling.irtg.script; 7 | 8 | import de.up.ling.irtg.random_automata.RandomTreeAutomaton; 9 | import java.io.BufferedWriter; 10 | import java.io.File; 11 | import java.io.FileInputStream; 12 | import java.io.FileWriter; 13 | import java.io.IOException; 14 | import java.io.InputStream; 15 | import java.util.Properties; 16 | 17 | /** 18 | * 19 | * @author teichmann 20 | */ 21 | public class CreateRandomAutomata { 22 | /** 23 | * 24 | * @throws java.io.IOException 25 | */ 26 | public static void main(String... args) throws IOException { 27 | InputStream in = new FileInputStream(args[0]); 28 | Properties props = new Properties(); 29 | props.load(in); 30 | 31 | String folder = props.getProperty("folder"); 32 | String fileNamePrefix = props.getProperty("fileNamePrefix"); 33 | String size = props.getProperty("size"); 34 | String amount = props.getProperty("toGenerate"); 35 | String seed = props.getProperty("seed"); 36 | String alpha = props.getProperty("alpha"); 37 | 38 | File f = new File(folder); 39 | f.mkdirs(); 40 | 41 | int number = Integer.parseInt(amount); 42 | int n = Integer.parseInt(size); 43 | 44 | RandomTreeAutomaton rta = new RandomTreeAutomaton(Long.parseLong(seed), Double.parseDouble(alpha)); 45 | 46 | for(int i=0;i nodeInVartree, List binarizedChildStates, String pathToNode, Rule originalRule, Tree vartree, InterpretedTreeAutomaton originalIrtg, InterpretedTreeAutomaton binarizedIrtg) { 25 | ConcreteTreeAutomaton binarizedRtg = (ConcreteTreeAutomaton) binarizedIrtg.getAutomaton(); 26 | String oldRuleParent = originalIrtg.getAutomaton().getStateForId(originalRule.getParent()); 27 | String parent; 28 | 29 | double weight; 30 | 31 | if (nodeInVartree == vartree) { 32 | parent = oldRuleParent; 33 | weight = originalRule.getWeight(); 34 | } else { 35 | parent = gensym("q"); 36 | weight = 1; 37 | } 38 | 39 | Rule newRule = binarizedRtg.createRule(parent, nodeInVartree.getLabel(), binarizedChildStates, weight); 40 | return newRule; 41 | } 42 | 43 | private String gensym(String prefix) { 44 | return prefix + (nextGensym++); 45 | } 46 | 47 | public static Function createFactoryFactory() { 48 | return irtg -> new GensymBinaryRuleFactory(); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/main/antlr/de/up/ling/irtg/codec/tulipac/Tulipac.g4: -------------------------------------------------------------------------------- 1 | grammar Tulipac; 2 | 3 | @header{ 4 | package de.up.ling.irtg.codec.tulipac; 5 | } 6 | 7 | 8 | // keywords 9 | TREE: 'tree'; 10 | FAMILY: 'family'; 11 | WORD: 'word'; 12 | LEMMA: 'lemma'; 13 | INCLUDE: '#include'; 14 | 15 | 16 | NAME: [a-zA-Z_] ([a-zA-Z0-9_]*); 17 | QUOTED_NAME: ['] (~['])* [']; 18 | DOUBLE_QUOTED_NAME: ["] (~["])* ["]; 19 | FAMILY_NAME: [<] (~[>])* [>]; 20 | ANNOTATION: [@] ([a-zA-Z0-9_]+); 21 | VARIABLE: [?] ([a-zA-Z0-9_]+); 22 | 23 | SUBSTITUTION_MARKER: '!'; 24 | FOOT_MARKER: '*'; 25 | ANCHOR_MARKER: '+'; 26 | 27 | COLON: ':'; 28 | OP_CBK: '{'; 29 | CL_CBK: '}'; 30 | OP_SBK: '['; 31 | CL_SBK: ']'; 32 | COMMA: ','; 33 | EQ: '='; 34 | 35 | 36 | WS: [ \n\t\r]+ -> skip; 37 | 38 | COMMENT 39 | : ( '//' ~[\r\n]* '\r'? '\n' 40 | | '/*' .*? '*/' 41 | ) -> skip 42 | ; 43 | 44 | 45 | 46 | grmr: (tr | family | wordByItself | lemma | include)+ EOF; 47 | 48 | 49 | 50 | /**** trees ****/ 51 | 52 | tr: TREE identifier COLON node; 53 | 54 | node: identifier marker? annotation? fs? fs? (OP_CBK node+ CL_CBK)?; 55 | 56 | fs: OP_SBK (ft COMMA)* ft? CL_SBK; 57 | 58 | ft: identifier EQ (identifier|variable); 59 | 60 | 61 | 62 | /**** tree families ****/ 63 | 64 | family: FAMILY identifier COLON OP_CBK (identifier COMMA)* identifier CL_CBK; 65 | 66 | 67 | 68 | /**** words ****/ 69 | 70 | wordByItself: WORD identifier COLON (identifier|familyIdentifier) fs?; 71 | 72 | wordInLemma: WORD identifier (COLON fs)?; 73 | 74 | lemma: LEMMA identifier COLON (identifier|familyIdentifier) fs? OP_CBK wordInLemma+ CL_CBK; 75 | 76 | 77 | 78 | /**** #include ****/ 79 | include: INCLUDE identifier; 80 | 81 | 82 | 83 | 84 | identifier: NAME #RAW | QUOTED_NAME #QUOTED | DOUBLE_QUOTED_NAME #DQUOTED ; 85 | 86 | familyIdentifier: FAMILY_NAME; 87 | 88 | marker: SUBSTITUTION_MARKER #SUBST | FOOT_MARKER #FOOT | ANCHOR_MARKER #ANCHOR; 89 | 90 | annotation: ANNOTATION; 91 | 92 | variable: VARIABLE; -------------------------------------------------------------------------------- /src/main/java/de/up/ling/irtg/semiring/ViterbiWithBackpointerSemiring.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package de.up.ling.irtg.semiring; 6 | 7 | import de.saar.basic.Pair; 8 | import de.up.ling.irtg.automata.Rule; 9 | 10 | /** 11 | * Viterbi with multiplications. Stores backpointer to best rule. 12 | * 13 | * @author koller 14 | */ 15 | public class ViterbiWithBackpointerSemiring implements Semiring> { 16 | 17 | public static final ViterbiWithBackpointerSemiring INSTANCE = new ViterbiWithBackpointerSemiring(); 18 | 19 | protected static final double ZERO = Double.NEGATIVE_INFINITY; 20 | protected static final Pair ZERO_PAIR = new Pair<>(ZERO, null); 21 | protected static final Pair ONE_PAIR = new Pair<>(1.0, null); 22 | 23 | // max 24 | public Pair add(Pair x, Pair y) { 25 | if (x.left > y.left) { 26 | return x; 27 | } else { 28 | return y; 29 | } 30 | } 31 | 32 | // Multiply. Rule backpointer is passed on from first argument. 33 | @Override 34 | public Pair multiply(Pair x, Pair y) { 35 | if (x == ONE_PAIR) { 36 | return y; 37 | } 38 | if (y == ONE_PAIR) { 39 | return x; 40 | } 41 | if (x.left == ZERO || y.left == ZERO) { 42 | // ensure that zero * x = x * zero = zero; 43 | // otherwise could get zero * zero = +Infinity 44 | return new Pair(ZERO, x.right); 45 | } else { 46 | return new Pair<>(x.left * y.left, x.right); 47 | } 48 | } 49 | 50 | @Override 51 | public Pair zero() { 52 | return ZERO_PAIR; 53 | } 54 | 55 | @Override 56 | public Pair one() { 57 | return ONE_PAIR; 58 | } 59 | } 60 | --------------------------------------------------------------------------------