├── .editorconfig ├── .github └── workflows │ └── tester.yaml ├── .gitignore ├── .perltidyrc ├── CONTRIBUTING.md ├── Dockerfile ├── README.md ├── bin ├── crstatus.pl ├── czeng-analysis ├── derimor ├── fl2treex ├── t │ ├── TestsCommon.pm │ ├── eval_block.t │ ├── fs.t │ ├── parallel.t │ ├── qparallel.t │ ├── scenarios │ │ ├── print3.scen │ │ ├── print4.scen │ │ └── scen_in_scen.scen │ ├── space_in_block_arg.t │ ├── survive.t │ └── treexrun.t ├── treex ├── treex-mtmworker.pl ├── treex-server ├── treex-socket-server.pl └── ttred ├── cpanfile ├── lib ├── Bash │ └── Completion │ │ └── Plugins │ │ └── Treex.pm ├── Treex.pm └── Treex │ ├── Block │ ├── A2A │ │ ├── AddDirectObjectMarkers.pm │ │ ├── AddEnhancedUD.pm │ │ ├── AddPluralMarkers.pm │ │ ├── AddTranslationsFromFile.pm │ │ ├── BackupTree.pm │ │ ├── CS │ │ │ ├── DetectClauses.pm │ │ │ ├── DoubleNegation.pm │ │ │ ├── FixAgreement.pm │ │ │ ├── FixAuxT.pm │ │ │ ├── FixAuxVChildren.pm │ │ │ ├── FixBy.pm │ │ │ ├── FixCasing.pm │ │ │ ├── FixFirstWordCapitalization.pm │ │ │ ├── FixGenitive.pm │ │ │ ├── FixNounAdjectiveAgreement.pm │ │ │ ├── FixNounClusters.pm │ │ │ ├── FixNounNounAgreement.pm │ │ │ ├── FixNounNumber.pm │ │ │ ├── FixOf.pm │ │ │ ├── FixP4.pm │ │ │ ├── FixPOS.pm │ │ │ ├── FixPassive.pm │ │ │ ├── FixPassiveAuxBeAgreement.pm │ │ │ ├── FixPnom.pm │ │ │ ├── FixPrepositionNounAgreement.pm │ │ │ ├── FixPrepositionWithoutChildren.pm │ │ │ ├── FixPrepositionalCase.pm │ │ │ ├── FixPresentContinuous.pm │ │ │ ├── FixSubject.pm │ │ │ ├── FixSubjectPastParticipleAgreement.pm │ │ │ ├── FixSubjectPredicateAgreement.pm │ │ │ ├── FixTemplate.pm │ │ │ ├── FixVerbAuxBeAgreement.pm │ │ │ ├── FixVerbByEnSubject.pm │ │ │ ├── ReadClauses.pm │ │ │ ├── RemoveFeaturesFromLemmas.pm │ │ │ ├── TruncateLemma.pm │ │ │ ├── VocalizePrepos.pm │ │ │ ├── VocalizePreposPlain.pm │ │ │ └── WorsenWordForms.pm │ │ ├── ConvertTags.pm │ │ ├── CopyAlignments.pm │ │ ├── CopyAtree.pm │ │ ├── CopyBasicToEnhancedUD.pm │ │ ├── CopyNodesFromAlignment.pm │ │ ├── CopySurfaceFromAlignment.pm │ │ ├── CorefClusters.pm │ │ ├── CorefDestroyWild.pm │ │ ├── CorefMentionHeads.pm │ │ ├── CorefMentions.pm │ │ ├── CorefToMisc.pm │ │ ├── DE │ │ │ └── CoNLL2Iset.pm │ │ ├── DeleteAfunCoordWithoutMembers.pm │ │ ├── Deprel2Afun.pm │ │ ├── EN │ │ │ ├── EnhanceInterset.pm │ │ │ ├── RehangModalVerbs.pm │ │ │ ├── RehangPPAttachment.pm │ │ │ └── Retokenize.pm │ │ ├── EU │ │ │ ├── FixAspect.pm │ │ │ ├── FixDefIndef.pm │ │ │ ├── FixMoveRoot.pm │ │ │ └── FixTest.pm │ │ ├── EnsembleTree.pm │ │ ├── FillCoNLLAttributes.pm │ │ ├── FilterBundlesByTreeSize.pm │ │ ├── FixCoordinatedAuxCP.pm │ │ ├── FlattenAtree.pm │ │ ├── FlowNetwork.pm │ │ ├── GRC │ │ │ └── AgdtFixAfun.pm │ │ ├── GuessIsMember.pm │ │ ├── HI │ │ │ └── Lemmatize.pm │ │ ├── MaxCycles.pm │ │ ├── NL │ │ │ └── EnhanceInterset.pm │ │ ├── NodeIdFromCoNLLFeats.pm │ │ ├── OracleTree.pm │ │ ├── ProjectCase.pm │ │ ├── ProjectTreeThroughAlignment.pm │ │ ├── RemoveDuplicateNodes.pm │ │ ├── RemoveEmptyNodes.pm │ │ ├── RemoveEmptySentences.pm │ │ ├── RemoveUnannotatedSentences.pm │ │ ├── RemoveUnusedEmptyNodes.pm │ │ ├── ReorderByLemmas.pm │ │ ├── ReorderHeadFinal.pm │ │ ├── ReorderPrepositions.pm │ │ ├── ReorderSVO2SOV.pm │ │ ├── Retokenize.pm │ │ ├── SVMTree.pm │ │ ├── SetAfunCoordAboveMembers.pm │ │ ├── SetClauseDepth.pm │ │ ├── SetCoordConjunction.pm │ │ ├── SetSharedModifier.pm │ │ ├── TA │ │ │ ├── FixAlignments.pm │ │ │ └── FixProjectedEdges.pm │ │ ├── TrainingData.pm │ │ ├── Transform │ │ │ ├── AllPunctBelowTechRoot.pm │ │ │ ├── BaseTransformer.pm │ │ │ ├── ComplexVerb.pm │ │ │ ├── ComplexVerbRootFirst.pm │ │ │ ├── ComplexVerbRootLast.pm │ │ │ ├── CoordStyle.pm │ │ │ ├── FirstNameUpward.pm │ │ │ ├── InvPrepositionDownward.pm │ │ │ ├── InvSubordConjDownward.pm │ │ │ ├── PrepositionDownward.pm │ │ │ ├── PunctBelowPrevNode.pm │ │ │ ├── SharedModifBelowNearestMember.pm │ │ │ ├── SubordConjDownward.pm │ │ │ └── t │ │ │ │ └── transformer_subscription.t │ │ ├── Translate.pm │ │ └── TranslateWithPreprocessing.pm │ ├── A2N │ │ ├── BaseNER.pm │ │ ├── CS │ │ │ ├── FixNERforIT.pm │ │ │ ├── NameTag.pm │ │ │ ├── NormalizeNames.pm │ │ │ ├── SimpleRuleNER.pm │ │ │ ├── SvmNer.pm │ │ │ ├── SysNERV.pm │ │ │ └── t │ │ │ │ ├── sysnerv_load.t │ │ │ │ └── sysnerv_run_reco.t │ │ ├── DE │ │ │ └── NameTag.pm │ │ ├── EN │ │ │ ├── DistinguishPersonalNames.pm │ │ │ ├── NameTag.pm │ │ │ ├── StanfordNER2008.pm │ │ │ ├── StanfordNER2015.pm │ │ │ ├── StanfordNamedEntities.pm │ │ │ └── t │ │ │ │ ├── stanford2008.t │ │ │ │ └── stanford2015.t │ │ ├── Encode2LemmaTag.pm │ │ ├── EncodeBIO.pm │ │ ├── FixMissingLinks.pm │ │ ├── MergeBBNIntoStanford.pm │ │ ├── NL │ │ │ └── AlpinoSimpleNER.pm │ │ ├── NameTag.pm │ │ ├── NestEntities.pm │ │ ├── RU │ │ │ └── NameTag.pm │ │ ├── SimpleNER.pm │ │ └── VI │ │ │ └── RecognizeFrequentPersonalNames.pm │ ├── A2P │ │ ├── NL │ │ │ └── ParseAlpino.pm │ │ ├── ParseCharniak.pm │ │ └── ParseStanford.pm │ ├── A2T │ │ ├── AddPersPronSb.pm │ │ ├── BG │ │ │ ├── MarkEdgesToCollapse.pm │ │ │ ├── SetCoapFunctors.pm │ │ │ └── SetGrammatemesFromAux.pm │ │ ├── BuildTtree.pm │ │ ├── CS │ │ │ ├── AddCor.pm │ │ │ ├── AddPersPron.pm │ │ │ ├── AddPersPronSb.pm │ │ │ ├── AddRcp.pm │ │ │ ├── DeleteExtraCoref.pm │ │ │ ├── FixCoord.pm │ │ │ ├── FixNonthirdPersSubj.pm │ │ │ ├── FixNumerals.pm │ │ │ ├── FixTlemmas.pm │ │ │ ├── MarkClauseHeads.pm │ │ │ ├── MarkEdgesToCollapse.pm │ │ │ ├── MarkReflexivePassiveGen.pm │ │ │ ├── MarkReflpronCoref.pm │ │ │ ├── MarkRelClauseCoref.pm │ │ │ ├── MarkRelClauseHeads.pm │ │ │ ├── SetCoapFunctors.pm │ │ │ ├── SetDiathesis.pm │ │ │ ├── SetFormeme.pm │ │ │ ├── SetFormeme │ │ │ │ └── NodeInfo.pm │ │ │ ├── SetFunctors.pm │ │ │ ├── SetFunctorsVW.pm │ │ │ ├── SetGrammatemes.pm │ │ │ ├── SetMissingFunctors.pm │ │ │ ├── SetValencyFrameRef.pm │ │ │ ├── SetValencyFrameRef2.pm │ │ │ ├── SetValencyFrameRefVW.pm │ │ │ ├── TBLa2tPhaseFd.pm │ │ │ └── conversion_rules.txt │ │ ├── DE │ │ │ └── SetCoapFunctors.pm │ │ ├── DeleteChildlessPunctuation.pm │ │ ├── DisambiguateGrammatemes.pm │ │ ├── DisambiguateGrammatemesFull.pm │ │ ├── EN │ │ │ ├── AddCorAct.pm │ │ │ ├── FindTextCoref.pm │ │ │ ├── FindTextCorefML.pm │ │ │ ├── FixAdjNattrN.pm │ │ │ ├── FixEitherOr.pm │ │ │ ├── FixHowPlusAdjective.pm │ │ │ ├── FixImperatives.pm │ │ │ ├── FixRelClauseNoRelPron.pm │ │ │ ├── FixTlemmas.pm │ │ │ ├── MarkClauseHeads.pm │ │ │ ├── MarkDspRoot.pm │ │ │ ├── MarkEdgesToCollapse.pm │ │ │ ├── MarkEdgesToCollapseNeg.pm │ │ │ ├── MarkInfin.pm │ │ │ ├── MarkPassives.pm │ │ │ ├── MarkReferentialIt.pm │ │ │ ├── MarkReflpronCoref.pm │ │ │ ├── MarkRelClauseCoref.pm │ │ │ ├── MarkRelClauseHeads.pm │ │ │ ├── MoveAuxFromCoordToMembers.pm │ │ │ ├── RehangSharedAttr.pm │ │ │ ├── SetCoapFunctors.pm │ │ │ ├── SetFormeme.pm │ │ │ ├── SetFormemeInterset.pm │ │ │ ├── SetFunctors.pm │ │ │ ├── SetFunctors2.pm │ │ │ ├── SetFunctorsVW.pm │ │ │ ├── SetGenderOfPerson.pm │ │ │ ├── SetGrammatemes.pm │ │ │ ├── SetIsNameOfPerson.pm │ │ │ ├── SetMissingFunctors.pm │ │ │ ├── SetNodetype.pm │ │ │ ├── SetPersonGender.pm │ │ │ ├── SetSentmod.pm │ │ │ ├── SetTense.pm │ │ │ ├── SetValencyFrameRef.pm │ │ │ ├── SetValencyFrameRef2.pm │ │ │ ├── SetValencyFrameRefVW.pm │ │ │ └── SetVoice.pm │ │ ├── ES │ │ │ ├── FixReflexiveVerbs.pm │ │ │ ├── FixTlemmas.pm │ │ │ ├── MarkEdgesToCollapse.pm │ │ │ ├── SetCoapFunctors.pm │ │ │ ├── SetFormeme.pm │ │ │ ├── SetGrammatemes.pm │ │ │ ├── SetGrammatemesFromAux.pm │ │ │ └── SetSentmod.pm │ │ ├── EU │ │ │ ├── MarkEdgesToCollapse.pm │ │ │ ├── SetCoapFunctors.pm │ │ │ ├── SetFormeme.pm │ │ │ ├── SetGrammatemes.pm │ │ │ └── SetSentmod.pm │ │ ├── FixAtomicNodes.pm │ │ ├── FixIsMember.pm │ │ ├── GRC │ │ │ ├── MarkEdgesToCollapse.pm │ │ │ └── SetCoapFunctors.pm │ │ ├── HideParentheses.pm │ │ ├── JA │ │ │ ├── FixTlemmas.pm │ │ │ ├── MarkEdgesToCollapseNeg.pm │ │ │ ├── MarkPassives.pm │ │ │ ├── SetFormeme.pm │ │ │ ├── SetGrammatemes.pm │ │ │ └── SetTense.pm │ │ ├── LA │ │ │ ├── AddPersPron.pm │ │ │ ├── FixTlemmas.pm │ │ │ ├── MarkClauseHeads.pm │ │ │ ├── MarkEdgesToCollapse.pm │ │ │ ├── MarkRelClauseCoref.pm │ │ │ ├── MarkRelClauseHeads.pm │ │ │ ├── SetCoapFunctors.pm │ │ │ ├── SetFunctors.pm │ │ │ ├── SetGrammatemes.pm │ │ │ ├── SetPronouns.pm │ │ │ └── TopicFocusArticulation.pm │ │ ├── MarkClauseHeads.pm │ │ ├── MarkEdgesToCollapse.pm │ │ ├── MarkParentheses.pm │ │ ├── MarkReflpronCoref.pm │ │ ├── MarkRelClauseCoref.pm │ │ ├── MarkRelClauseHeads.pm │ │ ├── MinimizeGrammatemes.pm │ │ ├── MoveAuxFromCoordToMembers.pm │ │ ├── NL │ │ │ ├── FixMultiwordSurnames.pm │ │ │ ├── FixTlemmas.pm │ │ │ ├── MarkEdgesToCollapse.pm │ │ │ ├── SetCoapFunctors.pm │ │ │ ├── SetFormeme.pm │ │ │ ├── SetFunctors.pm │ │ │ ├── SetGrammatemes.pm │ │ │ ├── SetGrammatemesFromAux.pm │ │ │ └── SetSentmod.pm │ │ ├── PL │ │ │ └── SetCoapFunctors.pm │ │ ├── PT │ │ │ ├── FixFormeme.pm │ │ │ ├── FixImperatives.pm │ │ │ ├── FixPersPron.pm │ │ │ ├── MarkEdgesToCollapse.pm │ │ │ ├── SetCoapFunctors.pm │ │ │ ├── SetGrammatemes.pm │ │ │ └── SetGrammatemesFromAux.pm │ │ ├── ProjectGazeteerInfo.pm │ │ ├── ProjectSelectedWild.pm │ │ ├── RU │ │ │ └── SetCoapFunctors.pm │ │ ├── RehangUnaryCoordConj.pm │ │ ├── SK │ │ │ ├── AddPersPron.pm │ │ │ ├── FixNumerals.pm │ │ │ ├── FixTlemmas.pm │ │ │ ├── MarkEdgesToCollapse.pm │ │ │ ├── MarkRelClauseCoref.pm │ │ │ ├── SetCoapFunctors.pm │ │ │ ├── SetDiathesis.pm │ │ │ ├── SetFormeme.pm │ │ │ ├── SetFormeme │ │ │ │ └── NodeInfo.pm │ │ │ ├── SetFunctors.pm │ │ │ └── SetPhrasalFunctors.pm │ │ ├── SRLParser.pm │ │ ├── SetCoapFunctors.pm │ │ ├── SetDocOrds.pm │ │ ├── SetFormeme.pm │ │ ├── SetFunctorsMLProcess.pm │ │ ├── SetFunctorsRules.pm │ │ ├── SetFunctorsVW.pm │ │ ├── SetGrammatemes.pm │ │ ├── SetGrammatemesFromAux.pm │ │ ├── SetGrammatemesFromAuxForPT.pm │ │ ├── SetIsMember.pm │ │ ├── SetNodetype.pm │ │ ├── SetSentmod.pm │ │ ├── SetValencyFrameRef.pm │ │ ├── SetValencyFrameRef2.pm │ │ └── SetValencyFrameRefVW.pm │ ├── A2W │ │ ├── CS │ │ │ ├── ApplySubstitutions.pm │ │ │ ├── AsciiQuotes.pm │ │ │ ├── ConcatenateTokens.pm │ │ │ ├── Detokenize.pm │ │ │ ├── DetokenizeDashes.pm │ │ │ ├── DetokenizeUsingRules.pm │ │ │ └── RemoveRepeatedTokens.pm │ │ ├── CapitalizeSentStart.pm │ │ ├── ConcatenateTokens.pm │ │ ├── Detokenize.pm │ │ ├── EN │ │ │ ├── ConcatenateTokens.pm │ │ │ ├── DeleteTracesFromAtree.pm │ │ │ ├── DeleteTracesFromSentence.pm │ │ │ ├── DirtyTricks.pm │ │ │ ├── FixCapitalization.pm │ │ │ └── Tidy.pm │ │ ├── ES │ │ │ └── ConcatenateTokens.pm │ │ ├── EU │ │ │ └── ConcatenateTokens.pm │ │ ├── NL │ │ │ ├── DetokenizeSentence.pm │ │ │ └── GenerateSentenceAlpino.pm │ │ ├── NormalizePunctuationForWMT.pm │ │ ├── PT │ │ │ ├── ConcatenateTokens.pm │ │ │ └── DirtyTricks.pm │ │ ├── ShowGazetteerItems.pm │ │ └── ShowIT.pm │ ├── Align │ │ ├── A │ │ │ ├── AlignMGiza.pm │ │ │ ├── CollectLinks.pm │ │ │ ├── FilterTreesByAlignment.pm │ │ │ ├── InsertAlignmentFromFile.pm │ │ │ ├── InsertBerkeleyAlignment.pm │ │ │ ├── MonolingualGreedy.pm │ │ │ ├── RemoveDuplicateLinks.pm │ │ │ ├── Retokenize.pm │ │ │ └── TrainAndAlignMGiza.pm │ │ ├── AddMissingLinks.pm │ │ ├── AddTransitiveLinks.pm │ │ ├── AlignForward.pm │ │ ├── AlignSameSentence.pm │ │ ├── Annot │ │ │ ├── Load.pm │ │ │ ├── Print.pm │ │ │ └── Summary.pm │ │ ├── FilterAlignment.pm │ │ ├── MarkConsistentTreelets.pm │ │ ├── ProjectAlignment.pm │ │ ├── PruneAlignmentForProjection.pm │ │ ├── RemoveAlignments.pm │ │ ├── ReverseAlignment.pm │ │ └── T │ │ │ ├── AlignCzechPersprons.pm │ │ │ ├── AlignGeneratedNodes.pm │ │ │ ├── Compare.pm │ │ │ ├── CopyAlignmentFromAlayer.pm │ │ │ ├── Eval.pm │ │ │ ├── Greedy1To1Alignment.pm │ │ │ ├── PCEDTAlignment.pm │ │ │ └── Supervised │ │ │ ├── Base.pm │ │ │ ├── PrintData.pm │ │ │ └── Resolver.pm │ ├── Coref │ │ ├── CS │ │ │ ├── All │ │ │ │ └── Resolve.pm │ │ │ ├── Cor │ │ │ │ ├── Base.pm │ │ │ │ ├── PrintData.pm │ │ │ │ └── Resolve.pm │ │ │ ├── DemonPron │ │ │ │ ├── Base.pm │ │ │ │ ├── PrintData.pm │ │ │ │ └── Resolve.pm │ │ │ ├── PersPron │ │ │ │ ├── Base.pm │ │ │ │ ├── PrintData.pm │ │ │ │ └── Resolve.pm │ │ │ ├── ReflPron │ │ │ │ ├── Base.pm │ │ │ │ ├── PrintData.pm │ │ │ │ └── Resolve.pm │ │ │ └── RelPron │ │ │ │ ├── Base.pm │ │ │ │ ├── PrintData.pm │ │ │ │ └── Resolve.pm │ │ ├── EN │ │ │ ├── Cor │ │ │ │ ├── Base.pm │ │ │ │ ├── PrintData.pm │ │ │ │ └── Resolve.pm │ │ │ ├── PersPron │ │ │ │ ├── Base.pm │ │ │ │ ├── PrintData.pm │ │ │ │ └── Resolve.pm │ │ │ ├── ReflPron │ │ │ │ ├── Base.pm │ │ │ │ ├── PrintData.pm │ │ │ │ └── Resolve.pm │ │ │ ├── RelPron │ │ │ │ ├── Base.pm │ │ │ │ ├── PrintData.pm │ │ │ │ └── Resolve.pm │ │ │ ├── ResolveBART2.pm │ │ │ └── ResolveStanfordCoreNLP.pm │ │ ├── EntityEvent │ │ │ ├── CS │ │ │ │ └── DemonPron │ │ │ │ │ ├── Base.pm │ │ │ │ │ ├── PrintData.pm │ │ │ │ │ └── Resolve.pm │ │ │ ├── Eval.pm │ │ │ ├── IndicateForCoref.pm │ │ │ ├── PrintData.pm │ │ │ └── Resolve.pm │ │ ├── MarkMentionsForScorer.pm │ │ ├── PrepareSpecializedEval.pm │ │ ├── PrettyPrint.pm │ │ ├── PrettyPrint │ │ │ ├── LabelKey.pm │ │ │ └── LabelSys.pm │ │ ├── PrintData.pm │ │ ├── Project │ │ │ └── MentionsFromAtoTlayer.pm │ │ ├── ProjectCorefEntities.pm │ │ ├── ProjectLinks.pm │ │ ├── RearrangeLinks.pm │ │ ├── RemoveLinks.pm │ │ ├── Resolve.pm │ │ ├── ResolveFromRawText.pm │ │ ├── STATE_OF_PROGRESS.txt │ │ ├── SimpleEval.pm │ │ ├── SupervisedBase.pm │ │ └── Write │ │ │ └── SentencesWithMentions.pm │ ├── Depfix │ │ ├── CS2EN │ │ │ ├── Fix.pm │ │ │ ├── FixGenitive.pm │ │ │ └── FixSVO.pm │ │ ├── CollectEdits.pm │ │ ├── CollectMonolingual.pm │ │ ├── EN2CS │ │ │ ├── CollectEdits.pm │ │ │ ├── MLFix.pm │ │ │ ├── MLFix_cas.pm │ │ │ ├── MLFix_gnc.pm │ │ │ └── MLFix_nc.pm │ │ ├── Fix.pm │ │ ├── MLFix.pm │ │ ├── README │ │ └── sample_config.yaml │ ├── Discourse │ │ ├── CS │ │ │ ├── EvaldEvaluateWeka.pm │ │ │ ├── EvaldExtractFeaturesWeka.pm │ │ │ ├── MarkDiscourse.pm │ │ │ └── MarkTFA.pm │ │ └── EVALD │ │ │ ├── Base.pm │ │ │ ├── PrintData.pm │ │ │ └── Resolve.pm │ ├── Eval │ │ ├── AER.pm │ │ ├── AddPersPronIt.pm │ │ ├── AddPersPronSb.pm │ │ ├── AtreeHighlightEdges.pm │ │ ├── AtreeUAS.pm │ │ ├── AtreeUASWithConfInterval.pm │ │ ├── AtreeUAStat.pm │ │ ├── BiEdgeScore.pm │ │ ├── BitextCorefStats.pm │ │ ├── Coref.pm │ │ ├── CorefSegm.pm │ │ ├── CorefStats.pm │ │ ├── EvalClauses.pm │ │ ├── InterAnnotatorAgreement.pm │ │ ├── Nonproj.pm │ │ ├── ReferentialIt.pm │ │ ├── SentencesWithIncompleteMorphology.pm │ │ ├── Ttrees.pm │ │ └── Wc.pm │ ├── Filter │ │ ├── CNK │ │ │ ├── DocIds.pm │ │ │ └── PunctuationOnly.pm │ │ ├── CzEng │ │ │ ├── AcademicTitle.pm │ │ │ ├── AlignmentCummulation.pm │ │ │ ├── AlignmentScore.pm │ │ │ ├── Classifier.pm │ │ │ ├── Common.pm │ │ │ ├── DecisionTree.pm │ │ │ ├── DictionaryRatio.pm │ │ │ ├── DifferentNumberOfTokens.pm │ │ │ ├── Eval.pm │ │ │ ├── GutenbergHeader.pm │ │ │ ├── HeadSwapRatio.pm │ │ │ ├── IdenticalSentences.pm │ │ │ ├── InconsistentGrade.pm │ │ │ ├── InconsistentNumber.pm │ │ │ ├── InconsistentNumeral.pm │ │ │ ├── InconsistentTense.pm │ │ │ ├── InterleavingSpaces.pm │ │ │ ├── LetterCount.pm │ │ │ ├── LongSentence.pm │ │ │ ├── LongWord.pm │ │ │ ├── MarkBadPairs.pm │ │ │ ├── MaxEnt.pm │ │ │ ├── MicrosoftLinesWithFilenames.pm │ │ │ ├── NaiveBayes.pm │ │ │ ├── NoWordInLanguage.pm │ │ │ ├── NonASCIICharacter.pm │ │ │ ├── POSRatio.pm │ │ │ ├── Predict.pm │ │ │ ├── RemoveBadPairs.pm │ │ │ ├── RemoveLinksToDeletedBundles.pm │ │ │ ├── ReorderingQuantity.pm │ │ │ ├── RepeatedCharacter.pm │ │ │ ├── Score.pm │ │ │ ├── SpecialCharRatio.pm │ │ │ ├── SuspiciousCharacter.pm │ │ │ ├── Train.pm │ │ │ └── UnrecognizedTagRatio.pm │ │ ├── Generic │ │ │ ├── AlignmentCummulation.pm │ │ │ ├── AlignmentScore.pm │ │ │ ├── Classifier.pm │ │ │ ├── Common.pm │ │ │ ├── DecisionTree.pm │ │ │ ├── DifferentNumberOfTokens.pm │ │ │ ├── Eval.pm │ │ │ ├── HeadSwapRatio.pm │ │ │ ├── IdenticalSentences.pm │ │ │ ├── LetterCount.pm │ │ │ ├── LongSentence.pm │ │ │ ├── LongWord.pm │ │ │ ├── MarkBadPairs.pm │ │ │ ├── MaxEnt.pm │ │ │ ├── NaiveBayes.pm │ │ │ ├── Predict.pm │ │ │ ├── RemoveBadPairs.pm │ │ │ ├── RemoveLinksToDeletedBundles.pm │ │ │ ├── ReorderingQuantity.pm │ │ │ ├── RepeatedCharacter.pm │ │ │ ├── Score.pm │ │ │ ├── SpecialCharRatio.pm │ │ │ ├── SuspiciousCharacter.pm │ │ │ └── Train.pm │ │ ├── HindenCorp │ │ │ ├── InconsistentNumber.pm │ │ │ ├── NoEnglishWord.pm │ │ │ ├── NonASCIICharacter.pm │ │ │ ├── POSRatio.pm │ │ │ └── UnrecognizedTagRatio.pm │ │ ├── Node.pm │ │ ├── NthSentence.pm │ │ ├── RemoveEmptySentences.pm │ │ ├── SDP2015Trees.pm │ │ └── SentenceNumber.pm │ ├── Gazetteer │ │ └── ExtractEntityVectors.pm │ ├── HamleDT │ │ ├── AR │ │ │ ├── FixUD.pm │ │ │ ├── Harmonize.pm │ │ │ └── TestFileNames.pm │ │ ├── BG │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── BN │ │ │ └── Harmonize.pm │ │ ├── Base.pm │ │ ├── CA │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── CS │ │ │ ├── CollectSentenceText.pm │ │ │ ├── FixUD.pm │ │ │ ├── Harmonize.pm │ │ │ ├── HarmonizeFicTree.pm │ │ │ ├── HarmonizePDT.pm │ │ │ ├── HarmonizePDTC.pm │ │ │ ├── SetBundleId.pm │ │ │ ├── SplitFusedWords.pm │ │ │ └── ToDoPunctuation.pm │ │ ├── DA │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── DE │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── EL │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── EN │ │ │ ├── Harmonize.pm │ │ │ └── HarmonizePCEDT.pm │ │ ├── ES │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── ET │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── EU │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── FA │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── FI │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── GRC │ │ │ └── Harmonize.pm │ │ ├── HE │ │ │ └── Harmonize.pm │ │ ├── HI │ │ │ ├── FixPUD.pm │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── HR │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── HU │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── Harmonize.pm │ │ ├── HarmonizeAnCora.pm │ │ ├── HarmonizePDT.pm │ │ ├── HarmonizePDTIT.pm │ │ ├── HarmonizePerseus.pm │ │ ├── HarmonizePerseusIT.pm │ │ ├── ID │ │ │ └── FixUD.pm │ │ ├── IS │ │ │ └── Harmonize.pm │ │ ├── IT │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── JA │ │ │ ├── FixUD.pm │ │ │ ├── GoogleToUdep.pm │ │ │ └── Harmonize.pm │ │ ├── KO │ │ │ ├── FixUD.pm │ │ │ └── GoogleToUdep.pm │ │ ├── LA │ │ │ ├── Harmonize.pm │ │ │ ├── HarmonizeIT.pm │ │ │ ├── List_absolute_adverbs_ITTB.txt │ │ │ ├── List_all_lemmas_ITTB.txt │ │ │ └── List_comparative_adverbs_ITTB.txt │ │ ├── LT │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── LV │ │ │ └── FixUD.pm │ │ ├── MT │ │ │ └── Harmonize.pm │ │ ├── NL │ │ │ └── Harmonize.pm │ │ ├── OrigFileSentToComment.pm │ │ ├── PL │ │ │ ├── FixUD.pm │ │ │ ├── Harmonize.pm │ │ │ └── SplitFusedWords.pm │ │ ├── PT │ │ │ ├── FixUD.pm │ │ │ ├── GoogleToUdep.pm │ │ │ ├── Harmonize.pm │ │ │ ├── HarmonizeCintil.pm │ │ │ ├── HarmonizeCintilUSD.pm │ │ │ └── SplitFusedWords.pm │ │ ├── PragueDeprelsToUD.pm │ │ ├── Punctuation.pm │ │ ├── RO │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── RU │ │ │ ├── FixUD.pm │ │ │ └── Harmonize.pm │ │ ├── SA │ │ │ └── FixUD.pm │ │ ├── SK │ │ │ ├── FixUD.pm │ │ │ ├── Harmonize.pm │ │ │ └── SplitFusedWords.pm │ │ ├── SL │ │ │ └── Harmonize.pm │ │ ├── SR │ │ │ └── FixUD.pm │ │ ├── SV │ │ │ └── Harmonize.pm │ │ ├── SetConllTags.pm │ │ ├── SetDeprel.pm │ │ ├── SetMiscNEFromPropn.pm │ │ ├── SplitFusedWords.pm │ │ ├── SplitMWUnderscore.pm │ │ ├── TA │ │ │ ├── FixUD.pm │ │ │ ├── Harmonize.pm │ │ │ └── ReadDetokenizedSentences.pm │ │ ├── TE │ │ │ └── Harmonize.pm │ │ ├── TR │ │ │ └── Harmonize.pm │ │ ├── Test │ │ │ ├── AuxGIsPunctuation.pm │ │ │ ├── AuxKAtEnd.pm │ │ │ ├── AuxKUnderRoot.pm │ │ │ ├── AuxVNotOnTop.pm │ │ │ ├── AuxXIsComma.pm │ │ │ ├── CoApAboveEveryMember.pm │ │ │ ├── CoordStyle.pm │ │ │ ├── DeprelKnown.pm │ │ │ ├── FinalPunctuation.pm │ │ │ ├── LeafAux.pm │ │ │ ├── MaxOneSubject.pm │ │ │ ├── MemberInEveryCoAp.pm │ │ │ ├── NoNewNonProj.pm │ │ │ ├── NonParentAuxS.pm │ │ │ ├── NonemptyAttr.pm │ │ │ ├── NounGovernsDet.pm │ │ │ ├── PrepIsAuxP.pm │ │ │ ├── PunctUnderCoord.pm │ │ │ ├── SubjectUnderVerb.pm │ │ │ └── UD │ │ │ │ ├── Adpositions.pm │ │ │ │ ├── AdverbIsNotNmod.pm │ │ │ │ ├── AuxChain.pm │ │ │ │ ├── AuxpassImpliesSubjpass.pm │ │ │ │ ├── CcDependsOnFollowingConjunct.pm │ │ │ │ ├── CcIsConjunction.pm │ │ │ │ ├── CcIsLeaf.pm │ │ │ │ ├── CompoundPrepositions.pm │ │ │ │ ├── CopulaIsAux.pm │ │ │ │ ├── Determiners.pm │ │ │ │ ├── FiniteVerbWithGender.pm │ │ │ │ ├── FutureIsNotXcomp.pm │ │ │ │ ├── LeftToRightRelations.pm │ │ │ │ ├── MWTCapitalization.pm │ │ │ │ ├── MaxOneDirectObject.pm │ │ │ │ ├── MaxOneSubject.pm │ │ │ │ ├── MweDoesNotCombineWithName.pm │ │ │ │ ├── Orphan.pm │ │ │ │ ├── PronounIsNotMark.pm │ │ │ │ ├── Punctuation.pm │ │ │ │ ├── Reflexives.pm │ │ │ │ ├── Root.pm │ │ │ │ ├── SingleRoot.pm │ │ │ │ ├── Subjunctions.pm │ │ │ │ ├── ToDoBug.pm │ │ │ │ ├── UnconvertedDependencies.pm │ │ │ │ ├── UnderscoreInForm.pm │ │ │ │ ├── XcompHasNoSubject.pm │ │ │ │ └── ZZZSearch.pm │ │ ├── Transform │ │ │ ├── AllPunctBelowTechRoot.pm │ │ │ ├── BaseTransformer.pm │ │ │ ├── ComplexVerb.pm │ │ │ ├── ComplexVerbRootFirst.pm │ │ │ ├── ComplexVerbRootLast.pm │ │ │ ├── CoordStyle.pm │ │ │ ├── FirstNameUpward.pm │ │ │ ├── InvPrepositionDownward.pm │ │ │ ├── InvSubordConjDownward.pm │ │ │ ├── MarkPunct.pm │ │ │ ├── PrepositionDownward.pm │ │ │ ├── PrepositionDownwardSimple.pm │ │ │ ├── PrepositionUpwardSimple.pm │ │ │ ├── PunctBelowPrevNode.pm │ │ │ ├── SharedModifBelowNearestMember.pm │ │ │ ├── StanfordCopulas.pm │ │ │ ├── StanfordObjects.pm │ │ │ ├── StanfordPunct.pm │ │ │ ├── StanfordTypes.pm │ │ │ ├── SubordConjDownward.pm │ │ │ └── t │ │ │ │ └── transformer_subscription.t │ │ ├── UD1To2.pm │ │ ├── UG │ │ │ └── FixUD.pm │ │ ├── UK │ │ │ └── FixUD.pm │ │ ├── UR │ │ │ └── FixUD.pm │ │ ├── Udep.pm │ │ ├── UdepIT.pm │ │ ├── UdepToPrague.pm │ │ ├── Util │ │ │ ├── CorrectDependencyInconsistencies.pm │ │ │ ├── CorrectPOSInconsistencies.pm │ │ │ ├── ExtractAfuns.pm │ │ │ ├── ExtractDependencyBigrams.pm │ │ │ ├── ExtractSurfaceNGrams.pm │ │ │ ├── ExtractTrees.pm │ │ │ └── PrintRoots.pm │ │ ├── VI │ │ │ └── FixUD.pm │ │ └── ZH │ │ │ ├── FixPUD.pm │ │ │ └── FixUD.pm │ ├── Import │ │ └── Sentences.pm │ ├── MLFix │ │ ├── CS │ │ │ ├── Oracle.pm │ │ │ └── ScikitLearn.pm │ │ ├── CollectEdits.pm │ │ ├── DE │ │ │ ├── Oracle.pm │ │ │ └── ScikitLearn.pm │ │ ├── Fix.pm │ │ ├── MLFix.pm │ │ ├── Mark2Fix.pm │ │ ├── MarkByOracle.pm │ │ ├── MarkByScikitLearn.pm │ │ ├── Oracle.pm │ │ └── ScikitLearn.pm │ ├── Misc │ │ ├── AbstractDialogueSlots.pm │ │ ├── AddZonesFromFile.pm │ │ ├── Anonymize │ │ │ └── CS │ │ │ │ ├── InsertAnonymizedTokensIntoOrigText.pm │ │ │ │ └── ReplaceNEsWithRandomChoice.pm │ │ ├── CopenhagenDT │ │ │ ├── BuildTreesFromOffsetIndices.pm │ │ │ ├── CreateBundlePerSentenceTuple.pm │ │ │ ├── DeleteFirstBundle.pm │ │ │ ├── FixLonelyNodes.pm │ │ │ ├── FlattenUnannotatedTrees.pm │ │ │ ├── ImportSentSegmFromExternalFiles.pm │ │ │ ├── MoveDanishTreesToSeparateBundles.pm │ │ │ ├── MoveSLTreesToSeparateBundles.pm │ │ │ ├── MoveTLTreesToSLCounterpartByAlignment.pm │ │ │ ├── MoveTreesToDanishCounterpartByAlignment.pm │ │ │ ├── MoveTreesToDanishCounterpartBySizeSimilarity.pm │ │ │ ├── MoveTreesToDanishCounterpartIfSameNumber.pm │ │ │ ├── PreannotateSyntax.pm │ │ │ ├── PrintAlignedSentences.pm │ │ │ ├── PrintDependentNeighbors.pm │ │ │ ├── PrintExtraction4Miriam.pm │ │ │ ├── ReconstructAlignmentLinks.pm │ │ │ ├── SearchDemo.pm │ │ │ ├── SearchSwitched.pm │ │ │ └── SentSegmByTagS.pm │ │ ├── CopyAtreeAttr.pm │ │ ├── Crash.pm │ │ ├── CreateWordToSentenceIndex.pm │ │ ├── DeabstractDialogueSlots.pm │ │ ├── DeleteCoordNodes.pm │ │ ├── EncodeGrammatemes.pm │ │ ├── FindSentences.pm │ │ ├── FixMissingZones.pm │ │ ├── FixNonstdAttrs.pm │ │ ├── GenerateWordformsFromJSON.pm │ │ ├── GroupBundles.pm │ │ ├── ImportATreesFromFile.pm │ │ ├── JoinBundles.pm │ │ ├── MoveNodesAfterResegment.pm │ │ ├── ProjectAndConcatAttribs.pm │ │ ├── RandomCoNLL.pm │ │ ├── ReplacePersonalNamesCS.pm │ │ ├── RestoreCoordNodes.pm │ │ ├── SampleWithoutReplacement.pm │ │ ├── Sleep.pm │ │ ├── TagToMorphcat.pm │ │ ├── Translog │ │ │ ├── BuildTreesFromOffsetIndices.pm │ │ │ ├── ConllDeprelToCdtDeprel.pm │ │ │ ├── MergeSentencesByAlignment.pm │ │ │ ├── MoveAlignedTargetNodes.pm │ │ │ ├── PdtStyleToCdtStyle.pm │ │ │ ├── ProjectEdgesByAlignment.pm │ │ │ ├── RemakeWildZones.pm │ │ │ ├── SegmentSentences.pm │ │ │ └── Treex2Alignment.pm │ │ ├── TreeDiffAnalysis.pm │ │ └── YALI.pm │ ├── N2N │ │ └── ProjectTreeThroughTranslation.pm │ ├── P2A │ │ ├── EN │ │ │ └── FixCoord.pm │ │ ├── NL │ │ │ └── Alpino.pm │ │ ├── Pennconverter.pm │ │ ├── StanfordConverter.pm │ │ └── TigerET.pm │ ├── Print │ │ ├── Accuracy.pm │ │ ├── AddOna.pm │ │ ├── AdjectivesWithInfinitive.pm │ │ ├── AlignedFrames.pm │ │ ├── AlignedTtrees.pm │ │ ├── AlignmentStatistics.pm │ │ ├── AtreeStats.pm │ │ ├── AtreeTransformationStats.pm │ │ ├── AttributeArrays.pm │ │ ├── Bleu.pm │ │ ├── BranchingFreq.pm │ │ ├── CS │ │ │ ├── UnsupPronCorefData.pm │ │ │ └── UnsupRelatCorefData.pm │ │ ├── ClauseDepth.pm │ │ ├── Clauses.pm │ │ ├── CoApAfunStats.pm │ │ ├── CoNLLFromPDTStyle.pm │ │ ├── CoordStats.pm │ │ ├── CorefSegmentsData.pm │ │ ├── CorefSentences.pm │ │ ├── Curriculum.pm │ │ ├── CzEngBlockIDs.pm │ │ ├── Debug.pm │ │ ├── Debug │ │ │ ├── DocumentTextHead.pm │ │ │ └── IsReferential.pm │ │ ├── DeprelStats.pm │ │ ├── DeprelStats1.pm │ │ ├── EdgeProbs.pm │ │ ├── Entropy.pm │ │ ├── EvalAlignedAtrees.pm │ │ ├── FormemeSemposMismatch.pm │ │ ├── Frames.pm │ │ ├── Garbage.pm │ │ ├── GrammatemesForTgen.pm │ │ ├── IntersetDriverStub.pm │ │ ├── ItTranslData.pm │ │ ├── ListMostDifferentTrees.pm │ │ ├── ListNonProjTrees.pm │ │ ├── MutualInformation.pm │ │ ├── MweStats.pm │ │ ├── NodeBleu.pm │ │ ├── Overall.pm │ │ ├── ParentChildStats.pm │ │ ├── ReferentialItData.pm │ │ ├── SRLLexRf.pm │ │ ├── SRLParserFeaturePrinter.pm │ │ ├── SemanticFactorsForMoses.pm │ │ ├── SentencesWithValencyFrames.pm │ │ ├── TagChanges.pm │ │ ├── TagStats.pm │ │ ├── TaggedTokensWithLemma.pm │ │ ├── TestFileNames.pm │ │ ├── TnT.pm │ │ ├── TokenStats.pm │ │ ├── TranslationOptions.pm │ │ ├── TranslationResume.pm │ │ ├── VWForDefiniteness.pm │ │ ├── VWForFunctors.pm │ │ ├── VWForValencyFrames.pm │ │ ├── VWVectors.pm │ │ ├── ValencyFramesForKira.pm │ │ ├── ValencyFramesForMoses.pm │ │ ├── VectorsForTM.pm │ │ ├── VectorsForTreeKenLM.pm │ │ ├── VectorsForTreeLM.pm │ │ └── WordOrderStats.pm │ ├── Project │ │ ├── Attributes.pm │ │ ├── Coreference.pm │ │ └── Tree.pm │ ├── Read │ │ ├── AlignedCoNLL.pm │ │ ├── AlignedSentences.pm │ │ ├── AlksnisPML.pm │ │ ├── Alpino.pm │ │ ├── Amr.pm │ │ ├── AttributeSentences.pm │ │ ├── BaseAlignedReader.pm │ │ ├── BaseAlignedTextReader.pm │ │ ├── BaseCoNLLReader.pm │ │ ├── BasePMLReader.pm │ │ ├── BaseReader.pm │ │ ├── BaseSplitterRole.pm │ │ ├── BaseTextReader.pm │ │ ├── BundleIds.pm │ │ ├── BundleWildAttribute.pm │ │ ├── CETLEF.pm │ │ ├── CSTS.pm │ │ ├── CdtPack.pm │ │ ├── CdtTag.pm │ │ ├── CnecXML.pm │ │ ├── CoNLL2003.pm │ │ ├── CoNLL2009.pm │ │ ├── CoNLL2012.pm │ │ ├── CoNLLU.pm │ │ ├── CoNLLX.pm │ │ ├── CoNLLXfp.pm │ │ ├── ConsumerReader.pm │ │ ├── CzengPlaintextReader.pm │ │ ├── DGA.pm │ │ ├── Deps.pm │ │ ├── Giza.pm │ │ ├── HTML.pm │ │ ├── Hali.pm │ │ ├── HaliBreaking.pm │ │ ├── MosesTrace.pm │ │ ├── Other_PML_schemas │ │ │ ├── AlksnisSchema-1.3.pml │ │ │ ├── AlksnisSchema-3.0.pml │ │ │ ├── antisDplus_schema.pml │ │ │ └── quz_schema.xml │ │ ├── PADT.pm │ │ ├── PADT_schema │ │ │ ├── deeper.schema.xml │ │ │ ├── elixir.schema.xml │ │ │ ├── morpho.schema.xml │ │ │ ├── syntax.schema.xml │ │ │ └── words.schema.xml │ │ ├── PCEDT.pm │ │ ├── PDT.pm │ │ ├── PDT_schema │ │ │ ├── adata.rng │ │ │ ├── adata_25_schema.xml │ │ │ ├── adata_30_schema.xml │ │ │ ├── adata_35_schema.xml │ │ │ ├── adata_c_schema.xml │ │ │ ├── adata_schema.xml │ │ │ ├── mdata.rng │ │ │ ├── mdata_25_schema.xml │ │ │ ├── mdata_30_schema.xml │ │ │ ├── mdata_35_schema.xml │ │ │ ├── mdata_c_schema.xml │ │ │ ├── mdata_schema.xml │ │ │ ├── pml_common.rng │ │ │ ├── pml_schema.rng │ │ │ ├── tdata.rng │ │ │ ├── tdata_25_schema.xml │ │ │ ├── tdata_30_schema.xml │ │ │ ├── tdata_35_schema.xml │ │ │ ├── tdata_c2_schema.xml │ │ │ ├── tdata_c_schema.xml │ │ │ ├── tdata_schema.xml │ │ │ ├── wdata.rng │ │ │ ├── wdata_25_schema.xml │ │ │ ├── wdata_30_schema.xml │ │ │ ├── wdata_35_schema.xml │ │ │ ├── wdata_c_schema.xml │ │ │ └── wdata_schema.xml │ │ ├── PEDT.pm │ │ ├── PEDT_schema │ │ │ ├── adata_eng_schema.xml │ │ │ └── tdata_eng_schema.xml │ │ ├── PennMrg.pm │ │ ├── PennPos.pm │ │ ├── ProducerReader.pm │ │ ├── QuechuaPML.pm │ │ ├── Sentences.pm │ │ ├── SentencesTSV.pm │ │ ├── Shakti.pm │ │ ├── Syntagrus.pm │ │ ├── TEI.pm │ │ ├── TMT.pm │ │ ├── TMT_schema │ │ │ └── tmt_schema.xml │ │ ├── Text.pm │ │ ├── Tiger.pm │ │ ├── Treex.pm │ │ ├── Valesco.pm │ │ ├── Vertical.pm │ │ ├── WikiDump.pm │ │ ├── WordAlignmentXML.pm │ │ ├── YAML.pm │ │ └── t │ │ │ ├── aligned_sentences.t │ │ │ ├── base.t │ │ │ ├── base_aligned.t │ │ │ ├── base_aligned_text.t │ │ │ ├── cdt-test-0005-da.tag │ │ │ ├── cdt-test-0005-es-lotte.tag │ │ │ ├── cdt-test-0005-it-lisa.tag │ │ │ ├── cdt_tag.t │ │ │ ├── gzip.t │ │ │ ├── pcedt.t │ │ │ ├── word_alignment_xml.t │ │ │ └── word_alignment_xml_sample.wa │ ├── Sample │ │ ├── Base.pm │ │ └── Trees.pm │ ├── Segment │ │ ├── EstimateInterlinkCounts.pm │ │ ├── GreedyRegSuggestBreaks.pm │ │ ├── NaiveSuggestBreaks.pm │ │ ├── OptimalSuggestBreaks.pm │ │ ├── RandomSuggestBreaks.pm │ │ ├── RandomizedSuggestBreaks.pm │ │ ├── SetBlockIdsAtRandom.pm │ │ ├── SetInterlinkCounts.pm │ │ └── SuggestSegmentBreaks.pm │ ├── SemevalABSA │ │ ├── Adverb.pm │ │ ├── AnnotateWithRules.pm │ │ ├── BaseRule.pm │ │ ├── But.pm │ │ ├── Coord.pm │ │ ├── FirstNounAboveSubjAdj.pm │ │ ├── KnownAspect.pm │ │ ├── MarkCategories.pm │ │ ├── MarkSentiment.pm │ │ ├── MoveABSAFromWild.pm │ │ ├── MoveABSAToWild.pm │ │ ├── MoveABSAToWildCandidates.pm │ │ ├── SubjectOfSubjectivePat.pm │ │ ├── VerbActants.pm │ │ ├── VerbonominalPatientNoun.pm │ │ └── VerbonominalSubjectSubjAdj.pm │ ├── T2A │ │ ├── AddAppositionPunct.pm │ │ ├── AddArticles.pm │ │ ├── AddAuxVerbModalTense.pm │ │ ├── AddCoordPunct.pm │ │ ├── AddInfinitiveParticles.pm │ │ ├── AddInterleavedFormemeNodes.pm │ │ ├── AddNegationParticle.pm │ │ ├── AddParentheses.pm │ │ ├── AddPrepos.pm │ │ ├── AddSentFinalPunct.pm │ │ ├── AddSentmodPunct.pm │ │ ├── AddSubconjs.pm │ │ ├── AddSubordClausePunct.pm │ │ ├── AmodCoordEnhancedUD.pm │ │ ├── AnalyticalReorder.pm │ │ ├── BG │ │ │ ├── AddAuxVerbModalTense.pm │ │ │ ├── AddAuxVerbs.pm │ │ │ └── MoveDefiniteness.pm │ │ ├── CS │ │ │ ├── AddAppositionPunct.pm │ │ │ ├── AddAuxVerbCompoundFuture.pm │ │ │ ├── AddAuxVerbCompoundPassive.pm │ │ │ ├── AddAuxVerbCompoundPast.pm │ │ │ ├── AddAuxVerbConditional.pm │ │ │ ├── AddAuxVerbModal.pm │ │ │ ├── AddClausalExpletivePronouns.pm │ │ │ ├── AddCoordPunct.pm │ │ │ ├── AddPrepos.pm │ │ │ ├── AddReflexParticles.pm │ │ │ ├── AddSentFinalPunct.pm │ │ │ ├── AddSubconjs.pm │ │ │ ├── AddSubordClausePunct.pm │ │ │ ├── CapitalizeNamedEntitiesAfterTransfer.pm │ │ │ ├── CapitalizeSentStart.pm │ │ │ ├── CheckCommas.pm │ │ │ ├── ChooseMlemmaForPersPron.pm │ │ │ ├── CopyTtree.pm │ │ │ ├── DeleteEmptyNouns.pm │ │ │ ├── DeleteSuperfluousAuxCP.pm │ │ │ ├── DistinguishHomonymousMlemmas.pm │ │ │ ├── DropSubjPersProns.pm │ │ │ ├── FixPossessiveAdjs.pm │ │ │ ├── GenerateCompoundVerbforms.pm │ │ │ ├── GenerateWordforms.pm │ │ │ ├── ImposeAttrAgr.pm │ │ │ ├── ImposeComplAgr.pm │ │ │ ├── ImposePronZAgr.pm │ │ │ ├── ImposeRelPronAgr.pm │ │ │ ├── ImposeSubjpredAgr.pm │ │ │ ├── InitMorphcat.pm │ │ │ ├── MarkClauseHeads.pm │ │ │ ├── MarkLexVerbChildren.pm │ │ │ ├── MarkSubject.pm │ │ │ ├── MoveCliticsToWackernagel.pm │ │ │ ├── MoveQuotes.pm │ │ │ ├── ResolveVerbs.pm │ │ │ ├── ReverseNumberNounDependency.pm │ │ │ ├── SetFormemes.pm │ │ │ ├── TLemmas.xfst │ │ │ ├── TransformTLemmas.pm │ │ │ └── VocalizePrepos.pm │ │ ├── CapitalizeDirectSpeech.pm │ │ ├── CapitalizeSentStart.pm │ │ ├── CopyFunctorsToMisc.pm │ │ ├── CopyTtree.pm │ │ ├── DeleteGeneratedNodes.pm │ │ ├── DeleteSuperfluousAuxCP.pm │ │ ├── DropPersPronSb.pm │ │ ├── DropPersPronSbImper.pm │ │ ├── EN │ │ │ ├── AddAdjAdvGradation.pm │ │ │ ├── AddAdjAdvNegation.pm │ │ │ ├── AddAppositionPunct.pm │ │ │ ├── AddArticles.pm │ │ │ ├── AddAuxVerbCompoundPassive.pm │ │ │ ├── AddAuxVerbInter.pm │ │ │ ├── AddAuxVerbModalTense.pm │ │ │ ├── AddAuxVerbThereIs.pm │ │ │ ├── AddCoordPunct.pm │ │ │ ├── AddExistentialThere.pm │ │ │ ├── AddInfinitiveParticles.pm │ │ │ ├── AddIntroPunct.pm │ │ │ ├── AddPhrasalPunct.pm │ │ │ ├── AddPhrasalVerbParticles.pm │ │ │ ├── AddPossessiveMarkers.pm │ │ │ ├── AddPrepos.pm │ │ │ ├── AddSubconjs.pm │ │ │ ├── AddSubordClausePunct.pm │ │ │ ├── AddVerbNegation.pm │ │ │ ├── CapitalizeSentStart.pm │ │ │ ├── DeleteYouInImperatives.pm │ │ │ ├── FixFlectErrors.pm │ │ │ ├── FixLemmas.pm │ │ │ ├── FixThereIs.pm │ │ │ ├── GenerateWordforms.pm │ │ │ ├── GenerateWordformsMorphodita.pm │ │ │ ├── ImposeSubjpredAgr.pm │ │ │ ├── IndefArticlePhonetics.pm │ │ │ ├── InitMorphcat.pm │ │ │ ├── MarkSubject.pm │ │ │ ├── MoveRhematizers.pm │ │ │ ├── SbAuxvReorder.pm │ │ │ ├── WordOrder.pm │ │ │ └── WordOrderTools.pm │ │ ├── ES │ │ │ ├── AddArticles.pm │ │ │ ├── AddAuxVerbCompoundPassive.pm │ │ │ ├── AddAuxVerbModalTense.pm │ │ │ ├── AddAuxVerbTense.pm │ │ │ ├── AddComparatives.pm │ │ │ ├── AddPrepos.pm │ │ │ ├── AddReflexive.pm │ │ │ ├── AddSentFinalPunct.pm │ │ │ ├── AddSentmodPunct.pm │ │ │ ├── AddSubconjs.pm │ │ │ ├── AddSubordClausePunct.pm │ │ │ ├── DeleteSuperfluousAuxCP.pm │ │ │ ├── FixAttributeOrder.pm │ │ │ ├── GenerateWordforms.pm │ │ │ ├── GenerateWordformsPerl.pm │ │ │ ├── InitMorphcat.pm │ │ │ └── MoveRhematizers.pm │ │ ├── EU │ │ │ ├── AddArticles.pm │ │ │ ├── AddAuxVerbModal.pm │ │ │ ├── AddAuxVerbModalTense.pm │ │ │ ├── AddAuxVerbTense.pm │ │ │ ├── AddNegationParticle.pm │ │ │ ├── AddPrepos.pm │ │ │ ├── AddSentFinalPunct.pm │ │ │ ├── AddSubconjs.pm │ │ │ ├── AddSubordClausePunct.pm │ │ │ ├── DropPersPron.pm │ │ │ ├── FixGramCases.pm │ │ │ ├── FixNegativeVerbOrder.pm │ │ │ ├── FixOrder.pm │ │ │ ├── FixTransitiveAgreement.pm │ │ │ ├── GenerateGazeteerItems.pm │ │ │ ├── GenerateWordforms.pm │ │ │ ├── ImposeSubjObjpredAgr.pm │ │ │ └── MarkSubject.pm │ │ ├── FixNounGender.pm │ │ ├── GenerateA2TRefs.pm │ │ ├── GenerateEmptyNodes.pm │ │ ├── GenerateWordforms.pm │ │ ├── ImposeAttrAgr.pm │ │ ├── ImposeSubjpredAgr.pm │ │ ├── InitMorphcat.pm │ │ ├── InitMorphcatPT.pm │ │ ├── MarkSubject.pm │ │ ├── MorphcatToPdtTagRegexp.pm │ │ ├── NL │ │ │ ├── AddArticles.pm │ │ │ ├── AddAuxVerbCompoundPassive.pm │ │ │ ├── AddAuxVerbModalTense.pm │ │ │ ├── AddFormalSubject.pm │ │ │ ├── AddInfinitiveParticles.pm │ │ │ ├── AddNegationParticle.pm │ │ │ ├── AddPrepos.pm │ │ │ ├── AddReflexParticles.pm │ │ │ ├── AddSeparableVerbPrefixes.pm │ │ │ ├── AddSubconjs.pm │ │ │ ├── Alpino │ │ │ │ ├── ADTTreeViterbi.pm │ │ │ │ ├── AddCoindexSubjects.pm │ │ │ │ ├── CoindexNodes.pm │ │ │ │ ├── FixAuxVerbs.pm │ │ │ │ ├── FixCompoundNouns.pm │ │ │ │ ├── FixFormalSubjects.pm │ │ │ │ ├── FixInfinitiveParticles.pm │ │ │ │ ├── FixMWUs.pm │ │ │ │ ├── FixNamedEntities.pm │ │ │ │ ├── FixPrec.pm │ │ │ │ ├── FixQuestionsAndRelClauses.pm │ │ │ │ ├── MWUs.pm │ │ │ │ ├── MarkStype.pm │ │ │ │ └── SetAdtRel.pm │ │ │ ├── CopyTtree.pm │ │ │ ├── FixLemmas.pm │ │ │ ├── FixMultiwordSurnames.pm │ │ │ ├── FixPronominalAdverbs.pm │ │ │ ├── GenerateWordforms.pm │ │ │ ├── GenerateWordformsAlpino.pm │ │ │ ├── HideVerbPrefixes.pm │ │ │ ├── InitMorphcat.pm │ │ │ ├── MoveFiniteVerbs.pm │ │ │ ├── MoveVerbsToClauseEnd.pm │ │ │ └── RestoreVerbPrefixes.pm │ │ ├── PT │ │ │ ├── AddArticles.pm │ │ │ ├── AddAuxVerbCompoundPassive.pm │ │ │ ├── AddAuxVerbModalTense.pm │ │ │ ├── AddComparatives.pm │ │ │ ├── AddConditional.pm │ │ │ ├── AddGender.pm │ │ │ ├── AddPrepos.pm │ │ │ ├── AddVerbNegation.pm │ │ │ ├── CliticExceptions.pm │ │ │ ├── DropSubjPersProns.pm │ │ │ ├── FixPossessivePronouns.pm │ │ │ ├── GeneratePronouns.pm │ │ │ ├── GenerateWordforms.pm │ │ │ ├── GenerateWordformsPerl.pm │ │ │ ├── ImposeFormeme.pm │ │ │ ├── ImposeLemma.pm │ │ │ ├── InitMorphcat.pm │ │ │ ├── MarkSubject.pm │ │ │ ├── MoveRhematizers.pm │ │ │ ├── PrepositionContraction.pm │ │ │ └── SecondPersonPoliteness.pm │ │ ├── ProjectClauseNumber.pm │ │ ├── RU │ │ │ ├── AddAuxVerbConditional.pm │ │ │ ├── AddAuxVerbModal.pm │ │ │ ├── AddNegation.pm │ │ │ ├── ChooseMlemmaForPersPron.pm │ │ │ ├── DropCopula.pm │ │ │ ├── GenerateWordforms.pm │ │ │ └── InitMorphcat.pm │ │ ├── RenamePunctuationTLemmas.pm │ │ └── SentenceNegationToVerb.pm │ ├── T2P │ │ └── CopyTtree.pm │ ├── T2T │ │ ├── AssignDefaultGrammatemes.pm │ │ ├── AssignWordnetHyperchain.pm │ │ ├── CS2CS │ │ │ ├── AddFrequentPrepositions.pm │ │ │ ├── Deepfix.pm │ │ │ ├── DropSubjPersProns.pm │ │ │ ├── FixGrammatemes.pm │ │ │ ├── FixInfrequentFormemes.pm │ │ │ ├── FixInfrequentNouns.pm │ │ │ ├── FixInfrequentPrepositions.pm │ │ │ ├── FixNegation.pm │ │ │ ├── FixTense.pm │ │ │ ├── FormemeTLemmaAgreement.pm │ │ │ ├── MarkClauseHeads.pm │ │ │ ├── MarkNegationCueAndScope.pm │ │ │ ├── ParaphraseSimple.pm │ │ │ ├── PrecomputeNodeInfo.pm │ │ │ ├── ProjectChangedToA.pm │ │ │ └── RemoveInfrequentPrepositions.pm │ │ ├── CS2EN │ │ │ ├── AddDefiniteness.pm │ │ │ ├── DeleteSuperfluousNodes.pm │ │ │ ├── FixDoubleNegative.pm │ │ │ ├── FixForeignNames.pm │ │ │ ├── FixGrammatemesAfterTransfer.pm │ │ │ ├── RearrangeNounCompounds.pm │ │ │ ├── RemoveInfinitiveSubjects.pm │ │ │ ├── RemovePerspronGender.pm │ │ │ ├── ReplaceSomeWithIndefArticle.pm │ │ │ ├── TrFAddVariants.pm │ │ │ ├── TrFAddVariantsInterpol.pm │ │ │ ├── TrFTryRules.pm │ │ │ ├── TrLAddVariants.pm │ │ │ ├── TrLAddVariantsInterpol.pm │ │ │ ├── TrLFPhrases.pm │ │ │ ├── TrLFixTMErrors.pm │ │ │ └── TrLTryRules.pm │ │ ├── CS2RU │ │ │ ├── FixDateTime.pm │ │ │ ├── FixValency.pm │ │ │ ├── RuleBasedFormemes.pm │ │ │ ├── TrLAddVariants.pm │ │ │ └── TrLTryRules.pm │ │ ├── CopyCorefFromAlignment.pm │ │ ├── CopyFunctorsFromAlignment.pm │ │ ├── CopyTtree.pm │ │ ├── CopyValencyFramesFromAlignment.pm │ │ ├── CutVariants.pm │ │ ├── EN2CS │ │ │ ├── AddNounGender.pm │ │ │ ├── AddPersPronBelowVfin.pm │ │ │ ├── AddRelpronBelowRc.pm │ │ │ ├── AddVerbAspect.pm │ │ │ ├── ChangeCorToPersPron.pm │ │ │ ├── CutVariants.pm │ │ │ ├── DeletePossPronBeforeVlastni.pm │ │ │ ├── DeleteSuperfluousTnodes.pm │ │ │ ├── FindGramCorefForReflPron.pm │ │ │ ├── FixAdjComplAgreement.pm │ │ │ ├── FixDateTime.pm │ │ │ ├── FixGrammatemesAfterTransfer.pm │ │ │ ├── FixMoney.pm │ │ │ ├── FixNegation.pm │ │ │ ├── FixTransferChoices.pm │ │ │ ├── MarkNewRelClauses.pm │ │ │ ├── MoveAdjsBeforeNouns.pm │ │ │ ├── MoveDicendiCloserToDsp.pm │ │ │ ├── MoveEnoughBeforeAdj.pm │ │ │ ├── MoveGenitivesRight.pm │ │ │ ├── MoveJesteBeforeVerb.pm │ │ │ ├── MoveNounAttrAfterNouns.pm │ │ │ ├── MovePersPronNextToVerb.pm │ │ │ ├── MoveRelClauseRight.pm │ │ │ ├── NeutPersPronGenderFromAntec.pm │ │ │ ├── OverridePpWithPhraseTr.pm │ │ │ ├── PrunePersonalNameVariants.pm │ │ │ ├── PruneVariants.pm │ │ │ ├── RemoveUnpassivizableVariants.pm │ │ │ ├── ReplaceVerbWithAdj.pm │ │ │ ├── TrFAddVariants.pm │ │ │ ├── TrFAddVariantsInterpol.pm │ │ │ ├── TrFRerank.pm │ │ │ ├── TrFRerank2.pm │ │ │ ├── TrFTryRules.pm │ │ │ ├── TrLAddVariants.pm │ │ │ ├── TrLAddVariantsBackoff.pm │ │ │ ├── TrLAddVariantsInterpol.pm │ │ │ ├── TrLAddVariantsVW.pm │ │ │ ├── TrLAddVariantsVW2.pm │ │ │ ├── TrLAddVariants_coref.pm │ │ │ ├── TrLFCompounds.pm │ │ │ ├── TrLFJointStatic.pm │ │ │ ├── TrLFNumeralsByRules.pm │ │ │ ├── TrLFPhrases.pm │ │ │ ├── TrLFTreeViterbi.pm │ │ │ ├── TrLFemaleSurnames.pm │ │ │ ├── TrLFilterAspect.pm │ │ │ ├── TrLHackNNP.pm │ │ │ ├── TrLNumbers.pm │ │ │ ├── TrLPersPronIt.pm │ │ │ ├── TrLPersPronRefl.pm │ │ │ ├── TrLTryRules.pm │ │ │ ├── TrL_ITdomain.pm │ │ │ ├── TransformPassiveConstructions.pm │ │ │ ├── TurnTextCorefToGramCoref.pm │ │ │ └── ValencyRelatedRules.pm │ │ ├── EN2EN │ │ │ └── TrLFTreeViterbi.pm │ │ ├── EN2ES │ │ │ ├── AddNounGender.pm │ │ │ ├── FixDefinitiveness.pm │ │ │ ├── FixThereIs.pm │ │ │ └── TrLTryRules.pm │ │ ├── EN2EU │ │ │ ├── FixDefinitiveness.pm │ │ │ ├── FixPresentContinuous.pm │ │ │ ├── FixThereIs.pm │ │ │ ├── FixYouPl.pm │ │ │ ├── RemoveRelPron.pm │ │ │ ├── TrLTryRules.pm │ │ │ └── TranslateRelPron.pm │ │ ├── EN2NL │ │ │ ├── AddNounGender.pm │ │ │ ├── FixCompounds.pm │ │ │ ├── TrLFFixTMErrors.pm │ │ │ └── TrLFPhrases.pm │ │ ├── EN2PT │ │ │ ├── AddRelpronBelowRc.pm │ │ │ ├── FixPersPron.pm │ │ │ ├── FixPunctuation.pm │ │ │ ├── FixThereIs.pm │ │ │ ├── MoveAdjsAfterNouns.pm │ │ │ ├── Noun1Noun2_To_Noun2DeNoun1.pm │ │ │ ├── TrGazeteerItems.pm │ │ │ ├── TrL_ITdomain.pm │ │ │ └── TurnVerbLemmaToAdjectives.pm │ │ ├── FixFormemeWrtNodetype.pm │ │ ├── FixGrammatemesAfterTransfer.pm │ │ ├── FixPunctFormemes.pm │ │ ├── FormemeTLemmaAgreement.pm │ │ ├── JA2CS │ │ │ ├── MoveVerbs.pm │ │ │ ├── TrFAddVariants.pm │ │ │ └── TrLAddVariants.pm │ │ ├── JA2EN │ │ │ ├── TrFAddVariants.pm │ │ │ └── TrLAddVariants.pm │ │ ├── PT2EN │ │ │ ├── FixThereIs.pm │ │ │ ├── FixValency.pm │ │ │ ├── MoveAdjsBeforeNouns.pm │ │ │ ├── RestoreUrl.pm │ │ │ └── TrGazeteerItems.pm │ │ ├── ParaphraseSimple.pm │ │ ├── PosFromTLemma.pm │ │ ├── PosToTLemma.pm │ │ ├── ProjectChangedToA.pm │ │ ├── ProjectSelectedWild.pm │ │ ├── ProjectTreeThroughAlignment.pm │ │ ├── RecoverUnknownLemmas.pm │ │ ├── RehangToEffParents.pm │ │ ├── RehangToOrigParents.pm │ │ ├── SelectCompatibleTlemmaFormeme.pm │ │ ├── SetAClauseNumber.pm │ │ ├── SetClauseNumber.pm │ │ ├── SetDefinitenessVW.pm │ │ ├── TbxParser.pm │ │ ├── TrAddVariantsRole.pm │ │ ├── TrBaseAddVariantsInterpol.pm │ │ ├── TrFAddVariants.pm │ │ ├── TrFAddVariantsInterpol.pm │ │ ├── TrGazeteerItems.pm │ │ ├── TrLAddVariants.pm │ │ ├── TrLAddVariantsInterpol.pm │ │ ├── TrLApplyTbxDictionary.pm │ │ └── TrUseMemcachedModel.pm │ ├── T2TAMR │ │ ├── AddNegNodes.pm │ │ ├── ApplyRules.pm │ │ ├── CopyTtree.pm │ │ ├── CreateTAMRfromT.pm │ │ ├── DeleteMarkedNodes.pm │ │ ├── FixCoreference.pm │ │ ├── FixNamedEntities.pm │ │ ├── FunctorsToAMRLabels.pm │ │ ├── MarkRules.pm │ │ ├── ReadRules.pm │ │ └── RulesSuggestion.pm │ ├── T2U │ │ ├── AdjustStructure.pm │ │ ├── BuildUtree.pm │ │ ├── CS │ │ │ ├── AdjustStructure.pm │ │ │ ├── BuildUtree.pm │ │ │ └── ConvertCoreference.pm │ │ ├── ConvertCoreference.pm │ │ ├── LA │ │ │ ├── AdjustStructure.pm │ │ │ ├── BuildUtree.pm │ │ │ └── ConvertCoreference.pm │ │ └── t │ │ │ └── adjust_coap.t │ ├── Test │ │ ├── BaseTester.pm │ │ ├── FieldCanHaveWideChars.pm │ │ ├── ParameterCanHaveWideChars.pm │ │ ├── Phrase.pm │ │ └── ValidReferences.pm │ ├── Treelets │ │ ├── AddTwonodeScores.pm │ │ ├── ExtractEdgeTreelets.pm │ │ ├── ExtractStrictTreelets.pm │ │ ├── ExtractVW.pm │ │ ├── ExtractVW2.pm │ │ ├── SrcFeatures.pm │ │ ├── SrcFeatures2.pm │ │ ├── TrEasyFirst.pm │ │ ├── TrEasyFirstChain.pm │ │ ├── TrEasyFirstLM.pm │ │ ├── TrEasyFirstSplit.pm │ │ ├── TrInterpol.pm │ │ ├── TrOneNode.pm │ │ ├── TrOneNodeNeedsCopyTtree.pm │ │ └── TrVW.pm │ ├── Tutorial │ │ ├── MarkHeads.pm │ │ ├── P2A.pm │ │ ├── PrintDefiniteDescriptions.pm │ │ ├── ReorderSVO2SOV.pm │ │ ├── Solution │ │ │ ├── MarkHeads.pm │ │ │ ├── P2A.pm │ │ │ ├── PrintDefiniteDescriptions.pm │ │ │ ├── ReorderSVO2SOV.pm │ │ │ └── StemTamil.pm │ │ └── StemTamil.pm │ ├── Util │ │ ├── DefinedAttr.pm │ │ ├── Eval.pm │ │ ├── Find.pm │ │ ├── FixInvalidIDs.pm │ │ ├── FixPMLStructure.pm │ │ ├── PMLTQ.pm │ │ ├── PMLTQMark.pm │ │ └── SetGlobal.pm │ ├── W2A │ │ ├── AnalysisWithAlignedTrees.pm │ │ ├── AppendSynsetIdToLemmas.pm │ │ ├── BaseChunkParser.pm │ │ ├── CS │ │ │ ├── FixAtreeAfterMcD.pm │ │ │ ├── FixGuessedLemmas.pm │ │ │ ├── FixIsMember.pm │ │ │ ├── FixMorphoErrors.pm │ │ │ ├── FixPrepositionalCase.pm │ │ │ ├── FixReflexivePronouns.pm │ │ │ ├── FixReflexiveTantum.pm │ │ │ ├── LabelMIRA.pm │ │ │ ├── LabelMSTAdapted.pm │ │ │ ├── ParseMST.pm │ │ │ ├── ParseMSTAdapted.pm │ │ │ ├── ParseMSTperl.pm │ │ │ ├── ParseRules.pm │ │ │ ├── Segment.pm │ │ │ ├── TagFeaturama.pm │ │ │ ├── TagMorce.pm │ │ │ ├── TagMorphoDiTa.pm │ │ │ └── Tokenize.pm │ │ ├── ConvertTags.pm │ │ ├── CopyAttribute.pm │ │ ├── CopyTagsFromFile.pm │ │ ├── DE │ │ │ ├── FixPronouns.pm │ │ │ ├── LemmatizeMate.pm │ │ │ ├── ParseMate.pm │ │ │ ├── TagStanford.pm │ │ │ └── Tokenize.pm │ │ ├── DefaultDepRel.pm │ │ ├── Delexicalize.pm │ │ ├── EN │ │ │ ├── FixAtree.pm │ │ │ ├── FixConjThat.pm │ │ │ ├── FixControlVerbs.pm │ │ │ ├── FixDicendiVerbs.pm │ │ │ ├── FixIsMember.pm │ │ │ ├── FixMultiwordPrepAndConj.pm │ │ │ ├── FixNominalGroups.pm │ │ │ ├── FixSharedSubjects.pm │ │ │ ├── FixTags.pm │ │ │ ├── FixTagsAfterParse.pm │ │ │ ├── FixTagsImperatives.pm │ │ │ ├── FixTagsQuotes.pm │ │ │ ├── FixTokenization.pm │ │ │ ├── GazeteerMatch.pm │ │ │ ├── HideIT.pm │ │ │ ├── LabelMIRA.pm │ │ │ ├── Lemmatize.pm │ │ │ ├── MarkCheckCommas.pm │ │ │ ├── NormalizeForms.pm │ │ │ ├── ParseCharniak.pm │ │ │ ├── ParseFanse.pm │ │ │ ├── ParseMST.pm │ │ │ ├── ParseMSTperl.pm │ │ │ ├── ParseMalt.pm │ │ │ ├── ParseZpar.pm │ │ │ ├── PreferImperatives.pm │ │ │ ├── QtHackTags.pm │ │ │ ├── QuotesStyle.pm │ │ │ ├── RehangConllToPdtStyle.pm │ │ │ ├── RehangStanfordCoordToPdtStyle.pm │ │ │ ├── Segment.pm │ │ │ ├── SetAfun.pm │ │ │ ├── SetAfunAfterMcD.pm │ │ │ ├── SetAfunAuxCPCoord.pm │ │ │ ├── SetIsMemberFromDeprel.pm │ │ │ ├── TagFeaturama.pm │ │ │ ├── TagLinguaEn.pm │ │ │ ├── TagMorce.pm │ │ │ ├── TagMorphoDiTa.pm │ │ │ ├── TagStanford.pm │ │ │ ├── Tokenize.pm │ │ │ └── t │ │ │ │ └── lingua_en.t │ │ ├── ES │ │ │ ├── FixMultiwordPrepAndConj.pm │ │ │ ├── FixTagAndParse.pm │ │ │ ├── TagAndParse.pm │ │ │ └── Tokenize.pm │ │ ├── EU │ │ │ ├── FixModalVerbs.pm │ │ │ ├── FixMultiwordPrepAndConj.pm │ │ │ ├── FixTagAndParse.pm │ │ │ ├── TagAndParse.pm │ │ │ ├── Tokenize.pm │ │ │ └── TokenizeAndParse.pm │ │ ├── EscapeMoses.pm │ │ ├── FR │ │ │ ├── TagMElt.pm │ │ │ ├── TagStanford.pm │ │ │ └── Tokenize.pm │ │ ├── FixAuxLeaves.pm │ │ ├── FixNonleafAuxC.pm │ │ ├── FixQuotes.pm │ │ ├── FromWSD.pm │ │ ├── GazeteerMatch.pm │ │ ├── HideGazeteerItems.pm │ │ ├── HideIT.pm │ │ ├── JA │ │ │ ├── FixCopulas.pm │ │ │ ├── FixInterpunction.pm │ │ │ ├── FixPeriod.pm │ │ │ ├── FixTagsUD.pm │ │ │ ├── ParseCabocha.pm │ │ │ ├── ParseJDEPP.pm │ │ │ ├── RehangAuxVerbs.pm │ │ │ ├── RehangConjunctions.pm │ │ │ ├── RehangCoordinations.pm │ │ │ ├── RehangCopulas.pm │ │ │ ├── RehangNouns.pm │ │ │ ├── RehangParticleChildren.pm │ │ │ ├── RehangParticles.pm │ │ │ ├── RomanizeTags.pm │ │ │ ├── SetAfun.pm │ │ │ ├── SetAfunParticles.pm │ │ │ ├── TagMeCab.pm │ │ │ └── t │ │ │ │ ├── parse_jdepp.t │ │ │ │ └── tag_mecab.t │ │ ├── LA │ │ │ ├── ParsingLatin.pm │ │ │ ├── Segment.pm │ │ │ ├── TagTreeTaggerIT.pm │ │ │ └── Tokenize.pm │ │ ├── LabelMIRA.pm │ │ ├── MarkChunks.pm │ │ ├── NL │ │ │ └── Tokenize.pm │ │ ├── NormalizeForms.pm │ │ ├── PT │ │ │ ├── ConcatenateTokens.pm │ │ │ ├── FixAfuns.pm │ │ │ ├── FixTags.pm │ │ │ ├── GazeteerMatch.pm │ │ │ ├── LXSuite.pm │ │ │ ├── Parse.pm │ │ │ └── Tokenize.pm │ │ ├── ParseLeftBranching.pm │ │ ├── ParseMST.pm │ │ ├── ParseMSTperl.pm │ │ ├── ParseMalt.pm │ │ ├── ParseRandom.pm │ │ ├── ParseRightBranching.pm │ │ ├── ParseUniversal.pm │ │ ├── RU │ │ │ ├── FixPronouns.pm │ │ │ ├── ParseMalt.pm │ │ │ ├── Segment.pm │ │ │ └── Tokenize.pm │ │ ├── ReplaceLemmasWithSynsetId.pm │ │ ├── ResegmentSentences.pm │ │ ├── Restuff.pm │ │ ├── RunDocWSD.pm │ │ ├── Segment.pm │ │ ├── SegmentOnNewlines.pm │ │ ├── TA │ │ │ ├── FixAmbiguousTags.pm │ │ │ ├── FixLemmas.pm │ │ │ ├── FixTags.pm │ │ │ ├── Lemmatization.pm │ │ │ ├── RuleBasedParser.pm │ │ │ ├── RuleBasedTagger.pm │ │ │ ├── Segment.pm │ │ │ ├── SetAfun.pm │ │ │ └── Tokenize.pm │ │ ├── Tag.pm │ │ ├── TagHunPoS.pm │ │ ├── TagMorphoDiTa.pm │ │ ├── TagStanford.pm │ │ ├── TagTnT.pm │ │ ├── TagTreeTagger.pm │ │ ├── ToWSD.pm │ │ ├── Tokenize.pm │ │ ├── TokenizeMorphoDiTa.pm │ │ ├── TokenizeMoses.pm │ │ ├── TokenizeOnWhitespace.pm │ │ ├── TruecaseMoses.pm │ │ ├── UDPipe.pm │ │ ├── WSD.pm │ │ └── t │ │ │ ├── resegment_sentences.t │ │ │ ├── segment_on_nl.t │ │ │ ├── tokenize.t │ │ │ └── tokenize_on_whitespace.t │ ├── W2W │ │ ├── AddNodeIdPrefix.pm │ │ ├── CopySentence.pm │ │ ├── Deromajize.pm │ │ ├── EstimateNoSpaceAfter.pm │ │ ├── GoogleTranslate.pm │ │ ├── InferNoSpaceAfterFromText.pm │ │ ├── MT │ │ │ └── Gloss.pm │ │ ├── NormalizeCzechSentence.pm │ │ ├── NormalizeEnglishSentence.pm │ │ ├── NormalizeJapaneseSentence.pm │ │ ├── ProjectTokenization.pm │ │ ├── RemoveLeadingTrailingSpaces.pm │ │ ├── ResegmentSentencesOnX.pm │ │ ├── RestoreNoSpaceAfter.pm │ │ ├── TA │ │ │ ├── CollapseAgglutination.pm │ │ │ └── Transliteration.pm │ │ ├── Translate.pm │ │ └── Translit.pm │ └── Write │ │ ├── ADTXML.pm │ │ ├── Alignments.pm │ │ ├── Amr.pm │ │ ├── AmrAligned.pm │ │ ├── AmrAlignedCrossLang.pm │ │ ├── AmrForTreeSurgeon.pm │ │ ├── Arff.pm │ │ ├── ArffWriting.pm │ │ ├── AttributeParameterized.pm │ │ ├── AttributeSentences.pm │ │ ├── AttributeSentencesAligned.pm │ │ ├── AttributeStats.pm │ │ ├── BaseTextWriter.pm │ │ ├── BaseWriter.pm │ │ ├── BundleIds.pm │ │ ├── BundleWildAttributeDump.pm │ │ ├── CdtTag.pm │ │ ├── CoNLL2003.pm │ │ ├── CoNLL2009.pm │ │ ├── CoNLLU.pm │ │ ├── CoNLLX.pm │ │ ├── ConllLike.pm │ │ ├── EVALD.pm │ │ ├── FS.pm │ │ ├── Factored.pm │ │ ├── LayerAttributes │ │ ├── AlignedFormWithoutPrefix.pm │ │ ├── AlignedTreeDistances.pm │ │ ├── AttributeModifier.pm │ │ ├── BracketedVerbform.pm │ │ ├── CoNLLMorphCat.pm │ │ ├── CoNLLUfeats.pm │ │ ├── CoNLLUmisc.pm │ │ ├── CzechCoarseTag.pm │ │ ├── CzechMLemmaTrunc.pm │ │ ├── CzechMorphCat.pm │ │ ├── Determiner.pm │ │ ├── Distance.pm │ │ ├── FirstChar.pm │ │ ├── FunctorsFromVallex.pm │ │ ├── IsActant.pm │ │ ├── IsModal.pm │ │ ├── IsValency.pm │ │ ├── LemmaFormDiff.pm │ │ ├── LemmaFormDist.pm │ │ ├── Matching.pm │ │ ├── MatchingOrEmpty.pm │ │ ├── Missing.pm │ │ ├── NegationCueAndScope.pm │ │ ├── NumberOfEngVallexFrames.pm │ │ ├── NumberOfVallexFrames.pm │ │ ├── Order.pm │ │ ├── SemposTrunc.pm │ │ ├── SplitFormeme.pm │ │ ├── SubjectPerson.pm │ │ ├── Suffixes.pm │ │ ├── SyntheticFuture.pm │ │ ├── TLemmaSempos.pm │ │ ├── TagWords.pm │ │ ├── TreeDistance.pm │ │ └── string_distances.py │ │ ├── LayerParameterized.pm │ │ ├── LemmatizedBitexts.pm │ │ ├── MST.pm │ │ ├── Manatee.pm │ │ ├── ManateeU.pm │ │ ├── MosesTree.pm │ │ ├── MrpJSON.pm │ │ ├── NAF.pm │ │ ├── NERHighlightWriter.pm │ │ ├── NERHtmlWriter.pm │ │ ├── Negations.pm │ │ ├── PCEDTAlignment.pm │ │ ├── PDT.pm │ │ ├── PEDT.pm │ │ ├── ParallelSentences.pm │ │ ├── PennMrg.pm │ │ ├── SDP2014.pm │ │ ├── SDP2015.pm │ │ ├── SemEval2010.pm │ │ ├── Senseval2.pm │ │ ├── Sentences.pm │ │ ├── SentencesTSV.pm │ │ ├── SgmMTEval.pm │ │ ├── Stanford.pm │ │ ├── Text.pm │ │ ├── TextModeTrees.pm │ │ ├── ToBundleAttr.pm │ │ ├── TreesTXT.pm │ │ ├── Treex.pm │ │ ├── UMR.pm │ │ ├── ViewJSON.pm │ │ └── YAML.pm │ ├── CS.pm │ ├── Core.pm │ ├── Core │ ├── Block.pm │ ├── Bundle.pm │ ├── BundleZone.pm │ ├── CacheBlock.pm │ ├── Common.pm │ ├── Config.pm │ ├── DocZone.pm │ ├── Document.pm │ ├── DocumentReader.pm │ ├── DocumentReader │ │ ├── Base.pm │ │ └── ZoneReader.pm │ ├── Entity.pm │ ├── EntityMention.pm │ ├── EntitySet.pm │ ├── Files.pm │ ├── Loader.pm │ ├── Log.pm │ ├── Node.pm │ ├── Node │ │ ├── A.pm │ │ ├── Aligned.pm │ │ ├── EffectiveRelations.pm │ │ ├── InClause.pm │ │ ├── Interset.pm │ │ ├── N.pm │ │ ├── Ordered.pm │ │ ├── P.pm │ │ ├── T.pm │ │ ├── U.pm │ │ └── t │ │ │ ├── a_effective_rel.t │ │ │ ├── alignment.t │ │ │ ├── ordered.t │ │ │ ├── p_penn_treebank_string_format.t │ │ │ ├── t_eff_dive.t │ │ │ ├── t_eff_dive2.t │ │ │ └── t_effective_rel.t │ ├── Parallel │ │ ├── Head.pm │ │ └── Node.pm │ ├── Phrase.pm │ ├── Phrase │ │ ├── BaseNTerm.pm │ │ ├── Builder.pm │ │ ├── Coordination.pm │ │ ├── NTerm.pm │ │ ├── PP.pm │ │ └── Term.pm │ ├── RememberArgs.pm │ ├── Resource.pm │ ├── Run.pm │ ├── Scenario.pm │ ├── ScenarioParser.pm │ ├── ScenarioParser.rdg │ ├── TredView.pm │ ├── TredView │ │ ├── AnnotationCommand.pm │ │ ├── BackendStorable.pm │ │ ├── Colors.pm │ │ ├── Common.pm │ │ ├── Labels.pm │ │ ├── LineStyles.pm │ │ ├── Styles.pm │ │ ├── TreeLayout.pm │ │ └── Vallex.pm │ ├── Types.pm │ ├── WildAttr.pm │ ├── Zone.pm │ ├── compile_grammar.pl │ ├── share │ │ └── tred_extension │ │ │ ├── extensions.lst │ │ │ └── treex │ │ │ ├── contrib │ │ │ └── treex │ │ │ │ ├── .gitignore │ │ │ │ ├── Treex_mode.inc │ │ │ │ └── contrib.mac │ │ │ ├── package.xml │ │ │ ├── resources │ │ │ ├── pmlbackend_conf.xml │ │ │ ├── treex_schema.xml │ │ │ ├── treex_subschema_a_layer.xml │ │ │ ├── treex_subschema_bbn.xml │ │ │ ├── treex_subschema_interset.xml │ │ │ ├── treex_subschema_langcodes.xml │ │ │ ├── treex_subschema_n_layer.xml │ │ │ ├── treex_subschema_p_layer.xml │ │ │ ├── treex_subschema_t_layer.xml │ │ │ ├── treex_subschema_u_layer.xml │ │ │ └── treex_subschema_w_layer.xml │ │ │ └── stylesheets │ │ │ └── Treex_stylesheet │ └── t │ │ ├── attribute.t │ │ ├── backrefs.t │ │ ├── block.t │ │ ├── bundle.t │ │ ├── bundle_zones.t │ │ ├── common.t │ │ ├── config.t │ │ ├── document.t │ │ ├── document_zones.t │ │ ├── dump_scen.t │ │ ├── error.scen │ │ ├── files.t │ │ ├── following_node.t │ │ ├── grammar.t │ │ ├── log.t │ │ ├── n_tree.t │ │ ├── node.t │ │ ├── phrase.t │ │ ├── remove_bundle.t │ │ ├── resource.t │ │ ├── run.t │ │ ├── runall.sh │ │ ├── scenario.t │ │ ├── scenario_parser.t │ │ ├── storable.t │ │ ├── test.scen │ │ ├── trees.t │ │ ├── wildattr.t │ │ └── writers.t │ ├── EN.pm │ ├── JA.pm │ ├── Manual │ └── FAQ.pod │ ├── Scen │ ├── Analysis │ │ ├── BG.pm │ │ ├── CS.pm │ │ ├── DE.pm │ │ ├── EN.pm │ │ ├── ES.pm │ │ ├── EU.pm │ │ ├── JA.pm │ │ ├── LA.pm │ │ ├── NL.pm │ │ ├── PL.pm │ │ ├── PT.pm │ │ └── RU.pm │ ├── CS2EN.pm │ ├── Coref.pm │ ├── CzEng16.pm │ ├── CzEng2CoNLLU.pm │ ├── EN2CS.pm │ ├── EN2ES.pm │ ├── EN2EU.pm │ ├── EN2NL.pm │ ├── EN2PT.pm │ ├── EN_Moses_postprocess.pm │ ├── EN_Moses_preprocess.pm │ ├── ES2EN.pm │ ├── EU2EN.pm │ ├── MLFix │ │ ├── Analysis_1.pm │ │ ├── Analysis_2.pm │ │ ├── Fix.pm │ │ ├── FixPrepare.pm │ │ ├── NER.pm │ │ ├── RunMGiza.pm │ │ ├── WriteSentences.pm │ │ └── WriteTriparallel.pm │ ├── NL2EN.pm │ ├── PT2EN.pm │ ├── Synthesis │ │ ├── BG.pm │ │ ├── CS.pm │ │ ├── EN.pm │ │ ├── ES.pm │ │ ├── EU.pm │ │ ├── NL.pm │ │ └── PT.pm │ └── Transfer │ │ ├── CS2EN.pm │ │ ├── EN2CS.pm │ │ ├── EN2ES.pm │ │ ├── EN2EU.pm │ │ ├── EN2NL.pm │ │ ├── EN2PT.pm │ │ ├── ES2EN.pm │ │ ├── EU2EN.pm │ │ ├── NL2EN.pm │ │ └── PT2EN.pm │ ├── Service │ ├── Client.pm │ ├── EventEmitter.pm │ ├── MDP.pm │ ├── Pool.pm │ ├── Role.pm │ ├── Router.pm │ ├── Worker.pm │ └── t │ │ ├── eventemitter.t │ │ ├── fixtures │ │ ├── cs_sample.txt │ │ ├── en_sample.txt │ │ └── hi_example.txt │ │ ├── lib │ │ ├── Test │ │ │ └── TreexTool.pm │ │ └── Treex │ │ │ ├── Block │ │ │ └── W2W │ │ │ │ └── AddPrefix.pm │ │ │ └── Tool │ │ │ └── Prefixer.pm │ │ ├── mst_parser_cs.t │ │ ├── mst_parser_en.t │ │ ├── pool.t │ │ ├── role.t │ │ ├── router.t │ │ └── worker.t │ ├── Tool │ ├── ATreeTransformer │ │ ├── BaseTransformer.pm │ │ ├── CoApStyle.pm │ │ ├── ComplexVerb.pm │ │ ├── DepReverser.pm │ │ └── t │ │ │ ├── coordchain.t │ │ │ └── reverser.t │ ├── Algorithm │ │ ├── TreeUtils.pm │ │ ├── TreeViterbi.pm │ │ ├── TreeViterbiState.pm │ │ └── t │ │ │ └── tree_utils.t │ ├── Align │ │ ├── Annot │ │ │ └── Util.pm │ │ ├── Features.pm │ │ ├── MonolingualGreedy.pm │ │ ├── Robust │ │ │ ├── CS │ │ │ │ └── RelPron.pm │ │ │ ├── Common.pm │ │ │ └── EN │ │ │ │ └── PersPron.pm │ │ ├── Utils.pm │ │ └── t │ │ │ └── monolingual_greedy.t │ ├── Alpino │ │ ├── Generator.pm │ │ ├── Parser.pm │ │ └── Run.pm │ ├── Clustering │ │ └── GoogleNGrams.pm │ ├── Compress │ │ └── Index.pm │ ├── Context │ │ ├── Sentences.pm │ │ └── t │ │ │ └── sents.t │ ├── CopenhagenDT │ │ └── XmlizeTagFormat.pm │ ├── CorefSegments │ │ ├── CS │ │ │ └── Features.pm │ │ ├── Features.pm │ │ └── InterSentLinks.pm │ ├── Coreference │ │ ├── AnteCandsGetter.pm │ │ ├── BaseCorefFeatures.pm │ │ ├── CS │ │ │ ├── PronAnaphFilter.pm │ │ │ ├── PronCorefFeatures.pm │ │ │ └── RelPronAnaphFilter.pm │ │ ├── CombinedDistrModel.pm │ │ ├── ContentWordFilter.pm │ │ ├── CorefFeatures.pm │ │ ├── DistrModelComponent.pm │ │ ├── DistrModelComponent │ │ │ ├── CandOrd.pm │ │ │ ├── Gender.pm │ │ │ ├── Number.pm │ │ │ ├── ParentLemma.pm │ │ │ └── SentDist.pm │ │ ├── EN │ │ │ ├── PronAnaphFilter.pm │ │ │ └── PronCorefFeatures.pm │ │ ├── Features │ │ │ ├── Aligned.pm │ │ │ ├── AllMonolingual.pm │ │ │ ├── CS │ │ │ │ ├── AllMonolingual.pm │ │ │ │ └── ReflPron.pm │ │ │ ├── Container.pm │ │ │ ├── Coreference.pm │ │ │ ├── Distance.pm │ │ │ ├── EN │ │ │ │ └── AllMonolingual.pm │ │ │ ├── Morphological.pm │ │ │ ├── ReflPron.pm │ │ │ ├── RelPron.pm │ │ │ ├── TectoSyntax.pm │ │ │ └── remove_PronCoref.pm │ │ ├── NADA.pm │ │ ├── NodeFilter.pm │ │ ├── NodeFilter │ │ │ ├── Coord.pm │ │ │ ├── DemonPron.pm │ │ │ ├── Noun.pm │ │ │ ├── PersPron.pm │ │ │ ├── RelPron.pm │ │ │ ├── Utils.pm │ │ │ └── Verb.pm │ │ ├── PerceptronRanker.pm │ │ ├── ProbDistrRanker.pm │ │ ├── PronCorefFeatures.pm │ │ ├── RuleBasedRanker.pm │ │ ├── SynonTranslDictModel.pm │ │ ├── Utils.pm │ │ ├── ValueTransformer.pm │ │ └── t │ │ │ └── ante_cands_getter.t │ ├── Depfix │ │ ├── Base.pm │ │ ├── CS │ │ │ ├── DepfixBase.pm │ │ │ ├── DiacriticsStripper.pm │ │ │ ├── FixLogger.pm │ │ │ ├── FormGenerator.pm │ │ │ ├── FormemeSplitter.pm │ │ │ ├── NodeInfoGetter.pm │ │ │ ├── NumberSwitcher.pm │ │ │ ├── PairGetter.pm │ │ │ ├── SimpleTranslator.pm │ │ │ └── TagHandler.pm │ │ ├── DecisionTreesModel.pm │ │ ├── EN │ │ │ └── NodeInfoGetter.pm │ │ ├── FixLogger.pm │ │ ├── FormGenerator.pm │ │ ├── MaxEntModel.pm │ │ ├── Model.pm │ │ ├── NaiveBayesModel.pm │ │ ├── NodeInfoGetter.pm │ │ └── OldDecisionTreesModel.pm │ ├── Discourse │ │ └── EVALD │ │ │ └── Features.pm │ ├── ElixirFM.pm │ ├── ElixirFM │ │ └── Exec.pm │ ├── EnglishMorpho │ │ ├── Analysis.pm │ │ ├── Lemmatizer.pm │ │ ├── Makefile │ │ ├── exceptions │ │ │ ├── README │ │ │ ├── adj_adv.pl │ │ │ ├── contractions.pl │ │ │ ├── errors.pl │ │ │ ├── false_negation.pl │ │ │ ├── nouns_invariant_ending_with_s.pl │ │ │ ├── nouns_invariant_not_ending_with_s.list │ │ │ ├── nouns_invariant_polemic.list │ │ │ ├── nouns_irregular.pl │ │ │ ├── nouns_latin.pl │ │ │ ├── nouns_plural_es.pl │ │ │ ├── nouns_plural_s.pl │ │ │ ├── verbs_cked.pl │ │ │ ├── verbs_doubling.pl │ │ │ ├── verbs_ending_with_e.pl │ │ │ ├── verbs_irregular.pl │ │ │ ├── verbs_not_ending_with_e.pl │ │ │ └── verbs_other.pl │ │ ├── negation │ │ └── t │ │ │ ├── contractions.txt │ │ │ └── morpho.t │ ├── Eval │ │ └── Bleu.pm │ ├── FSM │ │ └── Foma.pm │ ├── FeatureExtract.pm │ ├── Flect │ │ ├── Base.pm │ │ ├── Classif.pm │ │ ├── FlectBlock.pm │ │ ├── FlectClassifBlock.pm │ │ ├── test.pl │ │ └── test.py │ ├── FormsGenerator │ │ ├── TA.pm │ │ ├── t │ │ │ └── test_clitics.t │ │ └── test.pl │ ├── Gazetteer │ │ ├── Engine.pm │ │ ├── Features.pm │ │ └── RuleBasedScorer.pm │ ├── GoogleTranslate │ │ ├── APIv1.pm │ │ ├── APIv2.pm │ │ ├── batch.pl │ │ ├── batch_1by1.pl │ │ ├── simple.pl │ │ └── t │ │ │ ├── APIv2.t │ │ │ └── texts.txt │ ├── IO │ │ ├── Arff.pm │ │ └── t │ │ │ ├── arff.t │ │ │ └── test.arff │ ├── IR │ │ ├── ESA.pm │ │ ├── esa_script.pl │ │ └── t │ │ │ └── esa.t │ ├── IXAPipe │ │ ├── ES │ │ │ ├── TagAndParse.pm │ │ │ └── t │ │ │ │ └── ixapipe_tag_and_parse.t │ │ └── EU │ │ │ ├── TokenizeAndParse.pm │ │ │ └── t │ │ │ └── ixapipe_tag_and_parse.t │ ├── Interset │ │ ├── Driver.pm │ │ ├── Example │ │ │ └── Simple.pm │ │ ├── SimpleDriver.pm │ │ └── t │ │ │ └── example_simple.t │ ├── LM │ │ ├── FormInfo.pm │ │ ├── Lemma.pm │ │ ├── MorphoLM.pm │ │ ├── TreeLM.pm │ │ └── t │ │ │ ├── interactive_testTreeLM.pl │ │ │ ├── test_MorphoLM.pl │ │ │ ├── test_TreeLM.pl │ │ │ └── test_en_TreeLM.pl │ ├── LXSuite.pm │ ├── Lexicon │ │ ├── CS.pm │ │ ├── CS │ │ │ ├── AdjectivalComplements.pm │ │ │ ├── Adverbia.pm │ │ │ ├── Aspect.pm │ │ │ ├── NamedEntityLabels.pm │ │ │ ├── Numerals.pm │ │ │ ├── PersonalRoles.pm │ │ │ ├── Prefixes.pm │ │ │ └── Reflexivity.pm │ │ ├── DerivDict │ │ │ ├── Dictionary.pm │ │ │ ├── Lexeme.pm │ │ │ └── t │ │ │ │ └── deriv_dict_general.t │ │ ├── Derivations │ │ │ ├── CS.pm │ │ │ └── test_cs.pl │ │ ├── EN.pm │ │ ├── EN │ │ │ ├── Countability.pm │ │ │ ├── First_names.pm │ │ │ ├── Hypernyms.pm │ │ │ └── PersonalRoles.pm │ │ ├── Generation │ │ │ ├── CS.pm │ │ │ ├── ES.pm │ │ │ ├── ES_Morphology.pm │ │ │ ├── PT.pm │ │ │ ├── RU.pm │ │ │ └── t │ │ │ │ ├── cs.t │ │ │ │ ├── es.t │ │ │ │ ├── pt.t │ │ │ │ └── ru.t │ │ ├── JA.pm │ │ ├── NL │ │ │ ├── ErgativeVerbs.pm │ │ │ ├── Pronouns.pm │ │ │ ├── VerbformOrder.pm │ │ │ ├── alpino_extract_gender.pl │ │ │ └── alpino_extract_mwus.pl │ │ ├── UniversalTagset.pm │ │ └── t │ │ │ └── cs_lemma.t │ ├── ML │ │ ├── Categorizer │ │ │ └── Categorizer.pm │ │ ├── Classifier.pm │ │ ├── Classifier │ │ │ ├── Linear.pm │ │ │ └── RuleBased.pm │ │ ├── Clustering │ │ │ ├── C_Cluster.pm │ │ │ └── t │ │ │ │ └── c_cluster.t │ │ ├── Factory.pm │ │ ├── Learner.pm │ │ ├── LinearRegression.pm │ │ ├── LinearRegression │ │ │ ├── Model.pm │ │ │ └── Util.pm │ │ ├── MLProcess.pm │ │ ├── MLProcessBlock.pm │ │ ├── MLProcessBlockPiped.pm │ │ ├── MLProcessPiped.pm │ │ ├── MaxEnt │ │ │ ├── Learner.pm │ │ │ └── Model.pm │ │ ├── NormalizeProb.pm │ │ ├── Ranker.pm │ │ ├── Ranker │ │ │ └── Features.pm │ │ ├── ScikitLearn │ │ │ ├── Classifier.pm │ │ │ └── Model.pm │ │ ├── TabSpace │ │ │ └── Util.pm │ │ ├── VowpalWabbit │ │ │ ├── Classifier.pm │ │ │ ├── CsoaaLdfClassifier.pm │ │ │ ├── Learner.pm │ │ │ ├── Model.pm │ │ │ ├── Ranker.pm │ │ │ └── Util.pm │ │ ├── Weka │ │ │ └── Util.pm │ │ └── t │ │ │ ├── lin_regres.t │ │ │ └── vw.t │ ├── MLFix │ │ ├── .ScikitLearn.pm.swp │ │ ├── Base.pm │ │ ├── CS │ │ │ ├── FormGenerator.pm │ │ │ └── NumberSwitcher.pm │ │ ├── DE │ │ │ ├── FormGenerator.pm │ │ │ └── NumberSwitcher.pm │ │ ├── FixLogger.pm │ │ ├── FormGenerator.pm │ │ ├── Model.pm │ │ ├── NodeInfoGetter.pm │ │ └── ScikitLearn.pm │ ├── Mate │ │ └── Run.pm │ ├── Memcached │ │ ├── Memcached.pm │ │ ├── memcached.pl │ │ └── t │ │ │ ├── MemcachedTest.pm │ │ │ ├── check-lemmas.pl │ │ │ ├── extract-lemmas.pl │ │ │ ├── test-concurrent-access.t │ │ │ └── test-model-loading.t │ ├── Moses.pm │ ├── NER │ │ ├── NameTag.pm │ │ ├── Role.pm │ │ ├── Stanford.pm │ │ └── t │ │ │ ├── nametag_cs.t │ │ │ ├── nametag_en.t │ │ │ ├── stanford2008.t │ │ │ └── stanford2015.t │ ├── NamedEnt │ │ ├── Features.pl │ │ ├── Features │ │ │ ├── Common.pm │ │ │ ├── Containers.pm │ │ │ ├── Context.pm │ │ │ ├── Oneword.pm │ │ │ ├── Threeword.pm │ │ │ └── Twoword.pm │ │ ├── README │ │ ├── SVMTools.pm │ │ ├── SVMtuning_accuracy_based_results.txt │ │ ├── TestSVM.pl │ │ ├── TrainSVM.pl │ │ ├── TuneSVM.map.pl │ │ ├── TuneSVM.reduce.pl │ │ ├── other_models │ │ │ ├── TestMaxEnt.pl │ │ │ ├── TestNaiveBayes.pl │ │ │ ├── TestSVM.pl │ │ │ ├── TrainMaxEnt.pl │ │ │ ├── TrainNaiveBayes.pl │ │ │ └── TrainSVM_standalone.pl │ │ └── tuneWrapper.sh │ ├── Orthography │ │ └── TA.pm │ ├── PMLTQ │ │ └── Query.pm │ ├── Parallel │ │ ├── MessageBoard.pm │ │ └── t │ │ │ ├── msg_board.t │ │ │ └── synchronize.t │ ├── Parser │ │ ├── Cabocha.pm │ │ ├── Charniak │ │ │ ├── Charniak.pm │ │ │ ├── Node.pm │ │ │ └── t │ │ │ │ └── charniak.t │ │ ├── Ensemble │ │ │ └── Ensemble.pm │ │ ├── Fanse.pm │ │ ├── JDEPP.pm │ │ ├── LXParser.pm │ │ ├── MST.pm │ │ ├── MST │ │ │ └── Czech.pm │ │ ├── MSTperl.pm │ │ ├── MSTperl │ │ │ ├── Config.pm │ │ │ ├── Edge.pm │ │ │ ├── FeaturesControl.pm │ │ │ ├── Labeller.pm │ │ │ ├── ModelAdditional.pm │ │ │ ├── ModelBase.pm │ │ │ ├── ModelLabelling.pm │ │ │ ├── ModelUnlabelled.pm │ │ │ ├── MultiHeteroModelParser.pm │ │ │ ├── MultiModelParser.pm │ │ │ ├── Node.pm │ │ │ ├── ParsedSentencesCombiner.pm │ │ │ ├── Parser.pm │ │ │ ├── ParserCombiner.pm │ │ │ ├── Reader.pm │ │ │ ├── RootNode.pm │ │ │ ├── Sentence.pm │ │ │ ├── TrainerBase.pm │ │ │ ├── TrainerLabelling.pm │ │ │ ├── TrainerUnlabelled.pm │ │ │ ├── Writer.pm │ │ │ ├── samples │ │ │ │ ├── labeller_test.sh │ │ │ │ ├── labeller_train.sh │ │ │ │ ├── sample.config │ │ │ │ ├── sample_test.sh │ │ │ │ ├── sample_test.tsv │ │ │ │ ├── sample_train.sh │ │ │ │ ├── sample_train.tsv │ │ │ │ ├── test_labeller_tsv.pl │ │ │ │ ├── test_tsv.pl │ │ │ │ ├── train_labeller_tsv.pl │ │ │ │ ├── train_tsv.pl │ │ │ │ ├── treex_input.txt │ │ │ │ └── treex_parse.scen │ │ │ ├── scripts │ │ │ │ ├── TagMorceEnglishCoNLL.pl │ │ │ │ ├── compare_lines.pl │ │ │ │ ├── conll2inline.pl │ │ │ │ ├── inline2conll.pl │ │ │ │ ├── inline_sentences_reorder.pl │ │ │ │ ├── labelled_parse_test.sh │ │ │ │ ├── labeller_test.sh │ │ │ │ ├── labeller_train_and_test.sh │ │ │ │ ├── make_czech_tags.pl │ │ │ │ ├── pcedt2conll.sh │ │ │ │ ├── pcedt2conll_tag_and_parse_en.sh │ │ │ │ ├── pcedt2conll_tag_and_parse_en_worsen_cs.sh │ │ │ │ ├── pcedt2conll_td.sh │ │ │ │ ├── pdtT2conll.sh │ │ │ │ ├── simple_lemmas.pl │ │ │ │ ├── split_afun_ismember.sh │ │ │ │ ├── test_conll.pl │ │ │ │ ├── test_conll_multimodel.pl │ │ │ │ ├── test_conll_multimodel_weighted.pl │ │ │ │ ├── test_conll_multimodel_weighted_f.pl │ │ │ │ ├── test_conll_multimodel_weighted_f_multiconf.pl │ │ │ │ ├── test_conll_multimodel_weighted_f_norm.pl │ │ │ │ ├── test_conll_multimodel_weighted_f_norm_printout.pl │ │ │ │ ├── test_conll_multimodel_weighted_f_printout.pl │ │ │ │ ├── test_conll_multimodel_weighted_norm.pl │ │ │ │ ├── test_conll_multiplefiles.pl │ │ │ │ ├── test_conll_multiplefiles_printout.pl │ │ │ │ ├── test_conll_parsecomb.pl │ │ │ │ ├── test_conll_parsecomb_weighted.pl │ │ │ │ ├── test_conll_treecomb_weighted.pl │ │ │ │ ├── test_conll_treecomb_weighted_f.pl │ │ │ │ ├── test_conll_treecomb_weighted_f_multiconf.pl │ │ │ │ ├── test_conll_treecomb_weighted_f_printout.pl │ │ │ │ ├── test_labeller_tsv.pl │ │ │ │ ├── test_parse_and_label.pl │ │ │ │ ├── test_rur_conll.pl │ │ │ │ ├── train_conll.pl │ │ │ │ ├── train_labeller_tsv.pl │ │ │ │ ├── unlabelled_test.sh │ │ │ │ ├── unlabelled_test_rur.sh │ │ │ │ ├── unlabelled_train_and_test.sh │ │ │ │ └── worsen_pcedt.sh │ │ │ └── t │ │ │ │ ├── sample.config │ │ │ │ ├── sample_test.tsv │ │ │ │ ├── sample_train.tsv │ │ │ │ └── train_and_test.t │ │ ├── Malt.pm │ │ ├── ParsingLatin.pm │ │ ├── RUR │ │ │ ├── Config.pm │ │ │ ├── Edge.pm │ │ │ ├── FeaturesControl.pm │ │ │ ├── Labeller.pm │ │ │ ├── ModelAdditional.pm │ │ │ ├── ModelBase.pm │ │ │ ├── ModelLabelling.pm │ │ │ ├── ModelUnlabelled.pm │ │ │ ├── Node.pm │ │ │ ├── Parser.pm │ │ │ ├── RURParser.pm │ │ │ ├── Reader.pm │ │ │ ├── RootNode.pm │ │ │ ├── Sentence.pm │ │ │ ├── TrainerBase.pm │ │ │ ├── TrainerLabelling.pm │ │ │ ├── TrainerUnlabelled.pm │ │ │ ├── Writer.pm │ │ │ ├── samples │ │ │ │ ├── labeller_test.sh │ │ │ │ ├── labeller_train.sh │ │ │ │ ├── sample.config │ │ │ │ ├── sample_test.sh │ │ │ │ ├── sample_test.tsv │ │ │ │ ├── sample_train.sh │ │ │ │ ├── sample_train.tsv │ │ │ │ ├── test_labeller_tsv.pl │ │ │ │ ├── test_tsv.pl │ │ │ │ ├── train_labeller_tsv.pl │ │ │ │ ├── train_tsv.pl │ │ │ │ ├── treex_input.txt │ │ │ │ └── treex_parse.scen │ │ │ └── t │ │ │ │ ├── sample.config │ │ │ │ ├── sample_test.tsv │ │ │ │ ├── sample_train.tsv │ │ │ │ ├── test_rur.t │ │ │ │ └── train_and_test_rur.t │ │ ├── Role.pm │ │ ├── Simple │ │ │ ├── FR.pm │ │ │ └── XY.pm │ │ ├── Zpar.pm │ │ └── t │ │ │ ├── fanse.t │ │ │ ├── jdepp.t │ │ │ ├── malt.t │ │ │ └── zpar.t │ ├── Phrase2Dep │ │ ├── Pennconverter.pm │ │ ├── StanfordConverter.pm │ │ └── t │ │ │ └── pennconverter.t │ ├── PhraseBuilder │ │ ├── AlpinoToPrague.pm │ │ ├── BasePhraseBuilder.pm │ │ ├── MoscowToPrague.pm │ │ ├── Prague.pm │ │ ├── PragueToUD.pm │ │ ├── StanfordToPrague.pm │ │ ├── StanfordToUD.pm │ │ ├── ToPrague.pm │ │ ├── ToUD.pm │ │ └── UDToPrague.pm │ ├── PhraseParser │ │ ├── Charniak.pm │ │ ├── Common.pm │ │ ├── Stanford.pm │ │ └── t │ │ │ ├── charniak.t │ │ │ └── stanford.t │ ├── Probe.pm │ ├── ProcessUtils.pm │ ├── Python │ │ ├── RunFunc.pm │ │ └── execute.py │ ├── ReferentialIt │ │ ├── Features.pm │ │ └── Utils.pm │ ├── SRLParser │ │ ├── FeatureExtractor.pm │ │ ├── LPInference.pm │ │ ├── PredicateIdentifier.pm │ │ ├── submit_training_to_maxent.sh │ │ └── train.sh │ ├── SandhiHandler │ │ ├── TA.pm │ │ └── t │ │ │ └── tamil_spelling.t │ ├── Segment │ │ ├── CS │ │ │ └── RuleBased.pm │ │ ├── EN │ │ │ └── RuleBased.pm │ │ ├── ES │ │ │ └── RuleBased.pm │ │ ├── EU │ │ │ └── RuleBased.pm │ │ ├── LA │ │ │ └── RuleBased.pm │ │ ├── NL │ │ │ └── RuleBased.pm │ │ ├── PT │ │ │ └── RuleBased.pm │ │ ├── RU │ │ │ └── RuleBased.pm │ │ ├── RuleBased.pm │ │ ├── TA │ │ │ └── RuleBased.pm │ │ └── t │ │ │ └── rule_based.t │ ├── Stemmer │ │ └── TA │ │ │ ├── CorpusSuffixSplitter.pl │ │ │ ├── Simple.pm │ │ │ ├── SuffixSplitter.pm │ │ │ ├── sample.txt │ │ │ ├── test.pl │ │ │ └── test1.pl │ ├── Storage │ │ └── Storable.pm │ ├── Tagger │ │ ├── Featurama.pm │ │ ├── Featurama │ │ │ ├── CS.pm │ │ │ ├── Dummy.pm │ │ │ ├── EN.pm │ │ │ └── t │ │ │ │ ├── featurama_cs.t │ │ │ │ └── featurama_en.t │ │ ├── HunPoS.pm │ │ ├── LXTagger.pm │ │ ├── MElt.pm │ │ ├── MeCab.pm │ │ ├── MorphoDiTa.pm │ │ ├── Role.pm │ │ ├── Simple │ │ │ ├── FR.pm │ │ │ └── XY.pm │ │ ├── Stanford.pm │ │ ├── TnT.pm │ │ ├── TreeTagger.pm │ │ └── t │ │ │ ├── featurama.t │ │ │ ├── mecab.t │ │ │ ├── morphodita.t │ │ │ └── tree_tagger.t │ ├── TranslationModel │ │ ├── Chain.pm │ │ ├── Combined │ │ │ ├── Backoff.pm │ │ │ └── Interpolated.pm │ │ ├── Common.pm │ │ ├── Derivative │ │ │ ├── CS2RU │ │ │ │ ├── ReflexiveSja.pm │ │ │ │ └── Transliterate.pm │ │ │ ├── Common.pm │ │ │ └── EN2CS │ │ │ │ ├── Deadjectival_adverbs.pm │ │ │ │ ├── Deverbal_adjectives.pm │ │ │ │ ├── Hyphen_compounds.pm │ │ │ │ ├── Nouns_to_adjectives.pm │ │ │ │ ├── Numbers.pm │ │ │ │ ├── Prefixes.pm │ │ │ │ ├── Suffixes.pm │ │ │ │ ├── Transliterate.pm │ │ │ │ ├── Verbs_to_nouns.pm │ │ │ │ └── _readme.txt │ │ ├── Factory.pm │ │ ├── Features │ │ │ ├── EN_coref.pm │ │ │ ├── It.pm │ │ │ └── Standard.pm │ │ ├── Learner.pm │ │ ├── ML │ │ │ ├── Learner.pm │ │ │ └── Model.pm │ │ ├── MaxEnt │ │ │ └── FeatureExt │ │ │ │ └── EN2CS.pm │ │ ├── Memcached │ │ │ └── Model.pm │ │ ├── Model.pm │ │ ├── NaiveBayes │ │ │ ├── FeatureExt │ │ │ │ └── EN2CS.pm │ │ │ ├── Learner.pm │ │ │ ├── Model.pm │ │ │ └── test.pl │ │ ├── Rulebased │ │ │ └── Model.pm │ │ ├── Static │ │ │ ├── Model.pm │ │ │ ├── RelFreq │ │ │ │ ├── Learner.pm │ │ │ │ └── Learner_new.pm │ │ │ ├── Universal.pm │ │ │ └── Variant.pm │ │ ├── TwoNode.pm │ │ └── t │ │ │ ├── learner.t │ │ │ ├── maxent_learner_new.t │ │ │ ├── static_learner.t │ │ │ ├── static_learner_new.t │ │ │ └── vw_learner.t │ ├── Transliteration │ │ ├── DowngradeUTF8forISO2.pm │ │ ├── TA.pm │ │ └── t │ │ │ └── check_utf8_latin_conversion.t │ ├── Triggers │ │ ├── FeatureFilter.pm │ │ └── Features.pm │ ├── UDPipe.pm │ ├── UMR │ │ ├── CS │ │ │ └── GrammatemeSetter.pm │ │ ├── Common.pm │ │ ├── GrammatemeSetter.pm │ │ ├── LA │ │ │ └── GrammatemeSetter.pm │ │ ├── PDTV2PB.pm │ │ └── t │ │ │ ├── pdt2pb-w.csv │ │ │ ├── pdt2pb.csv │ │ │ ├── pdtv2pb.t │ │ │ └── vallex.xml │ ├── Vallex │ │ ├── FrameElement.pm │ │ ├── ValencyFrame.pm │ │ └── t │ │ │ ├── print_all_forms.pl │ │ │ ├── print_all_frames.pl │ │ │ └── valency_frame.t │ ├── Word2vec │ │ ├── readbin │ │ ├── readbin.c │ │ └── txt2vw.pl │ ├── Wordnet │ │ └── SimpleQuery.pm │ └── t │ │ └── udpipe.t │ ├── Tutorial.pod │ ├── Tutorial │ ├── Config.pod │ ├── FirstSteps.pod │ ├── Install.pod │ ├── ReadersAndWriters.pod │ ├── Scen.pod │ ├── WritingNewReaders.pod │ ├── generate_html.sh │ └── treexpod.css │ ├── Unilang.pm │ └── t │ ├── Makefile │ ├── cycle_detection_restore.pl │ ├── cycle_detection_test.pl │ ├── en_analysis.scen │ ├── sample-cs.txt │ └── sample-en.txt ├── packaging ├── Makefile ├── README.md ├── Treex-CS │ ├── Changes.template │ ├── Makefile │ └── dist.ini.template ├── Treex-Core │ ├── Changes.template │ ├── Makefile │ ├── compile_grammar.pl │ ├── dist.ini.template │ └── postprocess_POD.pl ├── Treex-Doc │ ├── Changes.template │ ├── Makefile │ ├── dist.ini.template │ └── t │ │ └── doctest.t ├── Treex-EN │ ├── Changes.template │ ├── Makefile │ └── dist.ini.template ├── Treex-JA │ ├── Changes.template │ ├── Makefile │ ├── dist.ini.template │ ├── share │ │ └── examples │ │ │ ├── sample-ja.txt │ │ │ └── sample.scen │ └── weaver.ini.template ├── Treex-Parser-MSTperl │ ├── Changes.template │ ├── Makefile │ └── dist.ini.template ├── Treex-Unilang │ ├── Changes.template │ ├── Makefile │ └── dist.ini.template ├── common.mk ├── perlcritic.rc └── test_treex_installation_by_cpanm.sh └── training └── treelm ├── Makefile ├── README ├── create_ids.pl ├── create_models.pl ├── en ├── czeng │ └── Makefile ├── stackoverflow │ ├── Makefile │ └── clean_stackexchange.pl ├── superuser │ ├── Makefile │ └── clean_stackexchange.pl ├── ubuntu-dialogue │ └── Makefile ├── wmt15-newscrawl14v2 │ └── Makefile └── wmt15-newsdiscuss │ └── Makefile └── print_plsgz.pl /.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig helps developers define and maintain consistent 2 | # coding styles between different editors and IDEs 3 | # editorconfig.org 4 | 5 | root = true 6 | 7 | 8 | [*] 9 | 10 | # Change these settings to your own preference 11 | indent_style = space 12 | indent_size = 4 13 | 14 | # We recommend you to keep these unchanged 15 | end_of_line = lf 16 | charset = utf-8 17 | trim_trailing_whitespace = true 18 | insert_final_newline = true 19 | 20 | [*.md] 21 | trim_trailing_whitespace = false 22 | 23 | [Makefile] 24 | indent_style = tab 25 | indent_size = 8 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /devel 2 | [0-9][0-9][0-9]-cluster-run-* 3 | *.pyc 4 | /lib/Treex/Tool/Flect/flect/ 5 | .*.swp 6 | *.lnk 7 | -------------------------------------------------------------------------------- /.perltidyrc: -------------------------------------------------------------------------------- 1 | # Use Conway's PBP recommendations 2 | --perl-best-practices 3 | # (which means -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq 4 | # -wbb="% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x=" 5 | # ) 6 | 7 | # with the exception of: 8 | 9 | --maximum-line-length=0 10 | # I think it is better to break lines manually (and yes, ideally to max 78 characters), 11 | # When there are too many indent levels (nested blocks) 12 | # it is better to refactor rather then auto-break lines. 13 | 14 | --no-delete-old-newlines 15 | # Sometimes I consider some statements (e.g. list expression) nicer to read when divided on more lines. 16 | 17 | --no-outdent-labels 18 | # In PBP there is no outdenting, but Conway forgot to add it to his perltidyrc 19 | 20 | # From shell you can use the settings above with 21 | # perltidy -pbp -l=0 -ndnl -nola -------------------------------------------------------------------------------- /bin/derimor: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Treex::Tool::DerivMorpho::Scenario; 7 | my $scenario_string = join ' ',@ARGV; 8 | print "scenario: $scenario_string\n"; 9 | my $scenario = Treex::Tool::DerivMorpho::Scenario->new({from_string => $scenario_string}); 10 | $scenario->apply_to_dictionary(undef); 11 | 12 | -------------------------------------------------------------------------------- /bin/t/TestsCommon.pm: -------------------------------------------------------------------------------- 1 | package TestsCommon; 2 | 3 | use File::Basename; 4 | 5 | chdir(dirname(__FILE__)); 6 | 7 | my $act_dir = dirname(__FILE__); 8 | my $pwd = `pwd`; 9 | my $treex_file = "./../treex"; 10 | if ( ! -f $treex_file ) { 11 | $treex_file = "./../bin/treex"; 12 | } 13 | 14 | if ( ! -f $treex_file ) { 15 | my $msg = "DIR: $act_dir; PWD: $pwd; TREEX: $treex_file"; 16 | die($msg); 17 | 18 | } 19 | 20 | our $TREEX_FILE = $treex_file; 21 | our $TREEX_CMD = $^X . " " . $TREEX_FILE; 22 | 1; -------------------------------------------------------------------------------- /bin/t/eval_block.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Treex::Core::Run q(treex); 7 | 8 | use Test::More tests => 1; 9 | use Test::Output; 10 | 11 | foreach my $i (1..3) { 12 | my $doc = Treex::Core::Document->new(); 13 | $doc->save("dummy$i.treex"); 14 | } 15 | 16 | my $cmdline_arguments = "-q Util::Eval document='print 1' -- !dummy?.treex"; 17 | stdout_is( sub { treex $cmdline_arguments },'111',"checking Util::Eval: treex $cmdline_arguments"); 18 | 19 | 20 | unlink glob "dummy*"; 21 | -------------------------------------------------------------------------------- /bin/t/scenarios/print3.scen: -------------------------------------------------------------------------------- 1 | # Scenarios can contain comments 2 | Util::Eval document='print 3;' # and end-of-line comments 3 | -------------------------------------------------------------------------------- /bin/t/scenarios/print4.scen: -------------------------------------------------------------------------------- 1 | Util::Eval document='print 4;' 2 | -------------------------------------------------------------------------------- /bin/t/scenarios/scen_in_scen.scen: -------------------------------------------------------------------------------- 1 | print4.scen 2 | -------------------------------------------------------------------------------- /bin/treex: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | use Treex::Core::Run q(treex); 6 | treex (\@ARGV); 7 | 8 | 9 | __END__ 10 | 11 | =encoding utf-8 12 | 13 | =head1 NAME 14 | 15 | treex - bash front-end for Treex::Core::Run 16 | 17 | =head1 DESCRIPTION 18 | 19 | This is a thin front-end for calling 20 | functionality of Treex::Core::Run from 21 | the bash command line. See Treex::Core::Run 22 | for the description of arguments. 23 | 24 | =head1 AUTHOR 25 | 26 | Zdeněk Žabokrtský 27 | 28 | 29 | =head1 COPYRIGHT AND LICENSE 30 | 31 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague 32 | 33 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 34 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2A/CS/VocalizePrepos.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2A::CS::VocalizePrepos; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Block::T2A::CS::VocalizePrepos'; 6 | 7 | override 'is_prep' => sub { 8 | my ($self, $anode) = @_; 9 | 10 | return $anode->tag =~ /^R/; 11 | }; 12 | 13 | 1; 14 | 15 | =head1 NAME 16 | 17 | Treex::Block::A2A::CS::VocalizePrepos 18 | 19 | =head1 DESCRIPTION 20 | 21 | An a-layer version of L. 22 | 23 | =head1 AUTHOR 24 | 25 | Rudolf Rosa 26 | 27 | =head1 COPYRIGHT AND LICENSE 28 | 29 | Copyright © 2015 by Institute of Formal and Applied Linguistics, 30 | Charles University in Prague 31 | 32 | This module is free software; you can redistribute it and/or modify it 33 | under the same terms as Perl itself. 34 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2A/CS/VocalizePreposPlain.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2A::CS::VocalizePreposPlain; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Block::A2A::CS::VocalizePrepos'; 6 | 7 | 1; 8 | 9 | =head1 DESCRIPTION 10 | 11 | Deprecated, use L. 12 | 13 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2A/EU/FixAspect.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2A::EU::FixAspect; 2 | 3 | use Moose; 4 | use Treex::Core::Common; 5 | use utf8; 6 | extends 'Treex::Core::Block'; 7 | 8 | sub process_tnode() { 9 | my ($self, $tnode) = @_; 10 | 11 | my ($anode)=$tnode->get_lex_anode()->get_aligned_nodes_of_type('orig'); 12 | 13 | if ($anode->form eq 'detektatu') { 14 | $anode->set_form('detektatzen'); 15 | } 16 | 17 | return; 18 | } 19 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2A/EU/FixDefIndef.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2A::EU::FixDefIndef; 2 | 3 | use Moose; 4 | use Treex::Core::Common; 5 | use utf8; 6 | extends 'Treex::Core::Block'; 7 | 8 | sub process_anode() { 9 | my ($self, $anode) = @_; 10 | 11 | 12 | my ($align) = $anode->get_aligned_nodes_of_type('int'); 13 | my @childs; 14 | if (defined $align) { 15 | @childs = $align->get_children(); 16 | if (grep {$_->lemma eq 'a'} @childs) { 17 | $anode->set_form($anode->lemma); 18 | my $child=$anode->create_child({form=>'bat', lemma=>'bat'}); 19 | $child->shift_after_node($anode); 20 | } 21 | } 22 | 23 | return; 24 | } 25 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2A/EU/FixMoveRoot.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2A::EU::FixMoveRoot; 2 | 3 | use Moose; 4 | use Treex::Core::Common; 5 | use utf8; 6 | extends 'Treex::Core::Block'; 7 | 8 | sub process_anode() { 9 | my ($self, $anode) = @_; 10 | 11 | if ($anode->parent->is_root && $anode->conll_pos eq "ADT") { 12 | my @descend = $anode->get_descendants(); 13 | my @verb = (grep {$_->conll_pos eq "ADI"} @descend); 14 | 15 | if (@verb) { 16 | $anode->shift_after_subtree($verb[-1], {without_children=>1}); 17 | } 18 | elsif (@descend) { 19 | $anode->shift_before_subtree($descend[0], {without_children=>1}); 20 | } 21 | } 22 | 23 | return; 24 | } 25 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2A/FlattenAtree.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2A::FlattenAtree; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_atree { 7 | my ($self, $root) = @_; 8 | my @nodes = $root->get_descendants({ordered=>1}); 9 | foreach my $n (@nodes) { 10 | $n->set_parent($root); 11 | } 12 | } 13 | 14 | 1; 15 | 16 | __END__ -------------------------------------------------------------------------------- /lib/Treex/Block/A2A/RemoveEmptySentences.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2A::RemoveEmptySentences; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_bundle { 7 | my ( $self, $bundle ) = @_; 8 | my @zones = $bundle->get_all_zones(); 9 | my $delete_zone = 0; 10 | foreach my $z (@zones) { 11 | my $sentence = $z->sentence; 12 | chomp $sentence; 13 | $sentence =~ s/\s+/ /g; 14 | $sentence =~ s/(^\s+|\s+$)//; 15 | if ($sentence =~ /^$/) { 16 | $delete_zone = 1; 17 | last; 18 | } 19 | } 20 | if ($delete_zone) { 21 | $bundle->remove(); 22 | } 23 | } 24 | 25 | 1; 26 | 27 | __END__ -------------------------------------------------------------------------------- /lib/Treex/Block/A2A/SetClauseDepth.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2A::SetClauseDepth; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_anode { 7 | my ( $self, $anode ) = @_; 8 | my $n = $anode; 9 | my %seen; 10 | while (!$n->is_root){ 11 | $seen{$n->clause_number}++ if $n->clause_number; 12 | $n = $n->get_parent(); 13 | } 14 | $anode->wild->{clause_depth} = scalar keys %seen; 15 | return; 16 | } 17 | 18 | 1; 19 | 20 | __END__ 21 | 22 | =encoding utf-8 23 | 24 | =head1 NAME 25 | 26 | Treex::Block::A2A::SetClauseDepth 27 | 28 | =head1 DESCRIPTION 29 | 30 | 31 | =head1 AUTHOR 32 | 33 | Martin Popel 34 | 35 | =head1 COPYRIGHT AND LICENSE 36 | 37 | Copyright © 2012 by Institute of Formal and Applied Linguistics, Charles University in Prague 38 | 39 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 40 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2A/Transform/ComplexVerbRootFirst.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2A::Transform::ComplexVerbRootFirst; 2 | use Moose; 3 | extends 'Treex::Block::A2A::Transform::BaseTransformer'; 4 | use Treex::Tool::ATreeTransformer::ComplexVerb; 5 | 6 | sub BUILD { 7 | my ($self) = @_; 8 | $self->set_transformer( 9 | Treex::Tool::ATreeTransformer::ComplexVerb->new( 10 | { 11 | subscription => $self->subscription, 12 | new_root => 'first', 13 | } 14 | ) 15 | ) 16 | } 17 | 18 | 1; 19 | 20 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2A/Transform/ComplexVerbRootLast.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2A::Transform::ComplexVerbRootLast; 2 | use Moose; 3 | extends 'Treex::Block::A2A::Transform::BaseTransformer'; 4 | use Treex::Tool::ATreeTransformer::ComplexVerb; 5 | 6 | sub BUILD { 7 | my ($self) = @_; 8 | $self->set_transformer( 9 | Treex::Tool::ATreeTransformer::ComplexVerb->new( 10 | { 11 | subscription => $self->subscription, 12 | new_root => 'last', 13 | } 14 | ) 15 | ) 16 | } 17 | 18 | 1; 19 | 20 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2N/CS/t/sysnerv_load.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | BEGIN { 3 | unless ( $ENV{AUTHOR_TESTING} ) { 4 | require Test::More; 5 | Test::More::plan( skip_all => 'these tests requires AUTHOR_TESTING' ); 6 | } 7 | } 8 | 9 | use strict; 10 | use warnings; 11 | 12 | use Test::More; 13 | use Data::Dumper; 14 | 15 | BEGIN { use_ok ('Treex::Block::A2N::CS::SysNERV') }; 16 | 17 | my $block = Treex::Block::A2N::CS::SysNERV->new; 18 | 19 | isa_ok( $block, 'Treex::Block::A2N::CS::SysNERV' ); 20 | 21 | 22 | done_testing(); 23 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2N/EN/t/stanford2008.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use utf8; 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More tests => 2; 7 | 8 | use_ok("Treex::Block::A2N::EN::StanfordNER2008"); 9 | 10 | my $in = "Peter and Paul love Stanford"; 11 | my $expect = 'p_Peter'. 'p_Paul' . 'i_Stanford'; 12 | my $scen = q{A2N::EN::StanfordNER2008 Util::Eval nnode='print $.ne_type.$.normalized_name'}; 13 | open my $OUT, "echo $in | treex -q -Len -t $scen |"; 14 | my $got = <$OUT>; 15 | is($got, $expect, 'sample sentence A2N::EN::StanfordNER2015'); 16 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2N/EN/t/stanford2015.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use utf8; 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More tests => 2; 7 | 8 | use_ok("Treex::Block::A2N::EN::StanfordNER2015"); 9 | 10 | my $in = "Peter and Paul love Stanford"; 11 | my $expect = 'p_Peter'. 'p_Paul' . 'i_Stanford'; 12 | my $scen = q{A2N::EN::StanfordNER2015 Util::Eval nnode='print $.ne_type.$.normalized_name'}; 13 | open my $OUT, "echo $in | treex -q -Len -t $scen |"; 14 | my $got = <$OUT>; 15 | is($got, $expect, 'sample sentence A2N::EN::StanfordNER2015'); 16 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2T/CS/DeleteExtraCoref.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2T::CS::DeleteExtraCoref; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | # Delete generated #PersPron in constructions like "zdá se, že" 7 | 8 | sub process_tnode { 9 | my ( $self, $tnode ) = @_; 10 | return if $tnode->formeme ne 'drop'; 11 | return if !$tnode->get_coref_text_nodes(); 12 | my $verb = $tnode->get_parent(); 13 | # this kind of error is common only in present tense 14 | return if ($verb->gram_tense || '') ne 'sim'; 15 | return if $verb->t_lemma !~ /_se$/; 16 | if (any {$_->formeme =~ /^v:že/} $verb->get_children()){ 17 | $tnode->remove(); 18 | } 19 | return; 20 | } 21 | 22 | 1; 23 | 24 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2T/PT/FixFormeme.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2T::PT::FixFormeme; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_tnode { 7 | my ( $self, $tnode ) = @_; 8 | if ($tnode->formeme eq "adj:attr") { 9 | if ($tnode->parent->formeme =~ /n:/) { 10 | if ($tnode->precedes($tnode->parent)) { 11 | $tnode->set_formeme("adj:prenom"); 12 | } else { 13 | $tnode->set_formeme("adj:postnom"); 14 | } 15 | } 16 | } 17 | } 18 | 19 | 1; 20 | 21 | __END__ 22 | 23 | =encoding utf-8 24 | 25 | =head1 NAME 26 | 27 | Treex::Block::A2T::PT::FixFormeme 28 | 29 | 30 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2T/ProjectSelectedWild.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2T::ProjectSelectedWild; 2 | use Moose; 3 | use Treex::Core::Common; 4 | 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_tnode { 8 | my ($self, $tnode) = @_; 9 | my @anodes = ($tnode->get_lex_anode, $tnode->get_aux_anodes); 10 | 11 | my ($anode) = grep {defined $_->wild->{check_comma_after}} @anodes; 12 | $tnode->wild->{check_comma_after} = $anode->wild->{check_comma_after} if (defined $anode); 13 | } 14 | 15 | 1; 16 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2T/SK/SetCoapFunctors.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2T::SK::SetCoapFunctors; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_tnode { 7 | 8 | my ( $self, $t_node ) = @_; 9 | my $functor; 10 | my $a_node = $t_node->get_lex_anode(); 11 | my $afun = $a_node ? $a_node->afun : ''; 12 | 13 | if ( $t_node->t_lemma =~ /^(a|i|aj|ani)$/ ) { 14 | $functor = "CONJ"; 15 | } 16 | elsif ( $t_node->t_lemma =~ /^(alebo|či)$/ ) { 17 | $functor = "DISJ"; 18 | } 19 | elsif ( $t_node->t_lemma =~ /^(ale|no)$/ ) { 20 | $functor = "ADVS"; 21 | } 22 | elsif ( $afun eq 'Coord' ) { 23 | $functor = 'CONJ'; 24 | } 25 | elsif ( $afun eq 'Apos' ) { 26 | $functor = 'APPS'; 27 | } 28 | 29 | if ( defined $functor ) { 30 | $t_node->set_functor($functor); 31 | } 32 | return; 33 | } 34 | 35 | 1; 36 | 37 | 38 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2T/SetIsMember.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2T::SetIsMember; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_tnode { 7 | my ( $self, $t_node ) = @_; 8 | if ( any { $_->is_member } $t_node->get_anodes() ) { 9 | $t_node->set_is_member(1); 10 | } 11 | return 1; 12 | } 13 | 14 | sub is_some_anode_member { 15 | my ($t_node) = @_; 16 | return ; 17 | } 18 | 19 | 1; 20 | 21 | =over 22 | 23 | =item Treex::Block::A2T::SetIsMember 24 | 25 | Coordination members on the t-layer should have the attribute C. 26 | This attribute is filled according to the same attribute on the a-layer. 27 | 28 | =back 29 | 30 | =cut 31 | 32 | # Copyright 2009 Martin Popel 33 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 34 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2W/CS/AsciiQuotes.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2W::CS::AsciiQuotes; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_zone { 8 | my ( $self, $zone ) = @_; 9 | my $sentence = $zone->sentence; 10 | $sentence =~ tr/„“/""/; 11 | $zone->set_sentence($sentence); 12 | return; 13 | } 14 | 15 | 1; 16 | 17 | =over 18 | 19 | =item Treex::Block::A2W::CS::AsciiQuotes 20 | 21 | Correct Czech quotation marks („ and “) are changed to incorrect ASCII ("). 22 | This hack is usefull for BLEU comparisons 23 | (when ASCII quotes are used in reference translations). 24 | 25 | =back 26 | 27 | =cut 28 | 29 | # Copyright 2009 Martin Popel 30 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 31 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2W/CS/Detokenize.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2W::CS::Detokenize; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_zone { 7 | my ( $self, $zone ) = @_; 8 | my $a_root = $zone->get_atree(); 9 | my $sentence = ''; 10 | foreach my $a_node ( $a_root->get_descendants( { ordered => 1 } ) ) { 11 | $sentence .= $a_node->form; 12 | $sentence .= ' ' if !$a_node->no_space_after; 13 | } 14 | $zone->set_sentence($sentence); 15 | } 16 | 17 | 1; 18 | 19 | =over 20 | 21 | =item Treex::Block::A2W::CS::Detokenize 22 | 23 | This block detokenizes Czech target analytical tree using the 'no_space_after' attributes and writes down the target sentence. 24 | 25 | =back 26 | 27 | =cut 28 | 29 | # Copyright 2011 David Marecek 30 | 31 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 32 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2W/ConcatenateTokens.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2W::ConcatenateTokens; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_zone { 8 | my ( $self, $zone ) = @_; 9 | my $a_root = $zone->get_atree(); 10 | my $sentence = join ' ', grep { !/#[A-Z]/ } map { $_->form } $a_root->get_descendants( { ordered => 1 } ); 11 | $zone->set_sentence($sentence); 12 | return; 13 | } 14 | 15 | 1; 16 | 17 | =over 18 | 19 | =item Treex::Block::A2W::ConcatenateTokens 20 | 21 | Creates the target sentence string simply by concatenation of word forms 22 | joined by spaces. You must apply detokenization after this block 23 | to delete spaces before/after punctuation etc. 24 | 25 | 26 | =back 27 | 28 | =cut 29 | 30 | # Copyright 2011 Martin Popel 31 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README. 32 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2W/EN/DirtyTricks.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2W::EN::DirtyTricks; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_zone { 8 | my ( $self, $zone ) = @_; 9 | my $sentence = $zone->sentence; 10 | 11 | $sentence =~ s/``\s*/“/g; 12 | $sentence =~ s/\s*''/”/g; 13 | $sentence =~ s/( |^)I\s+I( |$)/\1I\2/g; 14 | 15 | $zone->set_sentence($sentence); 16 | return; 17 | } 18 | 19 | 1; 20 | 21 | __END__ 22 | 23 | =encoding utf-8 24 | 25 | =head1 NAME 26 | 27 | Treex::Block::A2W::EN::DirtyTricks 28 | 29 | =head1 DESCRIPTION 30 | 31 | This is the place for temporary regex-based hacks. 32 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2W/EN/FixCapitalization.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2W::EN::FixCapitalization; 2 | 3 | use utf8; 4 | use Moose; 5 | use Treex::Core::Common; 6 | 7 | extends 'Treex::Core::Block'; 8 | 9 | my $ALL_CAPS = qr{ 10 | German|English|Turkish|French|Czech|Slovak|Spanish|Portugese|American|Mexican 11 | Italian|Greek|Serbian|Russian|Chinese|Indian 12 | }xi; 13 | 14 | 15 | sub process_anode { 16 | my ($self, $a_node) = @_; 17 | my $form = $a_node->form // ''; 18 | 19 | if ($form =~ /^($ALL_CAPS)$/i ){ 20 | $a_node->set_form(uc(substr($form, 0, 1)) . substr($form, 1)); 21 | } 22 | return; 23 | } 24 | 25 | 1; -------------------------------------------------------------------------------- /lib/Treex/Block/A2W/EN/Tidy.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2W::EN::Tidy; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | has domain => ( 7 | is => 'ro', 8 | isa => enum( [qw(general IT)] ), 9 | default => 'general', 10 | documentation => 'domain of the input texts', 11 | ); 12 | 13 | sub process_zone { 14 | my ( $self, $zone ) = @_; 15 | 16 | my $sent = $zone->sentence; 17 | 18 | $sent =~ s/,+/,/g; 19 | $sent =~ s/,\././g; 20 | $sent =~ s/,?",/,"/g; 21 | $sent =~ s/,":/":/g; 22 | $sent =~ s/:,/:/g; 23 | $sent =~ s/(,")+/,"/g; 24 | 25 | $sent =~ s/([0-9]+),([0-9]*[1-9]+[0-9]*)/$1.$2/g; 26 | 27 | $sent =~ s/[“”]/"/g if $self->domain eq 'IT'; 28 | 29 | $zone->set_sentence($sent); 30 | return; 31 | } 32 | 33 | 1; 34 | 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/A2W/PT/DirtyTricks.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::A2W::PT::DirtyTricks; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_zone { 8 | my ( $self, $zone ) = @_; 9 | my $sentence = $zone->sentence; 10 | 11 | $sentence =~ s/``\s*/“/g; # Isto ainda é necessario? 12 | $sentence =~ s/\s*''/”/g; 13 | 14 | $sentence =~ s/“//g; 15 | $sentence =~ s/”//g; 16 | 17 | $zone->set_sentence($sentence); 18 | return; 19 | } 20 | 21 | 1; 22 | 23 | __END__ 24 | 25 | =encoding utf-8 26 | 27 | =head1 NAME 28 | 29 | Treex::Block::A2W::PT::DirtyTricks 30 | 31 | =head1 DESCRIPTION 32 | 33 | This is the place for temporary regex-based hacks. 34 | -------------------------------------------------------------------------------- /lib/Treex/Block/Coref/CS/Cor/PrintData.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Coref::CS::Cor::PrintData; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Coref::PrintData'; 5 | with 'Treex::Block::Coref::CS::Cor::Base'; 6 | 7 | 1; 8 | #TODO add documentation 9 | -------------------------------------------------------------------------------- /lib/Treex/Block/Coref/CS/DemonPron/PrintData.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Coref::CS::DemonPron::PrintData; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use List::MoreUtils qw/none/; 5 | extends 'Treex::Block::Coref::PrintData'; 6 | with 'Treex::Block::Coref::CS::DemonPron::Base'; 7 | 8 | override 'losses_for_special_classes' => sub { 9 | my ($self, $anaph, @ante_cands) = @_; 10 | my @losses = (); 11 | my $coref_spec = $anaph->wild->{gold_coref_special} // ""; 12 | unshift @losses, ( $coref_spec =~ /e/ ? 0 : 1 ); 13 | unshift @losses, ( $coref_spec =~ /s/ ? 0 : 1 ); 14 | unshift @losses, ( (!@ante_cands && none {$_ == 0} @losses) ? 0 : 1 ); 15 | return @losses; 16 | }; 17 | 18 | 1; 19 | #TODO add documentation 20 | -------------------------------------------------------------------------------- /lib/Treex/Block/Coref/CS/PersPron/PrintData.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Coref::CS::PersPron::PrintData; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Coref::PrintData'; 5 | with 'Treex::Block::Coref::CS::PersPron::Base'; 6 | 7 | 1; 8 | #TODO add documentation 9 | -------------------------------------------------------------------------------- /lib/Treex/Block/Coref/CS/ReflPron/PrintData.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Coref::CS::ReflPron::PrintData; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Coref::PrintData'; 5 | with 'Treex::Block::Coref::CS::ReflPron::Base'; 6 | 7 | 1; 8 | #TODO add documentation 9 | -------------------------------------------------------------------------------- /lib/Treex/Block/Coref/CS/RelPron/PrintData.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Coref::CS::RelPron::PrintData; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Coref::PrintData'; 5 | with 'Treex::Block::Coref::CS::RelPron::Base'; 6 | 7 | 1; 8 | #TODO add documentation 9 | -------------------------------------------------------------------------------- /lib/Treex/Block/Coref/EN/Cor/PrintData.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Coref::EN::Cor::PrintData; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Coref::PrintData'; 5 | with 'Treex::Block::Coref::EN::Cor::Base'; 6 | 7 | 1; 8 | #TODO add documentation 9 | -------------------------------------------------------------------------------- /lib/Treex/Block/Coref/EN/PersPron/PrintData.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Coref::EN::PersPron::PrintData; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Coref::PrintData'; 5 | with 'Treex::Block::Coref::EN::PersPron::Base'; 6 | 7 | 1; 8 | #TODO add documentation 9 | -------------------------------------------------------------------------------- /lib/Treex/Block/Coref/EN/ReflPron/PrintData.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Coref::EN::ReflPron::PrintData; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Coref::PrintData'; 5 | with 'Treex::Block::Coref::EN::ReflPron::Base'; 6 | 7 | 1; 8 | #TODO add documentation 9 | -------------------------------------------------------------------------------- /lib/Treex/Block/Coref/EN/RelPron/PrintData.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Coref::EN::RelPron::PrintData; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Coref::PrintData'; 5 | with 'Treex::Block::Coref::EN::RelPron::Base'; 6 | 7 | 1; 8 | #TODO add documentation 9 | -------------------------------------------------------------------------------- /lib/Treex/Block/Coref/EntityEvent/CS/DemonPron/PrintData.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Coref::EntityEvent::CS::DemonPron::PrintData; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Coref::EntityEvent::PrintData'; 5 | with 'Treex::Block::Coref::EntityEvent::CS::DemonPron::Base'; 6 | 7 | 1; 8 | #TODO add documentation 9 | -------------------------------------------------------------------------------- /lib/Treex/Block/Coref/PrettyPrint/LabelSys.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Coref::PrettyPrint::LabelSys; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Treex::Tool::Coreference::NodeFilter; 5 | 6 | extends 'Treex::Core::Block'; 7 | with 'Treex::Block::Filter::Node'; 8 | 9 | sub _build_node_types { 10 | return 'all_anaph'; 11 | } 12 | 13 | sub _build_layers { 14 | return "t"; 15 | } 16 | 17 | sub process_filtered_tnode { 18 | my ($self, $tnode) = @_; 19 | 20 | $tnode->wild->{coref_diag}{is_anaph} = 1; 21 | $tnode->wild->{coref_diag}{cand_for}{$tnode->id} = 1; 22 | my @antes = $tnode->get_coref_nodes; 23 | foreach (@antes) { 24 | $_->wild->{coref_diag}{sys_ante_for}{$tnode->id} = 1; 25 | $_->wild->{coref_diag}{cand_for}{$tnode->id} = 1; 26 | } 27 | } 28 | 29 | 1; 30 | -------------------------------------------------------------------------------- /lib/Treex/Block/Depfix/CS2EN/Fix.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Depfix::CS2EN::Fix; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use utf8; 5 | extends 'Treex::Block::Depfix::Fix'; 6 | 7 | 1; 8 | 9 | =head1 NAME 10 | 11 | Treex::Block::Depfix::CS2EN::Fix 12 | -- ancestor for all Depfix::CS2EN::Fix* blocks 13 | (or at least for those operating on a-layer) 14 | 15 | =head1 AUTHOR 16 | 17 | Rudolf Rosa 18 | 19 | =head1 COPYRIGHT AND LICENSE 20 | 21 | Copyright © 2014 by Institute of Formal and Applied Linguistics, 22 | Charles University in Prague 23 | 24 | This module is free software; you can redistribute it and/or modify it 25 | under the same terms as Perl itself. 26 | 27 | -------------------------------------------------------------------------------- /lib/Treex/Block/Depfix/README: -------------------------------------------------------------------------------- 1 | 2 | Depfix code is now stored in A2A::CS (and some of it on other places 3 | according to the layer it operates on). However, in future it shall reside 4 | here. 5 | 6 | -------------------------------------------------------------------------------- /lib/Treex/Block/Depfix/sample_config.yaml: -------------------------------------------------------------------------------- 1 | fields: 2 | - oldchild_lemma 3 | - oldchild_afun 4 | - oldchild_tag 5 | - oldparent_lemma 6 | - oldparent_afun 7 | - oldparent_tag 8 | - oldedge_direction 9 | - srcchild_lemma 10 | - srcchild_afun 11 | - srcchild_tag 12 | - srcparent_lemma 13 | - srcparent_afun 14 | - srcparent_tag 15 | - srcedge_existence 16 | - srcedge_direction 17 | - newchild_lemma 18 | - newchild_afun 19 | - newchild_tag 20 | - newparent_lemma 21 | - newparent_afun 22 | - newparent_tag 23 | 24 | features: 25 | - oldchild_afun 26 | - oldchild_tag 27 | - newparent_afun 28 | - newparent_tag 29 | - oldedge_direction 30 | - srcchild_afun 31 | - srcchild_tag 32 | - srcparent_afun 33 | - srcparent_tag 34 | - srcedge_existence 35 | - srcedge_direction 36 | 37 | predict: newchild_tag 38 | 39 | -------------------------------------------------------------------------------- /lib/Treex/Block/Eval/CorefStats.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Eval::CorefStats; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_tnode { 7 | my ($self, $tnode) = @_; 8 | 9 | if (defined $tnode->t_lemma && ($tnode->t_lemma eq "#PersPron")) { 10 | my @chain = $tnode->get_coref_chain; 11 | my $tree = $tnode->get_root; 12 | print "CHAIN_SIZE: " . @chain . ", SENT_ORD: " . $tree->wild->{"czeng_sentord"} . "\n"; 13 | } 14 | } 15 | 16 | 1; 17 | 18 | =over 19 | 20 | =item Treex::Block::Eval::CorefStats 21 | 22 | Prints out some of the statistics regarding coreference. 23 | 24 | =back 25 | 26 | =cut 27 | 28 | # Copyright 2011 Michal Novak 29 | 30 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 31 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/CzEng/AcademicTitle.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::CzEng::AcademicTitle; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Treex::Core::Log; 5 | extends 'Treex::Block::Filter::CzEng::Common'; 6 | 7 | sub process_bundle { 8 | my ( $self, $bundle ) = @_; 9 | 10 | my $en = $bundle->get_zone('en')->sentence; 11 | my $cs = $bundle->get_zone('cs')->sentence; 12 | my $pattern = '\b((Bc|Mgr|Ing|MUDr|JUDr|RNDr|PhDr|MVDr|PharmDr|ThDr|Doc|Prof|arch)\.)$'; 13 | if ( $cs =~ m/$pattern/i || $en =~ m/$pattern/i ) { 14 | $self->add_feature( $bundle, 'academic_title' ); 15 | } 16 | 17 | return 1; 18 | } 19 | 20 | 1; 21 | 22 | =over 23 | 24 | =item Treex::Block::Filter::CzEng::AcademicTitle 25 | 26 | Finding subsequences of a character repeated four or more times. 27 | 28 | =back 29 | 30 | =cut 31 | 32 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna 33 | 34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/CzEng/Classifier.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::CzEng::Classifier; 2 | use Moose::Role; 3 | 4 | requires qw( init see learn save load predict score ); 5 | 6 | 1; 7 | 8 | =over 9 | 10 | =item Treex::Block::Filter::CzEng::Classifier 11 | 12 | A role that must be implemented by specific classifier types. 13 | 14 | =back 15 | 16 | =cut 17 | 18 | # Copyright 2011 Ales Tamchyna 19 | 20 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 21 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/CzEng/GutenbergHeader.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::CzEng::GutenbergHeader; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Treex::Core::Log; 5 | extends 'Treex::Block::Filter::CzEng::Common'; 6 | 7 | sub process_bundle { 8 | my ( $self, $bundle ) = @_; 9 | 10 | my $en = $bundle->get_zone('en')->sentence; 11 | my $cs = $bundle->get_zone('cs')->sentence; 12 | my $pattern = 'Gutenberg'; 13 | if ( $cs =~ m/$pattern/ || $en =~ m/$pattern/ ) { 14 | $self->add_feature( $bundle, 'gutenberg_header' ); 15 | } 16 | 17 | return 1; 18 | } 19 | 20 | 1; 21 | 22 | =over 23 | 24 | =item Treex::Block::Filter::CzEng::GutenbergHeader 25 | 26 | Relicts of the Project Gutenberg file header left in the data. 27 | 28 | =back 29 | 30 | =cut 31 | 32 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna 33 | 34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/CzEng/IdenticalSentences.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::CzEng::IdenticalSentences; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Treex::Core::Log; 5 | extends 'Treex::Block::Filter::CzEng::Common'; 6 | 7 | sub process_bundle { 8 | my ( $self, $bundle ) = @_; 9 | 10 | my $en = $bundle->get_zone('en')->sentence; 11 | my $cs = $bundle->get_zone('cs')->sentence; 12 | 13 | $self->add_feature( $bundle, 'identical' ) if $cs eq $en; 14 | 15 | return 1; 16 | } 17 | 18 | 1; 19 | 20 | =over 21 | 22 | =item Treex::Block::Filter::CzEng::IdenticalSentences 23 | 24 | Feature that fires when cs and en are identical. 25 | 26 | =back 27 | 28 | =cut 29 | 30 | # Copyright 2011 Ales Tamchyna 31 | 32 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 33 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/CzEng/InterleavingSpaces.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::CzEng::InterleavingSpaces; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Treex::Core::Log; 5 | extends 'Treex::Block::Filter::CzEng::Common'; 6 | 7 | sub process_bundle { 8 | my ( $self, $bundle ) = @_; 9 | 10 | my $en = $bundle->get_zone('en')->sentence; 11 | my $cs = $bundle->get_zone('cs')->sentence; 12 | my $pattern = ' \w \w \w \w '; 13 | if ( $cs =~ m/$pattern/ || $en =~ m/$pattern/ ) { 14 | $self->add_feature( $bundle, 'interleaving_spaces' ); 15 | } 16 | 17 | return 1; 18 | } 19 | 20 | 1; 21 | 22 | =over 23 | 24 | =item Treex::Block::Filter::CzEng::InterleavingSpaces 25 | 26 | Letters interleaved w i t h s p a c e s. 27 | 28 | =back 29 | 30 | =cut 31 | 32 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna 33 | 34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/CzEng/LongSentence.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::CzEng::LongSentence; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use List::Util qw( max ); 5 | extends 'Treex::Block::Filter::CzEng::Common'; 6 | 7 | sub process_bundle { 8 | my ( $self, $bundle ) = @_; 9 | 10 | my $en = $bundle->get_zone('en')->sentence; 11 | my $cs = $bundle->get_zone('cs')->sentence; 12 | my $length = max( length $en, length $cs ); 13 | 14 | my @bounds = ( 0, 10, 50, 100, 250, 500 ); 15 | $self->add_feature( $bundle, 'sentence_length=' . $self->quantize_given_bounds( $length, @bounds ) ); 16 | 17 | return 1; 18 | } 19 | 20 | 1; 21 | 22 | =over 23 | 24 | =item Treex::Block::Filter::CzEng::LongSentence 25 | 26 | Quantized maximum sentence length. 27 | 28 | =back 29 | 30 | =cut 31 | 32 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna 33 | 34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/CzEng/LongWord.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::CzEng::LongWord; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use List::Util qw( max ); 5 | extends 'Treex::Block::Filter::CzEng::Common'; 6 | 7 | sub process_bundle { 8 | my ( $self, $bundle ) = @_; 9 | 10 | my @tokens = ( 11 | $bundle->get_zone('en')->get_atree->get_descendants, 12 | $bundle->get_zone('cs')->get_atree->get_descendants 13 | ); 14 | 15 | my $length = max( map { length $_->get_attr('form') } @tokens ); 16 | 17 | my @bounds = ( 0, 5, 10, 20, 50 ); 18 | $self->add_feature( $bundle, 'max_word_length=' . $self->quantize_given_bounds( $length, @bounds ) ); 19 | 20 | return 1; 21 | } 22 | 23 | 1; 24 | 25 | =over 26 | 27 | =item Treex::Block::Filter::CzEng::LongWord 28 | 29 | Quantized maximum word length. 30 | 31 | =back 32 | 33 | =cut 34 | 35 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna 36 | 37 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 38 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/CzEng/MicrosoftLinesWithFilenames.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::CzEng::MicrosoftLinesWithFilenames; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Treex::Core::Log; 5 | extends 'Treex::Block::Filter::CzEng::Common'; 6 | 7 | sub process_bundle { 8 | my ( $self, $bundle ) = @_; 9 | 10 | my $en = $bundle->get_zone('en')->sentence; 11 | my $cs = $bundle->get_zone('cs')->sentence; 12 | my $pattern = '\w+\.[a-z]{3}\b'; 13 | if ( $cs =~ m/$pattern/ || $en =~ m/$pattern/ ) { 14 | $self->add_feature( $bundle, 'microsoft_lines_with_filenames' ); 15 | } 16 | 17 | return 1; 18 | } 19 | 20 | 1; 21 | 22 | =over 23 | 24 | =item Treex::Block::Filter::CzEng::MicrosoftLinesWithFilenames 25 | 26 | Marking lines containing file names or URLs (typically useless content). 27 | 28 | =back 29 | 30 | =cut 31 | 32 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna 33 | 34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/CzEng/NonASCIICharacter.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::CzEng::NonASCIICharacter; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Treex::Core::Log; 5 | extends 'Treex::Block::Filter::CzEng::Common'; 6 | 7 | sub process_bundle { 8 | my ( $self, $bundle ) = @_; 9 | 10 | my $en = $bundle->get_zone('en')->sentence; 11 | my $cs = $bundle->get_zone('cs')->sentence; 12 | while ($en =~ m/([^\p{ASCII}“”´´``—–€‐‘’‑‑])/g) { 13 | if ($cs !~ m/$1/) { 14 | $self->add_feature( $bundle, 'nonascii_character' ); 15 | last; 16 | } 17 | } 18 | 19 | return 1; 20 | } 21 | 22 | 1; 23 | 24 | =over 25 | 26 | =item Treex::Block::Filter::CzEng::NonASCIICharacter 27 | 28 | English side contains a non-ASCII character not confirmed by the Czech side. 29 | 30 | =back 31 | 32 | =cut 33 | 34 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna 35 | 36 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 37 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/CzEng/RepeatedCharacter.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::CzEng::RepeatedCharacter; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Treex::Core::Log; 5 | extends 'Treex::Block::Filter::CzEng::Common'; 6 | 7 | sub process_bundle { 8 | my ( $self, $bundle ) = @_; 9 | 10 | my $en = $bundle->get_zone('en')->sentence; 11 | my $cs = $bundle->get_zone('cs')->sentence; 12 | my $pattern = '([^\d])\1{3,}'; 13 | if ( $cs =~ m/$pattern/ || $en =~ m/$pattern/ ) { 14 | $self->add_feature( $bundle, 'repeated_character' ); 15 | } 16 | 17 | return 1; 18 | } 19 | 20 | 1; 21 | 22 | =over 23 | 24 | =item Treex::Block::Filter::CzEng::RepeatedCharacter 25 | 26 | Finding subsequences of a character repeated four or more times. 27 | 28 | =back 29 | 30 | =cut 31 | 32 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna 33 | 34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/Generic/Classifier.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::Generic::Classifier; 2 | use Moose::Role; 3 | 4 | requires qw( init see learn save load predict score ); 5 | 6 | 1; 7 | 8 | =over 9 | 10 | =item Treex::Block::Filter::Generic::Classifier 11 | 12 | A role that must be implemented by specific classifier types. 13 | 14 | =back 15 | 16 | =cut 17 | 18 | # Copyright 2011, 2014 Ales Tamchyna 19 | 20 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 21 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/Generic/IdenticalSentences.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::Generic::IdenticalSentences; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Treex::Core::Log; 5 | extends 'Treex::Block::Filter::Generic::Common'; 6 | 7 | sub process_bundle { 8 | my ( $self, $bundle ) = @_; 9 | 10 | my $src = $bundle->get_zone($self->language)->sentence; 11 | my $tgt = $bundle->get_zone($self->to_language)->sentence; 12 | 13 | $self->add_feature( $bundle, 'identical' ) if $src eq $tgt; 14 | 15 | return 1; 16 | } 17 | 18 | 1; 19 | 20 | =over 21 | 22 | =item Treex::Block::Filter::Generic::IdenticalSentences 23 | 24 | Feature that fires when cs and en are identical. 25 | 26 | =back 27 | 28 | =cut 29 | 30 | # Copyright 2011, 2014 Ales Tamchyna 31 | 32 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 33 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/Generic/LongSentence.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::Generic::LongSentence; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use List::Util qw( max ); 5 | extends 'Treex::Block::Filter::Generic::Common'; 6 | 7 | sub process_bundle { 8 | my ( $self, $bundle ) = @_; 9 | 10 | my $src = $bundle->get_zone($self->language)->sentence; 11 | my $tgt = $bundle->get_zone($self->to_language)->sentence; 12 | my $length = max( length $src, length $tgt ); 13 | 14 | my @bounds = ( 0, 10, 50, 100, 250, 500 ); 15 | $self->add_feature( $bundle, 'sentence_length=' . $self->quantize_given_bounds( $length, @bounds ) ); 16 | 17 | return 1; 18 | } 19 | 20 | 1; 21 | 22 | =over 23 | 24 | =item Treex::Block::Filter::Generic::LongSentence 25 | 26 | Quantized maximum sentence length. 27 | 28 | =back 29 | 30 | =cut 31 | 32 | # Copyright 2011, 2014 Zdenek Zabokrtsky, Ales Tamchyna 33 | 34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/Filter/Generic/RepeatedCharacter.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Filter::Generic::RepeatedCharacter; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Treex::Core::Log; 5 | extends 'Treex::Block::Filter::Generic::Common'; 6 | 7 | sub process_bundle { 8 | my ( $self, $bundle ) = @_; 9 | 10 | my $src = $bundle->get_zone($self->language)->sentence; 11 | my $tgt = $bundle->get_zone($self->to_language)->sentence; 12 | my $pattern = '([^\d])\1{3,}'; 13 | if ( $src =~ m/$pattern/ || $tgt =~ m/$pattern/ ) { 14 | $self->add_feature( $bundle, 'repeated_character' ); 15 | } 16 | 17 | return 1; 18 | } 19 | 20 | 1; 21 | 22 | =over 23 | 24 | =item Treex::Block::Filter::Generic::RepeatedCharacter 25 | 26 | Finding subsequences of a character repeated four or more times. 27 | 28 | =back 29 | 30 | =cut 31 | 32 | # Copyright 2011, 2014 Zdenek Zabokrtsky, Ales Tamchyna 33 | 34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/LA/List_absolute_adverbs_ITTB.txt: -------------------------------------------------------------------------------- 1 | altissime 2 | arrogantissime 3 | communissime 4 | convenientissime 5 | evidentissime 6 | firmissime 7 | frequentissime 8 | imperfectissime 9 | iustissime 10 | maxime 11 | minime 12 | optime 13 | perfectissime 14 | plenissime 15 | potissime 16 | summe 17 | ultimo 18 | verissime 19 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/LA/List_comparative_adverbs_ITTB.txt: -------------------------------------------------------------------------------- 1 | accuratius 2 | altius 3 | amplius 4 | apertius 5 | ardentius 6 | attentius 7 | certius 8 | clarius 9 | communius 10 | congruentius 11 | convenientius 12 | decentius 13 | difficilius 14 | dignius 15 | diligentius 16 | efficacius 17 | eminentius 18 | evidentius 19 | expressius 20 | exterius 21 | facilius 22 | ferventius 23 | fortius 24 | frequentius 25 | imperfectius 26 | inferius 27 | intensius 28 | interius 29 | lentius 30 | levius 31 | liberalius 32 | liberius 33 | longius 34 | manifestius 35 | melius 36 | minus 37 | multiplicius 38 | nobilius 39 | particularius 40 | perfectius 41 | plenius 42 | plures 43 | pluries 44 | plus 45 | posterius 46 | potius 47 | principalius 48 | prius 49 | probabilius 50 | profundius 51 | promptius 52 | propinquius 53 | rationabilius 54 | remissius 55 | simplicius 56 | specialius 57 | suavius 58 | subtilius 59 | superius 60 | tardius 61 | uberius 62 | ulterius 63 | universalius 64 | utilius 65 | vehementius 66 | velocius 67 | vicinius 68 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/Test/AuxGIsPunctuation.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::HamleDT::Test::AuxGIsPunctuation; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Test::BaseTester'; 5 | 6 | sub process_anode 7 | { 8 | my $self = shift; 9 | my $node = shift; 10 | # AuxG may also be used for numbers idenitifying items in numbered lists. 11 | if($node->deprel() eq 'AuxG' && !$node->is_punctuation() && !$node->form() =~ m/^\d+$/) 12 | { 13 | $self->complain($node, 'AuxG : '.$node->tag()); 14 | } 15 | } 16 | 17 | 1; 18 | 19 | =over 20 | 21 | =item Treex::Block::HamleDT::Test::AuxGIsPunctuation 22 | 23 | A node attached as AuxG must be POS-tagged as punctuation. 24 | 25 | =back 26 | 27 | =cut 28 | 29 | # Copyright 2012 Honza Václ 30 | # Copyright 2015 Dan Zeman 31 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README. 32 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/Test/AuxKUnderRoot.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::HamleDT::Test::AuxKUnderRoot; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Block::Test::BaseTester'; 6 | 7 | # Tests whether AuxK is attached directly to the root node. 8 | 9 | sub process_anode 10 | { 11 | my $self = shift; 12 | my $node = shift; 13 | if($node->deprel() eq 'AuxK') 14 | { 15 | my $parent = $node->parent(); 16 | if(defined($parent) && $parent->is_root()) 17 | { 18 | $self->praise($node); 19 | } 20 | else 21 | { 22 | $self->complain($node); 23 | } 24 | } 25 | } 26 | 27 | # (C) 2012 Jindřich Libovický 28 | # Copyright 2015 Dan Zeman 29 | 30 | 1; 31 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/Test/AuxVNotOnTop.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::HamleDT::Test::AuxVNotOnTop; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Block::Test::BaseTester'; 6 | 7 | # Testing if there is not an auxiliary verb directly under the root 8 | 9 | sub process_atree { 10 | my ( $self, $a_root ) = @_; 11 | 12 | foreach my $anode ($a_root->get_children()) { 13 | if ($anode->deprel eq "AuxV") { 14 | $self->complain($a_root); 15 | return; 16 | } 17 | } 18 | } 19 | 20 | # (C) 2012 Karel Bílek , Jindřich Libovický 21 | 22 | 1; 23 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/Test/AuxXIsComma.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::HamleDT::Test::AuxXIsComma; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Test::BaseTester'; 5 | 6 | sub process_anode { 7 | my ($self, $anode) = @_; 8 | 9 | if ($anode->deprel eq 'AuxX' && $anode->form ne ',') { 10 | $self->complain($anode); 11 | } 12 | } 13 | 14 | 1; 15 | 16 | =over 17 | 18 | =item Treex::Block::HamleDT::Test::AuxXisComma 19 | 20 | Only comma should be AuxX 21 | 22 | =back 23 | 24 | =cut 25 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/Test/CoApAboveEveryMember.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::HamleDT::Test::CoApAboveEveryMember; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Test::BaseTester'; 5 | 6 | sub process_anode 7 | { 8 | my $self = shift; 9 | my $node = shift; 10 | if($node->is_member()) 11 | { 12 | my $parent = $node->parent(); 13 | if($parent->deprel() !~ m/^(Coord|Apos)$/) 14 | { 15 | $self->complain($node); 16 | } 17 | else 18 | { 19 | $self->praise($node); 20 | } 21 | } 22 | } 23 | 24 | 1; 25 | 26 | =over 27 | 28 | =item Treex::Block::HamleDT::Test::CoApAboveEveryMember 29 | 30 | Nodes with is_member=1 are allowed only under coordination or apposition heads. 31 | 32 | =back 33 | 34 | =cut 35 | 36 | # Copyright 2011 Zdeněk Žabokrtský 37 | # Copyright 2015 Dan Zeman 38 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README. 39 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/Test/LeafAux.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::HamleDT::Test::LeafAux; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Test::BaseTester'; 5 | 6 | sub process_anode { 7 | my ( $self, $anode ) = @_; 8 | if ( ( $anode->deprel || '' ) =~ /^(AuxT|AuxR|AuxX|AuxA)$/ && $anode->get_children ) { 9 | $self->complain( $anode, $anode->afun ); 10 | } 11 | return; 12 | } 13 | 14 | 1; 15 | 16 | =over 17 | 18 | =item Treex::Block::HamleDT::Test::LeafAux 19 | 20 | Afun values AuxT, AuxR, AuxX... (?) imply 21 | that the node should be a leave. 22 | 23 | =back 24 | 25 | =cut 26 | 27 | # Copyright 2011 Zdenek Zabokrtsky 28 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README. 29 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/Test/MemberInEveryCoAp.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::HamleDT::Test::MemberInEveryCoAp; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Test::BaseTester'; 5 | 6 | sub process_anode 7 | { 8 | my $self = shift; 9 | my $node = shift; 10 | if($node->deprel() =~ m/^(Coord|Apos)$/) 11 | { 12 | if(!first {$_->is_member()} $node->children()) 13 | { 14 | $self->complain($node); 15 | } 16 | else 17 | { 18 | $self->praise($node); 19 | } 20 | } 21 | } 22 | 23 | 1; 24 | 25 | =over 26 | 27 | =item Treex::Block::HamleDT::Test::MemberInEveryCoAp 28 | 29 | Every coordination/apposition structure should have at least one 30 | member node among its children. 31 | 32 | =back 33 | 34 | =cut 35 | 36 | # Copyright 2011 Zdeněk Žabokrtský 37 | # Copyright 2015 Dan Zeman 38 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README. 39 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/Test/NonParentAuxS.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::HamleDT::Test::NonParentAuxS; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Test::BaseTester'; 5 | 6 | sub process_anode { 7 | my ($self, $anode) = @_; 8 | if ($anode->deprel eq 'AuxS' 9 | and $anode->get_parents 10 | ) { 11 | $self->complain($anode); 12 | } 13 | } 14 | 15 | 1; 16 | 17 | =over 18 | 19 | =item Treex::Block::HamleDT::Test::NonParentAuxS 20 | 21 | AuxS must not have a parent. 22 | 23 | =back 24 | 25 | =cut 26 | 27 | # Copyright 2011 Zdenek Zabokrtsky 28 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README. 29 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/Test/NonemptyAttr.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::HamleDT::Test::NonemptyAttr; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Test::BaseTester'; 5 | 6 | sub process_anode { 7 | my ($self, $anode) = @_; 8 | foreach my $attr_name (qw(form lemma tag)) { 9 | my $attr_value = $anode->get_attr($attr_name); 10 | if ( !defined $attr_value || $attr_value eq '' ) { 11 | $self->complain($anode, $attr_name); 12 | } 13 | } 14 | } 15 | 16 | 1; 17 | 18 | =over 19 | 20 | =item Treex::Block::HamleDT::Test::NonemptyAttr 21 | 22 | Report attributes form, lemma, or tag with empty string or undefined value. 23 | 24 | =back 25 | 26 | =cut 27 | 28 | # Copyright 2011 Zdenek Zabokrtsky 29 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README. 30 | 31 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/Test/UD/SingleRoot.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::HamleDT::Test::UD::SingleRoot; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Test::BaseTester'; 5 | 6 | sub process_atree 7 | { 8 | my $self = shift; 9 | my $root = shift; 10 | # In Universal Dependencies, there is only one top node (child of our artificial root, dependency label 'root'). 11 | my @topnodes = $root->children(); 12 | if(scalar(@topnodes)>1) 13 | { 14 | $self->complain($topnodes[1], 'More than one top node.'); 15 | } 16 | } 17 | 18 | 1; 19 | 20 | =over 21 | 22 | =item Treex::Block::HamleDT::Test::UD::SingleRoot 23 | 24 | There must be just one top node. 25 | 26 | We call the child of our artificial root node the top node. 27 | This is the actual sentence root from the linguistic point of view. 28 | 29 | =back 30 | 31 | =cut 32 | 33 | # Copyright 2015 Dan Zeman 34 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README. 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/Transform/ComplexVerbRootFirst.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::HamleDT::Transform::ComplexVerbRootFirst; 2 | use Moose; 3 | extends 'Treex::Block::HamleDT::Transform::BaseTransformer'; 4 | use Treex::Tool::ATreeTransformer::ComplexVerb; 5 | 6 | sub BUILD { 7 | my ($self) = @_; 8 | $self->set_transformer( 9 | Treex::Tool::ATreeTransformer::ComplexVerb->new( 10 | { 11 | subscription => $self->subscription, 12 | new_root => 'first', 13 | } 14 | ) 15 | ) 16 | } 17 | 18 | 1; 19 | 20 | -------------------------------------------------------------------------------- /lib/Treex/Block/HamleDT/Transform/ComplexVerbRootLast.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::HamleDT::Transform::ComplexVerbRootLast; 2 | use Moose; 3 | extends 'Treex::Block::HamleDT::Transform::BaseTransformer'; 4 | use Treex::Tool::ATreeTransformer::ComplexVerb; 5 | 6 | sub BUILD { 7 | my ($self) = @_; 8 | $self->set_transformer( 9 | Treex::Tool::ATreeTransformer::ComplexVerb->new( 10 | { 11 | subscription => $self->subscription, 12 | new_root => 'last', 13 | } 14 | ) 15 | ) 16 | } 17 | 18 | 1; 19 | 20 | -------------------------------------------------------------------------------- /lib/Treex/Block/Import/Sentences.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Import::Sentences; 2 | use Moose; 3 | 4 | use Treex::Core::Common; 5 | 6 | extends 'Treex::Core::Block'; 7 | 8 | has 'from' => ( is => 'ro', isa => 'Treex::Core::Files', required => 1, coerce => 1 ); 9 | 10 | sub process_document { 11 | my ($self, $doc) = @_; 12 | 13 | my @bundles = $doc->get_bundles(); 14 | my $bundle_count = scalar @bundles; 15 | 16 | while (my $line = $self->from->next_line()) { 17 | chomp $line; 18 | if ($line =~ /^\s*$/) { 19 | if (@bundles < $bundle_count) { 20 | log_fatal "Number of lines in the file to import does not correspond with number of bundles in the processed documents"; 21 | } 22 | next; 23 | } 24 | my $bundle = shift @bundles; 25 | my $new_zone = $bundle->create_zone($self->language, $self->selector); 26 | $new_zone->set_sentence($line); 27 | 28 | last if (!@bundles); 29 | } 30 | } 31 | 32 | 1; 33 | -------------------------------------------------------------------------------- /lib/Treex/Block/MLFix/CS/Oracle.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::MLFix::CS::Oracle; 2 | 3 | use Moose; 4 | use utf8; 5 | 6 | use Treex::Tool::MLFix::CS::FormGenerator; 7 | 8 | extends 'Treex::Block::MLFix::Oracle'; 9 | 10 | sub _build_form_generator { 11 | my ($self) = @_; 12 | 13 | return Treex::Tool::MLFix::CS::FormGenerator->new(); 14 | } 15 | 16 | 1; 17 | 18 | =head1 NAME 19 | 20 | MLFix::CS::Oracle 21 | 22 | =head1 DESCRIPTION 23 | 24 | =head1 PARAMETERS 25 | 26 | =over 27 | 28 | =back 29 | 30 | =head1 AUTHOR 31 | 32 | Dušan Variš 33 | 34 | =head1 COPYRIGHT AND LICENSE 35 | 36 | Copyright © 2013 by Institute of Formal and Applied Linguistics, 37 | Charles University in Prague 38 | 39 | This module is free software; you can redistribute it and/or modify it 40 | under the same terms as Perl itself. 41 | -------------------------------------------------------------------------------- /lib/Treex/Block/MLFix/CS/ScikitLearn.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::MLFix::CS::ScikitLearn; 2 | 3 | use Moose; 4 | use utf8; 5 | 6 | use Treex::Tool::MLFix::CS::FormGenerator; 7 | 8 | extends 'Treex::Block::MLFix::ScikitLearn'; 9 | 10 | sub _build_form_generator { 11 | my ($self) = @_; 12 | 13 | return Treex::Tool::MLFix::CS::FormGenerator->new(); 14 | } 15 | 16 | 1; 17 | 18 | =head1 NAME 19 | 20 | MLFix::CS::ScikitLearn 21 | 22 | =head1 DESCRIPTION 23 | 24 | =head1 PARAMETERS 25 | 26 | =over 27 | 28 | =back 29 | 30 | =head1 AUTHOR 31 | 32 | Dušan Variš 33 | 34 | =head1 COPYRIGHT AND LICENSE 35 | 36 | Copyright © 2013 by Institute of Formal and Applied Linguistics, 37 | Charles University in Prague 38 | 39 | This module is free software; you can redistribute it and/or modify it 40 | under the same terms as Perl itself. 41 | -------------------------------------------------------------------------------- /lib/Treex/Block/Print/AtreeStats.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Print::AtreeStats; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | has '+language' => ( required => 1 ); 7 | has '+selector' => (required => 1); 8 | 9 | has '_num_sentences' => (is => 'rw', isa => 'Int', default => 0); 10 | has '_total_tokens' => (is => 'rw', isa => 'Int', default => 0); 11 | 12 | sub process_atree { 13 | my ($self, $tree) = @_; 14 | my @nodes = $tree->get_descendants( { ordered => 1 } ); 15 | $self->_set_num_sentences($self->_num_sentences + 1); 16 | $self->_set_total_tokens($self->_total_tokens + scalar(@nodes)); 17 | } 18 | 19 | sub process_end { 20 | my ($self) = @_; 21 | print "Number of Sentences:\t" . $self->_num_sentences . "\n"; 22 | print "Number of Tokens:\t" . $self->_total_tokens . "\n"; 23 | } 24 | 25 | 1; -------------------------------------------------------------------------------- /lib/Treex/Block/Print/Debug/DocumentTextHead.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ufal/treex/06c3353d9d7c61ff86c572cd926b055733a73442/lib/Treex/Block/Print/Debug/DocumentTextHead.pm -------------------------------------------------------------------------------- /lib/Treex/Block/Read/PDT_schema/pml_common.rng: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /lib/Treex/Block/Read/t/base.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | use Test::Output; 8 | BEGIN { require_ok('Treex::Block::Read::BaseReader') } 9 | 10 | my $reader = Treex::Block::Read::BaseReader->new( from => '-', file_stem => 'test' ); 11 | isa_ok( $reader, 'Treex::Block::Read::BaseReader' ); 12 | 13 | stderr_like( 14 | sub { 15 | eval { $reader->next_document() }; 16 | }, 17 | qr/method next_document must be overridden in/, 18 | 'subroutine next_document has to fail' 19 | ); 20 | 21 | cmp_ok( $reader->number_of_documents(), '==', 1, 'There should be exactly one document' ); 22 | 23 | $reader->next_filename(); 24 | 25 | is( $reader->current_filename(), '-', 'Current file is STDIN(-)' ); 26 | 27 | done_testing(); 28 | -------------------------------------------------------------------------------- /lib/Treex/Block/Read/t/base_aligned.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More tests => 3; 7 | use Treex::Block::Read::BaseAlignedReader; 8 | use Test::Output; 9 | 10 | my $reader = new_ok('Treex::Block::Read::BaseAlignedReader'); 11 | 12 | #require next_document overriden 13 | stderr_like( 14 | sub { 15 | eval { 16 | $reader->next_document(); 17 | } 18 | }, 19 | qr/next_document must be overriden/, 20 | q(require next_document overriden) 21 | ); 22 | 23 | TODO: { 24 | local $TODO = 'Need tests on (next|current)_filenames a spol.'; 25 | 26 | fail('Write some tests'); 27 | } 28 | 29 | -------------------------------------------------------------------------------- /lib/Treex/Block/Read/t/base_aligned_text.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More tests => 2; 7 | use_ok('Treex::Block::Read::BaseAlignedTextReader'); 8 | 9 | my $reader = new_ok('Treex::Block::Read::BaseAlignedTextReader'); 10 | 11 | __END__ 12 | Stable test should not produce errors/warnings on STDERR, BaseAlignedTextReader will be substituted by another solution in future anyway 13 | -------------------------------------------------------------------------------- /lib/Treex/Block/Read/t/cdt_tag.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | use Treex::Block::Read::CdtTag; 8 | use File::Basename; 9 | my $my_dir = dirname($0); 10 | 11 | my $reader = Treex::Block::Read::CdtTag->new( 12 | from => join ',', map {"$my_dir/$_"} qw(cdt-test-0005-da.tag cdt-test-0005-es-lotte.tag cdt-test-0005-it-lisa.tag), 13 | ); 14 | 15 | my @documents; 16 | my $new_document; 17 | while ($new_document = $reader->next_document) { 18 | push @documents, $new_document; 19 | } 20 | 21 | 22 | is(scalar(@documents), 3, q(All test tag files loaded)); 23 | 24 | done_testing(); 25 | 26 | 27 | END { 28 | # delete temporary files 29 | } 30 | -------------------------------------------------------------------------------- /lib/Treex/Block/Read/t/gzip.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | use Treex::Block::Read::Text; 8 | use PerlIO::via::gzip; 9 | 10 | my $value = int rand 100; #get some random value 11 | my $filename = 'text' . (int rand 100) . '.gz'; 12 | open my $f, '>:via(gzip)', $filename or die($!); #open it 13 | print $f $value; #print there the value 14 | close $f; 15 | my $reader = Treex::Block::Read::Text->new( language => 'en', from => $filename ); 16 | 17 | my $doc = $reader->next_document(); 18 | is($doc->get_zone('en')->text, $value, q(Doc reader succesfully read generated value)); 19 | done_testing(); 20 | END { 21 | unlink $filename; 22 | } 23 | -------------------------------------------------------------------------------- /lib/Treex/Block/Read/t/pcedt.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More tests => 1; 7 | 8 | use Treex::Core; 9 | 10 | my $test_file = "/net/work/people/toman/pcedt_data/pdtpml/00/wsj_0010_en.t.gz"; 11 | my $schema_dir = "/net/os/h/zabokrtsky/svn_checkouts/pcedt_release/schemata"; 12 | 13 | # Stable test should not produce errors/warnings on STDERR 14 | #my $scenario = Treex::Core::Scenario->new( 15 | # { from_string => "Read::PCEDT from=$test_file schema_dir=$schema_dir Write::Treex path=./" } 16 | #); 17 | TODO: { 18 | local $TODO = q(PDEDT reader not ready yet); 19 | my $result; 20 | #my $result = eval {$scenario->run}; 21 | ok( $result, 'bunch of PCEDT files can be opened' ); 22 | } 23 | 24 | -------------------------------------------------------------------------------- /lib/Treex/Block/Read/t/word_alignment_xml.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | use Treex::Block::Read::WordAlignmentXML; 8 | use File::Basename; 9 | my $my_dir = dirname($0); 10 | 11 | my $reader = Treex::Block::Read::WordAlignmentXML->new( 12 | from => "$my_dir/word_alignment_xml_sample.wa", 13 | ); 14 | 15 | my $document = $reader->next_document; 16 | my @en_nodes = map {$_->get_zone('en')->get_atree->get_descendants} $document->get_bundles; 17 | 18 | is(scalar(@en_nodes), 41, q(Correct number of English tokens read from the wa-file.)); 19 | 20 | done_testing(); 21 | 22 | END { 23 | # delete temporary files 24 | } 25 | -------------------------------------------------------------------------------- /lib/Treex/Block/Segment/NaiveSuggestBreaks.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Segment::NaiveSuggestBreaks; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::Segment::SuggestSegmentBreaks'; 5 | 6 | 7 | sub _find_breaks { 8 | my ($self, $scores) = @_; 9 | 10 | my @break_idx_list = (); 11 | 12 | for (my $i = 1; $i < scalar @$scores; $i++) { 13 | if ($i % $self->max_size == 0) { 14 | push @break_idx_list, $i; 15 | } 16 | } 17 | 18 | return @break_idx_list; 19 | } 20 | 21 | sub name { 22 | return ''; 23 | } 24 | 25 | 1; 26 | 27 | # TODO POD 28 | -------------------------------------------------------------------------------- /lib/Treex/Block/SemevalABSA/AnnotateWithRules.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::SemevalABSA::AnnotateWithRules; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_anode { 8 | my ( $self, $anode ) = @_; 9 | if ($anode->wild->{absa_rules}) { 10 | $anode->set_form($anode->form . "#RULES#" . $anode->wild->{absa_rules}); 11 | $anode->set_lemma($anode->lemma . "#RULES#" . $anode->wild->{absa_rules}); 12 | } 13 | } 14 | 15 | 1; 16 | -------------------------------------------------------------------------------- /lib/Treex/Block/SemevalABSA/MoveABSAFromWild.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::SemevalABSA::MoveABSAFromWild; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_anode { 8 | my ( $self, $anode ) = @_; 9 | if ($anode->wild->{absa_is_aspect}) { 10 | my $polarity = $anode->wild->{absa_polarity}; 11 | $polarity =~ s/positive/+/; 12 | $polarity =~ s/negative/-/; 13 | $polarity =~ s/neutral/0/; 14 | $anode->set_form($anode->form . "#ASP#$polarity"); 15 | $anode->set_lemma($anode->lemma . "#ASP#$polarity"); 16 | 17 | delete $anode->wild->{absa_polarity}; 18 | delete $anode->wild->{absa_is_aspect}; 19 | } 20 | } 21 | 22 | 1; 23 | -------------------------------------------------------------------------------- /lib/Treex/Block/SemevalABSA/MoveABSAToWildCandidates.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::SemevalABSA::MoveABSAToWildCandidates; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_atree { 8 | my ( $self, $atree ) = @_; 9 | my @nodes = $atree->get_descendants; 10 | my $isaspect = 0; 11 | my $polarity = ""; 12 | for my $node (@nodes) { 13 | if ($node->{form} =~ m/^_ASPECT_START_(.*)_$/) { 14 | $polarity = $1; 15 | $isaspect = 1; 16 | $node->remove; 17 | } elsif ($node->{form} =~ m/^_ASPECT_END_$/) { 18 | $isaspect = 0; 19 | $node->remove; 20 | } else { 21 | if ($isaspect) { 22 | $node->wild->{absa_rules} = "bsln^$polarity"; 23 | } 24 | } 25 | } 26 | 27 | return 1; 28 | } 29 | 30 | 1; 31 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/AddInterleavedFormemeNodes.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ufal/treex/06c3353d9d7c61ff86c572cd926b055733a73442/lib/Treex/Block/T2A/AddInterleavedFormemeNodes.pm -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/CS/CheckCommas.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::CS::CheckCommas; 2 | use Moose; 3 | use Treex::Core::Common; 4 | 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_tnode { 8 | my ($self, $tnode) = @_; 9 | 10 | if ($tnode->wild->{check_comma_after}) { 11 | my $anode = $tnode->get_lex_anode or return; 12 | my $next_anode = $anode->get_next_node; 13 | return if !$next_anode || $next_anode->lemma eq ","; 14 | 15 | my $comma = $anode->create_child({ 16 | 'form' => ',', 17 | 'lemma' => ',', 18 | 'afun' => 'AuxX', 19 | 'morphcat/pos' => 'Z', 20 | 'clause_number' => 0, 21 | }); 22 | $comma->shift_after_node($anode); 23 | } 24 | } 25 | 26 | 1; 27 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/EN/AddInfinitiveParticles.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::EN::AddInfinitiveParticles; 2 | 3 | use utf8; 4 | use Moose; 5 | use Treex::Core::Common; 6 | 7 | extends 'Treex::Block::T2A::AddInfinitiveParticles'; 8 | 9 | override 'works_as_conj' => sub { 10 | my ($self, $particle) = @_; 11 | return not $particle eq 'to'; 12 | }; 13 | 14 | 1; 15 | 16 | __END__ 17 | 18 | =encoding utf-8 19 | 20 | =head1 NAME 21 | 22 | Treex::Block::T2A::EN::AddInfinitiveParticles 23 | 24 | =head1 DESCRIPTION 25 | 26 | The particle 'to' is added to English infinitives. Other prepositions 27 | in constructions such as "It's time for him to go home." are added 28 | as well. 29 | 30 | =head1 AUTHORS 31 | 32 | Ondřej Dušek 33 | 34 | =head1 COPYRIGHT AND LICENSE 35 | 36 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague 37 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 38 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/EN/AddPrepos.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::EN::AddPrepos; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::T2A::AddPrepos'; 5 | 6 | override 'get_prep_forms' => sub { 7 | my ( $self, $formeme ) = @_; 8 | return undef if ( !$formeme ); 9 | my ($prep_forms) = ( $formeme =~ /(?:n|adj):(.+)\+/ ); 10 | return $prep_forms if ($prep_forms); 11 | ($prep_forms) = ( $formeme =~ /v:(.+)\+ger/ ); 12 | return $prep_forms; 13 | }; 14 | 15 | 1; 16 | 17 | __END__ 18 | 19 | =encoding utf-8 20 | 21 | =head1 NAME 22 | 23 | Treex::Block::T2A::EN::AddPrepos 24 | 25 | =head1 DESCRIPTION 26 | 27 | Adding prepositional a-nodes according to prepositions contained in t-nodes' formemes. 28 | 29 | English-specific: adding prepositions to gerunds. 30 | 31 | =head1 AUTHORS 32 | 33 | Ondřej Dušek 34 | 35 | =head1 COPYRIGHT AND LICENSE 36 | 37 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague 38 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/EN/CapitalizeSentStart.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::EN::CapitalizeSentStart; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Block::T2A::CapitalizeSentStart'; 6 | 7 | has '+opening_punct' => ( isa => 'Str', is => 'ro', default => '({[‘“«‹|*"\'' ); 8 | 9 | 1; 10 | 11 | __END__ 12 | 13 | =encoding utf-8 14 | 15 | =head1 NAME 16 | 17 | Treex::Block::T2A::EN::CapitalizeSentStart 18 | 19 | =head1 DESCRIPTION 20 | 21 | Capitalize the first letter of the first (non-punctuation) 22 | token in the sentence, and do the same for direct speech sections. 23 | 24 | This contains just English-specific settings for L. 25 | 26 | =head1 AUTHORS 27 | 28 | Ondřej Dušek 29 | 30 | =head1 COPYRIGHT AND LICENSE 31 | 32 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague 33 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 34 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/EN/FixFlectErrors.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::EN::FixFlectErrors; 2 | 3 | use utf8; 4 | use Moose; 5 | use Treex::Core::Common; 6 | 7 | extends 'Treex::Core::Block'; 8 | 9 | sub process_anode { 10 | my ( $self, $a_node ) = @_; 11 | my $form = $a_node->form // ''; 12 | my $morphcat_pos = $a_node->morphcat_pos // ''; 13 | 14 | if ( $form eq 'badder' ) { 15 | $a_node->set_form('worse'); 16 | } 17 | elsif ( $form eq 'halfs' ) { 18 | $a_node->set_form('half'); 19 | } 20 | elsif ( $form =~ /^[.,]/ and $form ne ( $a_node->lemma // '' ) ) { 21 | $a_node->set_form( $a_node->lemma ); 22 | } 23 | 24 | return; 25 | } 26 | 27 | 1; 28 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/ES/AddAuxVerbTense.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ufal/treex/06c3353d9d7c61ff86c572cd926b055733a73442/lib/Treex/Block/T2A/ES/AddAuxVerbTense.pm -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/ES/AddPrepos.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::ES::AddPrepos; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::T2A::AddPrepos'; 5 | 6 | # In Spanish, it seems adverbs may have prepositions as well (e.g. "por allí"). 7 | has '+formeme_prep_regexp' => ( default => '^(?:n|adj|adv):(.+)[+]' ); 8 | 9 | 1; 10 | 11 | __END__ 12 | 13 | =encoding utf-8 14 | 15 | =head1 NAME 16 | 17 | Treex::Block::T2A::ES::AddPrepos 18 | 19 | =head1 DESCRIPTION 20 | 21 | Adding prepositional a-nodes according to prepositions contained in t-nodes' formemes. 22 | In Spanish, it seems adverbs may have prepositions as well (e.g. "por allí"). 23 | 24 | =head1 AUTHORS 25 | 26 | Martin Popel 27 | 28 | =head1 COPYRIGHT AND LICENSE 29 | 30 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague 31 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/ES/AddReflexive.pm: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ufal/treex/06c3353d9d7c61ff86c572cd926b055733a73442/lib/Treex/Block/T2A/ES/AddReflexive.pm -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/EU/AddSentFinalPunct.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::EU::AddSentFinalPunct; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::T2A::AddSentFinalPunct'; 5 | 6 | override '_ends_with_clause_in_quotes' => sub { 7 | my ( $self, $last_token ) = @_; 8 | my ( $open_punct, $close_punct ) = ( $self->open_punct, $self->close_punct ); 9 | 10 | return 0; 11 | }; 12 | 13 | 1; 14 | 15 | __END__ 16 | 17 | =encoding utf-8 18 | 19 | =head1 NAME 20 | 21 | Treex::Block::T2A::EU::AddSentFinalPunct 22 | 23 | =head1 DESCRIPTION 24 | 25 | Override '_ends_with_clause_in_quotes' 26 | 27 | =head1 AUTHORS 28 | 29 | Gorka Labaka 30 | 31 | =head1 COPYRIGHT AND LICENSE 32 | 33 | Copyright © 2008-2014 by Institute of Formal and Applied Linguistics, Charles University in Prague 34 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/EU/GenerateGazeteerItems.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::EU::GenerateGazeteerItems; 2 | use Moose; 3 | extends 'Treex::Core::Block'; 4 | 5 | sub process_tnode { 6 | my ($self, $tnode) = @_; 7 | my $anode = $tnode->get_lex_anode(); 8 | 9 | return if (! $anode); 10 | 11 | if (($tnode->t_lemma_origin || "") eq "lookup-TrGazeteerItems") { 12 | $anode->set_form($anode->lemma); 13 | } 14 | } 15 | 16 | 1; 17 | 18 | __END__ 19 | 20 | =encoding utf-8 21 | 22 | =head1 NAME 23 | 24 | Treex::Block::T2A::EU::GenerateGazeteerItems 25 | 26 | =head1 DESCRIPTION 27 | 28 | Gazeteer items should be treat as Proper names, which are not (usually) flexioned 29 | 30 | =head1 AUTHORS 31 | 32 | Gorka Labaka 33 | 34 | =head1 COPYRIGHT AND LICENSE 35 | 36 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague 37 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 38 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/MarkSubject.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::MarkSubject; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_tnode { 7 | my ( $self, $t_node ) = @_; 8 | if ($t_node->formeme eq 'n:subj'){ 9 | my $a_node = $t_node->get_lex_anode() or return; 10 | $a_node->set_afun('Sb'); 11 | } 12 | return; 13 | } 14 | 15 | 16 | 1; 17 | 18 | __END__ 19 | 20 | =encoding utf-8 21 | 22 | =head1 NAME 23 | 24 | Treex::Block::T2A::MarkSubject 25 | 26 | =head1 DESCRIPTION 27 | 28 | Fill afun=Sb for anodes which correspond to t-nodes with formeme "n:subj". 29 | 30 | =head1 AUTHORS 31 | 32 | Martin Popel 33 | 34 | =head1 COPYRIGHT AND LICENSE 35 | 36 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague 37 | 38 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 39 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/NL/RestoreVerbPrefixes.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::NL::RestoreVerbPrefixes; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_anode { 8 | 9 | my ( $self, $anode ) = @_; 10 | 11 | return if ( !$anode->wild->{verbal_prefix} ); 12 | $anode->set_form($anode->wild->{verbal_prefix} . $anode->form); 13 | 14 | return; 15 | } 16 | 17 | 1; 18 | 19 | __END__ 20 | 21 | =encoding utf-8 22 | 23 | =head1 NAME 24 | 25 | Treex::Block::T2A::EN::RestoreVerbPrefixes 26 | 27 | =head1 DESCRIPTION 28 | 29 | Verbal separable prefixes are restored after morphology generation. 30 | 31 | =head1 AUTHORS 32 | 33 | Ondřej Dušek 34 | 35 | =head1 COPYRIGHT AND LICENSE 36 | 37 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague 38 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 39 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/PT/AddPrepos.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::PT::AddPrepos; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::T2A::AddPrepos'; 5 | 6 | # In Portuguese, it seems adverbs may have prepositions as well (e.g. "por ali"). 7 | has '+formeme_prep_regexp' => ( default => '^(?:n|adj|adv):(.+)[+]' ); 8 | 9 | 1; 10 | 11 | __END__ 12 | 13 | =encoding utf-8 14 | 15 | =head1 NAME 16 | 17 | Treex::Block::T2A::PT::AddPrepos 18 | 19 | =head1 DESCRIPTION 20 | 21 | Adding prepositional a-nodes according to prepositions contained in t-nodes' formemes. 22 | In Portuguese, it seems adverbs may have prepositions as well (e.g. "por ali"). 23 | 24 | =head1 AUTHORS 25 | 26 | Martin Popel 27 | 28 | =head1 COPYRIGHT AND LICENSE 29 | 30 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague 31 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/RU/AddNegation.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::RU::AddNegation; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_tnode { 8 | my ( $self, $tnode ) = @_; 9 | if ( ( $tnode->gram_negation || '' ) eq 'neg1' && $tnode->gram_sempos eq 'v' ) { 10 | my $anode = $tnode->get_lex_anode(); 11 | my $new_node = $anode->create_child(); 12 | $new_node->shift_before_node($anode); 13 | 14 | $new_node->reset_morphcat(); 15 | $new_node->set_lemma('не'); 16 | $new_node->set_form('не'); 17 | $tnode->add_aux_anodes($new_node); 18 | } 19 | 20 | return; 21 | } 22 | 23 | 1; 24 | 25 | =over 26 | 27 | =item Treex::Block::T2A::RU::AddNegation 28 | 29 | Add a new a-node which represents a verbal negation particle ("не"). 30 | 31 | =back 32 | 33 | =cut 34 | 35 | # Copyright 2012 Martin Popel 36 | 37 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 38 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2A/RU/DropCopula.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2A::RU::DropCopula; 2 | use utf8; 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_tnode { 8 | my ( $self, $t_node ) = @_; 9 | 10 | if ($t_node->t_lemma eq 'быть' && ($t_node->gram_tense||'') eq 'sim'){ 11 | my $a_node = $t_node->get_lex_anode() or return; 12 | $a_node->set_lemma(''); 13 | } 14 | 15 | return; 16 | } 17 | 18 | 19 | 1; 20 | 21 | __END__ 22 | 23 | =encoding utf8 24 | 25 | =head1 NAME 26 | 27 | Treex::Block::T2A::RU::DropCopula - delete verb "to be" 28 | 29 | =head1 DESCRIPTION 30 | 31 | Russian copula verb (быть = to be) in present tense is dropped. 32 | E.g. "He is an idiot" -> "он дурак". 33 | 34 | The current implementation just sets the m/lemma to an empty string. 35 | 36 | # Copyright 2012 Martin Popel 37 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 38 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/CS2EN/FixDoubleNegative.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::CS2EN::FixDoubleNegative; 2 | 3 | use utf8; 4 | use Moose; 5 | use Treex::Core::Common; 6 | 7 | extends 'Treex::Core::Block'; 8 | 9 | sub process_tnode { 10 | my ( $self, $t_node ) = @_; 11 | 12 | # only solve verbs 13 | return if ( $t_node->formeme !~ /^v/ ); 14 | 15 | my (@negs) = grep { $_->t_lemma =~ /^(no(_one|body|where|thing|ne|)?|never|not)$/ } $t_node->get_clause_edescendants(); 16 | 17 | 18 | if ( @negs == 1 and $negs[0]->t_lemma eq 'no' and $negs[0]->get_parent->formeme ne 'n:subj' ) { 19 | my $neg = shift @negs; 20 | 21 | if ( not $neg->src_tnode or $neg->src_tnode->t_lemma ne 'ne' ){ 22 | $neg->set_t_lemma('any'); 23 | $neg->set_t_lemma_origin('rule-FixDoubleNegative'); 24 | } 25 | } 26 | 27 | if (@negs) { 28 | $t_node->set_gram_negation('neg0'); 29 | } 30 | } 31 | 32 | 1; 33 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/CS2EN/RemoveInfinitiveSubjects.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::CS2EN::RemoveInfinitiveSubjects; 2 | 3 | use utf8; 4 | use Moose; 5 | use Treex::Core::Common; 6 | 7 | extends 'Treex::Core::Block'; 8 | 9 | sub process_tnode { 10 | my ( $self, $t_node ) = @_; 11 | 12 | # only solve infinitive verbs 13 | return if ( $t_node->formeme !~ /^v.*inf$/ ); 14 | 15 | # TODO: Sometimes (raising/control) the subject should not be deleted: 16 | # Očekáváme, že přinese změnu -> We expect HIM to bring about a change. 17 | 18 | foreach my $subj (grep { $_->t_lemma eq '#PersPron' and $_->formeme eq 'n:subj' } $t_node->get_children()){ 19 | $subj->remove(); 20 | } 21 | } 22 | 23 | 1; 24 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/EN2CS/CutVariants.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::EN2CS::CutVariants; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::T2T::CutVariants'; 5 | 6 | sub BUILD { 7 | log_warn 'This block is deprecated, use T2T::CutVariants instead'; 8 | return; 9 | } 10 | 11 | 1; 12 | 13 | =pod 14 | 15 | This block is deprecated, use T2T::CutVariants instead 16 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/EN2CS/MoveEnoughBeforeAdj.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::EN2CS::MoveEnoughBeforeAdj; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_tnode { 7 | my ( $self, $tnode ) = @_; 8 | if (( $tnode->t_lemma || '' ) eq 'dost' 9 | and ( $tnode->get_parent->get_attr('mlayer_pos') || '' ) eq 'A' 10 | and $tnode->get_parent->precedes($tnode) 11 | ) 12 | { 13 | $tnode->shift_before_node( $tnode->get_parent ); 14 | } 15 | } 16 | 17 | 1; 18 | 19 | =over 20 | 21 | =item Treex::Block::T2T::EN2CS::MoveEnoughBeforeAdj 22 | 23 | 'Enough' t-node adjectives should be moved 24 | in front of them. 'He is big enough' -> 'Je dost velky'. 25 | 26 | =back 27 | 28 | =cut 29 | 30 | # Copyright 2010 Zdenek Zabokrtsky 31 | 32 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 33 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/EN2CS/MoveJesteBeforeVerb.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::EN2CS::MoveJesteBeforeVerb; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_tnode { 7 | my ( $self, $tnode ) = @_; 8 | my $parent = $tnode->get_parent; 9 | 10 | if ($tnode->t_lemma eq 'ještě' 11 | && !$tnode->children 12 | && ( $parent->gram_negation || '' ) eq 'neg1' 13 | && $parent->precedes($tnode) 14 | ) 15 | { 16 | $tnode->shift_before_node($parent); 17 | } 18 | return; 19 | } 20 | 21 | 1; 22 | 23 | =over 24 | 25 | =item Treex::Block::T2T::EN2CS::MoveJesteBeforeVerb 26 | 27 | 'jeste' resulting from 'not yet' is moved in front of the negated verb. 28 | 29 | =back 30 | 31 | =cut 32 | 33 | # Copyright 2011 Zdenek Zabokrtsky 34 | 35 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 36 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/EN2CS/MovePersPronNextToVerb.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::EN2CS::MovePersPronNextToVerb; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_tnode { 7 | my ( $self, $tnode ) = @_; 8 | my $parent = $tnode->get_parent; 9 | if ($tnode->t_lemma eq '#PersPron' 10 | && !$parent->is_root 11 | && $parent->formeme =~ /^v:/ 12 | && $tnode->formeme !~ /^n:1/ 13 | && $tnode->ord > $parent->ord 14 | ) 15 | { 16 | $tnode->shift_after_node($parent); 17 | } 18 | return; 19 | } 20 | 21 | 1; 22 | 23 | =over 24 | 25 | =item Treex::Block::T2T::EN2CS::MovePersPronNextToVerb 26 | 27 | No-subject #PersProns which are governed by a verb are shifted nex to the verb. 28 | 29 | =back 30 | 31 | =cut 32 | 33 | # Copyright 2010 David Marecek 34 | 35 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 36 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/EN2CS/MoveRelClauseRight.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::EN2CS::MoveRelClauseRight; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_tnode { 7 | my ( $self, $tnode ) = @_; 8 | 9 | if ( $tnode->formeme =~ /rc/ ) { 10 | my $parent = $tnode->get_parent; 11 | if ( $tnode->precedes($parent) and $parent->formeme =~ /^n/ ) { 12 | $tnode->shift_after_subtree($parent); 13 | } 14 | } 15 | return; 16 | } 17 | 18 | 1; 19 | 20 | =over 21 | 22 | =item Treex::Block::T2T::EN2CS::MoveRelClauseRight 23 | 24 | Relative clauses placed before their governing nouns (created e.g. 25 | from ing-forms) are moved behing the nouns. 26 | 27 | =back 28 | 29 | =cut 30 | 31 | # Copyright 2008-2011 Zdenek Zabokrtsky, David Marecek 32 | 33 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 34 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/EN2EU/FixYouPl.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::EN2EU::FixYouPl; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | 7 | sub process_tnode { 8 | my ( $self, $tnode ) = @_; 9 | 10 | if (($tnode->gram_sempos || "") =~ 'n.pron' && 11 | ($tnode->gram_person || "") eq '2') { 12 | $tnode->set_attr("gram/number", 'nr'); 13 | } 14 | 15 | return; 16 | }; 17 | 18 | 1; 19 | 20 | __END__ 21 | 22 | =encoding utf-8 23 | 24 | =head1 NAME 25 | 26 | Treex::Block::T2T::EN2EU::FixYouPl 27 | 28 | =head1 DESCRIPTION 29 | 30 | Some 'you' pronouns has plural analysis. It should be 'nr' 31 | 32 | =head1 AUTHORS 33 | 34 | Gorka Labaka 35 | 36 | =head1 COPYRIGHT AND LICENSE 37 | 38 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague 39 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 40 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/EN2EU/RemoveRelPron.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::EN2EU::RemoveRelPron; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use utf8; 5 | 6 | extends 'Treex::Core::Block'; 7 | 8 | sub process_tnode { 9 | my ( $self, $t_node ) = @_; 10 | my $parent = $t_node->get_parent(); 11 | 12 | 13 | if ($t_node->t_lemma eq "that" and $parent->formeme =~ /^v:rc/) { 14 | $t_node->set_t_lemma("#PersPron"); 15 | $t_node->set_t_lemma_origin('RemoveRelPron'); 16 | } 17 | 18 | } 19 | 1; 20 | 21 | __END__ 22 | 23 | =encoding utf-8 24 | 25 | =head1 NAME 26 | 27 | Treex::Block::T2T::EN2EU::RemoveRelPron; 28 | 29 | =head1 DESCRIPTION 30 | 31 | 32 | =head1 AUTHORS 33 | 34 | Gorka Labaka 35 | 36 | =head1 COPYRIGHT AND LICENSE 37 | 38 | Copyright © 2008 by Institute of Formal and Applied Linguistics, Charles University in Prague 39 | 40 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 41 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/FixPunctFormemes.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::FixPunctFormemes; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_tnode { 7 | my ( $self, $tnode ) = @_; 8 | if (( $tnode->t_lemma || "" ) =~ /^(?:\p{P}+|-LRB-|-RRB-)$/ ) { 9 | $tnode->set_formeme('x'); 10 | } 11 | } 12 | 13 | 1; 14 | 15 | __END__ 16 | 17 | =encoding utf-8 18 | 19 | =head1 NAME 20 | 21 | Treex::Block::T2T::FixPunctFormemes 22 | 23 | =head1 DESCRIPTION 24 | 25 | Force formeme x for all punctuation tokens. 26 | 27 | 28 | =head1 AUTHORS 29 | 30 | Luís Gomes , 31 | 32 | =head1 COPYRIGHT AND LICENSE 33 | 34 | Copyright © 2014 by NLX Group, Universidade de Lisboa 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/ProjectSelectedWild.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::ProjectSelectedWild; 2 | use Moose; 3 | use Treex::Core::Common; 4 | 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_tnode { 8 | my ($self, $tst_tnode) = @_; 9 | my $src_tnode = $tst_tnode->src_tnode; 10 | 11 | if (defined $src_tnode->wild->{check_comma_after}) { 12 | $tst_tnode->wild->{check_comma_after} = $src_tnode->wild->{check_comma_after}; 13 | } 14 | } 15 | 16 | 1; 17 | 18 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/RecoverUnknownLemmas.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::RecoverUnknownLemmas; 2 | use Moose; 3 | extends 'Treex::Core::Block'; 4 | 5 | sub process_tnode { 6 | my ( $self, $tnode ) = @_; 7 | my $src_tnode = $tnode->src_tnode() or return 1; 8 | return 1 if ($src_tnode->t_lemma ne $tnode->t_lemma); 9 | my $src_anode = $src_tnode->get_lex_anode() or return 1; 10 | my $original_lemma = $src_anode->wild->{original_lemma} or return 1; 11 | $tnode->set_t_lemma($original_lemma); 12 | return 1; 13 | } 14 | 15 | 1; 16 | 17 | __END__ 18 | 19 | =encoding utf-8 20 | 21 | =head1 NAME 22 | 23 | Treex::Block::T2T::RecoverUnknownLemmas 24 | 25 | =head1 DESCRIPTION 26 | 27 | Recovers lemmas that have been replaced with synset ids and were not "transferred". 28 | 29 | =head1 AUTHORS 30 | 31 | Luís Gomes , 32 | 33 | =head1 COPYRIGHT AND LICENSE 34 | 35 | Copyright © 2014 by NLX Group, Universidade de Lisboa 36 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2T/RehangToOrigParents.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2T::RehangToOrigParents; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_tnode { 7 | my ( $self, $node ) = @_; 8 | my $orig_parent = $node->get_deref_attr('original_parent.rf') or return; 9 | $node->set_parent($orig_parent); 10 | } 11 | 12 | 1; 13 | __END__ 14 | 15 | =over 16 | 17 | =item Treex::Block::T2T::RehangToOrigParents 18 | 19 | Rehangs nodes to its original parents as it was before applying 20 | the L block. 21 | Original parents are taken from the C attribute. 22 | 23 | =back 24 | 25 | =cut 26 | 27 | # Copyright 2008 Martin Popel 28 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. 29 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2U/CS/AdjustStructure.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2U::CS::AdjustStructure; 2 | use utf8; 3 | use Moose; 4 | extends 'Treex::Block::T2U::AdjustStructure'; 5 | 6 | use experimental 'signatures'; 7 | 8 | =head1 NAME 9 | 10 | Treex::Block::T2U::CS::AdjustStructure - Czech specifics for converting t-layer to u-layer. 11 | 12 | =cut 13 | 14 | override is_exclusive => sub($self, $tlemma) { 15 | $tlemma =~ /^(?:jen(?:om)?|pouze|výhradně)$/ 16 | }; 17 | 18 | override negation => sub { 'n(?:e|ikoliv?)' }; 19 | 20 | __PACKAGE__->meta->make_immutable 21 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2U/CS/ConvertCoreference.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2U::CS::ConvertCoreference; 2 | use utf8; 3 | use Moose; 4 | extends 'Treex::Block::T2U::ConvertCoreference'; 5 | with 'Treex::Tool::UMR::CS::GrammatemeSetter'; 6 | 7 | =head1 NAME 8 | 9 | Treex::Block::T2U::CS::ConvertCoreference - Czech specifics for converting coreference form the t-layer to u-layer. 10 | 11 | =cut 12 | 13 | { my $RELATIVE = '(?:který|jenž|jaký|co|kd[ye]|odkud|kudy|kam)'; 14 | sub relative { $RELATIVE } 15 | } 16 | 17 | __PACKAGE__->meta->make_immutable 18 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2U/LA/AdjustStructure.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2U::LA::AdjustStructure; 2 | use Moose; 3 | extends 'Treex::Block::T2U::AdjustStructure'; 4 | 5 | use experimental 'signatures'; 6 | 7 | =head1 NAME 8 | 9 | Treex::Block::T2U::LA::AdjustStructure - Latin specifics for converting t-layer to u-layer. 10 | 11 | =cut 12 | 13 | override is_exclusive => sub($self, $tlemma) { 14 | $tlemma =~ /^(?:solum|tantum)$/ 15 | }; 16 | 17 | override negation => sub { '(?:n(?:on|e)|haud)' }; 18 | 19 | __PACKAGE__->meta->make_immutable 20 | -------------------------------------------------------------------------------- /lib/Treex/Block/T2U/LA/ConvertCoreference.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::T2U::LA::ConvertCoreference; 2 | use Moose; 3 | extends 'Treex::Block::T2U::ConvertCoreference'; 4 | with 'Treex::Tool::UMR::LA::GrammatemeSetter'; 5 | 6 | =head1 NAME 7 | 8 | Treex::Block::T2U::LA::ConvertCoreference - Latin specifics for converting coreference form the t-layer to u-layer. 9 | 10 | =cut 11 | 12 | { my $RELATIVE = '(?:qu[aio]|u(?:bi|nde))(?:cumque)?' 13 | . '|qu(?:omodo|isquis|alis|antus)'; 14 | sub relative { $RELATIVE } 15 | } 16 | 17 | __PACKAGE__->meta->make_immutable 18 | -------------------------------------------------------------------------------- /lib/Treex/Block/Test/FieldCanHaveWideChars.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Test::FieldCanHaveWideChars; 2 | 3 | use Moose; 4 | use Treex::Core::Common; 5 | 6 | extends 'Treex::Core::Block'; 7 | 8 | has 'problematic_attribute' => ( is => 'rw', isa => 'Str', default => "žluťoučký" ); 9 | 10 | sub BUILD { 11 | my $self = shift; 12 | log_info("FieldCanHaveWideChars loaded, problematic_attribute=" . $self->problematic_attribute); 13 | } 14 | 15 | sub process_document { 16 | my $self = shift; 17 | log_info("FieldCanHaveWideChars executed, problematic_attribute=" . $self->problematic_attribute); 18 | } 19 | 20 | 1; 21 | -------------------------------------------------------------------------------- /lib/Treex/Block/Test/ParameterCanHaveWideChars.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Test::ParameterCanHaveWideChars; 2 | 3 | use Moose; 4 | use Treex::Core::Common; 5 | 6 | extends 'Treex::Core::Block'; 7 | 8 | has 'problematic_attribute' => ( is => 'rw', isa => 'Str', default => "no wide chars as default but they can be set in scenario" ); 9 | 10 | sub BUILD { 11 | my $self = shift; 12 | log_info("ParameterCanHaveWideChars loaded, problematic_attribute=" . $self->problematic_attribute); 13 | } 14 | 15 | sub process_document { 16 | my $self = shift; 17 | log_info("ParameterCanHaveWideChars executed, problematic_attribute=" . $self->problematic_attribute); 18 | } 19 | 20 | 1; 21 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/AppendSynsetIdToLemmas.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::AppendSynsetIdToLemmas; 2 | use Moose; 3 | extends 'Treex::Core::Block'; 4 | 5 | sub process_anode { 6 | my ( $self, $anode ) = @_; 7 | my $synsetid = $anode->wild->{lx_wsd} // 'UNK'; 8 | my $lemma = $anode->lemma; 9 | if ($synsetid ne 'UNK') { 10 | #my $alpha_synsetid = "$synsetid"; 11 | #$alpha_synsetid =~ tr/0-9/a-j/; 12 | #$anode->set_lemma($lemma."__".$alpha_synsetid); 13 | $anode->set_lemma($lemma."__".$synsetid); 14 | } 15 | return 1; 16 | } 17 | 18 | 1; 19 | 20 | __END__ 21 | 22 | =encoding utf-8 23 | 24 | =head1 NAME 25 | 26 | Treex::Block::W2A::AppendSynsetIdToLemmas 27 | 28 | =head1 DESCRIPTION 29 | 30 | Appends synset ids to lemmas (where applicable). 31 | 32 | =head1 AUTHORS 33 | 34 | Luís Gomes , 35 | 36 | =head1 COPYRIGHT AND LICENSE 37 | 38 | Copyright © 2014 by NLX Group, Universidade de Lisboa 39 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/CS/TagFeaturama.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::CS::TagFeaturama; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Treex::Tool::Tagger::Featurama::CS; 5 | extends 'Treex::Block::W2A::Tag'; 6 | 7 | sub _build_tagger{ 8 | return Treex::Tool::Tagger::Featurama::CS->new; 9 | } 10 | 11 | 1; 12 | 13 | __END__ 14 | 15 | =pod 16 | 17 | =encoding utf-8 18 | 19 | =head1 NAME 20 | 21 | Treex::Block::W2A::CS::TagFeaturama - Czech PoS+morpho tagger 22 | 23 | =head1 DESCRIPTION 24 | 25 | Each node in the analytical tree is tagged using the L tagger. 26 | 27 | =head1 AUTHORS 28 | 29 | Martin Popel 30 | 31 | =head1 COPYRIGHT AND LICENSE 32 | 33 | Copyright © 2012 by Institute of Formal and Applied Linguistics, Charles University in Prague 34 | 35 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 36 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/CS/TagMorphoDiTa.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::CS::TagMorphoDiTa; 2 | 3 | use strict; 4 | use warnings; 5 | use Moose; 6 | use Treex::Core::Common; 7 | extends 'Treex::Block::W2A::TagMorphoDiTa'; 8 | 9 | has '+model' => ( default => 'data/models/morphodita/cs/czech-morfflex-pdt-131112.tagger-best_accuracy' ); 10 | 11 | 1; 12 | 13 | __END__ 14 | 15 | =pod 16 | 17 | =encoding utf-8 18 | 19 | =head1 NAME 20 | 21 | Treex::Block::W2A::CS::TagMorphoDiTa 22 | 23 | =head1 DESCRIPTION 24 | 25 | This is just a small modification of L which adds the path to the 26 | default model for Czech. 27 | 28 | =head1 AUTHORS 29 | 30 | Martin Popel 31 | 32 | =head1 COPYRIGHT AND LICENSE 33 | 34 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague 35 | 36 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 37 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/ConvertTags.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::ConvertTags; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use utf8; 5 | extends 'Treex::Block::HamleDT::Harmonize'; 6 | 7 | has iset_driver => 8 | ( 9 | is => 'ro', 10 | isa => 'Str', 11 | required => 1, 12 | default => 'ta::tamiltb', 13 | documentation => 'Which interset driver should be used to decode tags in this treebank? '. 14 | 'Lowercase, language code :: treebank code, e.g. "cs::pdt". '. 15 | 'The driver must be available in "$TMT_ROOT/libs/other/tagset".' 16 | ); 17 | 18 | sub process_zone { 19 | my $self = shift; 20 | my $zone = shift; 21 | my $root = $zone->get_atree(); 22 | $self->convert_tags($root); 23 | } 24 | 25 | 1; 26 | 27 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/DE/TagStanford.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::DE::TagStanford; 2 | 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Block::W2A::TagStanford'; 6 | 7 | has '+model' => ( default => 'data/models/tagger/stanford/german-fast.tagger' ); 8 | 9 | 1; 10 | 11 | __END__ 12 | 13 | =pod 14 | 15 | =encoding utf-8 16 | 17 | =head1 NAME 18 | 19 | Treex::Block::W2A::DE::TagStanford 20 | 21 | =head1 DESCRIPTION 22 | 23 | This is just a small modification of L which adds the path to the 24 | default model for German. 25 | 26 | =head1 AUTHORS 27 | 28 | Ondřej Dušek 29 | 30 | =head1 COPYRIGHT AND LICENSE 31 | 32 | Copyright © 2012 by Institute of Formal and Applied Linguistics, Charles University in Prague 33 | 34 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/DefaultDepRel.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::DefaultDepRel; 2 | use Treex::Core::Common; 3 | use Moose; 4 | extends 'Treex::Core::Block'; 5 | 6 | has 'def_rel' => (is => 'ro', isa => 'Str', default=> 'NR'); 7 | has 'deprel_attribute' => ( is => 'rw', isa => 'Str', default => 'afun'); 8 | 9 | sub process_atree { 10 | my ( $self, $atree ) = @_; 11 | my @anodes = $atree->get_descendants( { ordered => 1 } ); 12 | foreach my $an (@anodes) { 13 | $an->set_attr($self->deprel_attribute, $self->def_rel); 14 | } 15 | } 16 | 17 | 1; 18 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/EN/HideIT.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::EN::HideIT; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Block::W2A::HideIT'; 5 | 6 | 1; 7 | 8 | __END__ 9 | 10 | =encoding utf-8 11 | 12 | =head1 NAME 13 | 14 | Treex::Block::W2A::EN::HideIT - hide IT-domain entites 15 | 16 | =head1 DESCRIPTION 17 | 18 | Currently, there is nothing English-specific, 19 | this block is just a nickname for L 20 | (for legacy reasons). 21 | 22 | =head1 AUTHOR 23 | 24 | Martin Popel 25 | 26 | =head1 COPYRIGHT AND LICENSE 27 | 28 | Copyright © 2015 by Institute of Formal and Applied Linguistics, Charles University in Prague 29 | 30 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 31 | 32 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/EN/QtHackTags.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::EN::QtHackTags; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use utf8; 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_anode { 8 | my ($self, $anode) = @_; 9 | 10 | if ( ($anode->lemma // '') eq 'select' 11 | && $anode->tag !~ /^VB/ 12 | ) { 13 | $anode->set_tag('VB'); 14 | } 15 | 16 | return ; 17 | } 18 | 19 | 20 | 1; 21 | 22 | =head1 NAME 23 | 24 | Treex::Block::W2A::EN::QtHackTags 25 | 26 | =head1 DESCRIPTION 27 | 28 | Some hacks useful for QTLeap; aka domain adaptation o:-) 29 | 30 | "select" gets often tagged as adjectives, as in "select OK", so we set it to VB 31 | 32 | =head1 AUTHOR 33 | 34 | Rudolf Rosa 35 | 36 | =head1 COPYRIGHT AND LICENSE 37 | 38 | Copyright © 2015 by Institute of Formal and Applied Linguistics, 39 | Charles University in Prague 40 | 41 | This module is free software; you can redistribute it and/or modify it 42 | under the same terms as Perl itself. 43 | 44 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/EscapeMoses.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::EscapeMoses; 2 | 3 | use strict; 4 | use warnings; 5 | use utf8; 6 | use Moose; 7 | use Treex::Core::Common; 8 | extends 'Treex::Core::Block'; 9 | 10 | use Treex::Tool::Moses; 11 | 12 | sub process_atree { 13 | my ($self, $aroot) = @_; 14 | 15 | Treex::Tool::Moses::escape_anodes($aroot); 16 | 17 | return; 18 | }; 19 | 20 | 1; 21 | 22 | __END__ 23 | 24 | =encoding utf-8 25 | 26 | =head1 NAME 27 | 28 | Treex::Block::W2A::EscapeMoses 29 | 30 | =head1 DESCRIPTION 31 | 32 | Escape anodes in the way the Moses tokenizer does, using L. 33 | 34 | =head1 AUTHOR 35 | 36 | Rudolf Rosa 37 | 38 | =head1 COPYRIGHT AND LICENSE 39 | 40 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 41 | 42 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/FR/TagStanford.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::FR::TagStanford; 2 | 3 | use Moose; 4 | use Treex::Core::Common; 5 | extends 'Treex::Block::W2A::TagStanford'; 6 | 7 | has '+model' => ( default => 'data/models/tagger/stanford/french.tagger' ); 8 | 9 | 1; 10 | 11 | __END__ 12 | 13 | =pod 14 | 15 | =encoding utf-8 16 | 17 | =head1 NAME 18 | 19 | Treex::Block::W2A::FR::TagStanford 20 | 21 | =head1 DESCRIPTION 22 | 23 | This is just a small modification of L which adds the path to the 24 | default model for French. 25 | 26 | =head1 AUTHORS 27 | 28 | Ondřej Dušek 29 | 30 | =head1 COPYRIGHT AND LICENSE 31 | 32 | Copyright © 2012 by Institute of Formal and Applied Linguistics, Charles University in Prague 33 | 34 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 35 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/JA/FixPeriod.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::JA::FixPeriod; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Encode; 5 | extends 'Treex::Core::Block'; 6 | 7 | # We change "。" to classic period, also rehang it to root 8 | 9 | sub process_atree { 10 | my ( $self, $a_root ) = @_; 11 | foreach my $child ( $a_root->get_descendants() ) { 12 | if ( $child->form eq "。") { 13 | $child->set_form("."); 14 | $child->set_lemma("."); 15 | $child->set_parent($a_root); 16 | } 17 | } 18 | return 1; 19 | } 20 | 21 | 1; 22 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/JA/t/parse_jdepp.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use utf8; 7 | binmode(STDIN, ':utf8'); 8 | binmode(STDOUT, ':utf8'); 9 | binmode(STDERR, ':utf8'); 10 | use Test::More tests => 3; 11 | 12 | BEGIN { use_ok('Treex::Block::W2A::JA::ParseJDEPP') }; 13 | 14 | require_ok('Treex::Block::W2A::JA::ParseJDEPP'); 15 | 16 | Treex::Core::Log::log_set_error_level('WARN'); 17 | my $block = Treex::Block::W2A::JA::ParseJDEPP->new(); 18 | 19 | $block->process_start(); 20 | 21 | isa_ok( $block->parser, 'Treex::Tool::Parser::JDEPP' ); 22 | 23 | # TODO: test parse chunk subroutine 24 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/JA/t/tag_mecab.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use utf8; 7 | binmode(STDIN, ':utf8'); 8 | binmode(STDOUT, ':utf8'); 9 | binmode(STDERR, ':utf8'); 10 | use Test::More tests => 3; 11 | 12 | BEGIN { use_ok('Treex::Block::W2A::JA::TagMeCab') }; 13 | 14 | require_ok('Treex::Block::W2A::JA::TagMeCab'); 15 | 16 | my $block = Treex::Block::W2A::JA::TagMeCab->new(); 17 | 18 | $block->process_start(); 19 | 20 | isa_ok( $block->tagger, 'Treex::Tool::Tagger::MeCab' ); 21 | 22 | # TODO: test process_zone subroutine 23 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/ParseLeftBranching.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::ParseLeftBranching; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_atree { 7 | my ( $self, $root ) = @_; 8 | my @todo = $root->get_descendants( { ordered => 1 } ); 9 | 10 | # Flatten the tree first, if there was some topology already. 11 | foreach my $node (@todo) { 12 | $node->set_parent($root); 13 | } 14 | 15 | 16 | my $child = shift @todo; 17 | my $parent; 18 | while (@todo) { 19 | $parent = shift @todo; 20 | $child->set_parent($parent); 21 | $child = $parent; 22 | 23 | } 24 | return; 25 | } 26 | 27 | 28 | 29 | 1; 30 | 31 | __END__ 32 | 33 | =head1 NAME 34 | 35 | Treex::Block::W2A::ParseRight 36 | 37 | =head1 DESCRIPTION 38 | 39 | Creates a parse tree that is Left branching 40 | 41 | itself. 42 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/ParseRightBranching.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::ParseRightBranching; 2 | use Moose; 3 | use Treex::Core::Common; 4 | extends 'Treex::Core::Block'; 5 | 6 | sub process_atree { 7 | my ( $self, $root ) = @_; 8 | my @todo = $root->get_descendants( { ordered => 1 } ); 9 | 10 | # Flatten the tree first, if there was some topology already. 11 | foreach my $node (@todo) { 12 | $node->set_parent($root); 13 | } 14 | 15 | 16 | my $child = shift @todo; 17 | my $parent; 18 | while (@todo) { 19 | $parent = shift @todo; 20 | $parent->set_parent($child); 21 | $child = $parent; 22 | 23 | } 24 | return; 25 | } 26 | 27 | 28 | 29 | 1; 30 | 31 | __END__ 32 | 33 | =head1 NAME 34 | 35 | Treex::Block::W2A::ParseRight 36 | 37 | =head1 DESCRIPTION 38 | 39 | Creates a parse tree that is Right branching 40 | 41 | itself. 42 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/ReplaceLemmasWithSynsetId.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::ReplaceLemmasWithSynsetId; 2 | use Moose; 3 | extends 'Treex::Core::Block'; 4 | 5 | sub process_anode { 6 | my ( $self, $anode ) = @_; 7 | my $synsetid = $anode->wild->{synsetid} // 'UNK'; 8 | if ($synsetid ne 'UNK') { 9 | $anode->wild->{original_lemma} = $anode->lemma; 10 | my $alpha_synsetid = "$synsetid"; 11 | $alpha_synsetid =~ tr/0-9/a-j/; 12 | $anode->set_lemma($alpha_synsetid); 13 | } 14 | return 1; 15 | } 16 | 17 | 1; 18 | 19 | __END__ 20 | 21 | =encoding utf-8 22 | 23 | =head1 NAME 24 | 25 | Treex::Block::W2A::ReplaceLemmasWithSynsetId 26 | 27 | =head1 DESCRIPTION 28 | 29 | Replaces lemmas with synset ids (where applicable). 30 | 31 | =head1 AUTHORS 32 | 33 | Luís Gomes , 34 | 35 | =head1 COPYRIGHT AND LICENSE 36 | 37 | Copyright © 2014 by NLX Group, Universidade de Lisboa 38 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/TA/FixTags.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::W2A::TA::FixTags; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use Treex::Tool::Orthography::TA; 5 | extends 'Treex::Core::Block'; 6 | 7 | sub process_anode { 8 | my ($self, $anode) = @_; 9 | my $fixed_tag = $anode->tag; 10 | $fixed_tag = $self->get_correct_tag($anode->form, $anode->lemma, $anode->tag); 11 | $anode->set_attr('tag', $fixed_tag); 12 | } 13 | 14 | sub get_correct_tag { 15 | my ($self, $f, $l, $t) = @_; 16 | 17 | # initials 18 | return 'NmNSN----------' if ($f =~ /($TA_VOWELS_REG)\.$/); 19 | return 'NmNSN----------' if ($f =~ /($TA_CONSONANTS_REG)\.$/); 20 | return 'NmNSN----------' if ($f =~ /($TA_CONSONANTS_PLUS_VOWEL_A_REG)($TA_VOWEL_SIGNS_REG)\.$/); 21 | return 'NmNSN----------' if ($f =~ /($TA_CONSONANTS_PLUS_VOWEL_A_REG)\.$/); 22 | return 'NmNSN----------' if ($f =~ /(எஸ்|எல்|எம்|என்|ஆர்)\.$/); 23 | 24 | return $t; 25 | } 26 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/t/resegment_sentences.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | use Treex::Block::W2A::ResegmentSentences; 8 | Treex::Core::Log::log_set_error_level('WARN'); 9 | 10 | my $block = new_ok('Treex::Block::W2A::ResegmentSentences'); 11 | 12 | foreach my $lang (qw(cs en de)) { 13 | isa_ok( $block->_get_segmenter($lang), 'Treex::Tool::Segment::RuleBased' ); 14 | is( $block->_get_segmenter($lang), $block->_get_segmenter($lang), 'Returns same object on each _get_segmenter call' ); 15 | } 16 | 17 | done_testing(); 18 | 19 | -------------------------------------------------------------------------------- /lib/Treex/Block/W2A/t/tokenize_on_whitespace.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More tests => 1; 7 | 8 | TODO: { 9 | local $TODO = 'Tests not yet written'; 10 | 11 | fail( 'Write some tests' ); 12 | } 13 | 14 | -------------------------------------------------------------------------------- /lib/Treex/Block/Write/LayerAttributes/Missing.pm: -------------------------------------------------------------------------------- 1 | package Treex::Block::Write::LayerAttributes::Missing; 2 | 3 | use Moose; 4 | use Treex::Core::Common; 5 | 6 | with 'Treex::Block::Write::LayerAttributes::AttributeModifier'; 7 | 8 | has '+return_values_names' => ( default => sub { [''] } ); 9 | 10 | 11 | sub modify_single { 12 | 13 | my ( $self ) = @_; 14 | 15 | return undef; 16 | } 17 | 18 | 1; 19 | 20 | __END__ 21 | 22 | =encoding utf-8 23 | 24 | =head1 NAME 25 | 26 | Treex::Block::Write::LayerAttributes::Missing 27 | 28 | =head1 DESCRIPTION 29 | 30 | A dummy 'text modifier' returning a missing value in all cases. 31 | 32 | =head1 AUTHOR 33 | 34 | Ondřej Dušek 35 | 36 | =head1 COPYRIGHT AND LICENSE 37 | 38 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague 39 | 40 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 41 | -------------------------------------------------------------------------------- /lib/Treex/CS.pm: -------------------------------------------------------------------------------- 1 | package Treex::CS; 2 | 3 | use strict; 4 | use warnings; 5 | use Treex::Unilang; 6 | use Ufal::MorphoDiTa; 7 | 8 | 1; 9 | 10 | __END__ 11 | #Module is here for synchronizing CS with Unilang 12 | #TODO - synopsis, example of parsing 13 | #a) from commandline 14 | #b) directly from perl 15 | =pod 16 | 17 | =encoding utf8 18 | 19 | =head1 NAME 20 | 21 | Treex::CS - collection of blocks for processing Czech 22 | 23 | =head1 DESCRIPTION 24 | 25 | =head1 AUTHOR 26 | 27 | Dušan Variš 28 | 29 | =head1 COPYRIGHT AND LICENSE 30 | 31 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague 32 | 33 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 34 | 35 | -------------------------------------------------------------------------------- /lib/Treex/Core/Node/t/ordered.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | use Test::Moose; 8 | use Treex::Core::Document; 9 | 10 | my $doc = new_ok('Treex::Core::Document'); 11 | my $bundle = $doc->create_bundle(); 12 | my $bzone = $bundle->create_zone('en'); 13 | my $t_root = $bzone->create_ttree(); 14 | does_ok($t_root, 'Treex::Core::Node::Ordered', 'T-root is ordered'); 15 | 16 | done_testing; 17 | -------------------------------------------------------------------------------- /lib/Treex/Core/compile_grammar.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use Parse::RecDescent 1.967009; 5 | my $grammar; 6 | open my $IN, '<', 'ScenarioParser.rdg'; 7 | { 8 | local $/ = undef; 9 | $grammar = <$IN>; 10 | } 11 | Parse::RecDescent->Precompile( 12 | { -standalone => 1, } 13 | , $grammar 14 | , "Treex::Core::ScenarioParser" 15 | ); 16 | 17 | # The standalone version contains several packages in one file, 18 | # but the very Treex::Core::ScenarioParser starts around line 3300. 19 | # We need to silent Perl critics also in the first package. 20 | system 'mv ScenarioParser.pm temp'; 21 | system '(echo "## no critic (Miscellanea::ProhibitUnrestrictedNoCritic)"; echo "## no critic Generated code follows"; cat temp) > ScenarioParser.pm'; 22 | 23 | # The old way did not generate *standalone* parser 24 | ##!/bin/bash 25 | #perl -MParse::RecDescent - ScenarioParser.rdg Treex::Core::ScenarioParser 26 | -------------------------------------------------------------------------------- /lib/Treex/Core/share/tred_extension/extensions.lst: -------------------------------------------------------------------------------- 1 | treex 2 | -------------------------------------------------------------------------------- /lib/Treex/Core/share/tred_extension/treex/contrib/treex/.gitignore: -------------------------------------------------------------------------------- 1 | .layouts.cfg 2 | -------------------------------------------------------------------------------- /lib/Treex/Core/share/tred_extension/treex/contrib/treex/contrib.mac: -------------------------------------------------------------------------------- 1 | # -*- cperl -*- 2 | 3 | #ifinclude 4 | #ifinclude 5 | #ifinclude 6 | 7 | package Treex_mode; 8 | #binding-context Treex_mode 9 | 10 | print STDERR "Initializing Treex extension\n"; 11 | 12 | BEGIN { import TredMacro; } 13 | 14 | #include Treex_mode.inc 15 | 16 | 1; 17 | -------------------------------------------------------------------------------- /lib/Treex/Core/share/tred_extension/treex/package.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | treex 9 | 10 | Support for .treex files 11 | 12 | 0.1 13 | 14 | Zdeněk Žabokrtský 15 | 16 | This extension allows to use TrEd for browsing 17 | Treex::Core::Document instances. 18 | 19 | 20 | 21 | 22 | 23 | -------------------------------------------------------------------------------- /lib/Treex/Core/share/tred_extension/treex/resources/pmlbackend_conf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 1 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /lib/Treex/Core/share/tred_extension/treex/stylesheets/Treex_stylesheet: -------------------------------------------------------------------------------- 1 | context: Treex_context 2 | hint: {_precomputed_hint} ?> 3 | rootstyle: {_precomputed_root_style} ?> 4 | style: {_precomputed_node_style} ?> 5 | node: {_precomputed_labels}->[0] ?> 6 | node: {_precomputed_labels}->[1] ?> 7 | node: {_precomputed_labels}->[2] ?> 8 | 9 | -------------------------------------------------------------------------------- /lib/Treex/Core/t/block.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | 8 | BEGIN { use_ok('Treex::Core::Block') } 9 | 10 | my $block = Treex::Core::Block->new; 11 | 12 | isa_ok( $block, 'Treex::Core::Block' ); 13 | 14 | done_testing(); 15 | -------------------------------------------------------------------------------- /lib/Treex/Core/t/common.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use Test::More; 4 | use Treex::Core::Common; 5 | 6 | use Moose::Util::TypeConstraints qw(find_type_constraint); 7 | ok(find_type_constraint('Treex::Type::NonNegativeInt'), 'Find type defined not directly in Common but in used module'); 8 | 9 | done_testing(); 10 | -------------------------------------------------------------------------------- /lib/Treex/Core/t/config.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | 8 | use Treex::Core::Config; 9 | 10 | my $TMP_DIR = Treex::Core::Config->tmp_dir(); 11 | ok( -d $TMP_DIR, 'Temporary directory is directory' ); 12 | ok( -w $TMP_DIR, 'Temporary directory is writable' ); 13 | 14 | done_testing(); 15 | -------------------------------------------------------------------------------- /lib/Treex/Core/t/dump_scen.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More tests => 3; 7 | use Treex::Core::Scenario; 8 | 9 | my $scen1 = Treex::Core::Scenario->new( from_string => 'Read::Text Write::Text' ); 10 | 11 | like( $scen1->construct_scenario_string(), qr{^Read::Text Write::Text$} , 'Simple scenario'); 12 | 13 | my $scen2 = Treex::Core::Scenario->new( from_string => 'Read::Text language=en Write::Text' ); 14 | 15 | like( $scen2->construct_scenario_string( multiline => 1 ), qr{^Read::Text language=en\nWrite::Text$} , "Multiline scenario"); 16 | 17 | my $scen3 = Treex::Core::Scenario->new( from_string => 'Read::Text language=en ::Another::Block Write::Text' ); 18 | 19 | like( $scen3->construct_scenario_string( ), qr{^Read::Text language=en ::Another::Block Write::Text$} , "Scenario with block out of Treex::Block namespace"); 20 | -------------------------------------------------------------------------------- /lib/Treex/Core/t/error.scen: -------------------------------------------------------------------------------- 1 | aaa aaa 2 | -------------------------------------------------------------------------------- /lib/Treex/Core/t/files.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More tests => 3; 7 | use File::Slurp; 8 | 9 | my @lines = read_file( \*DATA ); 10 | write_file( 'filelist', @lines ); 11 | END { unlink 'filelist'; } 12 | chomp @lines; 13 | use_ok('Treex::Core::Files'); 14 | 15 | my $files = Treex::Core::Files->new( string => '@filelist' ); 16 | isa_ok( $files, 'Treex::Core::Files' ); 17 | is_deeply( $files->filenames, \@lines, 'Got filenames chomped' ); 18 | 19 | __DATA__ 20 | first.file 21 | second.file 22 | -------------------------------------------------------------------------------- /lib/Treex/Core/t/following_node.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | 8 | use Treex::Core; 9 | 10 | my $document = Treex::Core::Document->new; 11 | my $bundle = $document->create_bundle; 12 | 13 | foreach my $language (qw(en ru de cs)) { 14 | 15 | foreach my $selector (undef, 'test') { 16 | my $zone = $bundle->create_zone($language,$selector); 17 | 18 | foreach my $level ('a','t') { 19 | 20 | my $root = $zone->create_tree($level); 21 | 22 | for (1..3) { 23 | $root->create_child(); 24 | } 25 | } 26 | } 27 | } 28 | 29 | 30 | my @nodes; 31 | 32 | my $node = $bundle; 33 | while ($node) { 34 | push @nodes, $node; 35 | $node = $node->following; 36 | } 37 | 38 | is( scalar(@nodes), 1 + 4 * 2 * 2 * 4 , 39 | 'following() traverses through all nodes in all trees in all zones' ); 40 | 41 | done_testing; 42 | -------------------------------------------------------------------------------- /lib/Treex/Core/t/grammar.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | use Treex::Core; 6 | 7 | use Test::More; 8 | use File::Basename; 9 | 10 | my @strings = ( 11 | q(Read::Text), 12 | q(Read::Text Util::Eval), 13 | q(Read::Text Util::Eval document='print'), 14 | q(Read::Text Util::Eval document='print"hello";'), 15 | q(Read::Text Util::Eval document='print "hello";'), 16 | q(Read::Text Util::Eval document='print "hello";'), 17 | dirname($0) . q(/test.scen), 18 | ); 19 | 20 | #plan tests => @strings + 2; 21 | BEGIN { use_ok('Treex::Core::ScenarioParser'); } 22 | 23 | my $parser = new Treex::Core::ScenarioParser; 24 | 25 | isa_ok( $parser, 'Parse::RecDescent::_Runtime' ); 26 | 27 | #$::RD_TRACE = 1; 28 | #$::RD_HINT = 1; 29 | foreach my $string (@strings) { 30 | isnt( $parser->startrule($string), undef ); 31 | } 32 | 33 | done_testing(); 34 | -------------------------------------------------------------------------------- /lib/Treex/Core/t/remove_bundle.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | use Treex::Core; 8 | 9 | my $document = Treex::Core::Document->new; 10 | 11 | foreach my $bundle_number (1..5) { 12 | 13 | my $bundle = $document->create_bundle(); 14 | $bundle->set_id("i$bundle_number"); 15 | 16 | if ( $bundle_number < 5 ) { # check if it works for empty bundles too 17 | $bundle->create_zone('en'); 18 | } 19 | }; 20 | 21 | my @bundles = $document->get_bundles; 22 | 23 | foreach my $bundle_number (1,3,5) { 24 | $bundles[$bundle_number-1]->remove; 25 | } 26 | 27 | is( ( join '-', map { $_->id() } $document->get_bundles ), 'i2-i4', 28 | 'Bundles correctly removed from the beginning, the middle and the end of a document' ); 29 | 30 | done_testing(); 31 | -------------------------------------------------------------------------------- /lib/Treex/Core/t/resource.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | use Test::Output; 8 | use File::Temp qw(tempfile); 9 | use File::Spec; 10 | 11 | BEGIN { require_ok('Treex::Core::Resource') } 12 | 13 | SKIP: 14 | { 15 | skip "May fail when not online", 1 unless ($ENV{AUTHOR_TESTING}); 16 | my $file = Treex::Core::Resource::require_file_from_share('data/models/parser/mst/cs/README'); 17 | ok( -e $file, 'file from resource exists' ); 18 | 19 | my ($fh, $filename) = tempfile(); 20 | $file = Treex::Core::Resource::require_file_from_share($filename); 21 | ok( -e $file, 'file with absolute path' ); 22 | 23 | my ($volume, $dir, $f) = File::Spec->splitpath($filename); 24 | chdir $dir; 25 | $file = Treex::Core::Resource::require_file_from_share("./$f"); 26 | ok( -e $file, 'file with relative path' ); 27 | 28 | unlink $filename; 29 | } 30 | done_testing(); 31 | 32 | -------------------------------------------------------------------------------- /lib/Treex/Core/t/runall.sh: -------------------------------------------------------------------------------- 1 | for SCRIPT in *.t 2 | do 3 | echo 4 | echo RUNNING $SCRIPT 5 | echo 6 | ./$SCRIPT 7 | done -------------------------------------------------------------------------------- /lib/Treex/Core/t/test.scen: -------------------------------------------------------------------------------- 1 | Read::Text Util::Eval document='print "hello";' 2 | -------------------------------------------------------------------------------- /lib/Treex/Core/t/writers.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # Testing treex -p and base writers 3 | use strict; 4 | use warnings; 5 | use Test::More; 6 | use File::Basename; 7 | 8 | BEGIN { 9 | Test::More::plan( skip_all => 'these tests require export AUTHOR_TESTING=1' ) if !$ENV{AUTHOR_TESTING}; 10 | Test::More::plan( skip_all => 'these tests require SGE qsub' ) if !`which qsub`; 11 | } 12 | 13 | my $command = q{-Len Read::Sentences lines_per_doc=1 Util::Eval document='$document->set_path("dir")' Write::Sentences to=.}; 14 | 15 | chdir(dirname(__FILE__)); 16 | `rm -rf dir; seq 3 | treex $command`; 17 | is(`cat dir/noname002.txt`, "2\n", 'local execution'); 18 | 19 | `rm -rf dir; seq 3 | treex -pj3 $command`; 20 | is(`cat dir/noname002.txt`, "2\n", 'treex -p execution'); 21 | # A bug causes the files are created in the current directory instead of "dir" 22 | # Let's delete also these files 23 | `rm -f noname00?.txt`; 24 | 25 | `rm -rf dir *-cluster-run-*`; 26 | done_testing(); 27 | -------------------------------------------------------------------------------- /lib/Treex/EN.pm: -------------------------------------------------------------------------------- 1 | package Treex::EN; 2 | 3 | use strict; 4 | use warnings; 5 | use Treex::Unilang; 6 | 7 | 1; 8 | 9 | __END__ 10 | #Module is here for synchronizing EN with Unilang 11 | #TODO - synopsis, example of parsing 12 | #a) from commandline 13 | #b) directly from perl 14 | =pod 15 | 16 | =encoding utf8 17 | 18 | =head1 NAME 19 | 20 | Treex::EN - collection of blocks for processing English 21 | 22 | =head1 DESCRIPTION 23 | 24 | =head1 AUTHOR 25 | 26 | Tomáš Kraut 27 | 28 | =head1 COPYRIGHT AND LICENSE 29 | 30 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague 31 | 32 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 33 | 34 | -------------------------------------------------------------------------------- /lib/Treex/Service/t/fixtures/en_sample.txt: -------------------------------------------------------------------------------- 1 | I got a gift for my brother. "Contributing factors were the long-term trend toward warmer temperatures, as well as a moderate El Nino in the Pacific," Jay Lawrimore of NOAA's National Climatic Data Center said in a telephone interview from Asheville, North Carolina. The next-warmest winter on record was in 2004, and the third warmest winter was in 1998, Lawrimore said. The 10 warmest years on record have occurred since 1995. "We don't say this winter is evidence of the influence of greenhouse gases," Lawrimore said. 2 | -------------------------------------------------------------------------------- /lib/Treex/Service/t/fixtures/hi_example.txt: -------------------------------------------------------------------------------- 1 | अनुच्छेद 1 — सभी मनुष्यों को गौरव और अधिकारों के मामले में जन्मजात स्वतन्त्रता और समानता प्राप्त हैं। उन्हें बुद्धि और अन्तरात्मा की देन प्राप्त है और परस्पर उन्हें भाईचारे के भाव से बर्ताव करना चाहिए। 2 | -------------------------------------------------------------------------------- /lib/Treex/Tool/CorefSegments/Features.pm: -------------------------------------------------------------------------------- 1 | package Treex::Tool::CorefSegments::Features; 2 | 3 | use Moose::Role; 4 | 5 | requires 'extract_features'; 6 | requires 'init_doc_features'; 7 | 8 | 1; 9 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Coreference/CS/PronAnaphFilter.pm: -------------------------------------------------------------------------------- 1 | ########################################## 2 | ######## THIS MODULE IS OBSOLETE ######### 3 | ########### SHOULD BE DELETED ############ 4 | ########################################## 5 | package Treex::Tool::Coreference::CS::PronAnaphFilter; 6 | 7 | use Moose; 8 | use Treex::Core::Common; 9 | 10 | with 'Treex::Tool::Coreference::NodeFilter'; 11 | 12 | # according to rule presented in Nguy et al. (2009) 13 | # nodes with the t_lemma #PersPron and third person in gram/person 14 | sub is_candidate { 15 | my ($self, $node) = @_; 16 | log_warn "Class Treex::Tool::Coreference::CS::PronAnaphFilter is DEPRECATED. Use Treex::Tool::Coreference::NodeFilter::PersPron instead."; 17 | return ( (defined $node->t_lemma) && ($node->t_lemma eq '#PersPron') 18 | && (defined $node->gram_person) && ($node->gram_person eq '3') ); 19 | } 20 | 21 | # TODO doc 22 | 23 | 1; 24 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Coreference/CorefFeatures.pm: -------------------------------------------------------------------------------- 1 | package Treex::Tool::Coreference::CorefFeatures; 2 | use Moose; 3 | 4 | extends 'Treex::Tool::ML::Ranker::Features'; 5 | 6 | has '+node1_label' => ( default => 'anaph' ); 7 | has '+node2_label' => ( default => 'cand' ); 8 | 9 | 1; 10 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Coreference/DistrModelComponent/CandOrd.pm: -------------------------------------------------------------------------------- 1 | package Treex::Tool::Coreference::DistrModelComponent::CandOrd; 2 | 3 | use Moose; 4 | 5 | with 'Treex::Tool::Coreference::DistrModelComponent'; 6 | 7 | has 'last_one_prob' => ( 8 | is => 'ro', 9 | isa => 'Num', 10 | required => 1, 11 | default => 0.5, 12 | ); 13 | 14 | sub _select_features { 15 | my ($self, $anaph, $cand) = @_; 16 | my $cand_ord = $cand->{'c_cand_ord'}; 17 | return ($cand_ord); 18 | } 19 | 20 | sub _base_distrib { 21 | my ($self, $cand_ord) = @_; 22 | 23 | if ($cand_ord > 500) { 24 | $cand_ord = 500; 25 | } 26 | 27 | return ($self->last_one_prob ** $cand_ord) * (1 - $self->last_one_prob); 28 | } 29 | 30 | 1; 31 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Coreference/DistrModelComponent/Number.pm: -------------------------------------------------------------------------------- 1 | package Treex::Tool::Coreference::DistrModelComponent::Number; 2 | 3 | use Moose; 4 | 5 | with 'Treex::Tool::Coreference::DistrModelComponent'; 6 | 7 | has 'number_count' => ( 8 | is => 'ro', 9 | isa => 'Int', 10 | required => 1, 11 | default => 5, 12 | ); 13 | 14 | sub _select_features { 15 | my ($self, $anaph, $cand) = @_; 16 | my $anaph_num = $anaph->{'c_anaph_num'}; 17 | my $cand_num = $cand->{'c_cand_num'}; 18 | return ($cand_num, $anaph_num); 19 | } 20 | 21 | sub _base_distrib { 22 | my ($self, $cand_num, $anaph_num) = @_; 23 | 24 | #if (($cand_num eq $anaph_num ) && 25 | # (($cand_num eq 'sg') || ($cand_num eq 'pl'))) { 26 | # return (0.5 / 2); 27 | #} 28 | #else { 29 | # return (0.5 / ($self->number_count ** 2 - 2)); 30 | #} 31 | 32 | return (1 / $self->number_count); 33 | } 34 | 35 | 1; 36 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Coreference/DistrModelComponent/SentDist.pm: -------------------------------------------------------------------------------- 1 | package Treex::Tool::Coreference::DistrModelComponent::SentDist; 2 | 3 | use Moose; 4 | 5 | with 'Treex::Tool::Coreference::DistrModelComponent'; 6 | 7 | has 'sent_dist_count' => ( 8 | is => 'ro', 9 | isa => 'Int', 10 | required => 1, 11 | default => 2, 12 | ); 13 | 14 | sub _select_features { 15 | my ($self, $anaph, $cand) = @_; 16 | my $cand_dist = $cand->{'c_sent_dist'}; 17 | return ($cand_dist); 18 | } 19 | 20 | sub _base_distrib { 21 | my ($self, $cand_dist, $anaph_dist) = @_; 22 | 23 | return (1 / $self->sent_dist_count); 24 | } 25 | 26 | 1; 27 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Coreference/EN/PronAnaphFilter.pm: -------------------------------------------------------------------------------- 1 | ########################################## 2 | ######## THIS MODULE IS OBSOLETE ######### 3 | ########### SHOULD BE DELETED ############ 4 | ########################################## 5 | package Treex::Tool::Coreference::EN::PronAnaphFilter; 6 | 7 | use Moose; 8 | use Treex::Core::Common; 9 | use Treex::Tool::Coreference::NodeFilter::PersPron; 10 | 11 | with 'Treex::Tool::Coreference::NodeFilter'; 12 | 13 | has 'skip_referential' => ( is => 'ro', isa => 'Bool', default => 0, required => 1); 14 | 15 | sub is_candidate { 16 | my ($self, $t_node) = @_; 17 | 18 | log_warn "Class Treex::Tool::Coreference::EN::PronAnaphFilter is DEPRECATED. Use Treex::Tool::Coreference::NodeFilter::PersPron instead."; 19 | 20 | my $args = {}; 21 | if ($self->skip_referential) { 22 | $args->{skip_nonref} = 1; 23 | } 24 | 25 | return Treex::Tool::Coreference::NodeFilter::PersPron::is_3rd_pers($t_node, $args); 26 | } 27 | 28 | # TODO doc 29 | 30 | 1; 31 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Coreference/Features/Coreference.pm: -------------------------------------------------------------------------------- 1 | package Treex::Tool::Coreference::Features::Coreference; 2 | 3 | use Moose; 4 | use Treex::Core::Common; 5 | 6 | use List::MoreUtils qw/any/; 7 | 8 | extends 'Treex::Tool::Coreference::BaseCorefFeatures'; 9 | 10 | sub _is_coref { 11 | my ($anaph, $cand) = @_; 12 | my @antecs = $anaph->get_coref_chain; 13 | #push @antecs, map { $_->functor =~ /^(APPS|CONJ|DISJ|GRAD)$/ ? $_->children : () } @antecs; 14 | return any {$_ == $cand} @antecs; 15 | } 16 | 17 | override '_binary_features' => sub { 18 | my ($self, $set_features, $anaph, $cand, $candord) = @_; 19 | 20 | my $feats = {}; 21 | $feats->{is_coref} = _is_coref($anaph, $cand) ? 1 : 0; 22 | return $feats; 23 | }; 24 | 25 | augment '_unary_features' => sub { 26 | my $feats = inner() || {}; 27 | return $feats; 28 | }; 29 | 30 | 1; 31 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Coreference/NodeFilter/Utils.pm: -------------------------------------------------------------------------------- 1 | package Treex::Tool::Coreference::NodeFilter::Utils; 2 | 3 | use Treex::Core::Common; 4 | use Exporter 'import'; 5 | our @EXPORT_OK = qw(ternary_arg); 6 | 7 | # processing ternary arguments for binary indicators 8 | # arg = 0 : does not take the indicator into account 9 | # arg = 1 : indicator must be true 10 | # arg = -1 : indicator must be false 11 | sub ternary_arg { 12 | my ($arg, $indicator) = @_; 13 | if ($arg > 0) { 14 | return $indicator; 15 | } 16 | elsif ($arg < 0) { 17 | return !$indicator; 18 | } 19 | else { 20 | return 1; 21 | } 22 | } 23 | 24 | 1; 25 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Depfix/CS/DiacriticsStripper.pm: -------------------------------------------------------------------------------- 1 | package Treex::Tool::Depfix::CS::DiacriticsStripper; 2 | use Moose; 3 | use Treex::Core::Common; 4 | use utf8; 5 | 6 | sub strip_diacritics { 7 | my ($word) = @_; 8 | 9 | $word =~ tr/áčďéěíľňóřšťúůýžÁČĎÉĚÍĽŇÓŘŠŤÚŮÝŽ/acdeeilnorstuuyzACDEEILNORSTUUYZ/; 10 | 11 | return $word; 12 | } 13 | 14 | 1; 15 | 16 | =head1 NAME 17 | 18 | Treex::Tool::Depfix::CS::DiacriticsStripper 19 | 20 | =head1 DESCRIPTION 21 | 22 | =head1 PARAMETERS 23 | 24 | =over 25 | 26 | =back 27 | 28 | =head1 AUTHOR 29 | 30 | Rudolf Rosa 31 | 32 | =head1 COPYRIGHT AND LICENSE 33 | 34 | Copyright © 2012 by Institute of Formal and Applied Linguistics, 35 | Charles University in Prague 36 | 37 | This module is free software; you can redistribute it and/or modify it 38 | under the same terms as Perl itself. 39 | 40 | -------------------------------------------------------------------------------- /lib/Treex/Tool/EnglishMorpho/exceptions/adj_adv.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | my $DATA = <<'END_DATA'; 3 | better JJR good 4 | best JJS good 5 | worse JJR bad 6 | worst JJS bad 7 | further JJR far 8 | furthest JJS far 9 | elder JJR old 10 | eldest JJS old 11 | stranger JJR strange 12 | strangest JJS strange 13 | better RBR well 14 | best RBS well 15 | worse RBR badly 16 | worst RBS badly 17 | further RBR far 18 | furthest RBS far 19 | END_DATA 20 | 21 | if ( $ARGV[0] =~ /^-[ad]$/ ) { print $DATA; } 22 | else { die "Invalid usage: use option -a or -d\n"; } 23 | -------------------------------------------------------------------------------- /lib/Treex/Tool/EnglishMorpho/exceptions/contractions.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | # 's VBZ can be lemmatized both as "be" and "have". 3 | # According to BNC, "be" is more frequent (309K vs. 29K occurrences). 4 | my $DATA = <<'END_DATA'; 5 | n't RB not 6 | 's VBZ be 7 | 're VBP be 8 | 've VBP have 9 | 've VB have 10 | 'm VBP be 11 | 'll MD will 12 | 'd MD would 13 | 'd VBD have 14 | wo MD will 15 | ca MD can 16 | END_DATA 17 | 18 | if ( $ARGV[0] =~ /^-[ad]$/ ) { print $DATA; } 19 | else { die "Invalid usage: use option -a or -d\n"; } 20 | -------------------------------------------------------------------------------- /lib/Treex/Tool/EnglishMorpho/exceptions/false_negation.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | my $DATA = <<'END_DATA'; 3 | nonetheless RB nonetheless 4 | none-the-less RB nonetheless 5 | nonchalance NN nonchalance 6 | nonchalant JJ nonchalant 7 | noncommittal JJ noncommittal 8 | noncommittally RB noncommittally 9 | nonsense NN nonsense 10 | nonsenses NN nonsenses 11 | nonstop NN nonstop 12 | non-stop NN nonstop 13 | END_DATA 14 | 15 | if ( $ARGV[0] =~ /^-[ad]$/ ) { print $DATA; } 16 | else { die "Invalid usage: use option -a or -d\n"; } 17 | -------------------------------------------------------------------------------- /lib/Treex/Tool/EnglishMorpho/exceptions/nouns_invariant_polemic.list: -------------------------------------------------------------------------------- 1 | clutches 2 | dues 3 | equities 4 | fumes 5 | groceries 6 | hostilities 7 | returns 8 | refreshments 9 | stays -------------------------------------------------------------------------------- /lib/Treex/Tool/EnglishMorpho/exceptions/nouns_plural_es.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | my @DATA = qw( 3 | bias 4 | canvas 5 | gas 6 | iris 7 | ); 8 | 9 | sub analyze() { ## no critic qw(Subroutines::ProhibitSubroutinePrototypes) 10 | foreach (@DATA) { 11 | print "${_}es\tNNS\t$_\n"; 12 | } 13 | return; 14 | } 15 | 16 | sub generate() { ## no critic qw(Subroutines::ProhibitSubroutinePrototypes) 17 | foreach (@DATA) { 18 | print "$_\t${_}es\n"; 19 | } 20 | return; 21 | } 22 | 23 | if ( $ARGV[0] eq '-a' ) { analyze(); } 24 | elsif ( $ARGV[0] eq '-g' ) { generate(); } 25 | elsif ( $ARGV[0] eq '-d' ) { 26 | foreach (@DATA) { print "$_\n"; } 27 | } 28 | else { die "Invalid usage: use option -a, -g or -d\n"; } 29 | -------------------------------------------------------------------------------- /lib/Treex/Tool/EnglishMorpho/exceptions/verbs_cked.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | my @DATA = qw( 3 | bivouac 4 | frolic 5 | mimic 6 | panic 7 | picnic 8 | traffic 9 | ); 10 | 11 | sub analyze() { ## no critic qw(Subroutines::ProhibitSubroutinePrototypes) 12 | foreach (@DATA) { 13 | print $_. "ked\tVBD\t" . $_ . "\n"; 14 | print $_. "ked\tVBN\t" . $_ . "\n"; 15 | print $_. "king\tVBG\t" . $_ . "\n"; 16 | } 17 | return; 18 | } 19 | 20 | if ( $ARGV[0] eq '-a' ) { analyze(); } 21 | elsif ( $ARGV[0] eq '-d' ) { 22 | foreach (@DATA) { print "$_\n"; } 23 | } 24 | else { die "Invalid usage: use option -a or -d\n"; } 25 | -------------------------------------------------------------------------------- /lib/Treex/Tool/EnglishMorpho/exceptions/verbs_other.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | my $DATA = <<'END_DATA'; 3 | am VBP be 4 | are VBP be 5 | is VBZ be 6 | has VBZ have 7 | hath VBZ have 8 | being VBG be 9 | belied VBN belie 10 | belied VBD belie 11 | belies VBZ belie 12 | belying VBG belie 13 | underlies VBZ underlie 14 | underlied VBD underlie 15 | underlied VBN underlie 16 | underlying VBG underlie 17 | ageing VBG age 18 | skiing VBG ski 19 | END_DATA 20 | 21 | if ( $ARGV[0] =~ /^-[ad]$/ ) { print $DATA; } 22 | else { die "Invalid usage: use option -a or -d\n"; } 23 | -------------------------------------------------------------------------------- /lib/Treex/Tool/EnglishMorpho/t/contractions.txt: -------------------------------------------------------------------------------- 1 | It's gonna rain. 2 | It is going to rain. 3 | I ain't gonna do it. 4 | I am not going to do it. 5 | I wanna hold your hand. 6 | I want to hold your hand. 7 | I've gotta go. 8 | You oughta know. 9 | It needs lotsa work. 10 | We're outta time. 11 | Gimme that. 12 | Lemme see that. 13 | Whatcha gonna do? 14 | I dunno. 15 | I shoulda never gotten married. 16 | It coulda been worse. 17 | Let's use contractions 'cos it's kinda cool. 18 | -------------------------------------------------------------------------------- /lib/Treex/Tool/EnglishMorpho/t/morpho.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use Test::More; 4 | use Treex::Core::Log; 5 | 6 | require_ok('Treex::Tool::EnglishMorpho::Lemmatizer'); 7 | 8 | my ( $word, $tag ) = qw(I PP); 9 | my $lemmatizer = new_ok('Treex::Tool::EnglishMorpho::Lemmatizer'); 10 | my @result = $lemmatizer->lemmatize( $word, $tag ); 11 | cmp_ok( scalar @result, '==', 2, 'Lemmatization returns array of two arguments' ); 12 | 13 | done_testing(); 14 | 15 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Gazetteer/RuleBasedScorer.pm: -------------------------------------------------------------------------------- 1 | package Treex::Tool::Gazetteer::RuleBasedScorer; 2 | 3 | use Moose; 4 | 5 | my $weights = { 6 | full_str_eq => [0, 2], 7 | full_str_non_alpha => [0, -100], 8 | first_starts_capital => [-10, 10], 9 | entity_starts_capital => [-50, 10], 10 | all_start_capital => [-1, 1], 11 | no_first => [-50, 1], 12 | last_menu => [0, -50], 13 | }; 14 | 15 | sub score { 16 | my ($feats) = @_; 17 | 18 | my %feat_hash = (); 19 | my $score = 0; 20 | 21 | foreach my $pair (@$feats) { 22 | my ($key, $value) = @$pair; 23 | $feat_hash{$key} = $value; 24 | next if (!defined $weights->{$key}); 25 | 26 | $value = $value >= 1 ? 1 : 0; 27 | $score += $weights->{$key}->[$value]; 28 | } 29 | #if ($feat_hash{all_capital}) { 30 | # $score += $feat_hash{full_str_eq} ? 10 : -50; 31 | #} 32 | my $anode_count = $feat_hash{anode_count} // 1; 33 | 34 | return $score * $anode_count; 35 | } 36 | 37 | 1; 38 | -------------------------------------------------------------------------------- /lib/Treex/Tool/GoogleTranslate/t/texts.txt: -------------------------------------------------------------------------------- 1 | Dnes je krásný den, šel jsem k vodě ven. 2 | Kdo jinému jámu kopá až se ucho utrhne. 3 | Moje vznášedlo je plné ptakopysků. 4 | 5 | -------------------------------------------------------------------------------- /lib/Treex/Tool/IO/t/test.arff: -------------------------------------------------------------------------------- 1 | @relation weather 2 | 3 | @attribute outlook {sunny, overcast, rainy} 4 | @attribute temperature real 5 | @attribute humidity real 6 | @attribute windy {TRUE, FALSE} 7 | @attribute play {yes, no} 8 | 9 | @data 10 | sunny,85,85,FALSE,no 11 | sunny,80,90,TRUE,no 12 | overcast,83,86,FALSE,yes 13 | rainy,70,96,FALSE,yes 14 | {0 rainy,1 68,2 80,3 FALSE} 15 | {0 rainy,2 70,3 'TRUE',4 no} 16 | overcast,64,65,TRUE,? 17 | ?,72,95,FALSE,? 18 | sunny,69,70,FALSE,yes 19 | rainy,75,80,FALSE,yes 20 | sunny,?,70,TRUE,yes 21 | 'overcast',72,90,TRUE,yes 22 | "overcast",81,75,FALSE,yes 23 | rainy,71,91,TRUE,no 24 | -------------------------------------------------------------------------------- /lib/Treex/Tool/IR/esa_script.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Treex::Tool::IR::ESA; 7 | 8 | my $esa = Treex::Tool::IR::ESA->new(); 9 | 10 | while (my $line = ) { 11 | chomp $line; 12 | 13 | print STDERR "Generating ESA vector for $line...\n"; 14 | my %vector = $esa->esa_vector_n_best($line, 10); 15 | 16 | next if (!%vector); 17 | 18 | my @sorted_keys = sort {$vector{$a} <=> $vector{$b}} keys %vector; 19 | print STDOUT (join " ", @sorted_keys) . "\n"; 20 | } 21 | -------------------------------------------------------------------------------- /lib/Treex/Tool/LM/t/interactive_testTreeLM.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use utf8; 5 | binmode STDIN, ':utf8'; 6 | binmode STDOUT, ':utf8'; 7 | binmode STDERR, ':utf8'; 8 | 9 | #use IO::Prompt; nějak to nezvládá utf8 10 | 11 | use Treex::Tool::LM::Lemma; 12 | use Treex::Tool::LM::TreeLM; 13 | my $model = Treex::Tool::LM::TreeLM->new(); 14 | 15 | while (1){ 16 | print "-------- Query---------\n"; 17 | print 'Lg POS: '; $_ = <>; chomp; my $uLg = $_ or last; 18 | print 'Ld POS: '; $_ = <>; chomp; my $uLd = $_ or last; 19 | print 'Fd: '; $_ = <>; chomp; my $Fd = $_ or last; 20 | my $Lg = Treex::Tool::LM::Lemma->new($uLg); 21 | my $Ld = Treex::Tool::LM::Lemma->new($uLd); 22 | my $probLdFd_Lg = $model->get_prob_LdFd_given_Lg($Ld,$Fd,$Lg,1); 23 | } 24 | print "\n"; -------------------------------------------------------------------------------- /lib/Treex/Tool/LM/t/test_MorphoLM.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use utf8; 5 | binmode STDOUT, ':utf8'; 6 | 7 | use Treex::Tool::LM::MorphoLM; 8 | use Treex::Tool::LM::FormInfo; 9 | 10 | # load default model file 11 | my $morphoLM = Treex::Tool::LM::MorphoLM->new(); 12 | 13 | print "Lemma 'moci': form tag count\n"; 14 | my @forms = $morphoLM->forms_of_lemma('moci'); 15 | foreach my $form_info (@forms) { 16 | print join( "\t", $form_info->get_form(), $form_info->get_tag(), $form_info->get_count() ), "\n"; 17 | } 18 | 19 | print "\nMost frequent past participle of 'moci' is: " 20 | , $morphoLM->best_form_of_lemma( 'moci', '^Vp' ) 21 | , "\n\n"; 22 | 23 | print "Past participles of 'moci'\n"; 24 | @forms = $morphoLM->forms_of_lemma( 'moci', { tag_regex => '^Vp' } ); 25 | foreach my $form_info (@forms) { 26 | print $form_info->to_string(), "\n"; 27 | } 28 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Lexicon/Derivations/test_cs.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use utf8; 5 | 6 | use Treex::Tool::Lexicon::Derivations::CS; 7 | binmode STDOUT, ":utf8"; 8 | 9 | my %sample_input = ( 10 | adj2adv => [qw(zelený sporý drahý plochý pracný zajímavý)], 11 | verb2noun => [qw(chodit platit hlídat smát)], 12 | noun2adj => [qw(hrad pes prach strom les matka Josef Praha Bush)], 13 | verb2adj => [qw(praštit vařit létat ušít skácet napsat)], 14 | verb2activeadj => [qw(chodit plavat klamat hořet)], 15 | perf2imperf => [qw(otevřít)], 16 | imperf2perf => [qw(dosahovat)], 17 | ); 18 | 19 | foreach my $type ( keys %sample_input ) { 20 | print "Derivations of type $type\n"; 21 | foreach my $input ( @{ $sample_input{$type} } ) { 22 | print "\t$input --> " . join( ", ", Treex::Tool::Lexicon::Derivations::CS::derive( $type, $input ) ) . "\n"; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Lexicon/Generation/t/cs.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use utf8; 3 | use strict; 4 | use warnings; 5 | use Test::More; #tests => 14; 6 | 7 | use_ok('Treex::Tool::Lexicon::Generation::CS'); 8 | my $generator = new_ok('Treex::Tool::Lexicon::Generation::CS'); 9 | 10 | my @TESTS = ( 11 | ['pes', 'NNMS4-----A----', 'psa'], 12 | ['pes', '...S4', 'psa'], 13 | ); 14 | 15 | foreach my $test (@TESTS) { 16 | my ($lemma, $tag_regex, $expected_form) = @$test; 17 | my $form_info = $generator->best_form_of_lemma($lemma, $tag_regex); 18 | my $form = $form_info ? $form_info->get_form() : undef; 19 | 20 | cmp_ok($form, 'eq', $expected_form, "$lemma + $tag_regex => $expected_form"); 21 | } 22 | 23 | done_testing(); 24 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Lexicon/Generation/t/es.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use utf8; 3 | use strict; 4 | use warnings; 5 | use Test::More; #tests => 14; 6 | use Lingua::Interset::FeatureStructure; 7 | 8 | use_ok('Treex::Tool::Lexicon::Generation::ES'); 9 | my $generator = new_ok('Treex::Tool::Lexicon::Generation::ES'); 10 | 11 | my @TESTS = ( 12 | ['llover', {pos=> 'verb', number=>'sing', mood=>'ind', person=>3, tense=>'past'}, 'llovió'], 13 | ['gustar', {pos=> 'verb', number=>'sing', mood=>'ind', person=>3, tense=>'pres'}, 'gusta'], 14 | ); 15 | 16 | foreach my $test (@TESTS) { 17 | my ($lemma, $features, $expected_form) = @$test; 18 | my $iset = Lingua::Interset::FeatureStructure->new($features); 19 | my $form = $generator->best_form_of_lemma($lemma, $iset); 20 | 21 | cmp_ok($form, 'eq', $expected_form, "$lemma + ".$iset->as_string()." => $expected_form"); 22 | } 23 | 24 | done_testing(); 25 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Lexicon/Generation/t/pt.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use utf8; 3 | use strict; 4 | use warnings; 5 | use Test::More; #tests => 14; 6 | use Lingua::Interset::FeatureStructure; 7 | 8 | use_ok('Treex::Tool::Lexicon::Generation::PT'); 9 | my $generator = new_ok('Treex::Tool::Lexicon::Generation::PT'); 10 | 11 | my @TESTS = ( 12 | ['chover', {pos=> 'verb', number=>'sing', mood=>'ind', person=>3, tense=>'past'}, 'choveu'], 13 | ['gostar', {pos=> 'verb', number=>'sing', mood=>'ind', person=>3, tense=>'pres'}, 'gosta'], 14 | ); 15 | 16 | foreach my $test (@TESTS) { 17 | my ($lemma, $features, $expected_form) = @$test; 18 | my $iset = Lingua::Interset::FeatureStructure->new($features); 19 | my $form = $generator->best_form_of_lemma($lemma, $iset); 20 | 21 | cmp_ok($form, 'eq', $expected_form, "$lemma + ".$iset->as_string()." => $expected_form"); 22 | } 23 | 24 | done_testing(); 25 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Lexicon/Generation/t/ru.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | use Test::More; 6 | use Treex::Tool::Lexicon::Generation::RU; 7 | 8 | my $generator = Treex::Tool::Lexicon::Generation::RU->new(); 9 | 10 | use utf8; 11 | 12 | BEGIN { use_ok('Treex::Tool::Lexicon::Generation::RU') } 13 | 14 | cmp_ok( ${[map {$_->get_form} $generator->forms_of_lemma('Россия',{ tag_regex => 'NNFS6.*'})]}[0], 15 | 'eq', 'России', 'Correct generation of forms of "Russia" in the locative case'); 16 | 17 | done_testing(); 18 | -------------------------------------------------------------------------------- /lib/Treex/Tool/ML/Clustering/t/c_cluster.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | #BEGIN { 6 | # if (!$ENV{EXPERIMENTAL} || !$ENV{EXPENSIVE_TESTING}) { 7 | # require Test::More; 8 | # Test::More::plan(skip_all => 'This test takes long time and is experimental'); 9 | # } 10 | #} 11 | 12 | use Test::More tests => 1; 13 | 14 | use Treex::Tool::ML::Clustering::C_Cluster; 15 | my $cluster = Treex::Tool::ML::Clustering::C_Cluster->new(); 16 | 17 | isa_ok( $cluster, 'Treex::Tool::ML::Clustering::C_Cluster', 'cluster instantiated' ); 18 | -------------------------------------------------------------------------------- /lib/Treex/Tool/MLFix/.ScikitLearn.pm.swp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ufal/treex/06c3353d9d7c61ff86c572cd926b055733a73442/lib/Treex/Tool/MLFix/.ScikitLearn.pm.swp -------------------------------------------------------------------------------- /lib/Treex/Tool/NamedEnt/README: -------------------------------------------------------------------------------- 1 | 2 | NamedEnt: Tools for SVM-based named-entity recognition 3 | 4 | Files in this directory: 5 | 6 | Features.pl - script for extracting feature vectors from data (uses modules mentioned below) 7 | Features/* - modules for extracting one/two/three-word entity features 8 | 9 | README - this file 10 | 11 | TestSVM.pl - script that evaluates an SVM model on given data 12 | TrainSVM.pl - script that trains an SVM model on given data 13 | TuneSVM*.pl - scripts for SVM tuning 14 | 15 | 16 | -------------------------------------------------------------------------------- /lib/Treex/Tool/NamedEnt/tuneWrapper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | . /net/projects/SGE/user/sge_profile > /dev/null 4 | export PATH=$PATH:~pajas/bin 5 | 6 | qcmd -j -- "./TuneSVM.map.pl $@" 7 | #qsub -cwd -j y -V "perl TuneSVM.map.pl oneword.feat $@" 8 | #qrsh -cwd -V -p -50 -l mf=5g -now no 'renice 10 $$ > /dev/null; perl TuneSVM.map.pl oneword.feat $@' 9 | 10 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/Charniak/Node.pm: -------------------------------------------------------------------------------- 1 | package Treex::Tool::Parser::Charniak::Node; 2 | 3 | use Moose; 4 | 5 | has term => ( 6 | isa => 'Str', 7 | is => 'rw', 8 | required => 1, 9 | default => 'null' 10 | ); 11 | 12 | has children => 13 | ( 14 | isa => 'ArrayRef', 15 | is => 'rw', 16 | default => sub { [] }, 17 | reader => 'get_children', 18 | ); 19 | 20 | sub BUILD { 21 | my ( $self, $params ) = @_; 22 | } 23 | 24 | sub add_child { 25 | my ( $self, $child ) = @_; 26 | push @{ $self->children }, $child; 27 | } 28 | 29 | sub get_type { 30 | my ($self) = @_; 31 | return $self->{term}; 32 | } 33 | 34 | 1; 35 | __END__ 36 | 37 | 38 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/samples/labeller_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./test_labeller_tsv.pl sample_test.tsv sample.Lmodel sample.config $@ 3 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/samples/labeller_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./train_labeller_tsv.pl sample_train.tsv sample.Lmodel sample.config 0 $@ 3 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/samples/sample_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./test_tsv.pl sample_test.tsv sample.model sample.config -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/samples/sample_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./train_tsv.pl sample_train.tsv sample.model sample.config 1 3 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/samples/treex_input.txt: -------------------------------------------------------------------------------- 1 | Boys love girls. 2 | John Brown loves Mary Thatcher. 3 | John eagerly loves the sweet little Mary. 4 | A boy loves a girl. 5 | John loves Mary. 6 | Counting Flowers On The Wall 7 | I keep hearing you're concerned about my happiness. 8 | But all that thought you're giving me is conscience I guess. 9 | If I was walking in your shoes, I wouldn't worry none. 10 | While you and your friends are worried about me I'm having lots of fun. 11 | Counting flowers on the wall that don't bother me at all. 12 | Playing solitaire till dawn with a deck of fifty-one. 13 | Smoking cigarettes and watching Captain Kangaroo. 14 | Now don't tell me I've nothing to do. 15 | It's good to see you, I must go, I know I look a fright. 16 | Anyway my eyes are not accustomed to this light. 17 | And my shoes are not accustomed to this hard concrete. 18 | So I must go back to my room and make my day complete. 19 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/samples/treex_parse.scen: -------------------------------------------------------------------------------- 1 | # You can use this scenario for parsing sentences in Treex. 2 | # If you have Treex installed, just run: 3 | # treex treex_parse.scen 4 | Util::SetGlobal language=en selector=src 5 | Read::Sentences from=treex_input.txt 6 | W2A::ResegmentSentences 7 | W2A::EN::Tokenize 8 | W2A::EN::NormalizeForms 9 | W2A::EN::FixTokenization 10 | W2A::EN::TagMorce 11 | W2A::EN::FixTags 12 | W2A::EN::Lemmatize 13 | A2N::EN::StanfordNamedEntities model=ner-eng-ie.crf-3-all2008.ser.gz 14 | A2N::EN::DistinguishPersonalNames 15 | W2A::MarkChunks 16 | W2A::EN::ParseMSTperl model_name=conll_2007_medium 17 | W2A::EN::FixNominalGroups 18 | W2A::EN::FixIsMember 19 | W2A::EN::FixAtree 20 | W2A::EN::FixMultiwordPrepAndConj 21 | W2A::EN::FixDicendiVerbs 22 | W2A::EN::SetAfunAuxCPCoord 23 | W2A::EN::SetAfun 24 | Write::Treex to=treex_output.treex 25 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/conll2inline.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use utf8; 5 | 6 | sub say { 7 | my $line = shift; 8 | print "$line\n"; 9 | } 10 | 11 | binmode STDIN, ':utf8'; 12 | binmode STDOUT, ':utf8'; 13 | binmode STDERR, ':utf8'; 14 | 15 | my @sentence; 16 | while (<>) { 17 | chomp; 18 | if ($_) { 19 | my $attributes = $_; 20 | $attributes =~ s/\t/ /g; 21 | push @sentence, $attributes; 22 | } else { 23 | print join "\t", @sentence; 24 | print "\n"; 25 | @sentence = (); 26 | } 27 | } 28 | if (@sentence) { 29 | print join "\t", @sentence; 30 | } 31 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/inline2conll.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use utf8; 5 | 6 | sub say { 7 | my $line = shift; 8 | print "$line\n"; 9 | } 10 | 11 | binmode STDIN, ':utf8'; 12 | binmode STDOUT, ':utf8'; 13 | binmode STDERR, ':utf8'; 14 | 15 | while (<>) { 16 | chomp; 17 | my @nodes = split /\t/; 18 | foreach my $node (@nodes) { 19 | my @attributes = split / /, $node; 20 | my $line = join "\t", @attributes; 21 | say $line; 22 | } 23 | say ''; 24 | } 25 | 26 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/inline_sentences_reorder.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use utf8; 5 | 6 | sub say { 7 | my $line = shift; 8 | print "$line\n"; 9 | } 10 | 11 | binmode STDIN, ':utf8'; 12 | binmode STDOUT, ':utf8'; 13 | binmode STDERR, ':utf8'; 14 | 15 | my %sentences; 16 | 17 | while (<>) { 18 | chomp; 19 | my $sentence = $_; 20 | my @nodes = split /\t/; 21 | my $sent_length = scalar(@nodes); 22 | $sentences{$sentence} = $sent_length; 23 | } 24 | 25 | # ascending length 26 | #my @sorted_sentences = sort {$sentences{$a} <=> $sentences{$b}} keys %sentences; 27 | 28 | # descending length 29 | my @sorted_sentences = sort {$sentences{$b} <=> $sentences{$a}} keys %sentences; 30 | 31 | foreach my $sentence (@sorted_sentences) { 32 | say $sentence; 33 | } 34 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/labelled_parse_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # $1=test data $2=config $3=model $4=lmodel 3 | # $5=algorithm $6=debug $7=pruning 4 | echo "Going to test the parser and the labeller in one pipeline." 5 | echo "Test data: $1" 6 | echo "Config file: $2" 7 | echo "Parser model file: $3" 8 | echo "Labeller model file: $4" 9 | # echo "Algorithm: $5" 10 | # echo "Debug level: $6" 11 | # echo "N-best pruning: $7" 12 | /home/rosa/mst_perl/scripts/test_parse_and_label.pl /home/rosa/mst_perl/data/$1 /home/rosa/mst_perl/$2 /home/rosa/models/$3 /home/rosa/models/$4 # $5 $6 $7 13 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/labeller_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # $1=test data $2=config $3=model $4=algorithm $5=debug $6=pruning 3 | echo "Going to test the labeller." 4 | echo "Test data: $1" 5 | echo "Config file: $2.config" 6 | echo "Model file: $2.lmodel" 7 | #echo "Algorithm: $4" 8 | #echo "Debug level: $5" 9 | #echo "Max number of states in Viterbi: $6" 10 | /home/rosa/mst_perl/scripts/test_labeller_tsv.pl /home/rosa/mst_perl/data/$1 /home/rosa/models/$2.lmodel /home/rosa/models/$2.config #$4 $5 $6 11 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/labeller_train_and_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # $1=training data $2=test data $3=config $4=model $5=algorithm $6=debug $7=pruning 3 | echo "Going to train and test the labeller." 4 | echo "Training data: $1" 5 | echo "Test data: $2" 6 | echo "Config file: $3" 7 | echo "Model file: $4" 8 | #echo "Algorithm: $5" 9 | #echo "Debug level: $6" 10 | #echo "Max number of states in Viterbi: $7" 11 | /home/rosa/mst_perl/scripts/train_labeller_tsv.pl /home/rosa/mst_perl/data/$1 /home/rosa/models/$4 /home/rosa/mst_perl/$3 0 # $5 $6 $7 12 | /home/rosa/mst_perl/scripts/test_labeller_tsv.pl /home/rosa/mst_perl/data/$2 /home/rosa/models/$4 /home/rosa/mst_perl/$3 # $5 $6 $7 -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/make_czech_tags.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | binmode STDIN, ":utf8"; 7 | binmode STDOUT, ":utf8"; 8 | 9 | while (<>) { 10 | chomp; 11 | my @item = split(/\t/, $_); 12 | # $item[3]: pos (N, V, A, ...) - set to $item[4] 13 | # $item[4]: subpos (A, B, C, D, E, F, ...) - changed to tag (N4, VB, ...) 14 | # $item[5]: morphological features - kept 15 | # other items kept and untouched 16 | my $tag; 17 | if (@item) { 18 | if ($item[5] =~ /Cas=(.)/) { 19 | $tag = $item[3].$1; # pos + case 20 | } 21 | else { 22 | $item[5] =~ /SubPOS=(.)/; 23 | $tag = $item[3].$1; # pos + subpos 24 | } 25 | $item[3] = $tag; 26 | $item[4] = $tag; 27 | } 28 | print join("\t", @item)."\n"; 29 | } 30 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/pcedt2conll.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | renice 10 $$ 3 | treex Write::AttributeSentencesAligned \ 4 | language=cs alignment_language=en layer=a \ 5 | alignment_type=int.gdfa \ 6 | attributes="ord form lemma CzechCoarseTag(tag) tag parent->ord afun \ 7 | aligned->ord aligned->tag aligned->afun aligned->parent->ord \ 8 | AlignedTreeDistances(node,alignment_hash)" \ 9 | -- ../data/pcedt_latest/*/wsj_*.treex.gz > ../data/pcedt_data_latest.tsv 10 | head -n -119991 ../data/pcedt_data_latest.tsv > ../data/pcedt_train_latest.tsv 11 | tail -n 119991 ../data/pcedt_data_latest.tsv > ../data/pcedt_test_latest.tsv 12 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/pcedt2conll_tag_and_parse_en.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | renice 10 $$ 3 | treex \ 4 | W2A::EN::TagMorce language=en \ 5 | W2A::EN::FixTags language=en \ 6 | W2A::EN::Lemmatize language=en \ 7 | ../../scenarios/en_analysis_2.scen \ 8 | Write::AttributeSentencesAligned \ 9 | language=cs alignment_language=en layer=a \ 10 | alignment_type=int.gdfa \ 11 | attributes="ord form lemma CzechCoarseTag(tag) tag parent->ord afun \ 12 | aligned->ord aligned->tag aligned->afun aligned->parent->ord" \ 13 | -- ../wsj_*.treex.gz \ 14 | > ../data/pcedt_data_en_parsed.tsv 15 | head -n -119991 ../data/pcedt_data_en_parsed.tsv > ../data/pcedt_train_en_parsed.tsv 16 | tail -n 119991 ../data/pcedt_data_en_parsed.tsv > ../data/pcedt_test_en_parsed.tsv 17 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/pcedt2conll_td.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | renice 10 $$ 3 | treex \ 4 | Write::AttributeSentencesAligned \ 5 | language=cs alignment_language=en layer=a \ 6 | alignment_type=int.gdfa \ 7 | attributes="ord form lemma CzechCoarseTag(tag) tag parent->ord afun \ 8 | aligned->ord aligned->tag aligned->afun aligned->parent->ord \ 9 | AlignedTreeDistances(node,alignment_hash)" \ 10 | -- ../data/pcedt/*/wsj_*.treex.gz \ 11 | > ../data/pcedt_data_td.tsv 12 | head -n -119991 ../data/pcedt_data_td.tsv > ../data/pcedt_train_td.tsv 13 | tail -n 119991 ../data/pcedt_data_td.tsv > ../data/pcedt_test_td.tsv 14 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/pdtT2conll.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | renice 10 $$ 3 | for i in ../data/pdt20amw/dtest/*.a.gz 4 | do 5 | treex Read::PDT schema_dir="/net/projects/pdt/pdt20/data/schemas/" t_layer=0 from="$i" \ 6 | Write::AttributeSentencesAligned language=cs layer=a attributes="ord form lemma CzechCoarseTag(tag) tag parent->ord afun" alignment_type=none alignment_is_backwards=0 alignment_language=en \ 7 | >> ../data/pdt20_test.tsv 8 | done -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/simple_lemmas.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use utf8; 5 | 6 | binmode STDIN, ':utf8'; 7 | binmode STDOUT, ':utf8'; 8 | binmode STDERR, ':utf8'; 9 | 10 | use Treex::Tool::Lexicon::CS; 11 | 12 | my $lemmaFieldIndex = 2; 13 | 14 | open my $file, '<:utf8', $ARGV[0] or die 'cannot open input file'; 15 | while (<$file>) { 16 | chomp; 17 | if ($_) { 18 | my @fields = split /\t/; 19 | $fields[$lemmaFieldIndex] = Treex::Tool::Lexicon::CS::truncate_lemma ($fields[$lemmaFieldIndex], 1); 20 | print join "\t", @fields; 21 | print "\n"; 22 | } else { 23 | print "\n"; 24 | } 25 | } 26 | close $file; 27 | print STDERR "Done.\n"; 28 | 29 | sub get_simple_lemma { 30 | my $lemma = shift; 31 | 32 | $lemma =~ s/-[0-9]+$//; 33 | #$lemma =~ s/(`|_[;:,^]).+$//; 34 | #$lemma =~ s/(-|`|_[;:,^]).+$//; 35 | 36 | return $lemma; 37 | } 38 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/split_afun_ismember.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | cut $1 -f-7 > $$.first.tmp 3 | cut $1 -f8 > $$.afuns.tmp 4 | cut $1 -f9- > $$.last.tmp 5 | sed $$.afuns.tmp -e 's/$/_0/' > $$.afuns0.tmp 6 | cut $$.afuns0.tmp -d'_' -f1 > $$.noM_afuns.tmp 7 | cut $$.afuns0.tmp -d'_' -f2 > $$.afun_Ms.tmp 8 | paste $$.first.tmp $$.noM_afuns.tmp $$.afun_Ms.tmp $$.last.tmp > ${1/.tsv}_split_afuns.tsv 9 | rm $$.*.tmp 10 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/train_conll.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use utf8; 5 | 6 | binmode STDIN, ':utf8'; 7 | binmode STDOUT, ':utf8'; 8 | 9 | use Treex::Tool::Parser::MSTperl::Config; 10 | use Treex::Tool::Parser::MSTperl::Reader; 11 | use Treex::Tool::Parser::MSTperl::TrainerUnlabelled; 12 | 13 | my ($train_file, $model_file, $config_file, $save_tsv) = @ARGV; 14 | 15 | if (!$config_file) { 16 | $config_file = "$model_file.config"; 17 | $model_file = "$model_file.model"; 18 | } 19 | 20 | my $config = Treex::Tool::Parser::MSTperl::Config->new(config_file => $config_file); 21 | my $reader = Treex::Tool::Parser::MSTperl::Reader->new(config => $config); 22 | my $training_data = $reader->read_tsv($train_file); 23 | my $trainer = Treex::Tool::Parser::MSTperl::TrainerUnlabelled->new(config => $config); 24 | 25 | $trainer->train($training_data); 26 | $trainer->model->store($model_file); 27 | if ($save_tsv) { 28 | $trainer->model->store_tsv($model_file.'.tsv'); 29 | } 30 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/unlabelled_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # $1=test data $2=config $3=model 3 | echo "Going to test the unlabelled parser." 4 | echo "Test data: $1" 5 | echo "Config file: $2.config" 6 | echo "Model file: $2.model" 7 | /home/rosa/mst_perl/scripts/test_conll.pl /home/rosa/mst_perl/data/$1 /home/rosa/models/$2.model /home/rosa/models/$2.config 8 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/unlabelled_test_rur.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # $1=test data $2=config $3=model 3 | echo "Going to test the RUR unlabelled parser." 4 | echo "Test data: $1" 5 | echo "Config file: $2.config" 6 | echo "Model file: $2.model" 7 | /home/rosa/mst_perl/scripts/test_rur_conll.pl /home/rosa/mst_perl/data/$1 /home/rosa/models/$2.model /home/rosa/models/$2.config 8 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/unlabelled_train_and_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # $1=training data $2=test data $3=config $4=model 3 | echo "Going to train and test the unlabelled parser." 4 | echo "Training data: $1" 5 | echo "Test data: $2" 6 | echo "Config file: $3" 7 | c=${3#*/} 8 | model=${c%config}model 9 | echo "Model file: ${model}" 10 | cd /home/rosa/models/ 11 | ln -s /home/rosa/mst_perl/$3 12 | /home/rosa/mst_perl/scripts/train_conll.pl /home/rosa/mst_perl/data/$1 /home/rosa/models/$model /home/rosa/mst_perl/$3 0 13 | /home/rosa/mst_perl/scripts/test_conll.pl /home/rosa/mst_perl/data/$2 /home/rosa/models/$model /home/rosa/mst_perl/$3 14 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/MSTperl/scripts/worsen_pcedt.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | renice 10 $$ 3 | 4 | # worsen PCEDT and save it into tsv 5 | treex -Lcs \ 6 | A2A::CS::WorsenWordForms err_distr_from=/home/rosa/depfix/tagchanges.tsv \ 7 | Write::AttributeSentencesAligned language=cs alignment_language=en layer=a alignment_type=int.gdfa \ 8 | attributes="ord form lemma CzechCoarseTag(tag) tag parent->ord afun \ 9 | aligned->ord aligned->tag aligned->afun aligned->parent->ord" \ 10 | -- /home/rosa/depfix/mst_perl/data/pcedt/*/wsj_*.treex.gz \ 11 | > /home/rosa/depfix/mst_perl/data/pcedt_worsened.tsv 12 | 13 | # split into train set and test set 14 | head -n -119991 /home/rosa/depfix/mst_perl/data/pcedt_worsened.tsv > /home/rosa/depfix/mst_perl/data/pcedt_worsened_train.tsv 15 | tail -n 119991 /home/rosa/depfix/mst_perl/data/pcedt_worsened.tsv > /home/rosa/depfix/mst_perl/data/pcedt_worsened_test.tsv 16 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/RUR/samples/labeller_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./test_labeller_tsv.pl sample_test.tsv sample.Lmodel sample.config $@ 3 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/RUR/samples/labeller_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./train_labeller_tsv.pl sample_train.tsv sample.Lmodel sample.config 0 $@ 3 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/RUR/samples/sample_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./test_tsv.pl sample_test.tsv sample.model sample.config -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/RUR/samples/sample_train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | ./train_tsv.pl sample_train.tsv sample.model sample.config 1 3 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/RUR/samples/treex_input.txt: -------------------------------------------------------------------------------- 1 | Boys love girls. 2 | John Brown loves Mary Thatcher. 3 | John eagerly loves the sweet little Mary. 4 | A boy loves a girl. 5 | John loves Mary. 6 | Counting Flowers On The Wall 7 | I keep hearing you're concerned about my happiness. 8 | But all that thought you're giving me is conscience I guess. 9 | If I was walking in your shoes, I wouldn't worry none. 10 | While you and your friends are worried about me I'm having lots of fun. 11 | Counting flowers on the wall that don't bother me at all. 12 | Playing solitaire till dawn with a deck of fifty-one. 13 | Smoking cigarettes and watching Captain Kangaroo. 14 | Now don't tell me I've nothing to do. 15 | It's good to see you, I must go, I know I look a fright. 16 | Anyway my eyes are not accustomed to this light. 17 | And my shoes are not accustomed to this hard concrete. 18 | So I must go back to my room and make my day complete. 19 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/RUR/samples/treex_parse.scen: -------------------------------------------------------------------------------- 1 | # You can use this scenario for parsing sentences in Treex. 2 | # If you have Treex installed, just run: 3 | # treex treex_parse.scen 4 | Util::SetGlobal language=en selector=src 5 | Read::Sentences from=treex_input.txt 6 | W2A::ResegmentSentences 7 | W2A::EN::Tokenize 8 | W2A::EN::NormalizeForms 9 | W2A::EN::FixTokenization 10 | W2A::EN::TagMorce 11 | W2A::EN::FixTags 12 | W2A::EN::Lemmatize 13 | A2N::EN::StanfordNamedEntities model=ner-eng-ie.crf-3-all2008.ser.gz 14 | A2N::EN::DistinguishPersonalNames 15 | W2A::MarkChunks 16 | W2A::EN::ParseMSTperl model_name=conll_2007_medium 17 | W2A::EN::FixNominalGroups 18 | W2A::EN::FixIsMember 19 | W2A::EN::FixAtree 20 | W2A::EN::FixMultiwordPrepAndConj 21 | W2A::EN::FixDicendiVerbs 22 | W2A::EN::SetAfunAuxCPCoord 23 | W2A::EN::SetAfun 24 | Write::Treex to=treex_output.treex 25 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/t/fanse.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | BEGIN { 6 | if (!$ENV{EXPENSIVE_TESTING}) { 7 | require Test::More; 8 | Test::More::plan(skip_all => 'This test takes long time'); 9 | } 10 | } 11 | 12 | use Test::More tests => 4; 13 | 14 | use Treex::Tool::Parser::Fanse; 15 | my $parser = Treex::Tool::Parser::Fanse->new(); 16 | 17 | isa_ok( $parser, 'Treex::Tool::Parser::Fanse', 'parser instantiated' ); 18 | 19 | my @forms = qw(John loves Mary); 20 | my ( $parent_indices, $edge_labels, $pos_tags ) = $parser->parse( \@forms ); 21 | 22 | is_deeply( $parent_indices, [ 2, 0, 2 ], 'topology ok' ); 23 | is_deeply( $edge_labels, [qw(nsubj ROOT dobj)], 'edge labels ok' ); 24 | is_deeply( $pos_tags, [qw(NNP VBZ NNP)], 'pos tags ok' ); 25 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/t/malt.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use Treex::Tool::Parser::Malt; 5 | 6 | 7 | 8 | use Test::More; 9 | 10 | plan skip_all => q(Module still using $TMT_ROOT, won't test, until changed to TC::Resource ); 11 | 12 | 13 | plan tests => 3; 14 | 15 | my $parser = Treex::Tool::Parser::Malt->new( { model => 'en_nivreeager.mco' } ); 16 | 17 | isa_ok( $parser, 'Treex::Tool::Parser::Malt', 'parser instantiated' ); 18 | 19 | my @forms = qw(John loves Mary); 20 | my @lemmas = qw(John love Mary); 21 | my @pos = qw(NNP VBZ NNP); 22 | my @cpos = qw(NN VB NN); 23 | my @features = qw(_ _ _); 24 | 25 | my ( $parent_indices, $edge_labels ) = $parser->parse( \@forms, \@lemmas, \@cpos, \@pos, \@features ); 26 | 27 | is_deeply( $parent_indices, [ 2, 0, 2 ], 'topology' ); 28 | is_deeply( $edge_labels, [qw(SBJ ROOT OBJ)], 'edge labels' ); 29 | 30 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Parser/t/zpar.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | BEGIN { 6 | if (!$ENV{EXPERIMENTAL} || !$ENV{EXPENSIVE_TESTING}) { 7 | require Test::More; 8 | Test::More::plan(skip_all => 'This test takes long time and is experimental'); 9 | } 10 | } 11 | 12 | use Test::More tests => 4; 13 | 14 | use Treex::Tool::Parser::Zpar; 15 | my $parser = Treex::Tool::Parser::Zpar->new(); 16 | 17 | isa_ok( $parser, 'Treex::Tool::Parser::Zpar', 'parser instantiated' ); 18 | 19 | my @forms = qw(John loves Mary); 20 | my ( $parent_indices, $edge_labels, $pos_tags ) = $parser->parse( \@forms ); 21 | 22 | is_deeply( $parent_indices, [ 2, 0, 2 ], 'topology ok' ); 23 | is_deeply( $edge_labels, [qw(SUB ROOT OBJ)], 'edge labels ok' ); # CoNLL uses "SBJ" for subject, not "SUB" 24 | is_deeply( $pos_tags, [qw(NNP VBZ NNP)], 'pos tags ok' ); 25 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Phrase2Dep/t/pennconverter.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | 5 | use Treex::Tool::Phrase2Dep::Pennconverter; 6 | 7 | use Test::More tests=>3; 8 | 9 | my $converter = Treex::Tool::Phrase2Dep::Pennconverter->new(); 10 | 11 | isa_ok( $converter, 'Treex::Tool::Phrase2Dep::Pennconverter', 'Penn Converter instantiated' ); 12 | 13 | my $penn_string = '(S (NP (NNP John)) (VP (VBZ loves) (NP (NNP Mary))))'; 14 | my $expected_parents = [2, 0, 2]; 15 | my $expected_deprels = [qw(VMOD ROOT OBJ)]; 16 | 17 | my ( $parents_ref, $deprels_ref ) = $converter->convert($penn_string); 18 | 19 | is_deeply($parents_ref, $expected_parents, 'correct topology'); 20 | is_deeply($deprels_ref, $expected_deprels, 'correct deprels'); 21 | -------------------------------------------------------------------------------- /lib/Treex/Tool/SRLParser/submit_training_to_maxent.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # file with classification features 4 | training_features=/net/work/people/strakova/robust_parsing/training_features.txt 5 | heldout_features=/net/work/people/strakova/robust_parsing/heldout_features.txt 6 | 7 | # model 8 | model=${TMT_ROOT%/}/share/data/models/srl_parser/srl_parser_model_cs 9 | 10 | ${TMT_ROOT%/}/share/external_tools/MaxEntToolkit/maxent_x86_64 \ 11 | $training_features --heldout $heldout_features -b -m $model -i 100 -v 12 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Segment/t/rule_based.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More tests => 4; 7 | BEGIN{ use_ok('Treex::Tool::Segment::RuleBased');} 8 | my $segmenter = new_ok('Treex::Tool::Segment::RuleBased'); 9 | 10 | my $text = 'Dummy text. Which has to be segmented'; 11 | 12 | my $result = eval { $segmenter->get_segments($text) }; 13 | 14 | ok($result, 'Segmenter returns some result'); 15 | 16 | 17 | TODO: { 18 | local $TODO = 'Test not yet written', 1; 19 | fail ('Test on semantics of segmenting'); 20 | } 21 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Stemmer/TA/CorpusSuffixSplitter.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | use utf8; 6 | use Treex::Tool::Stemmer::TA::SuffixSplitter; 7 | 8 | while (<>) { 9 | chomp; 10 | my $sentence = $_; 11 | my $stemmed_sentence = Treex::Tool::Stemmer::TA::SuffixSplitter::stem_sentence($sentence); 12 | print "$stemmed_sentence\n"; 13 | } 14 | 15 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Stemmer/TA/sample.txt: -------------------------------------------------------------------------------- 1 | patikkinRa pazakkan enakku irukkiRaTu. 2 | OtinAl varuvaTu puN . 3 | patikkiRaTu . 4 | d 5 | dd 6 | 3 7 | +3 8 | 9 | 10 | OtAmal irukkATu . 11 | paNaTTaiTTAn kotuTTaTaiyE . 12 | patikkaT TAn vawTEn. 13 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Stemmer/TA/test.pl: -------------------------------------------------------------------------------- 1 | # This program simply stems a given sentence 2 | 3 | use Moose; 4 | use utf8; 5 | use Treex::Tool::Stemmer::TA::Simple; 6 | 7 | my $sentence = "patikkinRa pazakkan enakku irukkiRaTu."; 8 | 9 | my $stemmed_sentence = Treex::Tool::Stemmer::TA::Simple::stem_sentence($sentence); 10 | 11 | print "Stemmed sentence: " . $stemmed_sentence . "\n"; 12 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Stemmer/TA/test1.pl: -------------------------------------------------------------------------------- 1 | use Moose; 2 | use utf8; 3 | use Treex::Tool::Stemmer::TA::Simple; 4 | 5 | # stem a given document. output is written to a new file 6 | Treex::Tool::Stemmer::TA::Simple::stem_document("sample.txt", "sample.stm.txt"); 7 | 8 | # restore the original document. 9 | Treex::Tool::Stemmer::TA::Simple::restore_document("sample.stm.txt", "sample.res.txt"); 10 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Tagger/Featurama/t/featurama_en.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More; 7 | 8 | eval { 9 | require Featurama::Perc; 10 | 1; 11 | } or plan skip_all => 'Cannot load Featurama::Perc'; 12 | 13 | plan tests => 5; 14 | 15 | use_ok('Treex::Tool::Tagger::Featurama::EN'); 16 | 17 | 18 | my $tagger = Treex::Tool::Tagger::Featurama::EN->new(); 19 | 20 | isa_ok( $tagger, 'Treex::Tool::Tagger::Featurama::EN' ); 21 | isa_ok( $tagger, 'Treex::Tool::Tagger::Featurama' ); 22 | 23 | my ( $tags_rf, $lemmas_rf ) = $tagger->tag_sentence( [qw(How are you ?)] ); 24 | cmp_ok( scalar @$tags_rf, '==', 4, q{There's Correct number of tags} ); 25 | cmp_ok( scalar @$lemmas_rf, '==', 4, q{There's Correct number of lemmas} ); 26 | note( join ' ', @$tags_rf ); 27 | note( join ' ', @$lemmas_rf ); 28 | 29 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Tagger/t/mecab.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use utf8; 7 | binmode(STDIN, ':utf8'); 8 | binmode(STDOUT, ':utf8'); 9 | binmode(STDERR, ':utf8'); 10 | use Test::More tests => 10; 11 | 12 | BEGIN { use_ok('Treex::Tool::Tagger::MeCab') }; 13 | 14 | require_ok('Treex::Tool::Tagger::MeCab'); 15 | 16 | my $tagger = Treex::Tool::Tagger::MeCab->new(); 17 | 18 | isa_ok( $tagger, 'Treex::Tool::Tagger::MeCab' ); 19 | 20 | my $sentence = qw(わたしは日本語を話します); 21 | my @tokens = $tagger->process_sentence($sentence); 22 | 23 | # tokenized sentence: "わたし は 日本語 を 話し ます" 24 | cmp_ok( scalar @tokens, '==', 6, q{Correct number of tokens}); 25 | 26 | foreach my $token (@tokens) { 27 | cmp_ok( scalar (split /\t/, $token), '==', 10, q{Correct number of features}); 28 | } 29 | 30 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Tagger/t/tree_tagger.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use Treex::Tool::Tagger::TreeTagger; 5 | use Test::More; 6 | 7 | my $tagger = Treex::Tool::Tagger::TreeTagger->new( model => $ENV{TMT_ROOT} . 'share/data/models/tagger/tree_tagger/en.par' ); 8 | isa_ok( $tagger, 'Treex::Tool::Tagger::TreeTagger', 'tagger instantiated' ); 9 | 10 | #SKIP: { 11 | # skip "Test is broken", 2; 12 | my @forms = qw(How are you ?); 13 | my @expected_tags = qw(WRB VBP PP SENT); 14 | my @expected_lemmas = qw(How be you ?); 15 | my ( $tags, $lemmas ) = $tagger->tag_sentence( \@forms ); 16 | is_deeply( $tags, \@expected_tags, 'tags ok' ); 17 | is_deeply( $lemmas, \@expected_lemmas, 'lemmas ok' ); 18 | # } 19 | 20 | done_testing(); -------------------------------------------------------------------------------- /lib/Treex/Tool/TranslationModel/Derivative/EN2CS/_readme.txt: -------------------------------------------------------------------------------- 1 | Hyphen_compounds 2 | Deverbial_nouns 3 | Negative_prefices 4 | Deverbial_adjectives 5 | Suffices 6 | Prefices 7 | 8 | 9 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Transliteration/t/check_utf8_latin_conversion.t: -------------------------------------------------------------------------------- 1 | use Treex::Tool::Transliteration::TA; 2 | use Test::More; 3 | use utf8; 4 | 5 | binmode STDIN, ':encoding(utf8)'; 6 | binmode STDOUT, ':encoding(utf8)'; 7 | binmode STDERR, ':encoding(utf8)'; 8 | 9 | my @latin_str = ( 'kAtu', 'malai', 'vItu' ); 10 | my @utf8_str = ( 'காடு', 'மலை', 'வீடு' ); 11 | 12 | my $transliterator = 13 | Treex::Tool::Transliteration::TA->new( use_enc_map => 'utf8_2_latin' ); 14 | 15 | # test - utf8 to latin conversion 16 | foreach my $i ( 0 .. $#utf8_str ) { 17 | my $out_string = $transliterator->transliterate_string( $utf8_str[$i] ); 18 | ok( $out_string eq $latin_str[$i], "$utf8_str[$i] => $out_string" ); 19 | } 20 | 21 | # test - latin to utf8 conversion 22 | $transliterator->set_enc_map('latin_2_utf8'); 23 | foreach my $i ( 0 .. $#latin_str ) { 24 | my $out_string = $transliterator->transliterate_string( $latin_str[$i] ); 25 | ok( $out_string eq $utf8_str[$i], "$latin_str[$i] => $out_string" ); 26 | } 27 | done_testing(); 28 | -------------------------------------------------------------------------------- /lib/Treex/Tool/UMR/CS/GrammatemeSetter.pm: -------------------------------------------------------------------------------- 1 | package Treex::Tool::UMR::CS::GrammatemeSetter; 2 | use Moose::Role; 3 | with 'Treex::Tool::UMR::GrammatemeSetter'; 4 | 5 | use experimental qw{ signatures }; 6 | 7 | =head1 NAME 8 | 9 | Treex::Tool::UMR::CS::GrammatemeSetter - Language specific grammateme 10 | deduction from morphology. 11 | 12 | =cut 13 | 14 | { my %REGEX = (person => '^.{7}([123])', 15 | number => '^(?x:(?| .{6} ([SP])' 16 | . '| .{3} ([SP]) ))'); 17 | sub tag_regex($self, $grammateme) { $REGEX{$grammateme} } 18 | } 19 | 20 | { my %GRAM = (person => {1 => 1, 21 | 2 => 2, 22 | 3 => 3}, 23 | number => {S => 'sg', 24 | P => 'pl'}); 25 | sub translate($self, $grammateme, $value) { $GRAM{$grammateme}{$value} } 26 | } 27 | 28 | __PACKAGE__ 29 | -------------------------------------------------------------------------------- /lib/Treex/Tool/UMR/LA/GrammatemeSetter.pm: -------------------------------------------------------------------------------- 1 | package Treex::Tool::UMR::LA::GrammatemeSetter; 2 | use Moose::Role; 3 | with 'Treex::Tool::UMR::GrammatemeSetter'; 4 | 5 | use experimental qw{ signatures }; 6 | 7 | =head1 NAME 8 | 9 | Treex::Tool::UMR::LA::GrammatemeSetter - Language specific grammateme 10 | deduction from morphology. 11 | 12 | =cut 13 | 14 | { my %REGEX = (person => '^.([123])', 15 | number => '^..([sp])'); 16 | sub tag_regex($self, $grammateme) { $REGEX{$grammateme} } 17 | } 18 | 19 | { my %GRAM = (person => {1 => 1, 20 | 2 => 2, 21 | 3 => 3}, 22 | number => {s => 'sg', 23 | p => 'pl'}); 24 | sub translate($self, $grammateme, $value) { $GRAM{$grammateme}{$value} } 25 | } 26 | 27 | __PACKAGE__ 28 | -------------------------------------------------------------------------------- /lib/Treex/Tool/UMR/t/pdt2pb.csv: -------------------------------------------------------------------------------- 1 | UMR ID,PDT frame,Role_mapping,AUTOMATIC MAPPING,CORRECTION,COMMENTS,Unambiguous mapping - SSC and/or CEV,Prevailing mapping - SSC and/or CEV,Unambiguous SSC mapping (other than CEV),Info on automatic mapping,Source,Mapping via CzEngVallex,Mapping via SynSemClass5.1 (r.16488),mapping via SynSemClass5.0 (Lindat_version),mapping via SynSemClass (old_version), 2 | """absorbovat-001""",absorbovat (v-w10f1),vec00476,,,,,,,,both,,,,,0 3 | ,ACT: 1,ACT->Absorber,ARG0,,,ARG0,,,,,ACT->ARG0/17,ACT->Absorber(ARG0/25),ACT->Absorber(ARG0/25),ACT->Absorber(ARG0/25),0 4 | ,PAT: 4,PAT->Absorbed,ARG1,,,ARG1,,,,,PAT->ARG1/28,PAT->Absorbed(ARG1/38),PAT->Absorbed(ARG1/38),PAT->Absorbed(ARG1/38),0 5 | 6 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Vallex/t/print_all_frames.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Treex::Tool::Vallex::ValencyFrame; 7 | 8 | my $i = 1; 9 | 10 | while ( $i < 14983 ) { 11 | my $frame = Treex::Tool::Vallex::ValencyFrame->new( { ord => $i++, lexicon => 'vallex.xml', language => 'cs' } ); 12 | 13 | print $frame->to_string . "\n"; 14 | } 15 | 16 | __END__ 17 | 18 | =encoding utf-8 19 | 20 | This lists all the valency frames form the PDT-Vallex Czech valency lexicon. 21 | 22 | =head1 AUTHOR 23 | 24 | Ondřej Dušek 25 | 26 | =head1 COPYRIGHT AND LICENSE 27 | 28 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague 29 | 30 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 31 | -------------------------------------------------------------------------------- /lib/Treex/Tool/Word2vec/readbin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ufal/treex/06c3353d9d7c61ff86c572cd926b055733a73442/lib/Treex/Tool/Word2vec/readbin -------------------------------------------------------------------------------- /lib/Treex/Tool/Word2vec/txt2vw.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use utf8; 5 | #use autodie; 6 | #use PerlIO::gzip; 7 | 8 | sub tsvsay { 9 | my $line = join " ", @_; 10 | print "$line\n"; 11 | } 12 | 13 | binmode STDIN, ':utf8'; 14 | binmode STDOUT, ':utf8'; 15 | binmode STDERR, ':utf8'; 16 | 17 | # 1st line 18 | my $first = <>; 19 | chomp $first; 20 | my ($count, $d) = split / /, $first; 21 | 22 | # convert other lines 23 | while (<>) { 24 | chomp; 25 | my ($word, @vec) = split / /; 26 | $word =~ tr/:| /;!_/; 27 | my $f = 1; 28 | my @fs = map { 'f' . ($f++) . ':' . ($_) } @vec[0 .. ($d-1)]; 29 | tsvsay($word, @fs); 30 | } 31 | 32 | -------------------------------------------------------------------------------- /lib/Treex/Tutorial/Config.pod: -------------------------------------------------------------------------------- 1 | package Treex::Tutorial::Config; 2 | 3 | =pod 4 | 5 | =encoding utf8 6 | 7 | =head1 NAME 8 | 9 | Treex::Tutorial::Config - Configuring Treex 10 | 11 | =head1 EXAMPLES 12 | 13 | TODO 14 | 15 | =head1 SEE ALSO 16 | 17 | L 18 | 19 | =head1 AUTHOR 20 | 21 | Dušan Variš 22 | Martin Popel 23 | 24 | =head1 COPYRIGHT AND LICENSE 25 | 26 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague 27 | 28 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 29 | 30 | -------------------------------------------------------------------------------- /lib/Treex/Tutorial/ReadersAndWriters.pod: -------------------------------------------------------------------------------- 1 | package Treex::Tutorial::ReadersAndWriters; 2 | 3 | =pod 4 | 5 | =encoding utf8 6 | 7 | =head1 NAME 8 | 9 | Treex::Tutorial::ReadersAndWriters - Reading/writing from/to different file formats 10 | 11 | 12 | =head1 FORMATS 13 | 14 | Treex can process data in many formats: plain text, HTML, CoNLL, PDT (PML),... 15 | For each supported format there exists a B block 16 | (that converts the given format to Treex in-memory representation) 17 | and a writer block (that prints the in-memory representation to the given format). 18 | 19 | cat index.html | treex -q -Len Read::HTML Write::Text 20 | 21 | 22 | =head1 AUTHOR 23 | 24 | Martin Popel 25 | 26 | =head1 COPYRIGHT AND LICENSE 27 | 28 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague 29 | 30 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 31 | -------------------------------------------------------------------------------- /lib/Treex/Tutorial/WritingNewReaders.pod: -------------------------------------------------------------------------------- 1 | package Treex::Tutorial::WritingNewReaders; 2 | 3 | =pod 4 | 5 | =encoding utf8 6 | 7 | =head1 NAME 8 | 9 | Treex::Tutorial::WritingNewReaders - Guidelines for writing your own Treex reader/writer blocks. 10 | 11 | =head1 EXAMPLES 12 | 13 | TODO 14 | 15 | =head1 AUTHOR 16 | 17 | Dušan Variš 18 | Martin Popel 19 | 20 | =head1 COPYRIGHT AND LICENSE 21 | 22 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague 23 | 24 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 25 | 26 | -------------------------------------------------------------------------------- /lib/Treex/Tutorial/generate_html.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Install.pod\ 3 | | perl -nlpe 's{}{}'\ 4 | > install.html 5 | 6 | perl -MPod::Simple::HTML -e Pod::Simple::HTML::go FirstSteps.pod\ 7 | | perl -nlpe 's{}{}'\ 8 | > firststeps.html 9 | -------------------------------------------------------------------------------- /lib/Treex/Unilang.pm: -------------------------------------------------------------------------------- 1 | package Treex::Unilang; 2 | 3 | use strict; 4 | use warnings; 5 | use Treex::Core; 6 | 7 | 1; 8 | 9 | __END__ 10 | #TODO this is module just to keep synchronized Unilang with Core 11 | =pod 12 | 13 | =encoding utf8 14 | 15 | =head1 NAME 16 | 17 | Treex::Unilang - collection of blocks parametrized by language and language independent 18 | 19 | =head1 DESCRIPTION 20 | 21 | =head1 AUTHOR 22 | 23 | Tomáš Kraut 24 | 25 | =head1 COPYRIGHT AND LICENSE 26 | 27 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague 28 | 29 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. 30 | 31 | -------------------------------------------------------------------------------- /packaging/Makefile: -------------------------------------------------------------------------------- 1 | default: usage 2 | 3 | PACKAGES := Treex-Core Treex-Unilang Treex-EN Treex-CS Treex-JA Treex-Parser-MSTperl Treex-Doc 4 | 5 | usage: 6 | echo 'Targets: cleanall buildall testall' 7 | 8 | cleanall: 9 | for d in $(PACKAGES); do \ 10 | echo "Package $$d"; \ 11 | ( cd $$d; $(MAKE) clean ); \ 12 | done; 13 | 14 | buildall: 15 | for d in $(PACKAGES); do \ 16 | echo "Package $$d"; \ 17 | ( cd $$d; $(MAKE) build ); \ 18 | done; 19 | 20 | testall: 21 | for d in $(PACKAGES); do \ 22 | echo "Package $$d"; \ 23 | ( cd $$d; $(MAKE) test ); \ 24 | done; 25 | -------------------------------------------------------------------------------- /packaging/Treex-CS/Changes.template: -------------------------------------------------------------------------------- 1 | Revision history of the Treex-EN distribution: 2 | 3 | 0.08171 2012-02-16 4 | - added Featurama tagger 5 | 6 | 0.08057 2012-02-07 7 | - MSTperl parser was updated 8 | 9 | 0.07297 2011-11-15 10 | - added MSTperl parser 11 | 12 | 0.07194 2011-11-07 13 | - added dependency on current version of Treex::Unilang 14 | 15 | 0.07191 2011-11-06 16 | - first non-dev release 17 | 18 | 0.06903_1 2011-10-03 19 | 20 | - the first release at CPAN 21 | - contain English analysis up to tagger 22 | 23 | For detailed log of changes see https://svn.ms.mff.cuni.cz/projects/tectomt_devel/timeline (Use 'public' username and same password) 24 | -------------------------------------------------------------------------------- /packaging/Treex-CS/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=bash 2 | 3 | include ../common.mk 4 | 5 | ##List of blocks used by scenario 6 | #Read::Sentences 7 | #Util::SetGlobal language=cs selector=src 8 | #W2A::ResegmentSentences 9 | #W2A::CS::Tokenize 10 | #W2A::CS::TagMorphoDiTa 11 | #W2A::CS::ParseMSTperl 12 | 13 | ALLDIRS=${CS_T}/ ${TOOLS}/ ${TAGGER}/ ${LEXICON}/CS/ 14 | 15 | MODULES := ${PREFIX}/CS.pm \ 16 | $(addprefix ${CS}/, Tokenize.pm TagMorphoDiTa.pm ParseMSTperl.pm FixAtreeAfterMcD.pm FixIsMember.pm FixReflexiveTantum.pm FixReflexivePronouns.pm) \ 17 | $(addprefix ${LEXICON}/, CS/Reflexivity.pm) 18 | -------------------------------------------------------------------------------- /packaging/Treex-Core/compile_grammar.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use Parse::RecDescent 1.967009; 5 | my $grammar; 6 | open my $IN, '<', 'lib/Treex/Core/ScenarioParser.rdg'; 7 | { 8 | local $/ = undef; 9 | $grammar = <$IN>; 10 | } 11 | Parse::RecDescent->Precompile( 12 | { -standalone => 1, } 13 | , $grammar 14 | , "Treex::Core::ScenarioParser" 15 | ); 16 | 17 | # The standalone version contains several packages in one file, 18 | # but the very Treex::Core::ScenarioParser starts around line 3300. 19 | # We need to silent Perl critics also in the first package. 20 | system '(echo "## no critic (Miscellanea::ProhibitUnrestrictedNoCritic)"; echo "## no critic Generated code follows"; cat ScenarioParser.pm) > lib/Treex/Core/ScenarioParser.pm'; 21 | unlink 'ScenarioParser.pm'; 22 | 23 | # The old way did not generate *standalone* parser 24 | ##!/bin/bash 25 | #perl -MParse::RecDescent - ScenarioParser.rdg Treex::Core::ScenarioParser 26 | -------------------------------------------------------------------------------- /packaging/Treex-Doc/Changes.template: -------------------------------------------------------------------------------- 1 | Revision history of the Treex-Doc distribution: 2 | 3 | 0.08324 2012-02-29 4 | 5 | - improvements in documentation 6 | 7 | 0.07297 2011-11-15 8 | 9 | - the first release at CPAN - FAQ and Tutorial 10 | 11 | For detailed log of changes see https://svn.ms.mff.cuni.cz/projects/tectomt_devel/timeline (Use 'public' username and same password) 12 | -------------------------------------------------------------------------------- /packaging/Treex-Doc/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=bash 2 | 3 | include ../common.mk 4 | 5 | MANUAL=${PREFIX}/Manual 6 | TUTORIAL=${PREFIX}/Tutorial 7 | 8 | ALLDIRS=${PREFIX} 9 | 10 | MODULES :=$(PREFIX)/Tutorial.pod ${MANUAL} \ 11 | ${TUTORIAL} 12 | 13 | -------------------------------------------------------------------------------- /packaging/Treex-Doc/t/doctest.t: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | use strict; 4 | use warnings; 5 | 6 | use Test::More tests => 1; 7 | 8 | pass('No tests available. We think it should produce PASS, not N/A'); 9 | 10 | done_testing(); 11 | 12 | -------------------------------------------------------------------------------- /packaging/Treex-EN/Changes.template: -------------------------------------------------------------------------------- 1 | Revision history of the Treex-EN distribution: 2 | 3 | 0.08171 2012-02-16 4 | - added Featurama tagger 5 | 6 | 0.08057 2012-02-07 7 | - MSTperl parser was updated 8 | 9 | 0.07297 2011-11-15 10 | - added MSTperl parser 11 | 12 | 0.07194 2011-11-07 13 | - added dependency on current version of Treex::Unilang 14 | 15 | 0.07191 2011-11-06 16 | - first non-dev release 17 | 18 | 0.06903_1 2011-10-03 19 | 20 | - the first release at CPAN 21 | - contain English analysis up to tagger 22 | 23 | For detailed log of changes see https://svn.ms.mff.cuni.cz/projects/tectomt_devel/timeline (Use 'public' username and same password) 24 | -------------------------------------------------------------------------------- /packaging/Treex-EN/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=bash 2 | 3 | include ../common.mk 4 | 5 | ##List of blocks used by scenario 6 | #Read::AlignedSentences en_src=sample-en.txt cs_ref=sample-cs.txt 7 | #Util::SetGlobal language=en selector=src 8 | #W2A::ResegmentSentences 9 | #W2A::EN::Tokenize 10 | #W2A::EN::NormalizeForms 11 | #W2A::EN::FixTokenization 12 | ##W2A::EN::TagMorce 13 | #W2A::TagTreeTagger 14 | #W2A::EN::FixTags 15 | #W2A::EN::Lemmatize 16 | #Tagger still missing 17 | 18 | 19 | ALLDIRS=${EN_T}/ ${TOOLS}/ ${SEGMENT_EN}/ ${FEATURAMA_T}/ 20 | 21 | MODULES := ${PREFIX}/EN.pm \ 22 | $(addprefix ${EN}/, Tokenize.pm NormalizeForms.pm FixTokenization.pm FixTags.pm Lemmatize.pm TagLinguaEn.pm ParseMSTperl.pm SetIsMemberFromDeprel.pm FixTagsAfterParse.pm TagMorphoDiTa.pm) \ 23 | $(addprefix ${EN_T}/, lingua_en.t) \ 24 | $(addprefix ${SEGMENT_EN}/, RuleBased.pm) \ 25 | $(addprefix ${FEATURAMA}/, EN.pm) \ 26 | $(addprefix ${FEATURAMA_T}/, featurama_en.t) \ 27 | ${ENGLISHMORPHO}/ 28 | 29 | -------------------------------------------------------------------------------- /packaging/Treex-JA/Changes.template: -------------------------------------------------------------------------------- 1 | Revision history of the Treex-EN distribution: 2 | 3 | 0.08171 2012-02-16 4 | - added Featurama tagger 5 | 6 | 0.08057 2012-02-07 7 | - MSTperl parser was updated 8 | 9 | 0.07297 2011-11-15 10 | - added MSTperl parser 11 | 12 | 0.07194 2011-11-07 13 | - added dependency on current version of Treex::Unilang 14 | 15 | 0.07191 2011-11-06 16 | - first non-dev release 17 | 18 | 0.06903_1 2011-10-03 19 | 20 | - the first release at CPAN 21 | - contain English analysis up to tagger 22 | 23 | For detailed log of changes see https://svn.ms.mff.cuni.cz/projects/tectomt_devel/timeline (Use 'public' username and same password) 24 | -------------------------------------------------------------------------------- /packaging/Treex-JA/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=bash 2 | 3 | include ../common.mk 4 | 5 | ##List of blocks used by scenario 6 | #Read::Sentences from=sample-ja.txt 7 | #Util::SetGlobal language=ja selector=src 8 | #W2A::JA::TagMeCab 9 | #W2A::JA::ParseJDEPP 10 | #W2A::JA::RomanizeTags 11 | #W2A::JA::FixInterpunction 12 | #W2A::JA::FixCopulas 13 | #W2A::JA::RehangAuxVerbs 14 | #W2A::JA::RehangCopulas 15 | #W2A::JA::RehangConjunctions 16 | #W2A::JA::RehangParticles 17 | 18 | ALLDIRS=${JA_T}/ ${TOOLS}/ ${TAGGER_T}/ ${PARSER_T}/ 19 | 20 | MODULES := ${PREFIX}/JA.pm \ 21 | $(addprefix ${JA}/, TagMeCab.pm ParseJDEPP.pm RomanizeTags.pm FixInterpunction.pm FixCopulas.pm RehangAuxVerbs.pm RehangCopulas.pm RehangConjunctions.pm RehangParticles.pm) \ 22 | $(addprefix ${JA_T}/, tag_mecab.t parse_jdepp.t) \ 23 | $(addprefix ${TAGGER_T}/, mecab.t) \ 24 | $(addprefix ${PARSER_T}/, jdepp.t) \ 25 | $(addprefix ${TAGGER}/, MeCab.pm) \ 26 | $(addprefix ${PARSER}/, JDEPP.pm) \ 27 | 28 | -------------------------------------------------------------------------------- /packaging/Treex-JA/share/examples/sample-ja.txt: -------------------------------------------------------------------------------- 1 | Johnを探しています。 2 | 鳥や犬や猫や馬がいました。 3 | すみません! 4 | 私と一緒に来てください。 5 | わたしは日本語を話します。 6 | 少しだけ。 7 | 名前は何ですか? 8 | 私の名前はJohnです。 9 | 私は水瓜が好きです。 10 | おめでとう! 11 | わかりません。 12 | 学校に行きます。 13 | 電車を乗ります。 14 | 家に犬がいます。 15 | それわ楽しいです。 16 | 木の葉の色が変わります。 17 | 贈り物をいっぱい買っておきました。 18 | 日本に行く時、贈り物をいっぱい買っておきました。 19 | -------------------------------------------------------------------------------- /packaging/Treex-JA/share/examples/sample.scen: -------------------------------------------------------------------------------- 1 | Util::SetGlobal language=ja selector=src 2 | Read::Sentences from=sample-ja.txt 3 | 4 | W2A::JA::TagMeCab 5 | 6 | W2A::JA::ParseJDEPP 7 | 8 | W2A::JA::RomanizeTags 9 | W2A::JA::FixInterpunction 10 | W2A::JA::FixCopulas 11 | W2A::JA::RehangAuxVerbs 12 | W2A::JA::RehangCopulas 13 | W2A::JA::RehangConjunctions 14 | W2A::JA::RehangParticles 15 | 16 | Write::Treex 17 | -------------------------------------------------------------------------------- /packaging/Treex-JA/weaver.ini.template: -------------------------------------------------------------------------------- 1 | [@CorePrep] 2 | 3 | [-SingleEncoding] 4 | 5 | [Name] 6 | [Version] 7 | 8 | [Region / prelude] 9 | 10 | [Generic / SYNOPSIS] 11 | [Generic / DESCRIPTION] 12 | [Generic / OVERVIEW] 13 | 14 | [Collect / ATTRIBUTES] 15 | command = attr 16 | 17 | [Collect / METHODS] 18 | command = method 19 | 20 | [Collect / FUNCTIONS] 21 | command = func 22 | 23 | [Leftovers] 24 | 25 | [Region / postlude] 26 | 27 | [Authors] 28 | [Legal] 29 | -------------------------------------------------------------------------------- /packaging/Treex-Parser-MSTperl/Changes.template: -------------------------------------------------------------------------------- 1 | Revision history of the Treex-Parser-MSTPerl distribution: 2 | 3 | VERSION DATE 4 | - TODO 5 | 6 | 0.11949 2014-04-05 7 | - updating ArrayRef[] and similar to work under new Perl 8 | 9 | 0.11336 2013-12-04 10 | - removing new Perl version dependency 11 | 12 | 0.11319 2013-11-28 13 | - setting minimum required version 14 | 15 | 0.11306 2013-11-24 16 | - updated POD a little 17 | 18 | 0.11299 2013-11-24 19 | - safer file path handling & more detailed error reporting 20 | 21 | 0.09731 2012-11-27 22 | - adding advanced capabilities (parallel features, large-scale data features) 23 | 24 | 0.08055 2012-02-07 25 | - labeller now works 26 | 27 | 0.07298 2011-11-15 28 | - the first release at CPAN 29 | 30 | 31 | For detailed log of changes see 32 | https://svn.ms.mff.cuni.cz/projects/tectomt_devel/log/trunk/treex/lib/Treex/Tool/Parser/MSTperl 33 | (Use 'public' username and same password) 34 | -------------------------------------------------------------------------------- /packaging/Treex-Parser-MSTperl/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=bash 2 | 3 | include ../common.mk 4 | 5 | ALLDIRS=${PARSER} 6 | 7 | VERSION=`svn info .| grep Revision | perl -ne 's/(\d+)//;printf("0.%05d%s", $$1, "${VERSION_SUFFIX}")'` 8 | 9 | MODULES := ${MST}/ \ 10 | $(addprefix ${PARSER}/, MSTperl.pm) 11 | #$(addprefix ${WRITE}/, Text.pm Treex.pm Sentences.pm Redirectable.pm) 12 | 13 | -------------------------------------------------------------------------------- /packaging/Treex-Unilang/Changes.template: -------------------------------------------------------------------------------- 1 | Revision history of the Treex-Unilang distribution: 2 | 3 | 0.08170 2012-02-16 4 | - added Featurama tagger 5 | 6 | 0.08056 2012-02-07 7 | - use new version of MSTperl parser and Treex::Core 8 | 9 | 0.07297 2011-11-15 10 | - added Read::CoNLLX and Write::CoNLLX 11 | - added MSTperl parser 12 | 13 | 0.07194 2011-11-07 14 | - added dependency on current version of Treex::Core 15 | 16 | 0.07191 2011-11-06 17 | - first non-dev release 18 | 19 | 0.06903_1 2011-10-03 20 | 21 | - the first release at CPAN 22 | 23 | For detailed log of changes see https://svn.ms.mff.cuni.cz/projects/tectomt_devel/timeline (Use 'public' username and same password) 24 | -------------------------------------------------------------------------------- /packaging/test_treex_installation_by_cpanm.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # tested in SU2 4 | 5 | export BASE=myperl 6 | export COMMONLIB=$BASE/basic 7 | export TREEXLIB=$BASE/treex 8 | 9 | export PERL5LIB=$TREEXLIB/lib:$COMMONLIB/lib:$PERL5LIB 10 | export PATH=$TREEXLIB/bin:$COMMONLIB/bin:$PATH 11 | 12 | mkdir -p $COMMONLIB 13 | mkdir -p $TREEXLIB 14 | 15 | curl -LO http://xrl.us/cpanm 16 | perl ./cpanm -l $COMMONLIB Moose 17 | 18 | perl ./cpanm -l $TREEXLIB treex-core-testy/Treex-Core-0.08040.tar.gz -------------------------------------------------------------------------------- /training/treelm/create_ids.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use utf8; 5 | use Storable; 6 | 7 | my %lemmas; 8 | binmode STDIN, ':utf8'; 9 | while () { 10 | chomp; 11 | my ( $count, $Lg, $Pg, $Ld, $Pd, $Fd ) = split /\t/, $_; 12 | $lemmas{"$Ld $Pd"} += $count; 13 | $lemmas{"$Lg $Pg"} += $count; 14 | } 15 | 16 | my @ids = (undef); 17 | my %id_of; 18 | my $i=1; 19 | 20 | foreach my $lemma_pos ( sort {$lemmas{$b} <=> $lemmas{$a}} keys %lemmas ) { 21 | my ($lemma, $pos) = split / /, $lemma_pos; 22 | push @ids, [$lemma, $pos]; 23 | $id_of{$lemma_pos} = $i++; 24 | } 25 | 26 | Storable::nstore_fd([\@ids, \%id_of], \*STDOUT); 27 | 28 | # Copyright 2009 Martin Popel 29 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. -------------------------------------------------------------------------------- /training/treelm/en/czeng/Makefile: -------------------------------------------------------------------------------- 1 | 2 | vectors: 3 | treex -p -j 100 -Len Read::Treex from='@/net/cluster/TMP/czeng10/filelist_parsed_train' A2A::ConvertTags input_driver=en::penn A2A::EN::EnhanceInterset Util::Eval anode='$$.set_tag($$.iset->pos)' Print::VectorsForTreeLM path=vectors/ 4 | -------------------------------------------------------------------------------- /training/treelm/en/stackoverflow/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=bash 2 | 3 | extract: 4 | mkdir -p vectors 5 | treex -p -j 100 --survive -Len -Ssrc Read::Treex from='!parsed/f*.treex.gz' skip_finished='{parsed/(f.*).treex.gz}{vectors/$$1}' Util::Eval anode='$$.set_tag($$.iset->pos)' Print::VectorsForTreeLM path=vectors/ 6 | 7 | 8 | parse: 9 | mkdir -p parsed 10 | treex -p -j 100 --survive --mem 16g -Len -Ssrc Read::Sentences from='!plain/f*.txt' skip_finished='{plain/(f.*).txt}{parsed/$$1.treex.gz}' W2A::ResegmentSentences W2A::HideIT Scen::Analysis::EN domain=IT Write::Treex path=parsed/ 11 | 12 | 13 | plain: Posts.xml 14 | mkdir -p plain 15 | cat Posts.xml | sed 's/.*Body="\([^"]*\)".*/\1/' | ./clean_stackexchange.pl | split --lines=100 --numeric-suffixes --suffix-length=6 --additional-suffix=.txt - plain/f 16 | 17 | Posts.xml: 18 | wget https://archive.org/download/stackexchange/stackoverflow.com-Posts.7z 19 | 7z x stackoverflow.com-Posts.7z 20 | -------------------------------------------------------------------------------- /training/treelm/en/stackoverflow/clean_stackexchange.pl: -------------------------------------------------------------------------------- 1 | ../superuser/clean_stackexchange.pl -------------------------------------------------------------------------------- /training/treelm/en/superuser/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=bash 2 | 3 | extract: 4 | mkdir -p vectors 5 | treex -p -j 100 --survive -Len -Ssrc Read::Treex from='!parsed/f*.treex.gz' skip_finished='{parsed/(f.*).treex.gz}{vectors/$$1}' Util::Eval anode='$$.set_tag($$.iset->pos)' Print::VectorsForTreeLM path=vectors/ 6 | 7 | 8 | parse: 9 | mkdir -p parsed 10 | treex -p -j 100 --survive --mem 16g -Len -Ssrc Read::Sentences from='!plain/f*.txt' skip_finished='{plain/(f.*).txt}{parsed/$$1.treex.gz}' W2A::ResegmentSentences W2A::HideIT Scen::Analysis::EN domain=IT Write::Treex path=parsed/ 11 | 12 | 13 | plain: Posts.xml 14 | mkdir -p plain 15 | cat Posts.xml | sed 's/.*Body="\([^"]*\)".*/\1/' | ./clean_stackexchange.pl | split --lines=100 --numeric-suffixes --suffix-length=6 --additional-suffix=.txt - plain/f 16 | 17 | Posts.xml: 18 | wget https://archive.org/download/stackexchange/superuser.com.7z 19 | 7z e superuser.com.7z Posts.xml 20 | -------------------------------------------------------------------------------- /training/treelm/en/ubuntu-dialogue/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=bash 2 | 3 | 4 | vectors: parsed 5 | mkdir -p vectors 6 | treex -p -j 100 -Len -Ssrc Read::Treex from='!parsed/f*.treex.gz' Util::Eval anode='$$.set_tag($$.iset->pos)' Print::VectorsForTreeLM path=vectors/ 7 | 8 | 9 | parsed: plain 10 | mkdir -p parsed 11 | treex -p -j 100 --mem 16g -Len -Ssrc Read::Sentences from='!plain/f*.txt' W2A::ResegmentSentences W2A::HideIT Scen::Analysis::EN domain=IT Write::Treex path=parsed/ 12 | 13 | 14 | plain: dialogs 15 | mkdir -p plain 16 | find dialogs/ -name '*.tsv' -exec cat {} \; | cut -f 4 | \ 17 | perl -CSDA -pe 's/[^\x9\xA\xD\x20-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]+//g;' | \ 18 | split --lines=100 --numeric-suffixes --suffix-length=6 --additional-suffix=.txt - plain/f 19 | 20 | dialogs: 21 | wget http://cs.mcgill.ca/~jpineau/datasets/ubuntu-corpus-1.0/ubuntu_dialogs.tgz 22 | tar -xzf ubuntu_dialogs.tgz 23 | -------------------------------------------------------------------------------- /training/treelm/en/wmt15-newscrawl14v2/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=bash 2 | 3 | vectors: parsed 4 | mkdir -p vectors 5 | treex -p -j 100 -Len -Ssrc Read::Treex from='!parsed/f*.treex.gz' Util::Eval anode='$$.set_tag($$.iset->pos)' Print::VectorsForTreeLM path=vectors/ 6 | 7 | 8 | parsed: plain 9 | mkdir -p parsed 10 | treex -p -j 100 --mem 16g -Len -Ssrc Read::Sentences from='!plain/f*.txt' W2A::ResegmentSentences W2A::HideIT Scen::Analysis::EN domain=IT Write::Treex path=parsed/ 11 | 12 | 13 | plain: 14 | mkdir -p plain 15 | zcat news.2014.en.shuffled.v2.gz | split --lines=100 --numeric-suffixes --suffix-length=6 --additional-suffix=.txt - plain/f 16 | 17 | news.2014.en.shuffled.v2.gz: 18 | wget http://www.statmt.org/wmt15/training-monolingual-news-crawl-v2/news.2014.cs.shuffled.v2.gz 19 | -------------------------------------------------------------------------------- /training/treelm/en/wmt15-newsdiscuss/Makefile: -------------------------------------------------------------------------------- 1 | SHELL=bash 2 | 3 | vectors: parsed 4 | mkdir -p vectors 5 | treex -p -j 100 -Len -Ssrc Read::Treex from='!parsed/f*.treex.gz' Util::Eval anode='$$.set_tag($$.iset->pos)' Print::VectorsForTreeLM path=vectors/ 6 | 7 | 8 | parsed: plain 9 | mkdir -p parsed 10 | treex -p -j 100 --mem 16g -Len -Ssrc Read::Sentences from='!plain/f*.txt' W2A::ResegmentSentences W2A::HideIT Scen::Analysis::EN domain=IT Write::Treex path=parsed/ 11 | 12 | 13 | plain: 14 | mkdir -p plain 15 | zcat news-discuss-v1.en.txt.gz | split --lines=100 --numeric-suffixes --suffix-length=6 --additional-suffix=.txt - plain/f 16 | 17 | news-discuss-v1.en.txt.gz: 18 | wget http://www.statmt.org/wmt15/news-discuss-v1.en.txt.gz 19 | -------------------------------------------------------------------------------- /training/treelm/print_plsgz.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | use strict; 3 | use warnings; 4 | use utf8; 5 | use autodie; 6 | use PerlIO::gzip; 7 | use Storable; 8 | use Data::Dumper; 9 | my $filename = shift or die "No filename as argument\n"; 10 | open my $IN, ($filename =~ /\.gz$/) ? '<:gzip' : '<', $filename; 11 | my $model = Storable::fd_retrieve($IN) or die 'Can not read Storable.'; 12 | close $IN; 13 | print Dumper($model); 14 | 15 | # Copyright 2009 Martin Popel 16 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README. --------------------------------------------------------------------------------