├── .editorconfig
├── .github
    └── workflows
    │   └── tester.yaml
├── .gitignore
├── .perltidyrc
├── CONTRIBUTING.md
├── Dockerfile
├── README.md
├── bin
    ├── crstatus.pl
    ├── czeng-analysis
    ├── derimor
    ├── fl2treex
    ├── t
    │   ├── TestsCommon.pm
    │   ├── eval_block.t
    │   ├── fs.t
    │   ├── parallel.t
    │   ├── qparallel.t
    │   ├── scenarios
    │   │   ├── print3.scen
    │   │   ├── print4.scen
    │   │   └── scen_in_scen.scen
    │   ├── space_in_block_arg.t
    │   ├── survive.t
    │   └── treexrun.t
    ├── treex
    ├── treex-mtmworker.pl
    ├── treex-server
    ├── treex-socket-server.pl
    └── ttred
├── cpanfile
├── lib
    ├── Bash
    │   └── Completion
    │   │   └── Plugins
    │   │       └── Treex.pm
    ├── Treex.pm
    └── Treex
    │   ├── Block
    │       ├── A2A
    │       │   ├── AddDirectObjectMarkers.pm
    │       │   ├── AddEnhancedUD.pm
    │       │   ├── AddPluralMarkers.pm
    │       │   ├── AddTranslationsFromFile.pm
    │       │   ├── BackupTree.pm
    │       │   ├── CS
    │       │   │   ├── DetectClauses.pm
    │       │   │   ├── DoubleNegation.pm
    │       │   │   ├── FixAgreement.pm
    │       │   │   ├── FixAuxT.pm
    │       │   │   ├── FixAuxVChildren.pm
    │       │   │   ├── FixBy.pm
    │       │   │   ├── FixCasing.pm
    │       │   │   ├── FixFirstWordCapitalization.pm
    │       │   │   ├── FixGenitive.pm
    │       │   │   ├── FixNounAdjectiveAgreement.pm
    │       │   │   ├── FixNounClusters.pm
    │       │   │   ├── FixNounNounAgreement.pm
    │       │   │   ├── FixNounNumber.pm
    │       │   │   ├── FixOf.pm
    │       │   │   ├── FixP4.pm
    │       │   │   ├── FixPOS.pm
    │       │   │   ├── FixPassive.pm
    │       │   │   ├── FixPassiveAuxBeAgreement.pm
    │       │   │   ├── FixPnom.pm
    │       │   │   ├── FixPrepositionNounAgreement.pm
    │       │   │   ├── FixPrepositionWithoutChildren.pm
    │       │   │   ├── FixPrepositionalCase.pm
    │       │   │   ├── FixPresentContinuous.pm
    │       │   │   ├── FixSubject.pm
    │       │   │   ├── FixSubjectPastParticipleAgreement.pm
    │       │   │   ├── FixSubjectPredicateAgreement.pm
    │       │   │   ├── FixTemplate.pm
    │       │   │   ├── FixVerbAuxBeAgreement.pm
    │       │   │   ├── FixVerbByEnSubject.pm
    │       │   │   ├── ReadClauses.pm
    │       │   │   ├── RemoveFeaturesFromLemmas.pm
    │       │   │   ├── TruncateLemma.pm
    │       │   │   ├── VocalizePrepos.pm
    │       │   │   ├── VocalizePreposPlain.pm
    │       │   │   └── WorsenWordForms.pm
    │       │   ├── ConvertTags.pm
    │       │   ├── CopyAlignments.pm
    │       │   ├── CopyAtree.pm
    │       │   ├── CopyBasicToEnhancedUD.pm
    │       │   ├── CopyNodesFromAlignment.pm
    │       │   ├── CopySurfaceFromAlignment.pm
    │       │   ├── CorefClusters.pm
    │       │   ├── CorefDestroyWild.pm
    │       │   ├── CorefMentionHeads.pm
    │       │   ├── CorefMentions.pm
    │       │   ├── CorefToMisc.pm
    │       │   ├── DE
    │       │   │   └── CoNLL2Iset.pm
    │       │   ├── DeleteAfunCoordWithoutMembers.pm
    │       │   ├── Deprel2Afun.pm
    │       │   ├── EN
    │       │   │   ├── EnhanceInterset.pm
    │       │   │   ├── RehangModalVerbs.pm
    │       │   │   ├── RehangPPAttachment.pm
    │       │   │   └── Retokenize.pm
    │       │   ├── EU
    │       │   │   ├── FixAspect.pm
    │       │   │   ├── FixDefIndef.pm
    │       │   │   ├── FixMoveRoot.pm
    │       │   │   └── FixTest.pm
    │       │   ├── EnsembleTree.pm
    │       │   ├── FillCoNLLAttributes.pm
    │       │   ├── FilterBundlesByTreeSize.pm
    │       │   ├── FixCoordinatedAuxCP.pm
    │       │   ├── FlattenAtree.pm
    │       │   ├── FlowNetwork.pm
    │       │   ├── GRC
    │       │   │   └── AgdtFixAfun.pm
    │       │   ├── GuessIsMember.pm
    │       │   ├── HI
    │       │   │   └── Lemmatize.pm
    │       │   ├── MaxCycles.pm
    │       │   ├── NL
    │       │   │   └── EnhanceInterset.pm
    │       │   ├── NodeIdFromCoNLLFeats.pm
    │       │   ├── OracleTree.pm
    │       │   ├── ProjectCase.pm
    │       │   ├── ProjectTreeThroughAlignment.pm
    │       │   ├── RemoveDuplicateNodes.pm
    │       │   ├── RemoveEmptyNodes.pm
    │       │   ├── RemoveEmptySentences.pm
    │       │   ├── RemoveUnannotatedSentences.pm
    │       │   ├── RemoveUnusedEmptyNodes.pm
    │       │   ├── ReorderByLemmas.pm
    │       │   ├── ReorderHeadFinal.pm
    │       │   ├── ReorderPrepositions.pm
    │       │   ├── ReorderSVO2SOV.pm
    │       │   ├── Retokenize.pm
    │       │   ├── SVMTree.pm
    │       │   ├── SetAfunCoordAboveMembers.pm
    │       │   ├── SetClauseDepth.pm
    │       │   ├── SetCoordConjunction.pm
    │       │   ├── SetSharedModifier.pm
    │       │   ├── TA
    │       │   │   ├── FixAlignments.pm
    │       │   │   └── FixProjectedEdges.pm
    │       │   ├── TrainingData.pm
    │       │   ├── Transform
    │       │   │   ├── AllPunctBelowTechRoot.pm
    │       │   │   ├── BaseTransformer.pm
    │       │   │   ├── ComplexVerb.pm
    │       │   │   ├── ComplexVerbRootFirst.pm
    │       │   │   ├── ComplexVerbRootLast.pm
    │       │   │   ├── CoordStyle.pm
    │       │   │   ├── FirstNameUpward.pm
    │       │   │   ├── InvPrepositionDownward.pm
    │       │   │   ├── InvSubordConjDownward.pm
    │       │   │   ├── PrepositionDownward.pm
    │       │   │   ├── PunctBelowPrevNode.pm
    │       │   │   ├── SharedModifBelowNearestMember.pm
    │       │   │   ├── SubordConjDownward.pm
    │       │   │   └── t
    │       │   │   │   └── transformer_subscription.t
    │       │   ├── Translate.pm
    │       │   └── TranslateWithPreprocessing.pm
    │       ├── A2N
    │       │   ├── BaseNER.pm
    │       │   ├── CS
    │       │   │   ├── FixNERforIT.pm
    │       │   │   ├── NameTag.pm
    │       │   │   ├── NormalizeNames.pm
    │       │   │   ├── SimpleRuleNER.pm
    │       │   │   ├── SvmNer.pm
    │       │   │   ├── SysNERV.pm
    │       │   │   └── t
    │       │   │   │   ├── sysnerv_load.t
    │       │   │   │   └── sysnerv_run_reco.t
    │       │   ├── DE
    │       │   │   └── NameTag.pm
    │       │   ├── EN
    │       │   │   ├── DistinguishPersonalNames.pm
    │       │   │   ├── NameTag.pm
    │       │   │   ├── StanfordNER2008.pm
    │       │   │   ├── StanfordNER2015.pm
    │       │   │   ├── StanfordNamedEntities.pm
    │       │   │   └── t
    │       │   │   │   ├── stanford2008.t
    │       │   │   │   └── stanford2015.t
    │       │   ├── Encode2LemmaTag.pm
    │       │   ├── EncodeBIO.pm
    │       │   ├── FixMissingLinks.pm
    │       │   ├── MergeBBNIntoStanford.pm
    │       │   ├── NL
    │       │   │   └── AlpinoSimpleNER.pm
    │       │   ├── NameTag.pm
    │       │   ├── NestEntities.pm
    │       │   ├── RU
    │       │   │   └── NameTag.pm
    │       │   ├── SimpleNER.pm
    │       │   └── VI
    │       │   │   └── RecognizeFrequentPersonalNames.pm
    │       ├── A2P
    │       │   ├── NL
    │       │   │   └── ParseAlpino.pm
    │       │   ├── ParseCharniak.pm
    │       │   └── ParseStanford.pm
    │       ├── A2T
    │       │   ├── AddPersPronSb.pm
    │       │   ├── BG
    │       │   │   ├── MarkEdgesToCollapse.pm
    │       │   │   ├── SetCoapFunctors.pm
    │       │   │   └── SetGrammatemesFromAux.pm
    │       │   ├── BuildTtree.pm
    │       │   ├── CS
    │       │   │   ├── AddCor.pm
    │       │   │   ├── AddPersPron.pm
    │       │   │   ├── AddPersPronSb.pm
    │       │   │   ├── AddRcp.pm
    │       │   │   ├── DeleteExtraCoref.pm
    │       │   │   ├── FixCoord.pm
    │       │   │   ├── FixNonthirdPersSubj.pm
    │       │   │   ├── FixNumerals.pm
    │       │   │   ├── FixTlemmas.pm
    │       │   │   ├── MarkClauseHeads.pm
    │       │   │   ├── MarkEdgesToCollapse.pm
    │       │   │   ├── MarkReflexivePassiveGen.pm
    │       │   │   ├── MarkReflpronCoref.pm
    │       │   │   ├── MarkRelClauseCoref.pm
    │       │   │   ├── MarkRelClauseHeads.pm
    │       │   │   ├── SetCoapFunctors.pm
    │       │   │   ├── SetDiathesis.pm
    │       │   │   ├── SetFormeme.pm
    │       │   │   ├── SetFormeme
    │       │   │   │   └── NodeInfo.pm
    │       │   │   ├── SetFunctors.pm
    │       │   │   ├── SetFunctorsVW.pm
    │       │   │   ├── SetGrammatemes.pm
    │       │   │   ├── SetMissingFunctors.pm
    │       │   │   ├── SetValencyFrameRef.pm
    │       │   │   ├── SetValencyFrameRef2.pm
    │       │   │   ├── SetValencyFrameRefVW.pm
    │       │   │   ├── TBLa2tPhaseFd.pm
    │       │   │   └── conversion_rules.txt
    │       │   ├── DE
    │       │   │   └── SetCoapFunctors.pm
    │       │   ├── DeleteChildlessPunctuation.pm
    │       │   ├── DisambiguateGrammatemes.pm
    │       │   ├── DisambiguateGrammatemesFull.pm
    │       │   ├── EN
    │       │   │   ├── AddCorAct.pm
    │       │   │   ├── FindTextCoref.pm
    │       │   │   ├── FindTextCorefML.pm
    │       │   │   ├── FixAdjNattrN.pm
    │       │   │   ├── FixEitherOr.pm
    │       │   │   ├── FixHowPlusAdjective.pm
    │       │   │   ├── FixImperatives.pm
    │       │   │   ├── FixRelClauseNoRelPron.pm
    │       │   │   ├── FixTlemmas.pm
    │       │   │   ├── MarkClauseHeads.pm
    │       │   │   ├── MarkDspRoot.pm
    │       │   │   ├── MarkEdgesToCollapse.pm
    │       │   │   ├── MarkEdgesToCollapseNeg.pm
    │       │   │   ├── MarkInfin.pm
    │       │   │   ├── MarkPassives.pm
    │       │   │   ├── MarkReferentialIt.pm
    │       │   │   ├── MarkReflpronCoref.pm
    │       │   │   ├── MarkRelClauseCoref.pm
    │       │   │   ├── MarkRelClauseHeads.pm
    │       │   │   ├── MoveAuxFromCoordToMembers.pm
    │       │   │   ├── RehangSharedAttr.pm
    │       │   │   ├── SetCoapFunctors.pm
    │       │   │   ├── SetFormeme.pm
    │       │   │   ├── SetFormemeInterset.pm
    │       │   │   ├── SetFunctors.pm
    │       │   │   ├── SetFunctors2.pm
    │       │   │   ├── SetFunctorsVW.pm
    │       │   │   ├── SetGenderOfPerson.pm
    │       │   │   ├── SetGrammatemes.pm
    │       │   │   ├── SetIsNameOfPerson.pm
    │       │   │   ├── SetMissingFunctors.pm
    │       │   │   ├── SetNodetype.pm
    │       │   │   ├── SetPersonGender.pm
    │       │   │   ├── SetSentmod.pm
    │       │   │   ├── SetTense.pm
    │       │   │   ├── SetValencyFrameRef.pm
    │       │   │   ├── SetValencyFrameRef2.pm
    │       │   │   ├── SetValencyFrameRefVW.pm
    │       │   │   └── SetVoice.pm
    │       │   ├── ES
    │       │   │   ├── FixReflexiveVerbs.pm
    │       │   │   ├── FixTlemmas.pm
    │       │   │   ├── MarkEdgesToCollapse.pm
    │       │   │   ├── SetCoapFunctors.pm
    │       │   │   ├── SetFormeme.pm
    │       │   │   ├── SetGrammatemes.pm
    │       │   │   ├── SetGrammatemesFromAux.pm
    │       │   │   └── SetSentmod.pm
    │       │   ├── EU
    │       │   │   ├── MarkEdgesToCollapse.pm
    │       │   │   ├── SetCoapFunctors.pm
    │       │   │   ├── SetFormeme.pm
    │       │   │   ├── SetGrammatemes.pm
    │       │   │   └── SetSentmod.pm
    │       │   ├── FixAtomicNodes.pm
    │       │   ├── FixIsMember.pm
    │       │   ├── GRC
    │       │   │   ├── MarkEdgesToCollapse.pm
    │       │   │   └── SetCoapFunctors.pm
    │       │   ├── HideParentheses.pm
    │       │   ├── JA
    │       │   │   ├── FixTlemmas.pm
    │       │   │   ├── MarkEdgesToCollapseNeg.pm
    │       │   │   ├── MarkPassives.pm
    │       │   │   ├── SetFormeme.pm
    │       │   │   ├── SetGrammatemes.pm
    │       │   │   └── SetTense.pm
    │       │   ├── LA
    │       │   │   ├── AddPersPron.pm
    │       │   │   ├── FixTlemmas.pm
    │       │   │   ├── MarkClauseHeads.pm
    │       │   │   ├── MarkEdgesToCollapse.pm
    │       │   │   ├── MarkRelClauseCoref.pm
    │       │   │   ├── MarkRelClauseHeads.pm
    │       │   │   ├── SetCoapFunctors.pm
    │       │   │   ├── SetFunctors.pm
    │       │   │   ├── SetGrammatemes.pm
    │       │   │   ├── SetPronouns.pm
    │       │   │   └── TopicFocusArticulation.pm
    │       │   ├── MarkClauseHeads.pm
    │       │   ├── MarkEdgesToCollapse.pm
    │       │   ├── MarkParentheses.pm
    │       │   ├── MarkReflpronCoref.pm
    │       │   ├── MarkRelClauseCoref.pm
    │       │   ├── MarkRelClauseHeads.pm
    │       │   ├── MinimizeGrammatemes.pm
    │       │   ├── MoveAuxFromCoordToMembers.pm
    │       │   ├── NL
    │       │   │   ├── FixMultiwordSurnames.pm
    │       │   │   ├── FixTlemmas.pm
    │       │   │   ├── MarkEdgesToCollapse.pm
    │       │   │   ├── SetCoapFunctors.pm
    │       │   │   ├── SetFormeme.pm
    │       │   │   ├── SetFunctors.pm
    │       │   │   ├── SetGrammatemes.pm
    │       │   │   ├── SetGrammatemesFromAux.pm
    │       │   │   └── SetSentmod.pm
    │       │   ├── PL
    │       │   │   └── SetCoapFunctors.pm
    │       │   ├── PT
    │       │   │   ├── FixFormeme.pm
    │       │   │   ├── FixImperatives.pm
    │       │   │   ├── FixPersPron.pm
    │       │   │   ├── MarkEdgesToCollapse.pm
    │       │   │   ├── SetCoapFunctors.pm
    │       │   │   ├── SetGrammatemes.pm
    │       │   │   └── SetGrammatemesFromAux.pm
    │       │   ├── ProjectGazeteerInfo.pm
    │       │   ├── ProjectSelectedWild.pm
    │       │   ├── RU
    │       │   │   └── SetCoapFunctors.pm
    │       │   ├── RehangUnaryCoordConj.pm
    │       │   ├── SK
    │       │   │   ├── AddPersPron.pm
    │       │   │   ├── FixNumerals.pm
    │       │   │   ├── FixTlemmas.pm
    │       │   │   ├── MarkEdgesToCollapse.pm
    │       │   │   ├── MarkRelClauseCoref.pm
    │       │   │   ├── SetCoapFunctors.pm
    │       │   │   ├── SetDiathesis.pm
    │       │   │   ├── SetFormeme.pm
    │       │   │   ├── SetFormeme
    │       │   │   │   └── NodeInfo.pm
    │       │   │   ├── SetFunctors.pm
    │       │   │   └── SetPhrasalFunctors.pm
    │       │   ├── SRLParser.pm
    │       │   ├── SetCoapFunctors.pm
    │       │   ├── SetDocOrds.pm
    │       │   ├── SetFormeme.pm
    │       │   ├── SetFunctorsMLProcess.pm
    │       │   ├── SetFunctorsRules.pm
    │       │   ├── SetFunctorsVW.pm
    │       │   ├── SetGrammatemes.pm
    │       │   ├── SetGrammatemesFromAux.pm
    │       │   ├── SetGrammatemesFromAuxForPT.pm
    │       │   ├── SetIsMember.pm
    │       │   ├── SetNodetype.pm
    │       │   ├── SetSentmod.pm
    │       │   ├── SetValencyFrameRef.pm
    │       │   ├── SetValencyFrameRef2.pm
    │       │   └── SetValencyFrameRefVW.pm
    │       ├── A2W
    │       │   ├── CS
    │       │   │   ├── ApplySubstitutions.pm
    │       │   │   ├── AsciiQuotes.pm
    │       │   │   ├── ConcatenateTokens.pm
    │       │   │   ├── Detokenize.pm
    │       │   │   ├── DetokenizeDashes.pm
    │       │   │   ├── DetokenizeUsingRules.pm
    │       │   │   └── RemoveRepeatedTokens.pm
    │       │   ├── CapitalizeSentStart.pm
    │       │   ├── ConcatenateTokens.pm
    │       │   ├── Detokenize.pm
    │       │   ├── EN
    │       │   │   ├── ConcatenateTokens.pm
    │       │   │   ├── DeleteTracesFromAtree.pm
    │       │   │   ├── DeleteTracesFromSentence.pm
    │       │   │   ├── DirtyTricks.pm
    │       │   │   ├── FixCapitalization.pm
    │       │   │   └── Tidy.pm
    │       │   ├── ES
    │       │   │   └── ConcatenateTokens.pm
    │       │   ├── EU
    │       │   │   └── ConcatenateTokens.pm
    │       │   ├── NL
    │       │   │   ├── DetokenizeSentence.pm
    │       │   │   └── GenerateSentenceAlpino.pm
    │       │   ├── NormalizePunctuationForWMT.pm
    │       │   ├── PT
    │       │   │   ├── ConcatenateTokens.pm
    │       │   │   └── DirtyTricks.pm
    │       │   ├── ShowGazetteerItems.pm
    │       │   └── ShowIT.pm
    │       ├── Align
    │       │   ├── A
    │       │   │   ├── AlignMGiza.pm
    │       │   │   ├── CollectLinks.pm
    │       │   │   ├── FilterTreesByAlignment.pm
    │       │   │   ├── InsertAlignmentFromFile.pm
    │       │   │   ├── InsertBerkeleyAlignment.pm
    │       │   │   ├── MonolingualGreedy.pm
    │       │   │   ├── RemoveDuplicateLinks.pm
    │       │   │   ├── Retokenize.pm
    │       │   │   └── TrainAndAlignMGiza.pm
    │       │   ├── AddMissingLinks.pm
    │       │   ├── AddTransitiveLinks.pm
    │       │   ├── AlignForward.pm
    │       │   ├── AlignSameSentence.pm
    │       │   ├── Annot
    │       │   │   ├── Load.pm
    │       │   │   ├── Print.pm
    │       │   │   └── Summary.pm
    │       │   ├── FilterAlignment.pm
    │       │   ├── MarkConsistentTreelets.pm
    │       │   ├── ProjectAlignment.pm
    │       │   ├── PruneAlignmentForProjection.pm
    │       │   ├── RemoveAlignments.pm
    │       │   ├── ReverseAlignment.pm
    │       │   └── T
    │       │   │   ├── AlignCzechPersprons.pm
    │       │   │   ├── AlignGeneratedNodes.pm
    │       │   │   ├── Compare.pm
    │       │   │   ├── CopyAlignmentFromAlayer.pm
    │       │   │   ├── Eval.pm
    │       │   │   ├── Greedy1To1Alignment.pm
    │       │   │   ├── PCEDTAlignment.pm
    │       │   │   └── Supervised
    │       │   │       ├── Base.pm
    │       │   │       ├── PrintData.pm
    │       │   │       └── Resolver.pm
    │       ├── Coref
    │       │   ├── CS
    │       │   │   ├── All
    │       │   │   │   └── Resolve.pm
    │       │   │   ├── Cor
    │       │   │   │   ├── Base.pm
    │       │   │   │   ├── PrintData.pm
    │       │   │   │   └── Resolve.pm
    │       │   │   ├── DemonPron
    │       │   │   │   ├── Base.pm
    │       │   │   │   ├── PrintData.pm
    │       │   │   │   └── Resolve.pm
    │       │   │   ├── PersPron
    │       │   │   │   ├── Base.pm
    │       │   │   │   ├── PrintData.pm
    │       │   │   │   └── Resolve.pm
    │       │   │   ├── ReflPron
    │       │   │   │   ├── Base.pm
    │       │   │   │   ├── PrintData.pm
    │       │   │   │   └── Resolve.pm
    │       │   │   └── RelPron
    │       │   │   │   ├── Base.pm
    │       │   │   │   ├── PrintData.pm
    │       │   │   │   └── Resolve.pm
    │       │   ├── EN
    │       │   │   ├── Cor
    │       │   │   │   ├── Base.pm
    │       │   │   │   ├── PrintData.pm
    │       │   │   │   └── Resolve.pm
    │       │   │   ├── PersPron
    │       │   │   │   ├── Base.pm
    │       │   │   │   ├── PrintData.pm
    │       │   │   │   └── Resolve.pm
    │       │   │   ├── ReflPron
    │       │   │   │   ├── Base.pm
    │       │   │   │   ├── PrintData.pm
    │       │   │   │   └── Resolve.pm
    │       │   │   ├── RelPron
    │       │   │   │   ├── Base.pm
    │       │   │   │   ├── PrintData.pm
    │       │   │   │   └── Resolve.pm
    │       │   │   ├── ResolveBART2.pm
    │       │   │   └── ResolveStanfordCoreNLP.pm
    │       │   ├── EntityEvent
    │       │   │   ├── CS
    │       │   │   │   └── DemonPron
    │       │   │   │   │   ├── Base.pm
    │       │   │   │   │   ├── PrintData.pm
    │       │   │   │   │   └── Resolve.pm
    │       │   │   ├── Eval.pm
    │       │   │   ├── IndicateForCoref.pm
    │       │   │   ├── PrintData.pm
    │       │   │   └── Resolve.pm
    │       │   ├── MarkMentionsForScorer.pm
    │       │   ├── PrepareSpecializedEval.pm
    │       │   ├── PrettyPrint.pm
    │       │   ├── PrettyPrint
    │       │   │   ├── LabelKey.pm
    │       │   │   └── LabelSys.pm
    │       │   ├── PrintData.pm
    │       │   ├── Project
    │       │   │   └── MentionsFromAtoTlayer.pm
    │       │   ├── ProjectCorefEntities.pm
    │       │   ├── ProjectLinks.pm
    │       │   ├── RearrangeLinks.pm
    │       │   ├── RemoveLinks.pm
    │       │   ├── Resolve.pm
    │       │   ├── ResolveFromRawText.pm
    │       │   ├── STATE_OF_PROGRESS.txt
    │       │   ├── SimpleEval.pm
    │       │   ├── SupervisedBase.pm
    │       │   └── Write
    │       │   │   └── SentencesWithMentions.pm
    │       ├── Depfix
    │       │   ├── CS2EN
    │       │   │   ├── Fix.pm
    │       │   │   ├── FixGenitive.pm
    │       │   │   └── FixSVO.pm
    │       │   ├── CollectEdits.pm
    │       │   ├── CollectMonolingual.pm
    │       │   ├── EN2CS
    │       │   │   ├── CollectEdits.pm
    │       │   │   ├── MLFix.pm
    │       │   │   ├── MLFix_cas.pm
    │       │   │   ├── MLFix_gnc.pm
    │       │   │   └── MLFix_nc.pm
    │       │   ├── Fix.pm
    │       │   ├── MLFix.pm
    │       │   ├── README
    │       │   └── sample_config.yaml
    │       ├── Discourse
    │       │   ├── CS
    │       │   │   ├── EvaldEvaluateWeka.pm
    │       │   │   ├── EvaldExtractFeaturesWeka.pm
    │       │   │   ├── MarkDiscourse.pm
    │       │   │   └── MarkTFA.pm
    │       │   └── EVALD
    │       │   │   ├── Base.pm
    │       │   │   ├── PrintData.pm
    │       │   │   └── Resolve.pm
    │       ├── Eval
    │       │   ├── AER.pm
    │       │   ├── AddPersPronIt.pm
    │       │   ├── AddPersPronSb.pm
    │       │   ├── AtreeHighlightEdges.pm
    │       │   ├── AtreeUAS.pm
    │       │   ├── AtreeUASWithConfInterval.pm
    │       │   ├── AtreeUAStat.pm
    │       │   ├── BiEdgeScore.pm
    │       │   ├── BitextCorefStats.pm
    │       │   ├── Coref.pm
    │       │   ├── CorefSegm.pm
    │       │   ├── CorefStats.pm
    │       │   ├── EvalClauses.pm
    │       │   ├── InterAnnotatorAgreement.pm
    │       │   ├── Nonproj.pm
    │       │   ├── ReferentialIt.pm
    │       │   ├── SentencesWithIncompleteMorphology.pm
    │       │   ├── Ttrees.pm
    │       │   └── Wc.pm
    │       ├── Filter
    │       │   ├── CNK
    │       │   │   ├── DocIds.pm
    │       │   │   └── PunctuationOnly.pm
    │       │   ├── CzEng
    │       │   │   ├── AcademicTitle.pm
    │       │   │   ├── AlignmentCummulation.pm
    │       │   │   ├── AlignmentScore.pm
    │       │   │   ├── Classifier.pm
    │       │   │   ├── Common.pm
    │       │   │   ├── DecisionTree.pm
    │       │   │   ├── DictionaryRatio.pm
    │       │   │   ├── DifferentNumberOfTokens.pm
    │       │   │   ├── Eval.pm
    │       │   │   ├── GutenbergHeader.pm
    │       │   │   ├── HeadSwapRatio.pm
    │       │   │   ├── IdenticalSentences.pm
    │       │   │   ├── InconsistentGrade.pm
    │       │   │   ├── InconsistentNumber.pm
    │       │   │   ├── InconsistentNumeral.pm
    │       │   │   ├── InconsistentTense.pm
    │       │   │   ├── InterleavingSpaces.pm
    │       │   │   ├── LetterCount.pm
    │       │   │   ├── LongSentence.pm
    │       │   │   ├── LongWord.pm
    │       │   │   ├── MarkBadPairs.pm
    │       │   │   ├── MaxEnt.pm
    │       │   │   ├── MicrosoftLinesWithFilenames.pm
    │       │   │   ├── NaiveBayes.pm
    │       │   │   ├── NoWordInLanguage.pm
    │       │   │   ├── NonASCIICharacter.pm
    │       │   │   ├── POSRatio.pm
    │       │   │   ├── Predict.pm
    │       │   │   ├── RemoveBadPairs.pm
    │       │   │   ├── RemoveLinksToDeletedBundles.pm
    │       │   │   ├── ReorderingQuantity.pm
    │       │   │   ├── RepeatedCharacter.pm
    │       │   │   ├── Score.pm
    │       │   │   ├── SpecialCharRatio.pm
    │       │   │   ├── SuspiciousCharacter.pm
    │       │   │   ├── Train.pm
    │       │   │   └── UnrecognizedTagRatio.pm
    │       │   ├── Generic
    │       │   │   ├── AlignmentCummulation.pm
    │       │   │   ├── AlignmentScore.pm
    │       │   │   ├── Classifier.pm
    │       │   │   ├── Common.pm
    │       │   │   ├── DecisionTree.pm
    │       │   │   ├── DifferentNumberOfTokens.pm
    │       │   │   ├── Eval.pm
    │       │   │   ├── HeadSwapRatio.pm
    │       │   │   ├── IdenticalSentences.pm
    │       │   │   ├── LetterCount.pm
    │       │   │   ├── LongSentence.pm
    │       │   │   ├── LongWord.pm
    │       │   │   ├── MarkBadPairs.pm
    │       │   │   ├── MaxEnt.pm
    │       │   │   ├── NaiveBayes.pm
    │       │   │   ├── Predict.pm
    │       │   │   ├── RemoveBadPairs.pm
    │       │   │   ├── RemoveLinksToDeletedBundles.pm
    │       │   │   ├── ReorderingQuantity.pm
    │       │   │   ├── RepeatedCharacter.pm
    │       │   │   ├── Score.pm
    │       │   │   ├── SpecialCharRatio.pm
    │       │   │   ├── SuspiciousCharacter.pm
    │       │   │   └── Train.pm
    │       │   ├── HindenCorp
    │       │   │   ├── InconsistentNumber.pm
    │       │   │   ├── NoEnglishWord.pm
    │       │   │   ├── NonASCIICharacter.pm
    │       │   │   ├── POSRatio.pm
    │       │   │   └── UnrecognizedTagRatio.pm
    │       │   ├── Node.pm
    │       │   ├── NthSentence.pm
    │       │   ├── RemoveEmptySentences.pm
    │       │   ├── SDP2015Trees.pm
    │       │   └── SentenceNumber.pm
    │       ├── Gazetteer
    │       │   └── ExtractEntityVectors.pm
    │       ├── HamleDT
    │       │   ├── AR
    │       │   │   ├── FixUD.pm
    │       │   │   ├── Harmonize.pm
    │       │   │   └── TestFileNames.pm
    │       │   ├── BG
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── BN
    │       │   │   └── Harmonize.pm
    │       │   ├── Base.pm
    │       │   ├── CA
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── CS
    │       │   │   ├── CollectSentenceText.pm
    │       │   │   ├── FixUD.pm
    │       │   │   ├── Harmonize.pm
    │       │   │   ├── HarmonizeFicTree.pm
    │       │   │   ├── HarmonizePDT.pm
    │       │   │   ├── HarmonizePDTC.pm
    │       │   │   ├── SetBundleId.pm
    │       │   │   ├── SplitFusedWords.pm
    │       │   │   └── ToDoPunctuation.pm
    │       │   ├── DA
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── DE
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── EL
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── EN
    │       │   │   ├── Harmonize.pm
    │       │   │   └── HarmonizePCEDT.pm
    │       │   ├── ES
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── ET
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── EU
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── FA
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── FI
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── GRC
    │       │   │   └── Harmonize.pm
    │       │   ├── HE
    │       │   │   └── Harmonize.pm
    │       │   ├── HI
    │       │   │   ├── FixPUD.pm
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── HR
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── HU
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── Harmonize.pm
    │       │   ├── HarmonizeAnCora.pm
    │       │   ├── HarmonizePDT.pm
    │       │   ├── HarmonizePDTIT.pm
    │       │   ├── HarmonizePerseus.pm
    │       │   ├── HarmonizePerseusIT.pm
    │       │   ├── ID
    │       │   │   └── FixUD.pm
    │       │   ├── IS
    │       │   │   └── Harmonize.pm
    │       │   ├── IT
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── JA
    │       │   │   ├── FixUD.pm
    │       │   │   ├── GoogleToUdep.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── KO
    │       │   │   ├── FixUD.pm
    │       │   │   └── GoogleToUdep.pm
    │       │   ├── LA
    │       │   │   ├── Harmonize.pm
    │       │   │   ├── HarmonizeIT.pm
    │       │   │   ├── List_absolute_adverbs_ITTB.txt
    │       │   │   ├── List_all_lemmas_ITTB.txt
    │       │   │   └── List_comparative_adverbs_ITTB.txt
    │       │   ├── LT
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── LV
    │       │   │   └── FixUD.pm
    │       │   ├── MT
    │       │   │   └── Harmonize.pm
    │       │   ├── NL
    │       │   │   └── Harmonize.pm
    │       │   ├── OrigFileSentToComment.pm
    │       │   ├── PL
    │       │   │   ├── FixUD.pm
    │       │   │   ├── Harmonize.pm
    │       │   │   └── SplitFusedWords.pm
    │       │   ├── PT
    │       │   │   ├── FixUD.pm
    │       │   │   ├── GoogleToUdep.pm
    │       │   │   ├── Harmonize.pm
    │       │   │   ├── HarmonizeCintil.pm
    │       │   │   ├── HarmonizeCintilUSD.pm
    │       │   │   └── SplitFusedWords.pm
    │       │   ├── PragueDeprelsToUD.pm
    │       │   ├── Punctuation.pm
    │       │   ├── RO
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── RU
    │       │   │   ├── FixUD.pm
    │       │   │   └── Harmonize.pm
    │       │   ├── SA
    │       │   │   └── FixUD.pm
    │       │   ├── SK
    │       │   │   ├── FixUD.pm
    │       │   │   ├── Harmonize.pm
    │       │   │   └── SplitFusedWords.pm
    │       │   ├── SL
    │       │   │   └── Harmonize.pm
    │       │   ├── SR
    │       │   │   └── FixUD.pm
    │       │   ├── SV
    │       │   │   └── Harmonize.pm
    │       │   ├── SetConllTags.pm
    │       │   ├── SetDeprel.pm
    │       │   ├── SetMiscNEFromPropn.pm
    │       │   ├── SplitFusedWords.pm
    │       │   ├── SplitMWUnderscore.pm
    │       │   ├── TA
    │       │   │   ├── FixUD.pm
    │       │   │   ├── Harmonize.pm
    │       │   │   └── ReadDetokenizedSentences.pm
    │       │   ├── TE
    │       │   │   └── Harmonize.pm
    │       │   ├── TR
    │       │   │   └── Harmonize.pm
    │       │   ├── Test
    │       │   │   ├── AuxGIsPunctuation.pm
    │       │   │   ├── AuxKAtEnd.pm
    │       │   │   ├── AuxKUnderRoot.pm
    │       │   │   ├── AuxVNotOnTop.pm
    │       │   │   ├── AuxXIsComma.pm
    │       │   │   ├── CoApAboveEveryMember.pm
    │       │   │   ├── CoordStyle.pm
    │       │   │   ├── DeprelKnown.pm
    │       │   │   ├── FinalPunctuation.pm
    │       │   │   ├── LeafAux.pm
    │       │   │   ├── MaxOneSubject.pm
    │       │   │   ├── MemberInEveryCoAp.pm
    │       │   │   ├── NoNewNonProj.pm
    │       │   │   ├── NonParentAuxS.pm
    │       │   │   ├── NonemptyAttr.pm
    │       │   │   ├── NounGovernsDet.pm
    │       │   │   ├── PrepIsAuxP.pm
    │       │   │   ├── PunctUnderCoord.pm
    │       │   │   ├── SubjectUnderVerb.pm
    │       │   │   └── UD
    │       │   │   │   ├── Adpositions.pm
    │       │   │   │   ├── AdverbIsNotNmod.pm
    │       │   │   │   ├── AuxChain.pm
    │       │   │   │   ├── AuxpassImpliesSubjpass.pm
    │       │   │   │   ├── CcDependsOnFollowingConjunct.pm
    │       │   │   │   ├── CcIsConjunction.pm
    │       │   │   │   ├── CcIsLeaf.pm
    │       │   │   │   ├── CompoundPrepositions.pm
    │       │   │   │   ├── CopulaIsAux.pm
    │       │   │   │   ├── Determiners.pm
    │       │   │   │   ├── FiniteVerbWithGender.pm
    │       │   │   │   ├── FutureIsNotXcomp.pm
    │       │   │   │   ├── LeftToRightRelations.pm
    │       │   │   │   ├── MWTCapitalization.pm
    │       │   │   │   ├── MaxOneDirectObject.pm
    │       │   │   │   ├── MaxOneSubject.pm
    │       │   │   │   ├── MweDoesNotCombineWithName.pm
    │       │   │   │   ├── Orphan.pm
    │       │   │   │   ├── PronounIsNotMark.pm
    │       │   │   │   ├── Punctuation.pm
    │       │   │   │   ├── Reflexives.pm
    │       │   │   │   ├── Root.pm
    │       │   │   │   ├── SingleRoot.pm
    │       │   │   │   ├── Subjunctions.pm
    │       │   │   │   ├── ToDoBug.pm
    │       │   │   │   ├── UnconvertedDependencies.pm
    │       │   │   │   ├── UnderscoreInForm.pm
    │       │   │   │   ├── XcompHasNoSubject.pm
    │       │   │   │   └── ZZZSearch.pm
    │       │   ├── Transform
    │       │   │   ├── AllPunctBelowTechRoot.pm
    │       │   │   ├── BaseTransformer.pm
    │       │   │   ├── ComplexVerb.pm
    │       │   │   ├── ComplexVerbRootFirst.pm
    │       │   │   ├── ComplexVerbRootLast.pm
    │       │   │   ├── CoordStyle.pm
    │       │   │   ├── FirstNameUpward.pm
    │       │   │   ├── InvPrepositionDownward.pm
    │       │   │   ├── InvSubordConjDownward.pm
    │       │   │   ├── MarkPunct.pm
    │       │   │   ├── PrepositionDownward.pm
    │       │   │   ├── PrepositionDownwardSimple.pm
    │       │   │   ├── PrepositionUpwardSimple.pm
    │       │   │   ├── PunctBelowPrevNode.pm
    │       │   │   ├── SharedModifBelowNearestMember.pm
    │       │   │   ├── StanfordCopulas.pm
    │       │   │   ├── StanfordObjects.pm
    │       │   │   ├── StanfordPunct.pm
    │       │   │   ├── StanfordTypes.pm
    │       │   │   ├── SubordConjDownward.pm
    │       │   │   └── t
    │       │   │   │   └── transformer_subscription.t
    │       │   ├── UD1To2.pm
    │       │   ├── UG
    │       │   │   └── FixUD.pm
    │       │   ├── UK
    │       │   │   └── FixUD.pm
    │       │   ├── UR
    │       │   │   └── FixUD.pm
    │       │   ├── Udep.pm
    │       │   ├── UdepIT.pm
    │       │   ├── UdepToPrague.pm
    │       │   ├── Util
    │       │   │   ├── CorrectDependencyInconsistencies.pm
    │       │   │   ├── CorrectPOSInconsistencies.pm
    │       │   │   ├── ExtractAfuns.pm
    │       │   │   ├── ExtractDependencyBigrams.pm
    │       │   │   ├── ExtractSurfaceNGrams.pm
    │       │   │   ├── ExtractTrees.pm
    │       │   │   └── PrintRoots.pm
    │       │   ├── VI
    │       │   │   └── FixUD.pm
    │       │   └── ZH
    │       │   │   ├── FixPUD.pm
    │       │   │   └── FixUD.pm
    │       ├── Import
    │       │   └── Sentences.pm
    │       ├── MLFix
    │       │   ├── CS
    │       │   │   ├── Oracle.pm
    │       │   │   └── ScikitLearn.pm
    │       │   ├── CollectEdits.pm
    │       │   ├── DE
    │       │   │   ├── Oracle.pm
    │       │   │   └── ScikitLearn.pm
    │       │   ├── Fix.pm
    │       │   ├── MLFix.pm
    │       │   ├── Mark2Fix.pm
    │       │   ├── MarkByOracle.pm
    │       │   ├── MarkByScikitLearn.pm
    │       │   ├── Oracle.pm
    │       │   └── ScikitLearn.pm
    │       ├── Misc
    │       │   ├── AbstractDialogueSlots.pm
    │       │   ├── AddZonesFromFile.pm
    │       │   ├── Anonymize
    │       │   │   └── CS
    │       │   │   │   ├── InsertAnonymizedTokensIntoOrigText.pm
    │       │   │   │   └── ReplaceNEsWithRandomChoice.pm
    │       │   ├── CopenhagenDT
    │       │   │   ├── BuildTreesFromOffsetIndices.pm
    │       │   │   ├── CreateBundlePerSentenceTuple.pm
    │       │   │   ├── DeleteFirstBundle.pm
    │       │   │   ├── FixLonelyNodes.pm
    │       │   │   ├── FlattenUnannotatedTrees.pm
    │       │   │   ├── ImportSentSegmFromExternalFiles.pm
    │       │   │   ├── MoveDanishTreesToSeparateBundles.pm
    │       │   │   ├── MoveSLTreesToSeparateBundles.pm
    │       │   │   ├── MoveTLTreesToSLCounterpartByAlignment.pm
    │       │   │   ├── MoveTreesToDanishCounterpartByAlignment.pm
    │       │   │   ├── MoveTreesToDanishCounterpartBySizeSimilarity.pm
    │       │   │   ├── MoveTreesToDanishCounterpartIfSameNumber.pm
    │       │   │   ├── PreannotateSyntax.pm
    │       │   │   ├── PrintAlignedSentences.pm
    │       │   │   ├── PrintDependentNeighbors.pm
    │       │   │   ├── PrintExtraction4Miriam.pm
    │       │   │   ├── ReconstructAlignmentLinks.pm
    │       │   │   ├── SearchDemo.pm
    │       │   │   ├── SearchSwitched.pm
    │       │   │   └── SentSegmByTagS.pm
    │       │   ├── CopyAtreeAttr.pm
    │       │   ├── Crash.pm
    │       │   ├── CreateWordToSentenceIndex.pm
    │       │   ├── DeabstractDialogueSlots.pm
    │       │   ├── DeleteCoordNodes.pm
    │       │   ├── EncodeGrammatemes.pm
    │       │   ├── FindSentences.pm
    │       │   ├── FixMissingZones.pm
    │       │   ├── FixNonstdAttrs.pm
    │       │   ├── GenerateWordformsFromJSON.pm
    │       │   ├── GroupBundles.pm
    │       │   ├── ImportATreesFromFile.pm
    │       │   ├── JoinBundles.pm
    │       │   ├── MoveNodesAfterResegment.pm
    │       │   ├── ProjectAndConcatAttribs.pm
    │       │   ├── RandomCoNLL.pm
    │       │   ├── ReplacePersonalNamesCS.pm
    │       │   ├── RestoreCoordNodes.pm
    │       │   ├── SampleWithoutReplacement.pm
    │       │   ├── Sleep.pm
    │       │   ├── TagToMorphcat.pm
    │       │   ├── Translog
    │       │   │   ├── BuildTreesFromOffsetIndices.pm
    │       │   │   ├── ConllDeprelToCdtDeprel.pm
    │       │   │   ├── MergeSentencesByAlignment.pm
    │       │   │   ├── MoveAlignedTargetNodes.pm
    │       │   │   ├── PdtStyleToCdtStyle.pm
    │       │   │   ├── ProjectEdgesByAlignment.pm
    │       │   │   ├── RemakeWildZones.pm
    │       │   │   ├── SegmentSentences.pm
    │       │   │   └── Treex2Alignment.pm
    │       │   ├── TreeDiffAnalysis.pm
    │       │   └── YALI.pm
    │       ├── N2N
    │       │   └── ProjectTreeThroughTranslation.pm
    │       ├── P2A
    │       │   ├── EN
    │       │   │   └── FixCoord.pm
    │       │   ├── NL
    │       │   │   └── Alpino.pm
    │       │   ├── Pennconverter.pm
    │       │   ├── StanfordConverter.pm
    │       │   └── TigerET.pm
    │       ├── Print
    │       │   ├── Accuracy.pm
    │       │   ├── AddOna.pm
    │       │   ├── AdjectivesWithInfinitive.pm
    │       │   ├── AlignedFrames.pm
    │       │   ├── AlignedTtrees.pm
    │       │   ├── AlignmentStatistics.pm
    │       │   ├── AtreeStats.pm
    │       │   ├── AtreeTransformationStats.pm
    │       │   ├── AttributeArrays.pm
    │       │   ├── Bleu.pm
    │       │   ├── BranchingFreq.pm
    │       │   ├── CS
    │       │   │   ├── UnsupPronCorefData.pm
    │       │   │   └── UnsupRelatCorefData.pm
    │       │   ├── ClauseDepth.pm
    │       │   ├── Clauses.pm
    │       │   ├── CoApAfunStats.pm
    │       │   ├── CoNLLFromPDTStyle.pm
    │       │   ├── CoordStats.pm
    │       │   ├── CorefSegmentsData.pm
    │       │   ├── CorefSentences.pm
    │       │   ├── Curriculum.pm
    │       │   ├── CzEngBlockIDs.pm
    │       │   ├── Debug.pm
    │       │   ├── Debug
    │       │   │   ├── DocumentTextHead.pm
    │       │   │   └── IsReferential.pm
    │       │   ├── DeprelStats.pm
    │       │   ├── DeprelStats1.pm
    │       │   ├── EdgeProbs.pm
    │       │   ├── Entropy.pm
    │       │   ├── EvalAlignedAtrees.pm
    │       │   ├── FormemeSemposMismatch.pm
    │       │   ├── Frames.pm
    │       │   ├── Garbage.pm
    │       │   ├── GrammatemesForTgen.pm
    │       │   ├── IntersetDriverStub.pm
    │       │   ├── ItTranslData.pm
    │       │   ├── ListMostDifferentTrees.pm
    │       │   ├── ListNonProjTrees.pm
    │       │   ├── MutualInformation.pm
    │       │   ├── MweStats.pm
    │       │   ├── NodeBleu.pm
    │       │   ├── Overall.pm
    │       │   ├── ParentChildStats.pm
    │       │   ├── ReferentialItData.pm
    │       │   ├── SRLLexRf.pm
    │       │   ├── SRLParserFeaturePrinter.pm
    │       │   ├── SemanticFactorsForMoses.pm
    │       │   ├── SentencesWithValencyFrames.pm
    │       │   ├── TagChanges.pm
    │       │   ├── TagStats.pm
    │       │   ├── TaggedTokensWithLemma.pm
    │       │   ├── TestFileNames.pm
    │       │   ├── TnT.pm
    │       │   ├── TokenStats.pm
    │       │   ├── TranslationOptions.pm
    │       │   ├── TranslationResume.pm
    │       │   ├── VWForDefiniteness.pm
    │       │   ├── VWForFunctors.pm
    │       │   ├── VWForValencyFrames.pm
    │       │   ├── VWVectors.pm
    │       │   ├── ValencyFramesForKira.pm
    │       │   ├── ValencyFramesForMoses.pm
    │       │   ├── VectorsForTM.pm
    │       │   ├── VectorsForTreeKenLM.pm
    │       │   ├── VectorsForTreeLM.pm
    │       │   └── WordOrderStats.pm
    │       ├── Project
    │       │   ├── Attributes.pm
    │       │   ├── Coreference.pm
    │       │   └── Tree.pm
    │       ├── Read
    │       │   ├── AlignedCoNLL.pm
    │       │   ├── AlignedSentences.pm
    │       │   ├── AlksnisPML.pm
    │       │   ├── Alpino.pm
    │       │   ├── Amr.pm
    │       │   ├── AttributeSentences.pm
    │       │   ├── BaseAlignedReader.pm
    │       │   ├── BaseAlignedTextReader.pm
    │       │   ├── BaseCoNLLReader.pm
    │       │   ├── BasePMLReader.pm
    │       │   ├── BaseReader.pm
    │       │   ├── BaseSplitterRole.pm
    │       │   ├── BaseTextReader.pm
    │       │   ├── BundleIds.pm
    │       │   ├── BundleWildAttribute.pm
    │       │   ├── CETLEF.pm
    │       │   ├── CSTS.pm
    │       │   ├── CdtPack.pm
    │       │   ├── CdtTag.pm
    │       │   ├── CnecXML.pm
    │       │   ├── CoNLL2003.pm
    │       │   ├── CoNLL2009.pm
    │       │   ├── CoNLL2012.pm
    │       │   ├── CoNLLU.pm
    │       │   ├── CoNLLX.pm
    │       │   ├── CoNLLXfp.pm
    │       │   ├── ConsumerReader.pm
    │       │   ├── CzengPlaintextReader.pm
    │       │   ├── DGA.pm
    │       │   ├── Deps.pm
    │       │   ├── Giza.pm
    │       │   ├── HTML.pm
    │       │   ├── Hali.pm
    │       │   ├── HaliBreaking.pm
    │       │   ├── MosesTrace.pm
    │       │   ├── Other_PML_schemas
    │       │   │   ├── AlksnisSchema-1.3.pml
    │       │   │   ├── AlksnisSchema-3.0.pml
    │       │   │   ├── antisDplus_schema.pml
    │       │   │   └── quz_schema.xml
    │       │   ├── PADT.pm
    │       │   ├── PADT_schema
    │       │   │   ├── deeper.schema.xml
    │       │   │   ├── elixir.schema.xml
    │       │   │   ├── morpho.schema.xml
    │       │   │   ├── syntax.schema.xml
    │       │   │   └── words.schema.xml
    │       │   ├── PCEDT.pm
    │       │   ├── PDT.pm
    │       │   ├── PDT_schema
    │       │   │   ├── adata.rng
    │       │   │   ├── adata_25_schema.xml
    │       │   │   ├── adata_30_schema.xml
    │       │   │   ├── adata_35_schema.xml
    │       │   │   ├── adata_c_schema.xml
    │       │   │   ├── adata_schema.xml
    │       │   │   ├── mdata.rng
    │       │   │   ├── mdata_25_schema.xml
    │       │   │   ├── mdata_30_schema.xml
    │       │   │   ├── mdata_35_schema.xml
    │       │   │   ├── mdata_c_schema.xml
    │       │   │   ├── mdata_schema.xml
    │       │   │   ├── pml_common.rng
    │       │   │   ├── pml_schema.rng
    │       │   │   ├── tdata.rng
    │       │   │   ├── tdata_25_schema.xml
    │       │   │   ├── tdata_30_schema.xml
    │       │   │   ├── tdata_35_schema.xml
    │       │   │   ├── tdata_c2_schema.xml
    │       │   │   ├── tdata_c_schema.xml
    │       │   │   ├── tdata_schema.xml
    │       │   │   ├── wdata.rng
    │       │   │   ├── wdata_25_schema.xml
    │       │   │   ├── wdata_30_schema.xml
    │       │   │   ├── wdata_35_schema.xml
    │       │   │   ├── wdata_c_schema.xml
    │       │   │   └── wdata_schema.xml
    │       │   ├── PEDT.pm
    │       │   ├── PEDT_schema
    │       │   │   ├── adata_eng_schema.xml
    │       │   │   └── tdata_eng_schema.xml
    │       │   ├── PennMrg.pm
    │       │   ├── PennPos.pm
    │       │   ├── ProducerReader.pm
    │       │   ├── QuechuaPML.pm
    │       │   ├── Sentences.pm
    │       │   ├── SentencesTSV.pm
    │       │   ├── Shakti.pm
    │       │   ├── Syntagrus.pm
    │       │   ├── TEI.pm
    │       │   ├── TMT.pm
    │       │   ├── TMT_schema
    │       │   │   └── tmt_schema.xml
    │       │   ├── Text.pm
    │       │   ├── Tiger.pm
    │       │   ├── Treex.pm
    │       │   ├── Valesco.pm
    │       │   ├── Vertical.pm
    │       │   ├── WikiDump.pm
    │       │   ├── WordAlignmentXML.pm
    │       │   ├── YAML.pm
    │       │   └── t
    │       │   │   ├── aligned_sentences.t
    │       │   │   ├── base.t
    │       │   │   ├── base_aligned.t
    │       │   │   ├── base_aligned_text.t
    │       │   │   ├── cdt-test-0005-da.tag
    │       │   │   ├── cdt-test-0005-es-lotte.tag
    │       │   │   ├── cdt-test-0005-it-lisa.tag
    │       │   │   ├── cdt_tag.t
    │       │   │   ├── gzip.t
    │       │   │   ├── pcedt.t
    │       │   │   ├── word_alignment_xml.t
    │       │   │   └── word_alignment_xml_sample.wa
    │       ├── Sample
    │       │   ├── Base.pm
    │       │   └── Trees.pm
    │       ├── Segment
    │       │   ├── EstimateInterlinkCounts.pm
    │       │   ├── GreedyRegSuggestBreaks.pm
    │       │   ├── NaiveSuggestBreaks.pm
    │       │   ├── OptimalSuggestBreaks.pm
    │       │   ├── RandomSuggestBreaks.pm
    │       │   ├── RandomizedSuggestBreaks.pm
    │       │   ├── SetBlockIdsAtRandom.pm
    │       │   ├── SetInterlinkCounts.pm
    │       │   └── SuggestSegmentBreaks.pm
    │       ├── SemevalABSA
    │       │   ├── Adverb.pm
    │       │   ├── AnnotateWithRules.pm
    │       │   ├── BaseRule.pm
    │       │   ├── But.pm
    │       │   ├── Coord.pm
    │       │   ├── FirstNounAboveSubjAdj.pm
    │       │   ├── KnownAspect.pm
    │       │   ├── MarkCategories.pm
    │       │   ├── MarkSentiment.pm
    │       │   ├── MoveABSAFromWild.pm
    │       │   ├── MoveABSAToWild.pm
    │       │   ├── MoveABSAToWildCandidates.pm
    │       │   ├── SubjectOfSubjectivePat.pm
    │       │   ├── VerbActants.pm
    │       │   ├── VerbonominalPatientNoun.pm
    │       │   └── VerbonominalSubjectSubjAdj.pm
    │       ├── T2A
    │       │   ├── AddAppositionPunct.pm
    │       │   ├── AddArticles.pm
    │       │   ├── AddAuxVerbModalTense.pm
    │       │   ├── AddCoordPunct.pm
    │       │   ├── AddInfinitiveParticles.pm
    │       │   ├── AddInterleavedFormemeNodes.pm
    │       │   ├── AddNegationParticle.pm
    │       │   ├── AddParentheses.pm
    │       │   ├── AddPrepos.pm
    │       │   ├── AddSentFinalPunct.pm
    │       │   ├── AddSentmodPunct.pm
    │       │   ├── AddSubconjs.pm
    │       │   ├── AddSubordClausePunct.pm
    │       │   ├── AmodCoordEnhancedUD.pm
    │       │   ├── AnalyticalReorder.pm
    │       │   ├── BG
    │       │   │   ├── AddAuxVerbModalTense.pm
    │       │   │   ├── AddAuxVerbs.pm
    │       │   │   └── MoveDefiniteness.pm
    │       │   ├── CS
    │       │   │   ├── AddAppositionPunct.pm
    │       │   │   ├── AddAuxVerbCompoundFuture.pm
    │       │   │   ├── AddAuxVerbCompoundPassive.pm
    │       │   │   ├── AddAuxVerbCompoundPast.pm
    │       │   │   ├── AddAuxVerbConditional.pm
    │       │   │   ├── AddAuxVerbModal.pm
    │       │   │   ├── AddClausalExpletivePronouns.pm
    │       │   │   ├── AddCoordPunct.pm
    │       │   │   ├── AddPrepos.pm
    │       │   │   ├── AddReflexParticles.pm
    │       │   │   ├── AddSentFinalPunct.pm
    │       │   │   ├── AddSubconjs.pm
    │       │   │   ├── AddSubordClausePunct.pm
    │       │   │   ├── CapitalizeNamedEntitiesAfterTransfer.pm
    │       │   │   ├── CapitalizeSentStart.pm
    │       │   │   ├── CheckCommas.pm
    │       │   │   ├── ChooseMlemmaForPersPron.pm
    │       │   │   ├── CopyTtree.pm
    │       │   │   ├── DeleteEmptyNouns.pm
    │       │   │   ├── DeleteSuperfluousAuxCP.pm
    │       │   │   ├── DistinguishHomonymousMlemmas.pm
    │       │   │   ├── DropSubjPersProns.pm
    │       │   │   ├── FixPossessiveAdjs.pm
    │       │   │   ├── GenerateCompoundVerbforms.pm
    │       │   │   ├── GenerateWordforms.pm
    │       │   │   ├── ImposeAttrAgr.pm
    │       │   │   ├── ImposeComplAgr.pm
    │       │   │   ├── ImposePronZAgr.pm
    │       │   │   ├── ImposeRelPronAgr.pm
    │       │   │   ├── ImposeSubjpredAgr.pm
    │       │   │   ├── InitMorphcat.pm
    │       │   │   ├── MarkClauseHeads.pm
    │       │   │   ├── MarkLexVerbChildren.pm
    │       │   │   ├── MarkSubject.pm
    │       │   │   ├── MoveCliticsToWackernagel.pm
    │       │   │   ├── MoveQuotes.pm
    │       │   │   ├── ResolveVerbs.pm
    │       │   │   ├── ReverseNumberNounDependency.pm
    │       │   │   ├── SetFormemes.pm
    │       │   │   ├── TLemmas.xfst
    │       │   │   ├── TransformTLemmas.pm
    │       │   │   └── VocalizePrepos.pm
    │       │   ├── CapitalizeDirectSpeech.pm
    │       │   ├── CapitalizeSentStart.pm
    │       │   ├── CopyFunctorsToMisc.pm
    │       │   ├── CopyTtree.pm
    │       │   ├── DeleteGeneratedNodes.pm
    │       │   ├── DeleteSuperfluousAuxCP.pm
    │       │   ├── DropPersPronSb.pm
    │       │   ├── DropPersPronSbImper.pm
    │       │   ├── EN
    │       │   │   ├── AddAdjAdvGradation.pm
    │       │   │   ├── AddAdjAdvNegation.pm
    │       │   │   ├── AddAppositionPunct.pm
    │       │   │   ├── AddArticles.pm
    │       │   │   ├── AddAuxVerbCompoundPassive.pm
    │       │   │   ├── AddAuxVerbInter.pm
    │       │   │   ├── AddAuxVerbModalTense.pm
    │       │   │   ├── AddAuxVerbThereIs.pm
    │       │   │   ├── AddCoordPunct.pm
    │       │   │   ├── AddExistentialThere.pm
    │       │   │   ├── AddInfinitiveParticles.pm
    │       │   │   ├── AddIntroPunct.pm
    │       │   │   ├── AddPhrasalPunct.pm
    │       │   │   ├── AddPhrasalVerbParticles.pm
    │       │   │   ├── AddPossessiveMarkers.pm
    │       │   │   ├── AddPrepos.pm
    │       │   │   ├── AddSubconjs.pm
    │       │   │   ├── AddSubordClausePunct.pm
    │       │   │   ├── AddVerbNegation.pm
    │       │   │   ├── CapitalizeSentStart.pm
    │       │   │   ├── DeleteYouInImperatives.pm
    │       │   │   ├── FixFlectErrors.pm
    │       │   │   ├── FixLemmas.pm
    │       │   │   ├── FixThereIs.pm
    │       │   │   ├── GenerateWordforms.pm
    │       │   │   ├── GenerateWordformsMorphodita.pm
    │       │   │   ├── ImposeSubjpredAgr.pm
    │       │   │   ├── IndefArticlePhonetics.pm
    │       │   │   ├── InitMorphcat.pm
    │       │   │   ├── MarkSubject.pm
    │       │   │   ├── MoveRhematizers.pm
    │       │   │   ├── SbAuxvReorder.pm
    │       │   │   ├── WordOrder.pm
    │       │   │   └── WordOrderTools.pm
    │       │   ├── ES
    │       │   │   ├── AddArticles.pm
    │       │   │   ├── AddAuxVerbCompoundPassive.pm
    │       │   │   ├── AddAuxVerbModalTense.pm
    │       │   │   ├── AddAuxVerbTense.pm
    │       │   │   ├── AddComparatives.pm
    │       │   │   ├── AddPrepos.pm
    │       │   │   ├── AddReflexive.pm
    │       │   │   ├── AddSentFinalPunct.pm
    │       │   │   ├── AddSentmodPunct.pm
    │       │   │   ├── AddSubconjs.pm
    │       │   │   ├── AddSubordClausePunct.pm
    │       │   │   ├── DeleteSuperfluousAuxCP.pm
    │       │   │   ├── FixAttributeOrder.pm
    │       │   │   ├── GenerateWordforms.pm
    │       │   │   ├── GenerateWordformsPerl.pm
    │       │   │   ├── InitMorphcat.pm
    │       │   │   └── MoveRhematizers.pm
    │       │   ├── EU
    │       │   │   ├── AddArticles.pm
    │       │   │   ├── AddAuxVerbModal.pm
    │       │   │   ├── AddAuxVerbModalTense.pm
    │       │   │   ├── AddAuxVerbTense.pm
    │       │   │   ├── AddNegationParticle.pm
    │       │   │   ├── AddPrepos.pm
    │       │   │   ├── AddSentFinalPunct.pm
    │       │   │   ├── AddSubconjs.pm
    │       │   │   ├── AddSubordClausePunct.pm
    │       │   │   ├── DropPersPron.pm
    │       │   │   ├── FixGramCases.pm
    │       │   │   ├── FixNegativeVerbOrder.pm
    │       │   │   ├── FixOrder.pm
    │       │   │   ├── FixTransitiveAgreement.pm
    │       │   │   ├── GenerateGazeteerItems.pm
    │       │   │   ├── GenerateWordforms.pm
    │       │   │   ├── ImposeSubjObjpredAgr.pm
    │       │   │   └── MarkSubject.pm
    │       │   ├── FixNounGender.pm
    │       │   ├── GenerateA2TRefs.pm
    │       │   ├── GenerateEmptyNodes.pm
    │       │   ├── GenerateWordforms.pm
    │       │   ├── ImposeAttrAgr.pm
    │       │   ├── ImposeSubjpredAgr.pm
    │       │   ├── InitMorphcat.pm
    │       │   ├── InitMorphcatPT.pm
    │       │   ├── MarkSubject.pm
    │       │   ├── MorphcatToPdtTagRegexp.pm
    │       │   ├── NL
    │       │   │   ├── AddArticles.pm
    │       │   │   ├── AddAuxVerbCompoundPassive.pm
    │       │   │   ├── AddAuxVerbModalTense.pm
    │       │   │   ├── AddFormalSubject.pm
    │       │   │   ├── AddInfinitiveParticles.pm
    │       │   │   ├── AddNegationParticle.pm
    │       │   │   ├── AddPrepos.pm
    │       │   │   ├── AddReflexParticles.pm
    │       │   │   ├── AddSeparableVerbPrefixes.pm
    │       │   │   ├── AddSubconjs.pm
    │       │   │   ├── Alpino
    │       │   │   │   ├── ADTTreeViterbi.pm
    │       │   │   │   ├── AddCoindexSubjects.pm
    │       │   │   │   ├── CoindexNodes.pm
    │       │   │   │   ├── FixAuxVerbs.pm
    │       │   │   │   ├── FixCompoundNouns.pm
    │       │   │   │   ├── FixFormalSubjects.pm
    │       │   │   │   ├── FixInfinitiveParticles.pm
    │       │   │   │   ├── FixMWUs.pm
    │       │   │   │   ├── FixNamedEntities.pm
    │       │   │   │   ├── FixPrec.pm
    │       │   │   │   ├── FixQuestionsAndRelClauses.pm
    │       │   │   │   ├── MWUs.pm
    │       │   │   │   ├── MarkStype.pm
    │       │   │   │   └── SetAdtRel.pm
    │       │   │   ├── CopyTtree.pm
    │       │   │   ├── FixLemmas.pm
    │       │   │   ├── FixMultiwordSurnames.pm
    │       │   │   ├── FixPronominalAdverbs.pm
    │       │   │   ├── GenerateWordforms.pm
    │       │   │   ├── GenerateWordformsAlpino.pm
    │       │   │   ├── HideVerbPrefixes.pm
    │       │   │   ├── InitMorphcat.pm
    │       │   │   ├── MoveFiniteVerbs.pm
    │       │   │   ├── MoveVerbsToClauseEnd.pm
    │       │   │   └── RestoreVerbPrefixes.pm
    │       │   ├── PT
    │       │   │   ├── AddArticles.pm
    │       │   │   ├── AddAuxVerbCompoundPassive.pm
    │       │   │   ├── AddAuxVerbModalTense.pm
    │       │   │   ├── AddComparatives.pm
    │       │   │   ├── AddConditional.pm
    │       │   │   ├── AddGender.pm
    │       │   │   ├── AddPrepos.pm
    │       │   │   ├── AddVerbNegation.pm
    │       │   │   ├── CliticExceptions.pm
    │       │   │   ├── DropSubjPersProns.pm
    │       │   │   ├── FixPossessivePronouns.pm
    │       │   │   ├── GeneratePronouns.pm
    │       │   │   ├── GenerateWordforms.pm
    │       │   │   ├── GenerateWordformsPerl.pm
    │       │   │   ├── ImposeFormeme.pm
    │       │   │   ├── ImposeLemma.pm
    │       │   │   ├── InitMorphcat.pm
    │       │   │   ├── MarkSubject.pm
    │       │   │   ├── MoveRhematizers.pm
    │       │   │   ├── PrepositionContraction.pm
    │       │   │   └── SecondPersonPoliteness.pm
    │       │   ├── ProjectClauseNumber.pm
    │       │   ├── RU
    │       │   │   ├── AddAuxVerbConditional.pm
    │       │   │   ├── AddAuxVerbModal.pm
    │       │   │   ├── AddNegation.pm
    │       │   │   ├── ChooseMlemmaForPersPron.pm
    │       │   │   ├── DropCopula.pm
    │       │   │   ├── GenerateWordforms.pm
    │       │   │   └── InitMorphcat.pm
    │       │   ├── RenamePunctuationTLemmas.pm
    │       │   └── SentenceNegationToVerb.pm
    │       ├── T2P
    │       │   └── CopyTtree.pm
    │       ├── T2T
    │       │   ├── AssignDefaultGrammatemes.pm
    │       │   ├── AssignWordnetHyperchain.pm
    │       │   ├── CS2CS
    │       │   │   ├── AddFrequentPrepositions.pm
    │       │   │   ├── Deepfix.pm
    │       │   │   ├── DropSubjPersProns.pm
    │       │   │   ├── FixGrammatemes.pm
    │       │   │   ├── FixInfrequentFormemes.pm
    │       │   │   ├── FixInfrequentNouns.pm
    │       │   │   ├── FixInfrequentPrepositions.pm
    │       │   │   ├── FixNegation.pm
    │       │   │   ├── FixTense.pm
    │       │   │   ├── FormemeTLemmaAgreement.pm
    │       │   │   ├── MarkClauseHeads.pm
    │       │   │   ├── MarkNegationCueAndScope.pm
    │       │   │   ├── ParaphraseSimple.pm
    │       │   │   ├── PrecomputeNodeInfo.pm
    │       │   │   ├── ProjectChangedToA.pm
    │       │   │   └── RemoveInfrequentPrepositions.pm
    │       │   ├── CS2EN
    │       │   │   ├── AddDefiniteness.pm
    │       │   │   ├── DeleteSuperfluousNodes.pm
    │       │   │   ├── FixDoubleNegative.pm
    │       │   │   ├── FixForeignNames.pm
    │       │   │   ├── FixGrammatemesAfterTransfer.pm
    │       │   │   ├── RearrangeNounCompounds.pm
    │       │   │   ├── RemoveInfinitiveSubjects.pm
    │       │   │   ├── RemovePerspronGender.pm
    │       │   │   ├── ReplaceSomeWithIndefArticle.pm
    │       │   │   ├── TrFAddVariants.pm
    │       │   │   ├── TrFAddVariantsInterpol.pm
    │       │   │   ├── TrFTryRules.pm
    │       │   │   ├── TrLAddVariants.pm
    │       │   │   ├── TrLAddVariantsInterpol.pm
    │       │   │   ├── TrLFPhrases.pm
    │       │   │   ├── TrLFixTMErrors.pm
    │       │   │   └── TrLTryRules.pm
    │       │   ├── CS2RU
    │       │   │   ├── FixDateTime.pm
    │       │   │   ├── FixValency.pm
    │       │   │   ├── RuleBasedFormemes.pm
    │       │   │   ├── TrLAddVariants.pm
    │       │   │   └── TrLTryRules.pm
    │       │   ├── CopyCorefFromAlignment.pm
    │       │   ├── CopyFunctorsFromAlignment.pm
    │       │   ├── CopyTtree.pm
    │       │   ├── CopyValencyFramesFromAlignment.pm
    │       │   ├── CutVariants.pm
    │       │   ├── EN2CS
    │       │   │   ├── AddNounGender.pm
    │       │   │   ├── AddPersPronBelowVfin.pm
    │       │   │   ├── AddRelpronBelowRc.pm
    │       │   │   ├── AddVerbAspect.pm
    │       │   │   ├── ChangeCorToPersPron.pm
    │       │   │   ├── CutVariants.pm
    │       │   │   ├── DeletePossPronBeforeVlastni.pm
    │       │   │   ├── DeleteSuperfluousTnodes.pm
    │       │   │   ├── FindGramCorefForReflPron.pm
    │       │   │   ├── FixAdjComplAgreement.pm
    │       │   │   ├── FixDateTime.pm
    │       │   │   ├── FixGrammatemesAfterTransfer.pm
    │       │   │   ├── FixMoney.pm
    │       │   │   ├── FixNegation.pm
    │       │   │   ├── FixTransferChoices.pm
    │       │   │   ├── MarkNewRelClauses.pm
    │       │   │   ├── MoveAdjsBeforeNouns.pm
    │       │   │   ├── MoveDicendiCloserToDsp.pm
    │       │   │   ├── MoveEnoughBeforeAdj.pm
    │       │   │   ├── MoveGenitivesRight.pm
    │       │   │   ├── MoveJesteBeforeVerb.pm
    │       │   │   ├── MoveNounAttrAfterNouns.pm
    │       │   │   ├── MovePersPronNextToVerb.pm
    │       │   │   ├── MoveRelClauseRight.pm
    │       │   │   ├── NeutPersPronGenderFromAntec.pm
    │       │   │   ├── OverridePpWithPhraseTr.pm
    │       │   │   ├── PrunePersonalNameVariants.pm
    │       │   │   ├── PruneVariants.pm
    │       │   │   ├── RemoveUnpassivizableVariants.pm
    │       │   │   ├── ReplaceVerbWithAdj.pm
    │       │   │   ├── TrFAddVariants.pm
    │       │   │   ├── TrFAddVariantsInterpol.pm
    │       │   │   ├── TrFRerank.pm
    │       │   │   ├── TrFRerank2.pm
    │       │   │   ├── TrFTryRules.pm
    │       │   │   ├── TrLAddVariants.pm
    │       │   │   ├── TrLAddVariantsBackoff.pm
    │       │   │   ├── TrLAddVariantsInterpol.pm
    │       │   │   ├── TrLAddVariantsVW.pm
    │       │   │   ├── TrLAddVariantsVW2.pm
    │       │   │   ├── TrLAddVariants_coref.pm
    │       │   │   ├── TrLFCompounds.pm
    │       │   │   ├── TrLFJointStatic.pm
    │       │   │   ├── TrLFNumeralsByRules.pm
    │       │   │   ├── TrLFPhrases.pm
    │       │   │   ├── TrLFTreeViterbi.pm
    │       │   │   ├── TrLFemaleSurnames.pm
    │       │   │   ├── TrLFilterAspect.pm
    │       │   │   ├── TrLHackNNP.pm
    │       │   │   ├── TrLNumbers.pm
    │       │   │   ├── TrLPersPronIt.pm
    │       │   │   ├── TrLPersPronRefl.pm
    │       │   │   ├── TrLTryRules.pm
    │       │   │   ├── TrL_ITdomain.pm
    │       │   │   ├── TransformPassiveConstructions.pm
    │       │   │   ├── TurnTextCorefToGramCoref.pm
    │       │   │   └── ValencyRelatedRules.pm
    │       │   ├── EN2EN
    │       │   │   └── TrLFTreeViterbi.pm
    │       │   ├── EN2ES
    │       │   │   ├── AddNounGender.pm
    │       │   │   ├── FixDefinitiveness.pm
    │       │   │   ├── FixThereIs.pm
    │       │   │   └── TrLTryRules.pm
    │       │   ├── EN2EU
    │       │   │   ├── FixDefinitiveness.pm
    │       │   │   ├── FixPresentContinuous.pm
    │       │   │   ├── FixThereIs.pm
    │       │   │   ├── FixYouPl.pm
    │       │   │   ├── RemoveRelPron.pm
    │       │   │   ├── TrLTryRules.pm
    │       │   │   └── TranslateRelPron.pm
    │       │   ├── EN2NL
    │       │   │   ├── AddNounGender.pm
    │       │   │   ├── FixCompounds.pm
    │       │   │   ├── TrLFFixTMErrors.pm
    │       │   │   └── TrLFPhrases.pm
    │       │   ├── EN2PT
    │       │   │   ├── AddRelpronBelowRc.pm
    │       │   │   ├── FixPersPron.pm
    │       │   │   ├── FixPunctuation.pm
    │       │   │   ├── FixThereIs.pm
    │       │   │   ├── MoveAdjsAfterNouns.pm
    │       │   │   ├── Noun1Noun2_To_Noun2DeNoun1.pm
    │       │   │   ├── TrGazeteerItems.pm
    │       │   │   ├── TrL_ITdomain.pm
    │       │   │   └── TurnVerbLemmaToAdjectives.pm
    │       │   ├── FixFormemeWrtNodetype.pm
    │       │   ├── FixGrammatemesAfterTransfer.pm
    │       │   ├── FixPunctFormemes.pm
    │       │   ├── FormemeTLemmaAgreement.pm
    │       │   ├── JA2CS
    │       │   │   ├── MoveVerbs.pm
    │       │   │   ├── TrFAddVariants.pm
    │       │   │   └── TrLAddVariants.pm
    │       │   ├── JA2EN
    │       │   │   ├── TrFAddVariants.pm
    │       │   │   └── TrLAddVariants.pm
    │       │   ├── PT2EN
    │       │   │   ├── FixThereIs.pm
    │       │   │   ├── FixValency.pm
    │       │   │   ├── MoveAdjsBeforeNouns.pm
    │       │   │   ├── RestoreUrl.pm
    │       │   │   └── TrGazeteerItems.pm
    │       │   ├── ParaphraseSimple.pm
    │       │   ├── PosFromTLemma.pm
    │       │   ├── PosToTLemma.pm
    │       │   ├── ProjectChangedToA.pm
    │       │   ├── ProjectSelectedWild.pm
    │       │   ├── ProjectTreeThroughAlignment.pm
    │       │   ├── RecoverUnknownLemmas.pm
    │       │   ├── RehangToEffParents.pm
    │       │   ├── RehangToOrigParents.pm
    │       │   ├── SelectCompatibleTlemmaFormeme.pm
    │       │   ├── SetAClauseNumber.pm
    │       │   ├── SetClauseNumber.pm
    │       │   ├── SetDefinitenessVW.pm
    │       │   ├── TbxParser.pm
    │       │   ├── TrAddVariantsRole.pm
    │       │   ├── TrBaseAddVariantsInterpol.pm
    │       │   ├── TrFAddVariants.pm
    │       │   ├── TrFAddVariantsInterpol.pm
    │       │   ├── TrGazeteerItems.pm
    │       │   ├── TrLAddVariants.pm
    │       │   ├── TrLAddVariantsInterpol.pm
    │       │   ├── TrLApplyTbxDictionary.pm
    │       │   └── TrUseMemcachedModel.pm
    │       ├── T2TAMR
    │       │   ├── AddNegNodes.pm
    │       │   ├── ApplyRules.pm
    │       │   ├── CopyTtree.pm
    │       │   ├── CreateTAMRfromT.pm
    │       │   ├── DeleteMarkedNodes.pm
    │       │   ├── FixCoreference.pm
    │       │   ├── FixNamedEntities.pm
    │       │   ├── FunctorsToAMRLabels.pm
    │       │   ├── MarkRules.pm
    │       │   ├── ReadRules.pm
    │       │   └── RulesSuggestion.pm
    │       ├── T2U
    │       │   ├── AdjustStructure.pm
    │       │   ├── BuildUtree.pm
    │       │   ├── CS
    │       │   │   ├── AdjustStructure.pm
    │       │   │   ├── BuildUtree.pm
    │       │   │   └── ConvertCoreference.pm
    │       │   ├── ConvertCoreference.pm
    │       │   ├── LA
    │       │   │   ├── AdjustStructure.pm
    │       │   │   ├── BuildUtree.pm
    │       │   │   └── ConvertCoreference.pm
    │       │   └── t
    │       │   │   └── adjust_coap.t
    │       ├── Test
    │       │   ├── BaseTester.pm
    │       │   ├── FieldCanHaveWideChars.pm
    │       │   ├── ParameterCanHaveWideChars.pm
    │       │   ├── Phrase.pm
    │       │   └── ValidReferences.pm
    │       ├── Treelets
    │       │   ├── AddTwonodeScores.pm
    │       │   ├── ExtractEdgeTreelets.pm
    │       │   ├── ExtractStrictTreelets.pm
    │       │   ├── ExtractVW.pm
    │       │   ├── ExtractVW2.pm
    │       │   ├── SrcFeatures.pm
    │       │   ├── SrcFeatures2.pm
    │       │   ├── TrEasyFirst.pm
    │       │   ├── TrEasyFirstChain.pm
    │       │   ├── TrEasyFirstLM.pm
    │       │   ├── TrEasyFirstSplit.pm
    │       │   ├── TrInterpol.pm
    │       │   ├── TrOneNode.pm
    │       │   ├── TrOneNodeNeedsCopyTtree.pm
    │       │   └── TrVW.pm
    │       ├── Tutorial
    │       │   ├── MarkHeads.pm
    │       │   ├── P2A.pm
    │       │   ├── PrintDefiniteDescriptions.pm
    │       │   ├── ReorderSVO2SOV.pm
    │       │   ├── Solution
    │       │   │   ├── MarkHeads.pm
    │       │   │   ├── P2A.pm
    │       │   │   ├── PrintDefiniteDescriptions.pm
    │       │   │   ├── ReorderSVO2SOV.pm
    │       │   │   └── StemTamil.pm
    │       │   └── StemTamil.pm
    │       ├── Util
    │       │   ├── DefinedAttr.pm
    │       │   ├── Eval.pm
    │       │   ├── Find.pm
    │       │   ├── FixInvalidIDs.pm
    │       │   ├── FixPMLStructure.pm
    │       │   ├── PMLTQ.pm
    │       │   ├── PMLTQMark.pm
    │       │   └── SetGlobal.pm
    │       ├── W2A
    │       │   ├── AnalysisWithAlignedTrees.pm
    │       │   ├── AppendSynsetIdToLemmas.pm
    │       │   ├── BaseChunkParser.pm
    │       │   ├── CS
    │       │   │   ├── FixAtreeAfterMcD.pm
    │       │   │   ├── FixGuessedLemmas.pm
    │       │   │   ├── FixIsMember.pm
    │       │   │   ├── FixMorphoErrors.pm
    │       │   │   ├── FixPrepositionalCase.pm
    │       │   │   ├── FixReflexivePronouns.pm
    │       │   │   ├── FixReflexiveTantum.pm
    │       │   │   ├── LabelMIRA.pm
    │       │   │   ├── LabelMSTAdapted.pm
    │       │   │   ├── ParseMST.pm
    │       │   │   ├── ParseMSTAdapted.pm
    │       │   │   ├── ParseMSTperl.pm
    │       │   │   ├── ParseRules.pm
    │       │   │   ├── Segment.pm
    │       │   │   ├── TagFeaturama.pm
    │       │   │   ├── TagMorce.pm
    │       │   │   ├── TagMorphoDiTa.pm
    │       │   │   └── Tokenize.pm
    │       │   ├── ConvertTags.pm
    │       │   ├── CopyAttribute.pm
    │       │   ├── CopyTagsFromFile.pm
    │       │   ├── DE
    │       │   │   ├── FixPronouns.pm
    │       │   │   ├── LemmatizeMate.pm
    │       │   │   ├── ParseMate.pm
    │       │   │   ├── TagStanford.pm
    │       │   │   └── Tokenize.pm
    │       │   ├── DefaultDepRel.pm
    │       │   ├── Delexicalize.pm
    │       │   ├── EN
    │       │   │   ├── FixAtree.pm
    │       │   │   ├── FixConjThat.pm
    │       │   │   ├── FixControlVerbs.pm
    │       │   │   ├── FixDicendiVerbs.pm
    │       │   │   ├── FixIsMember.pm
    │       │   │   ├── FixMultiwordPrepAndConj.pm
    │       │   │   ├── FixNominalGroups.pm
    │       │   │   ├── FixSharedSubjects.pm
    │       │   │   ├── FixTags.pm
    │       │   │   ├── FixTagsAfterParse.pm
    │       │   │   ├── FixTagsImperatives.pm
    │       │   │   ├── FixTagsQuotes.pm
    │       │   │   ├── FixTokenization.pm
    │       │   │   ├── GazeteerMatch.pm
    │       │   │   ├── HideIT.pm
    │       │   │   ├── LabelMIRA.pm
    │       │   │   ├── Lemmatize.pm
    │       │   │   ├── MarkCheckCommas.pm
    │       │   │   ├── NormalizeForms.pm
    │       │   │   ├── ParseCharniak.pm
    │       │   │   ├── ParseFanse.pm
    │       │   │   ├── ParseMST.pm
    │       │   │   ├── ParseMSTperl.pm
    │       │   │   ├── ParseMalt.pm
    │       │   │   ├── ParseZpar.pm
    │       │   │   ├── PreferImperatives.pm
    │       │   │   ├── QtHackTags.pm
    │       │   │   ├── QuotesStyle.pm
    │       │   │   ├── RehangConllToPdtStyle.pm
    │       │   │   ├── RehangStanfordCoordToPdtStyle.pm
    │       │   │   ├── Segment.pm
    │       │   │   ├── SetAfun.pm
    │       │   │   ├── SetAfunAfterMcD.pm
    │       │   │   ├── SetAfunAuxCPCoord.pm
    │       │   │   ├── SetIsMemberFromDeprel.pm
    │       │   │   ├── TagFeaturama.pm
    │       │   │   ├── TagLinguaEn.pm
    │       │   │   ├── TagMorce.pm
    │       │   │   ├── TagMorphoDiTa.pm
    │       │   │   ├── TagStanford.pm
    │       │   │   ├── Tokenize.pm
    │       │   │   └── t
    │       │   │   │   └── lingua_en.t
    │       │   ├── ES
    │       │   │   ├── FixMultiwordPrepAndConj.pm
    │       │   │   ├── FixTagAndParse.pm
    │       │   │   ├── TagAndParse.pm
    │       │   │   └── Tokenize.pm
    │       │   ├── EU
    │       │   │   ├── FixModalVerbs.pm
    │       │   │   ├── FixMultiwordPrepAndConj.pm
    │       │   │   ├── FixTagAndParse.pm
    │       │   │   ├── TagAndParse.pm
    │       │   │   ├── Tokenize.pm
    │       │   │   └── TokenizeAndParse.pm
    │       │   ├── EscapeMoses.pm
    │       │   ├── FR
    │       │   │   ├── TagMElt.pm
    │       │   │   ├── TagStanford.pm
    │       │   │   └── Tokenize.pm
    │       │   ├── FixAuxLeaves.pm
    │       │   ├── FixNonleafAuxC.pm
    │       │   ├── FixQuotes.pm
    │       │   ├── FromWSD.pm
    │       │   ├── GazeteerMatch.pm
    │       │   ├── HideGazeteerItems.pm
    │       │   ├── HideIT.pm
    │       │   ├── JA
    │       │   │   ├── FixCopulas.pm
    │       │   │   ├── FixInterpunction.pm
    │       │   │   ├── FixPeriod.pm
    │       │   │   ├── FixTagsUD.pm
    │       │   │   ├── ParseCabocha.pm
    │       │   │   ├── ParseJDEPP.pm
    │       │   │   ├── RehangAuxVerbs.pm
    │       │   │   ├── RehangConjunctions.pm
    │       │   │   ├── RehangCoordinations.pm
    │       │   │   ├── RehangCopulas.pm
    │       │   │   ├── RehangNouns.pm
    │       │   │   ├── RehangParticleChildren.pm
    │       │   │   ├── RehangParticles.pm
    │       │   │   ├── RomanizeTags.pm
    │       │   │   ├── SetAfun.pm
    │       │   │   ├── SetAfunParticles.pm
    │       │   │   ├── TagMeCab.pm
    │       │   │   └── t
    │       │   │   │   ├── parse_jdepp.t
    │       │   │   │   └── tag_mecab.t
    │       │   ├── LA
    │       │   │   ├── ParsingLatin.pm
    │       │   │   ├── Segment.pm
    │       │   │   ├── TagTreeTaggerIT.pm
    │       │   │   └── Tokenize.pm
    │       │   ├── LabelMIRA.pm
    │       │   ├── MarkChunks.pm
    │       │   ├── NL
    │       │   │   └── Tokenize.pm
    │       │   ├── NormalizeForms.pm
    │       │   ├── PT
    │       │   │   ├── ConcatenateTokens.pm
    │       │   │   ├── FixAfuns.pm
    │       │   │   ├── FixTags.pm
    │       │   │   ├── GazeteerMatch.pm
    │       │   │   ├── LXSuite.pm
    │       │   │   ├── Parse.pm
    │       │   │   └── Tokenize.pm
    │       │   ├── ParseLeftBranching.pm
    │       │   ├── ParseMST.pm
    │       │   ├── ParseMSTperl.pm
    │       │   ├── ParseMalt.pm
    │       │   ├── ParseRandom.pm
    │       │   ├── ParseRightBranching.pm
    │       │   ├── ParseUniversal.pm
    │       │   ├── RU
    │       │   │   ├── FixPronouns.pm
    │       │   │   ├── ParseMalt.pm
    │       │   │   ├── Segment.pm
    │       │   │   └── Tokenize.pm
    │       │   ├── ReplaceLemmasWithSynsetId.pm
    │       │   ├── ResegmentSentences.pm
    │       │   ├── Restuff.pm
    │       │   ├── RunDocWSD.pm
    │       │   ├── Segment.pm
    │       │   ├── SegmentOnNewlines.pm
    │       │   ├── TA
    │       │   │   ├── FixAmbiguousTags.pm
    │       │   │   ├── FixLemmas.pm
    │       │   │   ├── FixTags.pm
    │       │   │   ├── Lemmatization.pm
    │       │   │   ├── RuleBasedParser.pm
    │       │   │   ├── RuleBasedTagger.pm
    │       │   │   ├── Segment.pm
    │       │   │   ├── SetAfun.pm
    │       │   │   └── Tokenize.pm
    │       │   ├── Tag.pm
    │       │   ├── TagHunPoS.pm
    │       │   ├── TagMorphoDiTa.pm
    │       │   ├── TagStanford.pm
    │       │   ├── TagTnT.pm
    │       │   ├── TagTreeTagger.pm
    │       │   ├── ToWSD.pm
    │       │   ├── Tokenize.pm
    │       │   ├── TokenizeMorphoDiTa.pm
    │       │   ├── TokenizeMoses.pm
    │       │   ├── TokenizeOnWhitespace.pm
    │       │   ├── TruecaseMoses.pm
    │       │   ├── UDPipe.pm
    │       │   ├── WSD.pm
    │       │   └── t
    │       │   │   ├── resegment_sentences.t
    │       │   │   ├── segment_on_nl.t
    │       │   │   ├── tokenize.t
    │       │   │   └── tokenize_on_whitespace.t
    │       ├── W2W
    │       │   ├── AddNodeIdPrefix.pm
    │       │   ├── CopySentence.pm
    │       │   ├── Deromajize.pm
    │       │   ├── EstimateNoSpaceAfter.pm
    │       │   ├── GoogleTranslate.pm
    │       │   ├── InferNoSpaceAfterFromText.pm
    │       │   ├── MT
    │       │   │   └── Gloss.pm
    │       │   ├── NormalizeCzechSentence.pm
    │       │   ├── NormalizeEnglishSentence.pm
    │       │   ├── NormalizeJapaneseSentence.pm
    │       │   ├── ProjectTokenization.pm
    │       │   ├── RemoveLeadingTrailingSpaces.pm
    │       │   ├── ResegmentSentencesOnX.pm
    │       │   ├── RestoreNoSpaceAfter.pm
    │       │   ├── TA
    │       │   │   ├── CollapseAgglutination.pm
    │       │   │   └── Transliteration.pm
    │       │   ├── Translate.pm
    │       │   └── Translit.pm
    │       └── Write
    │       │   ├── ADTXML.pm
    │       │   ├── Alignments.pm
    │       │   ├── Amr.pm
    │       │   ├── AmrAligned.pm
    │       │   ├── AmrAlignedCrossLang.pm
    │       │   ├── AmrForTreeSurgeon.pm
    │       │   ├── Arff.pm
    │       │   ├── ArffWriting.pm
    │       │   ├── AttributeParameterized.pm
    │       │   ├── AttributeSentences.pm
    │       │   ├── AttributeSentencesAligned.pm
    │       │   ├── AttributeStats.pm
    │       │   ├── BaseTextWriter.pm
    │       │   ├── BaseWriter.pm
    │       │   ├── BundleIds.pm
    │       │   ├── BundleWildAttributeDump.pm
    │       │   ├── CdtTag.pm
    │       │   ├── CoNLL2003.pm
    │       │   ├── CoNLL2009.pm
    │       │   ├── CoNLLU.pm
    │       │   ├── CoNLLX.pm
    │       │   ├── ConllLike.pm
    │       │   ├── EVALD.pm
    │       │   ├── FS.pm
    │       │   ├── Factored.pm
    │       │   ├── LayerAttributes
    │       │       ├── AlignedFormWithoutPrefix.pm
    │       │       ├── AlignedTreeDistances.pm
    │       │       ├── AttributeModifier.pm
    │       │       ├── BracketedVerbform.pm
    │       │       ├── CoNLLMorphCat.pm
    │       │       ├── CoNLLUfeats.pm
    │       │       ├── CoNLLUmisc.pm
    │       │       ├── CzechCoarseTag.pm
    │       │       ├── CzechMLemmaTrunc.pm
    │       │       ├── CzechMorphCat.pm
    │       │       ├── Determiner.pm
    │       │       ├── Distance.pm
    │       │       ├── FirstChar.pm
    │       │       ├── FunctorsFromVallex.pm
    │       │       ├── IsActant.pm
    │       │       ├── IsModal.pm
    │       │       ├── IsValency.pm
    │       │       ├── LemmaFormDiff.pm
    │       │       ├── LemmaFormDist.pm
    │       │       ├── Matching.pm
    │       │       ├── MatchingOrEmpty.pm
    │       │       ├── Missing.pm
    │       │       ├── NegationCueAndScope.pm
    │       │       ├── NumberOfEngVallexFrames.pm
    │       │       ├── NumberOfVallexFrames.pm
    │       │       ├── Order.pm
    │       │       ├── SemposTrunc.pm
    │       │       ├── SplitFormeme.pm
    │       │       ├── SubjectPerson.pm
    │       │       ├── Suffixes.pm
    │       │       ├── SyntheticFuture.pm
    │       │       ├── TLemmaSempos.pm
    │       │       ├── TagWords.pm
    │       │       ├── TreeDistance.pm
    │       │       └── string_distances.py
    │       │   ├── LayerParameterized.pm
    │       │   ├── LemmatizedBitexts.pm
    │       │   ├── MST.pm
    │       │   ├── Manatee.pm
    │       │   ├── ManateeU.pm
    │       │   ├── MosesTree.pm
    │       │   ├── MrpJSON.pm
    │       │   ├── NAF.pm
    │       │   ├── NERHighlightWriter.pm
    │       │   ├── NERHtmlWriter.pm
    │       │   ├── Negations.pm
    │       │   ├── PCEDTAlignment.pm
    │       │   ├── PDT.pm
    │       │   ├── PEDT.pm
    │       │   ├── ParallelSentences.pm
    │       │   ├── PennMrg.pm
    │       │   ├── SDP2014.pm
    │       │   ├── SDP2015.pm
    │       │   ├── SemEval2010.pm
    │       │   ├── Senseval2.pm
    │       │   ├── Sentences.pm
    │       │   ├── SentencesTSV.pm
    │       │   ├── SgmMTEval.pm
    │       │   ├── Stanford.pm
    │       │   ├── Text.pm
    │       │   ├── TextModeTrees.pm
    │       │   ├── ToBundleAttr.pm
    │       │   ├── TreesTXT.pm
    │       │   ├── Treex.pm
    │       │   ├── UMR.pm
    │       │   ├── ViewJSON.pm
    │       │   └── YAML.pm
    │   ├── CS.pm
    │   ├── Core.pm
    │   ├── Core
    │       ├── Block.pm
    │       ├── Bundle.pm
    │       ├── BundleZone.pm
    │       ├── CacheBlock.pm
    │       ├── Common.pm
    │       ├── Config.pm
    │       ├── DocZone.pm
    │       ├── Document.pm
    │       ├── DocumentReader.pm
    │       ├── DocumentReader
    │       │   ├── Base.pm
    │       │   └── ZoneReader.pm
    │       ├── Entity.pm
    │       ├── EntityMention.pm
    │       ├── EntitySet.pm
    │       ├── Files.pm
    │       ├── Loader.pm
    │       ├── Log.pm
    │       ├── Node.pm
    │       ├── Node
    │       │   ├── A.pm
    │       │   ├── Aligned.pm
    │       │   ├── EffectiveRelations.pm
    │       │   ├── InClause.pm
    │       │   ├── Interset.pm
    │       │   ├── N.pm
    │       │   ├── Ordered.pm
    │       │   ├── P.pm
    │       │   ├── T.pm
    │       │   ├── U.pm
    │       │   └── t
    │       │   │   ├── a_effective_rel.t
    │       │   │   ├── alignment.t
    │       │   │   ├── ordered.t
    │       │   │   ├── p_penn_treebank_string_format.t
    │       │   │   ├── t_eff_dive.t
    │       │   │   ├── t_eff_dive2.t
    │       │   │   └── t_effective_rel.t
    │       ├── Parallel
    │       │   ├── Head.pm
    │       │   └── Node.pm
    │       ├── Phrase.pm
    │       ├── Phrase
    │       │   ├── BaseNTerm.pm
    │       │   ├── Builder.pm
    │       │   ├── Coordination.pm
    │       │   ├── NTerm.pm
    │       │   ├── PP.pm
    │       │   └── Term.pm
    │       ├── RememberArgs.pm
    │       ├── Resource.pm
    │       ├── Run.pm
    │       ├── Scenario.pm
    │       ├── ScenarioParser.pm
    │       ├── ScenarioParser.rdg
    │       ├── TredView.pm
    │       ├── TredView
    │       │   ├── AnnotationCommand.pm
    │       │   ├── BackendStorable.pm
    │       │   ├── Colors.pm
    │       │   ├── Common.pm
    │       │   ├── Labels.pm
    │       │   ├── LineStyles.pm
    │       │   ├── Styles.pm
    │       │   ├── TreeLayout.pm
    │       │   └── Vallex.pm
    │       ├── Types.pm
    │       ├── WildAttr.pm
    │       ├── Zone.pm
    │       ├── compile_grammar.pl
    │       ├── share
    │       │   └── tred_extension
    │       │   │   ├── extensions.lst
    │       │   │   └── treex
    │       │   │       ├── contrib
    │       │   │           └── treex
    │       │   │           │   ├── .gitignore
    │       │   │           │   ├── Treex_mode.inc
    │       │   │           │   └── contrib.mac
    │       │   │       ├── package.xml
    │       │   │       ├── resources
    │       │   │           ├── pmlbackend_conf.xml
    │       │   │           ├── treex_schema.xml
    │       │   │           ├── treex_subschema_a_layer.xml
    │       │   │           ├── treex_subschema_bbn.xml
    │       │   │           ├── treex_subschema_interset.xml
    │       │   │           ├── treex_subschema_langcodes.xml
    │       │   │           ├── treex_subschema_n_layer.xml
    │       │   │           ├── treex_subschema_p_layer.xml
    │       │   │           ├── treex_subschema_t_layer.xml
    │       │   │           ├── treex_subschema_u_layer.xml
    │       │   │           └── treex_subschema_w_layer.xml
    │       │   │       └── stylesheets
    │       │   │           └── Treex_stylesheet
    │       └── t
    │       │   ├── attribute.t
    │       │   ├── backrefs.t
    │       │   ├── block.t
    │       │   ├── bundle.t
    │       │   ├── bundle_zones.t
    │       │   ├── common.t
    │       │   ├── config.t
    │       │   ├── document.t
    │       │   ├── document_zones.t
    │       │   ├── dump_scen.t
    │       │   ├── error.scen
    │       │   ├── files.t
    │       │   ├── following_node.t
    │       │   ├── grammar.t
    │       │   ├── log.t
    │       │   ├── n_tree.t
    │       │   ├── node.t
    │       │   ├── phrase.t
    │       │   ├── remove_bundle.t
    │       │   ├── resource.t
    │       │   ├── run.t
    │       │   ├── runall.sh
    │       │   ├── scenario.t
    │       │   ├── scenario_parser.t
    │       │   ├── storable.t
    │       │   ├── test.scen
    │       │   ├── trees.t
    │       │   ├── wildattr.t
    │       │   └── writers.t
    │   ├── EN.pm
    │   ├── JA.pm
    │   ├── Manual
    │       └── FAQ.pod
    │   ├── Scen
    │       ├── Analysis
    │       │   ├── BG.pm
    │       │   ├── CS.pm
    │       │   ├── DE.pm
    │       │   ├── EN.pm
    │       │   ├── ES.pm
    │       │   ├── EU.pm
    │       │   ├── JA.pm
    │       │   ├── LA.pm
    │       │   ├── NL.pm
    │       │   ├── PL.pm
    │       │   ├── PT.pm
    │       │   └── RU.pm
    │       ├── CS2EN.pm
    │       ├── Coref.pm
    │       ├── CzEng16.pm
    │       ├── CzEng2CoNLLU.pm
    │       ├── EN2CS.pm
    │       ├── EN2ES.pm
    │       ├── EN2EU.pm
    │       ├── EN2NL.pm
    │       ├── EN2PT.pm
    │       ├── EN_Moses_postprocess.pm
    │       ├── EN_Moses_preprocess.pm
    │       ├── ES2EN.pm
    │       ├── EU2EN.pm
    │       ├── MLFix
    │       │   ├── Analysis_1.pm
    │       │   ├── Analysis_2.pm
    │       │   ├── Fix.pm
    │       │   ├── FixPrepare.pm
    │       │   ├── NER.pm
    │       │   ├── RunMGiza.pm
    │       │   ├── WriteSentences.pm
    │       │   └── WriteTriparallel.pm
    │       ├── NL2EN.pm
    │       ├── PT2EN.pm
    │       ├── Synthesis
    │       │   ├── BG.pm
    │       │   ├── CS.pm
    │       │   ├── EN.pm
    │       │   ├── ES.pm
    │       │   ├── EU.pm
    │       │   ├── NL.pm
    │       │   └── PT.pm
    │       └── Transfer
    │       │   ├── CS2EN.pm
    │       │   ├── EN2CS.pm
    │       │   ├── EN2ES.pm
    │       │   ├── EN2EU.pm
    │       │   ├── EN2NL.pm
    │       │   ├── EN2PT.pm
    │       │   ├── ES2EN.pm
    │       │   ├── EU2EN.pm
    │       │   ├── NL2EN.pm
    │       │   └── PT2EN.pm
    │   ├── Service
    │       ├── Client.pm
    │       ├── EventEmitter.pm
    │       ├── MDP.pm
    │       ├── Pool.pm
    │       ├── Role.pm
    │       ├── Router.pm
    │       ├── Worker.pm
    │       └── t
    │       │   ├── eventemitter.t
    │       │   ├── fixtures
    │       │       ├── cs_sample.txt
    │       │       ├── en_sample.txt
    │       │       └── hi_example.txt
    │       │   ├── lib
    │       │       ├── Test
    │       │       │   └── TreexTool.pm
    │       │       └── Treex
    │       │       │   ├── Block
    │       │       │       └── W2W
    │       │       │       │   └── AddPrefix.pm
    │       │       │   └── Tool
    │       │       │       └── Prefixer.pm
    │       │   ├── mst_parser_cs.t
    │       │   ├── mst_parser_en.t
    │       │   ├── pool.t
    │       │   ├── role.t
    │       │   ├── router.t
    │       │   └── worker.t
    │   ├── Tool
    │       ├── ATreeTransformer
    │       │   ├── BaseTransformer.pm
    │       │   ├── CoApStyle.pm
    │       │   ├── ComplexVerb.pm
    │       │   ├── DepReverser.pm
    │       │   └── t
    │       │   │   ├── coordchain.t
    │       │   │   └── reverser.t
    │       ├── Algorithm
    │       │   ├── TreeUtils.pm
    │       │   ├── TreeViterbi.pm
    │       │   ├── TreeViterbiState.pm
    │       │   └── t
    │       │   │   └── tree_utils.t
    │       ├── Align
    │       │   ├── Annot
    │       │   │   └── Util.pm
    │       │   ├── Features.pm
    │       │   ├── MonolingualGreedy.pm
    │       │   ├── Robust
    │       │   │   ├── CS
    │       │   │   │   └── RelPron.pm
    │       │   │   ├── Common.pm
    │       │   │   └── EN
    │       │   │   │   └── PersPron.pm
    │       │   ├── Utils.pm
    │       │   └── t
    │       │   │   └── monolingual_greedy.t
    │       ├── Alpino
    │       │   ├── Generator.pm
    │       │   ├── Parser.pm
    │       │   └── Run.pm
    │       ├── Clustering
    │       │   └── GoogleNGrams.pm
    │       ├── Compress
    │       │   └── Index.pm
    │       ├── Context
    │       │   ├── Sentences.pm
    │       │   └── t
    │       │   │   └── sents.t
    │       ├── CopenhagenDT
    │       │   └── XmlizeTagFormat.pm
    │       ├── CorefSegments
    │       │   ├── CS
    │       │   │   └── Features.pm
    │       │   ├── Features.pm
    │       │   └── InterSentLinks.pm
    │       ├── Coreference
    │       │   ├── AnteCandsGetter.pm
    │       │   ├── BaseCorefFeatures.pm
    │       │   ├── CS
    │       │   │   ├── PronAnaphFilter.pm
    │       │   │   ├── PronCorefFeatures.pm
    │       │   │   └── RelPronAnaphFilter.pm
    │       │   ├── CombinedDistrModel.pm
    │       │   ├── ContentWordFilter.pm
    │       │   ├── CorefFeatures.pm
    │       │   ├── DistrModelComponent.pm
    │       │   ├── DistrModelComponent
    │       │   │   ├── CandOrd.pm
    │       │   │   ├── Gender.pm
    │       │   │   ├── Number.pm
    │       │   │   ├── ParentLemma.pm
    │       │   │   └── SentDist.pm
    │       │   ├── EN
    │       │   │   ├── PronAnaphFilter.pm
    │       │   │   └── PronCorefFeatures.pm
    │       │   ├── Features
    │       │   │   ├── Aligned.pm
    │       │   │   ├── AllMonolingual.pm
    │       │   │   ├── CS
    │       │   │   │   ├── AllMonolingual.pm
    │       │   │   │   └── ReflPron.pm
    │       │   │   ├── Container.pm
    │       │   │   ├── Coreference.pm
    │       │   │   ├── Distance.pm
    │       │   │   ├── EN
    │       │   │   │   └── AllMonolingual.pm
    │       │   │   ├── Morphological.pm
    │       │   │   ├── ReflPron.pm
    │       │   │   ├── RelPron.pm
    │       │   │   ├── TectoSyntax.pm
    │       │   │   └── remove_PronCoref.pm
    │       │   ├── NADA.pm
    │       │   ├── NodeFilter.pm
    │       │   ├── NodeFilter
    │       │   │   ├── Coord.pm
    │       │   │   ├── DemonPron.pm
    │       │   │   ├── Noun.pm
    │       │   │   ├── PersPron.pm
    │       │   │   ├── RelPron.pm
    │       │   │   ├── Utils.pm
    │       │   │   └── Verb.pm
    │       │   ├── PerceptronRanker.pm
    │       │   ├── ProbDistrRanker.pm
    │       │   ├── PronCorefFeatures.pm
    │       │   ├── RuleBasedRanker.pm
    │       │   ├── SynonTranslDictModel.pm
    │       │   ├── Utils.pm
    │       │   ├── ValueTransformer.pm
    │       │   └── t
    │       │   │   └── ante_cands_getter.t
    │       ├── Depfix
    │       │   ├── Base.pm
    │       │   ├── CS
    │       │   │   ├── DepfixBase.pm
    │       │   │   ├── DiacriticsStripper.pm
    │       │   │   ├── FixLogger.pm
    │       │   │   ├── FormGenerator.pm
    │       │   │   ├── FormemeSplitter.pm
    │       │   │   ├── NodeInfoGetter.pm
    │       │   │   ├── NumberSwitcher.pm
    │       │   │   ├── PairGetter.pm
    │       │   │   ├── SimpleTranslator.pm
    │       │   │   └── TagHandler.pm
    │       │   ├── DecisionTreesModel.pm
    │       │   ├── EN
    │       │   │   └── NodeInfoGetter.pm
    │       │   ├── FixLogger.pm
    │       │   ├── FormGenerator.pm
    │       │   ├── MaxEntModel.pm
    │       │   ├── Model.pm
    │       │   ├── NaiveBayesModel.pm
    │       │   ├── NodeInfoGetter.pm
    │       │   └── OldDecisionTreesModel.pm
    │       ├── Discourse
    │       │   └── EVALD
    │       │   │   └── Features.pm
    │       ├── ElixirFM.pm
    │       ├── ElixirFM
    │       │   └── Exec.pm
    │       ├── EnglishMorpho
    │       │   ├── Analysis.pm
    │       │   ├── Lemmatizer.pm
    │       │   ├── Makefile
    │       │   ├── exceptions
    │       │   │   ├── README
    │       │   │   ├── adj_adv.pl
    │       │   │   ├── contractions.pl
    │       │   │   ├── errors.pl
    │       │   │   ├── false_negation.pl
    │       │   │   ├── nouns_invariant_ending_with_s.pl
    │       │   │   ├── nouns_invariant_not_ending_with_s.list
    │       │   │   ├── nouns_invariant_polemic.list
    │       │   │   ├── nouns_irregular.pl
    │       │   │   ├── nouns_latin.pl
    │       │   │   ├── nouns_plural_es.pl
    │       │   │   ├── nouns_plural_s.pl
    │       │   │   ├── verbs_cked.pl
    │       │   │   ├── verbs_doubling.pl
    │       │   │   ├── verbs_ending_with_e.pl
    │       │   │   ├── verbs_irregular.pl
    │       │   │   ├── verbs_not_ending_with_e.pl
    │       │   │   └── verbs_other.pl
    │       │   ├── negation
    │       │   └── t
    │       │   │   ├── contractions.txt
    │       │   │   └── morpho.t
    │       ├── Eval
    │       │   └── Bleu.pm
    │       ├── FSM
    │       │   └── Foma.pm
    │       ├── FeatureExtract.pm
    │       ├── Flect
    │       │   ├── Base.pm
    │       │   ├── Classif.pm
    │       │   ├── FlectBlock.pm
    │       │   ├── FlectClassifBlock.pm
    │       │   ├── test.pl
    │       │   └── test.py
    │       ├── FormsGenerator
    │       │   ├── TA.pm
    │       │   ├── t
    │       │   │   └── test_clitics.t
    │       │   └── test.pl
    │       ├── Gazetteer
    │       │   ├── Engine.pm
    │       │   ├── Features.pm
    │       │   └── RuleBasedScorer.pm
    │       ├── GoogleTranslate
    │       │   ├── APIv1.pm
    │       │   ├── APIv2.pm
    │       │   ├── batch.pl
    │       │   ├── batch_1by1.pl
    │       │   ├── simple.pl
    │       │   └── t
    │       │   │   ├── APIv2.t
    │       │   │   └── texts.txt
    │       ├── IO
    │       │   ├── Arff.pm
    │       │   └── t
    │       │   │   ├── arff.t
    │       │   │   └── test.arff
    │       ├── IR
    │       │   ├── ESA.pm
    │       │   ├── esa_script.pl
    │       │   └── t
    │       │   │   └── esa.t
    │       ├── IXAPipe
    │       │   ├── ES
    │       │   │   ├── TagAndParse.pm
    │       │   │   └── t
    │       │   │   │   └── ixapipe_tag_and_parse.t
    │       │   └── EU
    │       │   │   ├── TokenizeAndParse.pm
    │       │   │   └── t
    │       │   │       └── ixapipe_tag_and_parse.t
    │       ├── Interset
    │       │   ├── Driver.pm
    │       │   ├── Example
    │       │   │   └── Simple.pm
    │       │   ├── SimpleDriver.pm
    │       │   └── t
    │       │   │   └── example_simple.t
    │       ├── LM
    │       │   ├── FormInfo.pm
    │       │   ├── Lemma.pm
    │       │   ├── MorphoLM.pm
    │       │   ├── TreeLM.pm
    │       │   └── t
    │       │   │   ├── interactive_testTreeLM.pl
    │       │   │   ├── test_MorphoLM.pl
    │       │   │   ├── test_TreeLM.pl
    │       │   │   └── test_en_TreeLM.pl
    │       ├── LXSuite.pm
    │       ├── Lexicon
    │       │   ├── CS.pm
    │       │   ├── CS
    │       │   │   ├── AdjectivalComplements.pm
    │       │   │   ├── Adverbia.pm
    │       │   │   ├── Aspect.pm
    │       │   │   ├── NamedEntityLabels.pm
    │       │   │   ├── Numerals.pm
    │       │   │   ├── PersonalRoles.pm
    │       │   │   ├── Prefixes.pm
    │       │   │   └── Reflexivity.pm
    │       │   ├── DerivDict
    │       │   │   ├── Dictionary.pm
    │       │   │   ├── Lexeme.pm
    │       │   │   └── t
    │       │   │   │   └── deriv_dict_general.t
    │       │   ├── Derivations
    │       │   │   ├── CS.pm
    │       │   │   └── test_cs.pl
    │       │   ├── EN.pm
    │       │   ├── EN
    │       │   │   ├── Countability.pm
    │       │   │   ├── First_names.pm
    │       │   │   ├── Hypernyms.pm
    │       │   │   └── PersonalRoles.pm
    │       │   ├── Generation
    │       │   │   ├── CS.pm
    │       │   │   ├── ES.pm
    │       │   │   ├── ES_Morphology.pm
    │       │   │   ├── PT.pm
    │       │   │   ├── RU.pm
    │       │   │   └── t
    │       │   │   │   ├── cs.t
    │       │   │   │   ├── es.t
    │       │   │   │   ├── pt.t
    │       │   │   │   └── ru.t
    │       │   ├── JA.pm
    │       │   ├── NL
    │       │   │   ├── ErgativeVerbs.pm
    │       │   │   ├── Pronouns.pm
    │       │   │   ├── VerbformOrder.pm
    │       │   │   ├── alpino_extract_gender.pl
    │       │   │   └── alpino_extract_mwus.pl
    │       │   ├── UniversalTagset.pm
    │       │   └── t
    │       │   │   └── cs_lemma.t
    │       ├── ML
    │       │   ├── Categorizer
    │       │   │   └── Categorizer.pm
    │       │   ├── Classifier.pm
    │       │   ├── Classifier
    │       │   │   ├── Linear.pm
    │       │   │   └── RuleBased.pm
    │       │   ├── Clustering
    │       │   │   ├── C_Cluster.pm
    │       │   │   └── t
    │       │   │   │   └── c_cluster.t
    │       │   ├── Factory.pm
    │       │   ├── Learner.pm
    │       │   ├── LinearRegression.pm
    │       │   ├── LinearRegression
    │       │   │   ├── Model.pm
    │       │   │   └── Util.pm
    │       │   ├── MLProcess.pm
    │       │   ├── MLProcessBlock.pm
    │       │   ├── MLProcessBlockPiped.pm
    │       │   ├── MLProcessPiped.pm
    │       │   ├── MaxEnt
    │       │   │   ├── Learner.pm
    │       │   │   └── Model.pm
    │       │   ├── NormalizeProb.pm
    │       │   ├── Ranker.pm
    │       │   ├── Ranker
    │       │   │   └── Features.pm
    │       │   ├── ScikitLearn
    │       │   │   ├── Classifier.pm
    │       │   │   └── Model.pm
    │       │   ├── TabSpace
    │       │   │   └── Util.pm
    │       │   ├── VowpalWabbit
    │       │   │   ├── Classifier.pm
    │       │   │   ├── CsoaaLdfClassifier.pm
    │       │   │   ├── Learner.pm
    │       │   │   ├── Model.pm
    │       │   │   ├── Ranker.pm
    │       │   │   └── Util.pm
    │       │   ├── Weka
    │       │   │   └── Util.pm
    │       │   └── t
    │       │   │   ├── lin_regres.t
    │       │   │   └── vw.t
    │       ├── MLFix
    │       │   ├── .ScikitLearn.pm.swp
    │       │   ├── Base.pm
    │       │   ├── CS
    │       │   │   ├── FormGenerator.pm
    │       │   │   └── NumberSwitcher.pm
    │       │   ├── DE
    │       │   │   ├── FormGenerator.pm
    │       │   │   └── NumberSwitcher.pm
    │       │   ├── FixLogger.pm
    │       │   ├── FormGenerator.pm
    │       │   ├── Model.pm
    │       │   ├── NodeInfoGetter.pm
    │       │   └── ScikitLearn.pm
    │       ├── Mate
    │       │   └── Run.pm
    │       ├── Memcached
    │       │   ├── Memcached.pm
    │       │   ├── memcached.pl
    │       │   └── t
    │       │   │   ├── MemcachedTest.pm
    │       │   │   ├── check-lemmas.pl
    │       │   │   ├── extract-lemmas.pl
    │       │   │   ├── test-concurrent-access.t
    │       │   │   └── test-model-loading.t
    │       ├── Moses.pm
    │       ├── NER
    │       │   ├── NameTag.pm
    │       │   ├── Role.pm
    │       │   ├── Stanford.pm
    │       │   └── t
    │       │   │   ├── nametag_cs.t
    │       │   │   ├── nametag_en.t
    │       │   │   ├── stanford2008.t
    │       │   │   └── stanford2015.t
    │       ├── NamedEnt
    │       │   ├── Features.pl
    │       │   ├── Features
    │       │   │   ├── Common.pm
    │       │   │   ├── Containers.pm
    │       │   │   ├── Context.pm
    │       │   │   ├── Oneword.pm
    │       │   │   ├── Threeword.pm
    │       │   │   └── Twoword.pm
    │       │   ├── README
    │       │   ├── SVMTools.pm
    │       │   ├── SVMtuning_accuracy_based_results.txt
    │       │   ├── TestSVM.pl
    │       │   ├── TrainSVM.pl
    │       │   ├── TuneSVM.map.pl
    │       │   ├── TuneSVM.reduce.pl
    │       │   ├── other_models
    │       │   │   ├── TestMaxEnt.pl
    │       │   │   ├── TestNaiveBayes.pl
    │       │   │   ├── TestSVM.pl
    │       │   │   ├── TrainMaxEnt.pl
    │       │   │   ├── TrainNaiveBayes.pl
    │       │   │   └── TrainSVM_standalone.pl
    │       │   └── tuneWrapper.sh
    │       ├── Orthography
    │       │   └── TA.pm
    │       ├── PMLTQ
    │       │   └── Query.pm
    │       ├── Parallel
    │       │   ├── MessageBoard.pm
    │       │   └── t
    │       │   │   ├── msg_board.t
    │       │   │   └── synchronize.t
    │       ├── Parser
    │       │   ├── Cabocha.pm
    │       │   ├── Charniak
    │       │   │   ├── Charniak.pm
    │       │   │   ├── Node.pm
    │       │   │   └── t
    │       │   │   │   └── charniak.t
    │       │   ├── Ensemble
    │       │   │   └── Ensemble.pm
    │       │   ├── Fanse.pm
    │       │   ├── JDEPP.pm
    │       │   ├── LXParser.pm
    │       │   ├── MST.pm
    │       │   ├── MST
    │       │   │   └── Czech.pm
    │       │   ├── MSTperl.pm
    │       │   ├── MSTperl
    │       │   │   ├── Config.pm
    │       │   │   ├── Edge.pm
    │       │   │   ├── FeaturesControl.pm
    │       │   │   ├── Labeller.pm
    │       │   │   ├── ModelAdditional.pm
    │       │   │   ├── ModelBase.pm
    │       │   │   ├── ModelLabelling.pm
    │       │   │   ├── ModelUnlabelled.pm
    │       │   │   ├── MultiHeteroModelParser.pm
    │       │   │   ├── MultiModelParser.pm
    │       │   │   ├── Node.pm
    │       │   │   ├── ParsedSentencesCombiner.pm
    │       │   │   ├── Parser.pm
    │       │   │   ├── ParserCombiner.pm
    │       │   │   ├── Reader.pm
    │       │   │   ├── RootNode.pm
    │       │   │   ├── Sentence.pm
    │       │   │   ├── TrainerBase.pm
    │       │   │   ├── TrainerLabelling.pm
    │       │   │   ├── TrainerUnlabelled.pm
    │       │   │   ├── Writer.pm
    │       │   │   ├── samples
    │       │   │   │   ├── labeller_test.sh
    │       │   │   │   ├── labeller_train.sh
    │       │   │   │   ├── sample.config
    │       │   │   │   ├── sample_test.sh
    │       │   │   │   ├── sample_test.tsv
    │       │   │   │   ├── sample_train.sh
    │       │   │   │   ├── sample_train.tsv
    │       │   │   │   ├── test_labeller_tsv.pl
    │       │   │   │   ├── test_tsv.pl
    │       │   │   │   ├── train_labeller_tsv.pl
    │       │   │   │   ├── train_tsv.pl
    │       │   │   │   ├── treex_input.txt
    │       │   │   │   └── treex_parse.scen
    │       │   │   ├── scripts
    │       │   │   │   ├── TagMorceEnglishCoNLL.pl
    │       │   │   │   ├── compare_lines.pl
    │       │   │   │   ├── conll2inline.pl
    │       │   │   │   ├── inline2conll.pl
    │       │   │   │   ├── inline_sentences_reorder.pl
    │       │   │   │   ├── labelled_parse_test.sh
    │       │   │   │   ├── labeller_test.sh
    │       │   │   │   ├── labeller_train_and_test.sh
    │       │   │   │   ├── make_czech_tags.pl
    │       │   │   │   ├── pcedt2conll.sh
    │       │   │   │   ├── pcedt2conll_tag_and_parse_en.sh
    │       │   │   │   ├── pcedt2conll_tag_and_parse_en_worsen_cs.sh
    │       │   │   │   ├── pcedt2conll_td.sh
    │       │   │   │   ├── pdtT2conll.sh
    │       │   │   │   ├── simple_lemmas.pl
    │       │   │   │   ├── split_afun_ismember.sh
    │       │   │   │   ├── test_conll.pl
    │       │   │   │   ├── test_conll_multimodel.pl
    │       │   │   │   ├── test_conll_multimodel_weighted.pl
    │       │   │   │   ├── test_conll_multimodel_weighted_f.pl
    │       │   │   │   ├── test_conll_multimodel_weighted_f_multiconf.pl
    │       │   │   │   ├── test_conll_multimodel_weighted_f_norm.pl
    │       │   │   │   ├── test_conll_multimodel_weighted_f_norm_printout.pl
    │       │   │   │   ├── test_conll_multimodel_weighted_f_printout.pl
    │       │   │   │   ├── test_conll_multimodel_weighted_norm.pl
    │       │   │   │   ├── test_conll_multiplefiles.pl
    │       │   │   │   ├── test_conll_multiplefiles_printout.pl
    │       │   │   │   ├── test_conll_parsecomb.pl
    │       │   │   │   ├── test_conll_parsecomb_weighted.pl
    │       │   │   │   ├── test_conll_treecomb_weighted.pl
    │       │   │   │   ├── test_conll_treecomb_weighted_f.pl
    │       │   │   │   ├── test_conll_treecomb_weighted_f_multiconf.pl
    │       │   │   │   ├── test_conll_treecomb_weighted_f_printout.pl
    │       │   │   │   ├── test_labeller_tsv.pl
    │       │   │   │   ├── test_parse_and_label.pl
    │       │   │   │   ├── test_rur_conll.pl
    │       │   │   │   ├── train_conll.pl
    │       │   │   │   ├── train_labeller_tsv.pl
    │       │   │   │   ├── unlabelled_test.sh
    │       │   │   │   ├── unlabelled_test_rur.sh
    │       │   │   │   ├── unlabelled_train_and_test.sh
    │       │   │   │   └── worsen_pcedt.sh
    │       │   │   └── t
    │       │   │   │   ├── sample.config
    │       │   │   │   ├── sample_test.tsv
    │       │   │   │   ├── sample_train.tsv
    │       │   │   │   └── train_and_test.t
    │       │   ├── Malt.pm
    │       │   ├── ParsingLatin.pm
    │       │   ├── RUR
    │       │   │   ├── Config.pm
    │       │   │   ├── Edge.pm
    │       │   │   ├── FeaturesControl.pm
    │       │   │   ├── Labeller.pm
    │       │   │   ├── ModelAdditional.pm
    │       │   │   ├── ModelBase.pm
    │       │   │   ├── ModelLabelling.pm
    │       │   │   ├── ModelUnlabelled.pm
    │       │   │   ├── Node.pm
    │       │   │   ├── Parser.pm
    │       │   │   ├── RURParser.pm
    │       │   │   ├── Reader.pm
    │       │   │   ├── RootNode.pm
    │       │   │   ├── Sentence.pm
    │       │   │   ├── TrainerBase.pm
    │       │   │   ├── TrainerLabelling.pm
    │       │   │   ├── TrainerUnlabelled.pm
    │       │   │   ├── Writer.pm
    │       │   │   ├── samples
    │       │   │   │   ├── labeller_test.sh
    │       │   │   │   ├── labeller_train.sh
    │       │   │   │   ├── sample.config
    │       │   │   │   ├── sample_test.sh
    │       │   │   │   ├── sample_test.tsv
    │       │   │   │   ├── sample_train.sh
    │       │   │   │   ├── sample_train.tsv
    │       │   │   │   ├── test_labeller_tsv.pl
    │       │   │   │   ├── test_tsv.pl
    │       │   │   │   ├── train_labeller_tsv.pl
    │       │   │   │   ├── train_tsv.pl
    │       │   │   │   ├── treex_input.txt
    │       │   │   │   └── treex_parse.scen
    │       │   │   └── t
    │       │   │   │   ├── sample.config
    │       │   │   │   ├── sample_test.tsv
    │       │   │   │   ├── sample_train.tsv
    │       │   │   │   ├── test_rur.t
    │       │   │   │   └── train_and_test_rur.t
    │       │   ├── Role.pm
    │       │   ├── Simple
    │       │   │   ├── FR.pm
    │       │   │   └── XY.pm
    │       │   ├── Zpar.pm
    │       │   └── t
    │       │   │   ├── fanse.t
    │       │   │   ├── jdepp.t
    │       │   │   ├── malt.t
    │       │   │   └── zpar.t
    │       ├── Phrase2Dep
    │       │   ├── Pennconverter.pm
    │       │   ├── StanfordConverter.pm
    │       │   └── t
    │       │   │   └── pennconverter.t
    │       ├── PhraseBuilder
    │       │   ├── AlpinoToPrague.pm
    │       │   ├── BasePhraseBuilder.pm
    │       │   ├── MoscowToPrague.pm
    │       │   ├── Prague.pm
    │       │   ├── PragueToUD.pm
    │       │   ├── StanfordToPrague.pm
    │       │   ├── StanfordToUD.pm
    │       │   ├── ToPrague.pm
    │       │   ├── ToUD.pm
    │       │   └── UDToPrague.pm
    │       ├── PhraseParser
    │       │   ├── Charniak.pm
    │       │   ├── Common.pm
    │       │   ├── Stanford.pm
    │       │   └── t
    │       │   │   ├── charniak.t
    │       │   │   └── stanford.t
    │       ├── Probe.pm
    │       ├── ProcessUtils.pm
    │       ├── Python
    │       │   ├── RunFunc.pm
    │       │   └── execute.py
    │       ├── ReferentialIt
    │       │   ├── Features.pm
    │       │   └── Utils.pm
    │       ├── SRLParser
    │       │   ├── FeatureExtractor.pm
    │       │   ├── LPInference.pm
    │       │   ├── PredicateIdentifier.pm
    │       │   ├── submit_training_to_maxent.sh
    │       │   └── train.sh
    │       ├── SandhiHandler
    │       │   ├── TA.pm
    │       │   └── t
    │       │   │   └── tamil_spelling.t
    │       ├── Segment
    │       │   ├── CS
    │       │   │   └── RuleBased.pm
    │       │   ├── EN
    │       │   │   └── RuleBased.pm
    │       │   ├── ES
    │       │   │   └── RuleBased.pm
    │       │   ├── EU
    │       │   │   └── RuleBased.pm
    │       │   ├── LA
    │       │   │   └── RuleBased.pm
    │       │   ├── NL
    │       │   │   └── RuleBased.pm
    │       │   ├── PT
    │       │   │   └── RuleBased.pm
    │       │   ├── RU
    │       │   │   └── RuleBased.pm
    │       │   ├── RuleBased.pm
    │       │   ├── TA
    │       │   │   └── RuleBased.pm
    │       │   └── t
    │       │   │   └── rule_based.t
    │       ├── Stemmer
    │       │   └── TA
    │       │   │   ├── CorpusSuffixSplitter.pl
    │       │   │   ├── Simple.pm
    │       │   │   ├── SuffixSplitter.pm
    │       │   │   ├── sample.txt
    │       │   │   ├── test.pl
    │       │   │   └── test1.pl
    │       ├── Storage
    │       │   └── Storable.pm
    │       ├── Tagger
    │       │   ├── Featurama.pm
    │       │   ├── Featurama
    │       │   │   ├── CS.pm
    │       │   │   ├── Dummy.pm
    │       │   │   ├── EN.pm
    │       │   │   └── t
    │       │   │   │   ├── featurama_cs.t
    │       │   │   │   └── featurama_en.t
    │       │   ├── HunPoS.pm
    │       │   ├── LXTagger.pm
    │       │   ├── MElt.pm
    │       │   ├── MeCab.pm
    │       │   ├── MorphoDiTa.pm
    │       │   ├── Role.pm
    │       │   ├── Simple
    │       │   │   ├── FR.pm
    │       │   │   └── XY.pm
    │       │   ├── Stanford.pm
    │       │   ├── TnT.pm
    │       │   ├── TreeTagger.pm
    │       │   └── t
    │       │   │   ├── featurama.t
    │       │   │   ├── mecab.t
    │       │   │   ├── morphodita.t
    │       │   │   └── tree_tagger.t
    │       ├── TranslationModel
    │       │   ├── Chain.pm
    │       │   ├── Combined
    │       │   │   ├── Backoff.pm
    │       │   │   └── Interpolated.pm
    │       │   ├── Common.pm
    │       │   ├── Derivative
    │       │   │   ├── CS2RU
    │       │   │   │   ├── ReflexiveSja.pm
    │       │   │   │   └── Transliterate.pm
    │       │   │   ├── Common.pm
    │       │   │   └── EN2CS
    │       │   │   │   ├── Deadjectival_adverbs.pm
    │       │   │   │   ├── Deverbal_adjectives.pm
    │       │   │   │   ├── Hyphen_compounds.pm
    │       │   │   │   ├── Nouns_to_adjectives.pm
    │       │   │   │   ├── Numbers.pm
    │       │   │   │   ├── Prefixes.pm
    │       │   │   │   ├── Suffixes.pm
    │       │   │   │   ├── Transliterate.pm
    │       │   │   │   ├── Verbs_to_nouns.pm
    │       │   │   │   └── _readme.txt
    │       │   ├── Factory.pm
    │       │   ├── Features
    │       │   │   ├── EN_coref.pm
    │       │   │   ├── It.pm
    │       │   │   └── Standard.pm
    │       │   ├── Learner.pm
    │       │   ├── ML
    │       │   │   ├── Learner.pm
    │       │   │   └── Model.pm
    │       │   ├── MaxEnt
    │       │   │   └── FeatureExt
    │       │   │   │   └── EN2CS.pm
    │       │   ├── Memcached
    │       │   │   └── Model.pm
    │       │   ├── Model.pm
    │       │   ├── NaiveBayes
    │       │   │   ├── FeatureExt
    │       │   │   │   └── EN2CS.pm
    │       │   │   ├── Learner.pm
    │       │   │   ├── Model.pm
    │       │   │   └── test.pl
    │       │   ├── Rulebased
    │       │   │   └── Model.pm
    │       │   ├── Static
    │       │   │   ├── Model.pm
    │       │   │   ├── RelFreq
    │       │   │   │   ├── Learner.pm
    │       │   │   │   └── Learner_new.pm
    │       │   │   ├── Universal.pm
    │       │   │   └── Variant.pm
    │       │   ├── TwoNode.pm
    │       │   └── t
    │       │   │   ├── learner.t
    │       │   │   ├── maxent_learner_new.t
    │       │   │   ├── static_learner.t
    │       │   │   ├── static_learner_new.t
    │       │   │   └── vw_learner.t
    │       ├── Transliteration
    │       │   ├── DowngradeUTF8forISO2.pm
    │       │   ├── TA.pm
    │       │   └── t
    │       │   │   └── check_utf8_latin_conversion.t
    │       ├── Triggers
    │       │   ├── FeatureFilter.pm
    │       │   └── Features.pm
    │       ├── UDPipe.pm
    │       ├── UMR
    │       │   ├── CS
    │       │   │   └── GrammatemeSetter.pm
    │       │   ├── Common.pm
    │       │   ├── GrammatemeSetter.pm
    │       │   ├── LA
    │       │   │   └── GrammatemeSetter.pm
    │       │   ├── PDTV2PB.pm
    │       │   └── t
    │       │   │   ├── pdt2pb-w.csv
    │       │   │   ├── pdt2pb.csv
    │       │   │   ├── pdtv2pb.t
    │       │   │   └── vallex.xml
    │       ├── Vallex
    │       │   ├── FrameElement.pm
    │       │   ├── ValencyFrame.pm
    │       │   └── t
    │       │   │   ├── print_all_forms.pl
    │       │   │   ├── print_all_frames.pl
    │       │   │   └── valency_frame.t
    │       ├── Word2vec
    │       │   ├── readbin
    │       │   ├── readbin.c
    │       │   └── txt2vw.pl
    │       ├── Wordnet
    │       │   └── SimpleQuery.pm
    │       └── t
    │       │   └── udpipe.t
    │   ├── Tutorial.pod
    │   ├── Tutorial
    │       ├── Config.pod
    │       ├── FirstSteps.pod
    │       ├── Install.pod
    │       ├── ReadersAndWriters.pod
    │       ├── Scen.pod
    │       ├── WritingNewReaders.pod
    │       ├── generate_html.sh
    │       └── treexpod.css
    │   ├── Unilang.pm
    │   └── t
    │       ├── Makefile
    │       ├── cycle_detection_restore.pl
    │       ├── cycle_detection_test.pl
    │       ├── en_analysis.scen
    │       ├── sample-cs.txt
    │       └── sample-en.txt
├── packaging
    ├── Makefile
    ├── README.md
    ├── Treex-CS
    │   ├── Changes.template
    │   ├── Makefile
    │   └── dist.ini.template
    ├── Treex-Core
    │   ├── Changes.template
    │   ├── Makefile
    │   ├── compile_grammar.pl
    │   ├── dist.ini.template
    │   └── postprocess_POD.pl
    ├── Treex-Doc
    │   ├── Changes.template
    │   ├── Makefile
    │   ├── dist.ini.template
    │   └── t
    │   │   └── doctest.t
    ├── Treex-EN
    │   ├── Changes.template
    │   ├── Makefile
    │   └── dist.ini.template
    ├── Treex-JA
    │   ├── Changes.template
    │   ├── Makefile
    │   ├── dist.ini.template
    │   ├── share
    │   │   └── examples
    │   │   │   ├── sample-ja.txt
    │   │   │   └── sample.scen
    │   └── weaver.ini.template
    ├── Treex-Parser-MSTperl
    │   ├── Changes.template
    │   ├── Makefile
    │   └── dist.ini.template
    ├── Treex-Unilang
    │   ├── Changes.template
    │   ├── Makefile
    │   └── dist.ini.template
    ├── common.mk
    ├── perlcritic.rc
    └── test_treex_installation_by_cpanm.sh
└── training
    └── treelm
        ├── Makefile
        ├── README
        ├── create_ids.pl
        ├── create_models.pl
        ├── en
            ├── czeng
            │   └── Makefile
            ├── stackoverflow
            │   ├── Makefile
            │   └── clean_stackexchange.pl
            ├── superuser
            │   ├── Makefile
            │   └── clean_stackexchange.pl
            ├── ubuntu-dialogue
            │   └── Makefile
            ├── wmt15-newscrawl14v2
            │   └── Makefile
            └── wmt15-newsdiscuss
            │   └── Makefile
        └── print_plsgz.pl


/.editorconfig:
--------------------------------------------------------------------------------
 1 | # EditorConfig helps developers define and maintain consistent
 2 | # coding styles between different editors and IDEs
 3 | # editorconfig.org
 4 | 
 5 | root = true
 6 | 
 7 | 
 8 | [*]
 9 | 
10 | # Change these settings to your own preference
11 | indent_style = space
12 | indent_size = 4
13 | 
14 | # We recommend you to keep these unchanged
15 | end_of_line = lf
16 | charset = utf-8
17 | trim_trailing_whitespace = true
18 | insert_final_newline = true
19 | 
20 | [*.md]
21 | trim_trailing_whitespace = false
22 | 
23 | [Makefile]
24 | indent_style = tab
25 | indent_size = 8
26 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /devel
2 | [0-9][0-9][0-9]-cluster-run-*
3 | *.pyc
4 | /lib/Treex/Tool/Flect/flect/
5 | .*.swp
6 | *.lnk
7 | 


--------------------------------------------------------------------------------
/.perltidyrc:
--------------------------------------------------------------------------------
 1 | # Use Conway's PBP recommendations
 2 | --perl-best-practices
 3 | # (which means -l=78 -i=4 -ci=4 -st -se -vt=2 -cti=0 -pt=1 -bt=1 -sbt=1 -bbt=1 -nsfs -nolq
 4 | #              -wbb="% + - * / x != == >= <= =~ !~ < > | & = **= += *= &= <<= &&= -= /= |= >>= ||= //= .= %= ^= x="
 5 | # )
 6 | 
 7 | # with the exception of:
 8 | 
 9 | --maximum-line-length=0
10 | # I think it is better to break lines manually (and yes, ideally to max 78 characters),
11 | # When there are too many indent levels (nested blocks)
12 | # it is better to refactor rather then auto-break lines.
13 | 
14 | --no-delete-old-newlines
15 | # Sometimes I consider some statements (e.g. list expression) nicer to read when divided on more lines.
16 | 
17 | --no-outdent-labels
18 | # In PBP there is no outdenting, but Conway forgot to add it to his perltidyrc
19 | 
20 | # From shell you can use the settings above with
21 | # perltidy -pbp -l=0 -ndnl -nola


--------------------------------------------------------------------------------
/bin/derimor:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Treex::Tool::DerivMorpho::Scenario;
 7 | my $scenario_string = join ' ',@ARGV;
 8 | print "scenario: $scenario_string\n";
 9 | my $scenario = Treex::Tool::DerivMorpho::Scenario->new({from_string => $scenario_string});
10 | $scenario->apply_to_dictionary(undef);
11 | 
12 | 


--------------------------------------------------------------------------------
/bin/t/TestsCommon.pm:
--------------------------------------------------------------------------------
 1 | package TestsCommon;
 2 | 
 3 | use File::Basename;
 4 | 
 5 | chdir(dirname(__FILE__));
 6 | 
 7 | my $act_dir = dirname(__FILE__);
 8 | my $pwd = `pwd`;
 9 | my $treex_file = "./../treex";
10 | if ( ! -f $treex_file ) {
11 |     $treex_file = "./../bin/treex";
12 | }
13 | 
14 | if ( ! -f $treex_file ) {
15 |     my $msg = "DIR: $act_dir; PWD: $pwd; TREEX: $treex_file";
16 |     die($msg);
17 |     
18 | }
19 | 
20 | our $TREEX_FILE = $treex_file;
21 | our $TREEX_CMD = $^X . " " . $TREEX_FILE;
22 | 1;


--------------------------------------------------------------------------------
/bin/t/eval_block.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Treex::Core::Run q(treex);
 7 | 
 8 | use Test::More tests => 1;
 9 | use Test::Output;
10 | 
11 | foreach my $i (1..3) {
12 |     my $doc = Treex::Core::Document->new();
13 |     $doc->save("dummy$i.treex");
14 | }
15 | 
16 | my $cmdline_arguments = "-q Util::Eval document='print 1' -- !dummy?.treex";
17 | stdout_is( sub { treex $cmdline_arguments },'111',"checking Util::Eval: treex $cmdline_arguments");
18 | 
19 | 
20 | unlink glob "dummy*";
21 | 


--------------------------------------------------------------------------------
/bin/t/scenarios/print3.scen:
--------------------------------------------------------------------------------
1 | # Scenarios can contain comments
2 | Util::Eval document='print 3;' # and end-of-line comments
3 | 


--------------------------------------------------------------------------------
/bin/t/scenarios/print4.scen:
--------------------------------------------------------------------------------
1 | Util::Eval document='print 4;'
2 | 


--------------------------------------------------------------------------------
/bin/t/scenarios/scen_in_scen.scen:
--------------------------------------------------------------------------------
1 | print4.scen
2 | 


--------------------------------------------------------------------------------
/bin/treex:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | use Treex::Core::Run q(treex);
 6 | treex (\@ARGV);
 7 | 
 8 | 
 9 | __END__
10 | 
11 | =encoding utf-8
12 | 
13 | =head1 NAME
14 | 
15 | treex - bash front-end for Treex::Core::Run
16 | 
17 | =head1 DESCRIPTION
18 | 
19 | This is a thin front-end for calling
20 | functionality of Treex::Core::Run from
21 | the bash command line. See Treex::Core::Run
22 | for the description of arguments.
23 | 
24 | =head1 AUTHOR
25 | 
26 | Zdeněk Žabokrtský <zabokrtsky@ufal.mff.cuni.cz>
27 | 
28 | 
29 | =head1 COPYRIGHT AND LICENSE
30 | 
31 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
32 | 
33 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
34 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2A/CS/VocalizePrepos.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2A::CS::VocalizePrepos;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Block::T2A::CS::VocalizePrepos';
 6 | 
 7 | override 'is_prep' => sub {
 8 |     my ($self, $anode) = @_;
 9 | 
10 |     return $anode->tag =~ /^R/;
11 | };
12 | 
13 | 1;
14 | 
15 | =head1 NAME 
16 | 
17 | Treex::Block::A2A::CS::VocalizePrepos
18 | 
19 | =head1 DESCRIPTION
20 | 
21 | An a-layer version of L<Treex::Block::T2A::CS::VocalizePrepos>.
22 | 
23 | =head1 AUTHOR
24 | 
25 | Rudolf Rosa <rosa@ufal.mff.cuni.cz>
26 | 
27 | =head1 COPYRIGHT AND LICENSE
28 | 
29 | Copyright © 2015 by Institute of Formal and Applied Linguistics,
30 | Charles University in Prague
31 | 
32 | This module is free software; you can redistribute it and/or modify it
33 | under the same terms as Perl itself.
34 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2A/CS/VocalizePreposPlain.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2A::CS::VocalizePreposPlain;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Block::A2A::CS::VocalizePrepos';
 6 | 
 7 | 1;
 8 | 
 9 | =head1 DESCRIPTION
10 | 
11 | Deprecated, use L<Treex::Block::A2A::CS::VocalizePrepos>.
12 | 
13 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2A/EU/FixAspect.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2A::EU::FixAspect;
 2 | 
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | use utf8;
 6 | extends 'Treex::Core::Block';
 7 | 
 8 | sub process_tnode() {
 9 |     my ($self, $tnode) = @_;
10 | 
11 |     my ($anode)=$tnode->get_lex_anode()->get_aligned_nodes_of_type('orig');
12 | 
13 |     if ($anode->form eq 'detektatu') {
14 | 	$anode->set_form('detektatzen');
15 |     }
16 | 
17 |     return;
18 | }
19 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2A/EU/FixDefIndef.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2A::EU::FixDefIndef;
 2 | 
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | use utf8;
 6 | extends 'Treex::Core::Block';
 7 | 
 8 | sub process_anode() {
 9 |     my ($self, $anode) = @_;
10 | 
11 | 
12 |     my ($align) = $anode->get_aligned_nodes_of_type('int');
13 |     my @childs;
14 |     if (defined $align) {
15 | 	@childs = $align->get_children();
16 | 	if (grep {$_->lemma eq 'a'} @childs) {
17 | 	    $anode->set_form($anode->lemma);
18 | 	    my $child=$anode->create_child({form=>'bat', lemma=>'bat'});
19 | 	    $child->shift_after_node($anode);
20 | 	}
21 |     }
22 | 
23 |     return;
24 | }
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2A/EU/FixMoveRoot.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2A::EU::FixMoveRoot;
 2 | 
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | use utf8;
 6 | extends 'Treex::Core::Block';
 7 | 
 8 | sub process_anode() {
 9 |     my ($self, $anode) = @_;
10 | 
11 |     if ($anode->parent->is_root && $anode->conll_pos eq "ADT") {
12 | 	my @descend = $anode->get_descendants();
13 | 	my @verb = (grep {$_->conll_pos eq "ADI"} @descend);
14 | 	
15 | 	if (@verb) {
16 | 	    $anode->shift_after_subtree($verb[-1], {without_children=>1});	
17 | 	}
18 | 	elsif (@descend) {
19 | 	    $anode->shift_before_subtree($descend[0], {without_children=>1});
20 | 	}
21 |     }
22 | 
23 |     return;
24 | }
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2A/FlattenAtree.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2A::FlattenAtree;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_atree {
 7 | 	my ($self, $root) = @_;
 8 | 	my @nodes = $root->get_descendants({ordered=>1});
 9 | 	foreach my $n (@nodes) {
10 | 		$n->set_parent($root);
11 | 	}
12 | }
13 | 
14 | 1;
15 | 
16 | __END__


--------------------------------------------------------------------------------
/lib/Treex/Block/A2A/RemoveEmptySentences.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2A::RemoveEmptySentences;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_bundle {
 7 | 	my ( $self, $bundle ) = @_;
 8 | 	my @zones = $bundle->get_all_zones();
 9 | 	my $delete_zone = 0;
10 | 	foreach my $z (@zones) {
11 | 		my $sentence = $z->sentence;
12 | 		chomp $sentence;
13 | 		$sentence =~ s/\s+/ /g;
14 | 		$sentence =~ s/(^\s+|\s+$)//;
15 | 		if ($sentence =~ /^$/) {
16 | 			$delete_zone = 1;
17 | 			last;
18 | 		}
19 | 	}
20 | 	if ($delete_zone) {
21 | 		$bundle->remove();
22 | 	}	
23 | }
24 | 
25 | 1;
26 | 
27 | __END__


--------------------------------------------------------------------------------
/lib/Treex/Block/A2A/SetClauseDepth.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2A::SetClauseDepth;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_anode {
 7 |     my ( $self, $anode ) = @_;
 8 |     my $n = $anode;
 9 |     my %seen;
10 |     while (!$n->is_root){
11 |         $seen{$n->clause_number}++ if $n->clause_number;
12 |         $n = $n->get_parent();
13 |     }
14 |     $anode->wild->{clause_depth} = scalar keys %seen;
15 |     return;
16 | }
17 | 
18 | 1;
19 | 
20 | __END__
21 | 
22 | =encoding utf-8
23 | 
24 | =head1 NAME
25 | 
26 | Treex::Block::A2A::SetClauseDepth
27 | 
28 | =head1 DESCRIPTION
29 | 
30 | 
31 | =head1 AUTHOR
32 | 
33 | Martin Popel
34 | 
35 | =head1 COPYRIGHT AND LICENSE
36 | 
37 | Copyright © 2012 by Institute of Formal and Applied Linguistics, Charles University in Prague
38 | 
39 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
40 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2A/Transform/ComplexVerbRootFirst.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2A::Transform::ComplexVerbRootFirst;
 2 | use Moose;
 3 | extends 'Treex::Block::A2A::Transform::BaseTransformer';
 4 | use Treex::Tool::ATreeTransformer::ComplexVerb;
 5 | 
 6 | sub BUILD {
 7 |     my ($self) = @_;
 8 |     $self->set_transformer(
 9 |         Treex::Tool::ATreeTransformer::ComplexVerb->new(
10 |             {
11 |                 subscription => $self->subscription,
12 |                 new_root     => 'first',
13 |             }
14 |             )
15 |         )
16 | }
17 | 
18 | 1;
19 | 
20 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2A/Transform/ComplexVerbRootLast.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2A::Transform::ComplexVerbRootLast;
 2 | use Moose;
 3 | extends 'Treex::Block::A2A::Transform::BaseTransformer';
 4 | use Treex::Tool::ATreeTransformer::ComplexVerb;
 5 | 
 6 | sub BUILD {
 7 |     my ($self) = @_;
 8 |     $self->set_transformer(
 9 |         Treex::Tool::ATreeTransformer::ComplexVerb->new(
10 |             {
11 |                 subscription => $self->subscription,
12 |                 new_root     => 'last',
13 |             }
14 |             )
15 |         )
16 | }
17 | 
18 | 1;
19 | 
20 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2N/CS/t/sysnerv_load.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | BEGIN {
 3 |     unless ( $ENV{AUTHOR_TESTING} ) {
 4 |         require Test::More;
 5 |         Test::More::plan( skip_all => 'these tests requires AUTHOR_TESTING' );
 6 |     }
 7 | }
 8 | 
 9 | use strict;
10 | use warnings;
11 | 
12 | use Test::More;
13 | use Data::Dumper;
14 | 
15 | BEGIN { use_ok ('Treex::Block::A2N::CS::SysNERV') };
16 | 
17 | my $block = Treex::Block::A2N::CS::SysNERV->new;
18 | 
19 | isa_ok( $block, 'Treex::Block::A2N::CS::SysNERV' );
20 | 
21 | 
22 | done_testing();
23 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2N/EN/t/stanford2008.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use utf8;
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More tests => 2;
 7 | 
 8 | use_ok("Treex::Block::A2N::EN::StanfordNER2008");
 9 | 
10 | my $in = "Peter and Paul love Stanford";
11 | my $expect = 'p_Peter'.  'p_Paul' . 'i_Stanford';
12 | my $scen = q{A2N::EN::StanfordNER2008 Util::Eval nnode='print $.ne_type.$.normalized_name'};
13 | open my $OUT, "echo $in | treex -q -Len -t $scen |";
14 | my $got = <$OUT>;
15 | is($got, $expect, 'sample sentence A2N::EN::StanfordNER2015');
16 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2N/EN/t/stanford2015.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use utf8;
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More tests => 2;
 7 | 
 8 | use_ok("Treex::Block::A2N::EN::StanfordNER2015");
 9 | 
10 | my $in = "Peter and Paul love Stanford";
11 | my $expect = 'p_Peter'.  'p_Paul' . 'i_Stanford';
12 | my $scen = q{A2N::EN::StanfordNER2015 Util::Eval nnode='print $.ne_type.$.normalized_name'};
13 | open my $OUT, "echo $in | treex -q -Len -t $scen |";
14 | my $got = <$OUT>;
15 | is($got, $expect, 'sample sentence A2N::EN::StanfordNER2015');
16 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2T/CS/DeleteExtraCoref.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2T::CS::DeleteExtraCoref;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | # Delete generated #PersPron in constructions like "zdá se, že"
 7 | 
 8 | sub process_tnode {
 9 |     my ( $self, $tnode ) = @_;
10 |     return if $tnode->formeme ne 'drop';
11 |     return if !$tnode->get_coref_text_nodes();
12 |     my $verb = $tnode->get_parent();
13 |     # this kind of error is common only in present tense
14 |     return if ($verb->gram_tense || '') ne 'sim';
15 |     return if $verb->t_lemma !~ /_se$/;
16 |     if (any {$_->formeme =~ /^v:že/} $verb->get_children()){
17 |         $tnode->remove();
18 |     }
19 |     return;
20 | }
21 | 
22 | 1;
23 | 
24 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2T/PT/FixFormeme.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2T::PT::FixFormeme;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_tnode {
 7 |     my ( $self, $tnode ) = @_;
 8 |     if ($tnode->formeme eq "adj:attr") {
 9 |         if ($tnode->parent->formeme =~ /n:/) {
10 |             if ($tnode->precedes($tnode->parent)) {
11 |                 $tnode->set_formeme("adj:prenom");
12 |             } else {
13 |                 $tnode->set_formeme("adj:postnom");
14 |             }
15 |         }
16 |     }
17 | }
18 | 
19 | 1;
20 | 
21 | __END__
22 | 
23 | =encoding utf-8
24 | 
25 | =head1 NAME
26 | 
27 | Treex::Block::A2T::PT::FixFormeme
28 | 
29 | 
30 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2T/ProjectSelectedWild.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2T::ProjectSelectedWild;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | 
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_tnode {
 8 |     my ($self, $tnode) = @_;
 9 |     my @anodes = ($tnode->get_lex_anode, $tnode->get_aux_anodes);
10 |     
11 |     my ($anode) = grep {defined $_->wild->{check_comma_after}} @anodes;
12 |     $tnode->wild->{check_comma_after} = $anode->wild->{check_comma_after} if (defined $anode);
13 | }
14 | 
15 | 1;
16 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2T/SK/SetCoapFunctors.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2T::SK::SetCoapFunctors;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_tnode {
 7 | 
 8 |     my ( $self, $t_node ) = @_;
 9 |     my $functor;
10 |     my $a_node = $t_node->get_lex_anode();
11 |     my $afun = $a_node ? $a_node->afun : '';
12 | 
13 |     if ( $t_node->t_lemma =~ /^(a|i|aj|ani)$/ ) {
14 |         $functor = "CONJ";
15 |     }
16 |     elsif ( $t_node->t_lemma =~ /^(alebo|či)$/ ) {
17 |         $functor = "DISJ";
18 |     }
19 |     elsif ( $t_node->t_lemma =~ /^(ale|no)$/ ) {
20 |         $functor = "ADVS";
21 |     }
22 |     elsif ( $afun eq 'Coord' ) {
23 |         $functor = 'CONJ';
24 |     }
25 |     elsif ( $afun eq 'Apos' ) {
26 |         $functor = 'APPS';
27 |     }
28 | 
29 |     if ( defined $functor ) {
30 |         $t_node->set_functor($functor);
31 |     }
32 |     return;
33 | }
34 | 
35 | 1;
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2T/SetIsMember.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2T::SetIsMember;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_tnode {
 7 |     my ( $self, $t_node ) = @_;
 8 |     if ( any { $_->is_member } $t_node->get_anodes() ) {
 9 |         $t_node->set_is_member(1);
10 |     }
11 |     return 1;
12 | }
13 | 
14 | sub is_some_anode_member {
15 |     my ($t_node) = @_;
16 |     return ;
17 | }
18 | 
19 | 1;
20 | 
21 | =over
22 | 
23 | =item Treex::Block::A2T::SetIsMember
24 | 
25 | Coordination members on the t-layer should have the attribute C<is_member = 1>.
26 | This attribute is filled according to the same attribute on the a-layer.
27 | 
28 | =back
29 | 
30 | =cut
31 | 
32 | # Copyright 2009 Martin Popel
33 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
34 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2W/CS/AsciiQuotes.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2W::CS::AsciiQuotes;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_zone {
 8 |     my ( $self, $zone ) = @_;
 9 |     my $sentence = $zone->sentence;
10 |     $sentence =~ tr/„“/""/;
11 |     $zone->set_sentence($sentence);
12 |     return;
13 | }
14 | 
15 | 1;
16 | 
17 | =over
18 | 
19 | =item Treex::Block::A2W::CS::AsciiQuotes
20 | 
21 | Correct Czech quotation marks („ and “) are changed to incorrect ASCII (").
22 | This hack is usefull for BLEU comparisons
23 | (when ASCII quotes are used in reference translations).
24 | 
25 | =back
26 | 
27 | =cut
28 | 
29 | # Copyright 2009 Martin Popel
30 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2W/CS/Detokenize.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2W::CS::Detokenize;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_zone {
 7 |     my ( $self, $zone ) = @_;
 8 |     my $a_root   = $zone->get_atree();
 9 |     my $sentence = '';
10 |     foreach my $a_node ( $a_root->get_descendants( { ordered => 1 } ) ) {
11 |         $sentence .= $a_node->form;
12 |         $sentence .= ' ' if !$a_node->no_space_after;
13 |     }
14 |     $zone->set_sentence($sentence);
15 | }
16 | 
17 | 1;
18 | 
19 | =over
20 | 
21 | =item Treex::Block::A2W::CS::Detokenize
22 | 
23 | This block detokenizes Czech target analytical tree using the 'no_space_after' attributes and writes down the target sentence.
24 | 
25 | =back
26 | 
27 | =cut
28 | 
29 | # Copyright 2011 David Marecek
30 | 
31 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
32 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2W/ConcatenateTokens.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2W::ConcatenateTokens;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_zone {
 8 |     my ( $self, $zone ) = @_;
 9 |     my $a_root = $zone->get_atree();
10 |     my $sentence = join ' ', grep { !/#[A-Z]/ } map { $_->form } $a_root->get_descendants( { ordered => 1 } );
11 |     $zone->set_sentence($sentence);
12 |     return;
13 | }
14 | 
15 | 1;
16 | 
17 | =over
18 | 
19 | =item Treex::Block::A2W::ConcatenateTokens
20 | 
21 | Creates the target sentence string simply by concatenation of word forms
22 | joined by spaces. You must apply detokenization after this block
23 | to delete spaces before/after punctuation etc.
24 | 
25 | 
26 | =back
27 | 
28 | =cut
29 | 
30 | # Copyright 2011 Martin Popel
31 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README.
32 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2W/EN/DirtyTricks.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2W::EN::DirtyTricks;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_zone {
 8 |     my ( $self, $zone ) = @_;
 9 |     my $sentence = $zone->sentence;
10 | 
11 |     $sentence =~ s/``\s*/“/g;
12 |     $sentence =~ s/\s*''/”/g;
13 |     $sentence =~ s/( |^)I\s+I( |$)/\1I\2/g;
14 | 
15 |     $zone->set_sentence($sentence);
16 |     return;
17 | }
18 | 
19 | 1;
20 | 
21 | __END__
22 | 
23 | =encoding utf-8
24 | 
25 | =head1 NAME
26 | 
27 | Treex::Block::A2W::EN::DirtyTricks
28 | 
29 | =head1 DESCRIPTION
30 | 
31 | This is the place for temporary regex-based hacks.
32 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2W/EN/FixCapitalization.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2W::EN::FixCapitalization;
 2 | 
 3 | use utf8;
 4 | use Moose;
 5 | use Treex::Core::Common;
 6 | 
 7 | extends 'Treex::Core::Block';
 8 | 
 9 | my $ALL_CAPS = qr{
10 |     German|English|Turkish|French|Czech|Slovak|Spanish|Portugese|American|Mexican
11 |     Italian|Greek|Serbian|Russian|Chinese|Indian
12 | }xi;
13 | 
14 | 
15 | sub process_anode {
16 |     my ($self, $a_node) = @_;
17 |     my $form = $a_node->form // '';
18 | 
19 |     if ($form =~ /^($ALL_CAPS)$/i ){
20 |         $a_node->set_form(uc(substr($form, 0, 1)) . substr($form, 1));
21 |     }
22 |     return;
23 | }
24 | 
25 | 1;


--------------------------------------------------------------------------------
/lib/Treex/Block/A2W/EN/Tidy.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2W::EN::Tidy;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | has domain => (
 7 |      is => 'ro',
 8 |      isa => enum( [qw(general IT)] ),
 9 |      default => 'general',
10 |      documentation => 'domain of the input texts',
11 | );
12 | 
13 | sub process_zone {
14 |     my ( $self, $zone ) = @_;
15 | 
16 |     my $sent = $zone->sentence;
17 | 
18 |     $sent =~ s/,+/,/g;
19 |     $sent =~ s/,\././g;
20 |     $sent =~ s/,?",/,"/g;
21 |     $sent =~ s/,":/":/g;
22 |     $sent =~ s/:,/:/g;
23 |     $sent =~ s/(,")+/,"/g;
24 | 
25 |     $sent =~ s/([0-9]+),([0-9]*[1-9]+[0-9]*)/$1.$2/g;
26 | 
27 |     $sent =~ s/[“”]/"/g if $self->domain eq 'IT';
28 | 
29 |     $zone->set_sentence($sent);
30 |     return;
31 | }
32 | 
33 | 1;
34 | 
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/A2W/PT/DirtyTricks.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::A2W::PT::DirtyTricks;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_zone {
 8 |     my ( $self, $zone ) = @_;
 9 |     my $sentence = $zone->sentence;
10 | 
11 |     $sentence =~ s/``\s*/“/g; # Isto ainda é necessario?
12 |     $sentence =~ s/\s*''/”/g;
13 | 
14 |     $sentence =~ s/“//g;
15 |     $sentence =~ s/”//g;
16 | 
17 |     $zone->set_sentence($sentence);
18 |     return;
19 | }
20 | 
21 | 1;
22 | 
23 | __END__
24 | 
25 | =encoding utf-8
26 | 
27 | =head1 NAME
28 | 
29 | Treex::Block::A2W::PT::DirtyTricks
30 | 
31 | =head1 DESCRIPTION
32 | 
33 | This is the place for temporary regex-based hacks.
34 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Coref/CS/Cor/PrintData.pm:
--------------------------------------------------------------------------------
1 | package Treex::Block::Coref::CS::Cor::PrintData;
2 | use Moose;
3 | use Treex::Core::Common;
4 | extends 'Treex::Block::Coref::PrintData';
5 | with 'Treex::Block::Coref::CS::Cor::Base';
6 | 
7 | 1;
8 | #TODO add documentation
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Coref/CS/DemonPron/PrintData.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Coref::CS::DemonPron::PrintData;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use List::MoreUtils qw/none/;
 5 | extends 'Treex::Block::Coref::PrintData';
 6 | with 'Treex::Block::Coref::CS::DemonPron::Base';
 7 | 
 8 | override 'losses_for_special_classes' => sub {
 9 |     my ($self, $anaph, @ante_cands) = @_;
10 |     my @losses = ();
11 |     my $coref_spec = $anaph->wild->{gold_coref_special} // "";
12 |     unshift @losses, ( $coref_spec =~ /e/ ? 0 : 1 );
13 |     unshift @losses, ( $coref_spec =~ /s/ ? 0 : 1 );
14 |     unshift @losses, ( (!@ante_cands && none {$_ == 0} @losses) ? 0 : 1 );
15 |     return @losses;
16 | };
17 | 
18 | 1;
19 | #TODO add documentation
20 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Coref/CS/PersPron/PrintData.pm:
--------------------------------------------------------------------------------
1 | package Treex::Block::Coref::CS::PersPron::PrintData;
2 | use Moose;
3 | use Treex::Core::Common;
4 | extends 'Treex::Block::Coref::PrintData';
5 | with 'Treex::Block::Coref::CS::PersPron::Base';
6 | 
7 | 1;
8 | #TODO add documentation
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Coref/CS/ReflPron/PrintData.pm:
--------------------------------------------------------------------------------
1 | package Treex::Block::Coref::CS::ReflPron::PrintData;
2 | use Moose;
3 | use Treex::Core::Common;
4 | extends 'Treex::Block::Coref::PrintData';
5 | with 'Treex::Block::Coref::CS::ReflPron::Base';
6 | 
7 | 1;
8 | #TODO add documentation
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Coref/CS/RelPron/PrintData.pm:
--------------------------------------------------------------------------------
1 | package Treex::Block::Coref::CS::RelPron::PrintData;
2 | use Moose;
3 | use Treex::Core::Common;
4 | extends 'Treex::Block::Coref::PrintData';
5 | with 'Treex::Block::Coref::CS::RelPron::Base';
6 | 
7 | 1;
8 | #TODO add documentation
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Coref/EN/Cor/PrintData.pm:
--------------------------------------------------------------------------------
1 | package Treex::Block::Coref::EN::Cor::PrintData;
2 | use Moose;
3 | use Treex::Core::Common;
4 | extends 'Treex::Block::Coref::PrintData';
5 | with 'Treex::Block::Coref::EN::Cor::Base';
6 | 
7 | 1;
8 | #TODO add documentation
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Coref/EN/PersPron/PrintData.pm:
--------------------------------------------------------------------------------
1 | package Treex::Block::Coref::EN::PersPron::PrintData;
2 | use Moose;
3 | use Treex::Core::Common;
4 | extends 'Treex::Block::Coref::PrintData';
5 | with 'Treex::Block::Coref::EN::PersPron::Base';
6 | 
7 | 1;
8 | #TODO add documentation
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Coref/EN/ReflPron/PrintData.pm:
--------------------------------------------------------------------------------
1 | package Treex::Block::Coref::EN::ReflPron::PrintData;
2 | use Moose;
3 | use Treex::Core::Common;
4 | extends 'Treex::Block::Coref::PrintData';
5 | with 'Treex::Block::Coref::EN::ReflPron::Base';
6 | 
7 | 1;
8 | #TODO add documentation
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Coref/EN/RelPron/PrintData.pm:
--------------------------------------------------------------------------------
1 | package Treex::Block::Coref::EN::RelPron::PrintData;
2 | use Moose;
3 | use Treex::Core::Common;
4 | extends 'Treex::Block::Coref::PrintData';
5 | with 'Treex::Block::Coref::EN::RelPron::Base';
6 | 
7 | 1;
8 | #TODO add documentation
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Coref/EntityEvent/CS/DemonPron/PrintData.pm:
--------------------------------------------------------------------------------
1 | package Treex::Block::Coref::EntityEvent::CS::DemonPron::PrintData;
2 | use Moose;
3 | use Treex::Core::Common;
4 | extends 'Treex::Block::Coref::EntityEvent::PrintData';
5 | with 'Treex::Block::Coref::EntityEvent::CS::DemonPron::Base';
6 | 
7 | 1;
8 | #TODO add documentation
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Coref/PrettyPrint/LabelSys.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Coref::PrettyPrint::LabelSys;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Treex::Tool::Coreference::NodeFilter;
 5 | 
 6 | extends 'Treex::Core::Block';
 7 | with 'Treex::Block::Filter::Node';
 8 | 
 9 | sub _build_node_types {
10 |     return 'all_anaph';
11 | }
12 | 
13 | sub _build_layers {
14 |     return "t";
15 | }
16 | 
17 | sub process_filtered_tnode {
18 |     my ($self, $tnode) = @_;
19 |     
20 |     $tnode->wild->{coref_diag}{is_anaph} = 1;
21 |     $tnode->wild->{coref_diag}{cand_for}{$tnode->id} = 1;
22 |     my @antes = $tnode->get_coref_nodes;
23 |     foreach (@antes) { 
24 |         $_->wild->{coref_diag}{sys_ante_for}{$tnode->id} = 1;
25 |         $_->wild->{coref_diag}{cand_for}{$tnode->id} = 1;
26 |     }
27 | }
28 | 
29 | 1;
30 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Depfix/CS2EN/Fix.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Depfix::CS2EN::Fix;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use utf8;
 5 | extends 'Treex::Block::Depfix::Fix';
 6 | 
 7 | 1;
 8 | 
 9 | =head1 NAME 
10 | 
11 | Treex::Block::Depfix::CS2EN::Fix
12 | -- ancestor for all Depfix::CS2EN::Fix* blocks
13 | (or at least for those operating on a-layer)
14 | 
15 | =head1 AUTHOR
16 | 
17 | Rudolf Rosa <rosa@ufal.mff.cuni.cz>
18 | 
19 | =head1 COPYRIGHT AND LICENSE
20 | 
21 | Copyright © 2014 by Institute of Formal and Applied Linguistics,
22 | Charles University in Prague
23 | 
24 | This module is free software; you can redistribute it and/or modify it
25 | under the same terms as Perl itself.
26 | 
27 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Depfix/README:
--------------------------------------------------------------------------------
1 | 
2 | Depfix code is now stored in A2A::CS (and some of it on other places
3 | according to the layer it operates on). However, in future it shall reside
4 | here.
5 | 
6 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Depfix/sample_config.yaml:
--------------------------------------------------------------------------------
 1 | fields:
 2 | - oldchild_lemma
 3 | - oldchild_afun
 4 | - oldchild_tag
 5 | - oldparent_lemma
 6 | - oldparent_afun
 7 | - oldparent_tag
 8 | - oldedge_direction
 9 | - srcchild_lemma
10 | - srcchild_afun
11 | - srcchild_tag
12 | - srcparent_lemma
13 | - srcparent_afun
14 | - srcparent_tag
15 | - srcedge_existence
16 | - srcedge_direction
17 | - newchild_lemma
18 | - newchild_afun
19 | - newchild_tag
20 | - newparent_lemma
21 | - newparent_afun
22 | - newparent_tag
23 | 
24 | features:
25 | - oldchild_afun
26 | - oldchild_tag
27 | - newparent_afun
28 | - newparent_tag
29 | - oldedge_direction
30 | - srcchild_afun
31 | - srcchild_tag
32 | - srcparent_afun
33 | - srcparent_tag
34 | - srcedge_existence
35 | - srcedge_direction
36 | 
37 | predict: newchild_tag
38 | 
39 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Eval/CorefStats.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Eval::CorefStats;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_tnode {
 7 |     my ($self, $tnode) = @_;
 8 | 
 9 |     if (defined $tnode->t_lemma && ($tnode->t_lemma eq "#PersPron")) {
10 |         my @chain = $tnode->get_coref_chain;
11 |         my $tree = $tnode->get_root;
12 |         print "CHAIN_SIZE: " . @chain . ", SENT_ORD: " . $tree->wild->{"czeng_sentord"} . "\n";
13 |     }
14 | }
15 | 
16 | 1;
17 | 
18 | =over
19 | 
20 | =item Treex::Block::Eval::CorefStats
21 | 
22 | Prints out some of the statistics regarding coreference.
23 | 
24 | =back
25 | 
26 | =cut
27 | 
28 | # Copyright 2011 Michal Novak
29 | 
30 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/CzEng/AcademicTitle.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::CzEng::AcademicTitle;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Treex::Core::Log;
 5 | extends 'Treex::Block::Filter::CzEng::Common';
 6 | 
 7 | sub process_bundle {
 8 |     my ( $self, $bundle ) = @_;
 9 | 
10 |     my $en      = $bundle->get_zone('en')->sentence;
11 |     my $cs      = $bundle->get_zone('cs')->sentence;
12 |     my $pattern = '\b((Bc|Mgr|Ing|MUDr|JUDr|RNDr|PhDr|MVDr|PharmDr|ThDr|Doc|Prof|arch)\.)$';
13 |     if ( $cs =~ m/$pattern/i || $en =~ m/$pattern/i ) {
14 |         $self->add_feature( $bundle, 'academic_title' );
15 |     }
16 | 
17 |     return 1;
18 | }
19 | 
20 | 1;
21 | 
22 | =over
23 | 
24 | =item Treex::Block::Filter::CzEng::AcademicTitle
25 | 
26 | Finding subsequences of a character repeated four or more times.
27 | 
28 | =back
29 | 
30 | =cut
31 | 
32 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna
33 | 
34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/CzEng/Classifier.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::CzEng::Classifier;
 2 | use Moose::Role;
 3 | 
 4 | requires qw( init see learn save load predict score );
 5 | 
 6 | 1;
 7 | 
 8 | =over
 9 | 
10 | =item Treex::Block::Filter::CzEng::Classifier
11 | 
12 | A role that must be implemented by specific classifier types.
13 | 
14 | =back
15 | 
16 | =cut
17 | 
18 | # Copyright 2011 Ales Tamchyna
19 | 
20 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
21 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/CzEng/GutenbergHeader.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::CzEng::GutenbergHeader;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Treex::Core::Log;
 5 | extends 'Treex::Block::Filter::CzEng::Common';
 6 | 
 7 | sub process_bundle {
 8 |     my ( $self, $bundle ) = @_;
 9 | 
10 |     my $en      = $bundle->get_zone('en')->sentence;
11 |     my $cs      = $bundle->get_zone('cs')->sentence;
12 |     my $pattern = 'Gutenberg';
13 |     if ( $cs =~ m/$pattern/ || $en =~ m/$pattern/ ) {
14 |         $self->add_feature( $bundle, 'gutenberg_header' );
15 |     }
16 | 
17 |     return 1;
18 | }
19 | 
20 | 1;
21 | 
22 | =over
23 | 
24 | =item Treex::Block::Filter::CzEng::GutenbergHeader
25 | 
26 | Relicts of the Project Gutenberg file header left in the data.
27 | 
28 | =back
29 | 
30 | =cut
31 | 
32 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna
33 | 
34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/CzEng/IdenticalSentences.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::CzEng::IdenticalSentences;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Treex::Core::Log;
 5 | extends 'Treex::Block::Filter::CzEng::Common';
 6 | 
 7 | sub process_bundle {
 8 |     my ( $self, $bundle ) = @_;
 9 | 
10 |     my $en = $bundle->get_zone('en')->sentence;
11 |     my $cs = $bundle->get_zone('cs')->sentence;
12 | 
13 |     $self->add_feature( $bundle, 'identical' ) if $cs eq $en;
14 | 
15 |     return 1;
16 | }
17 | 
18 | 1;
19 | 
20 | =over
21 | 
22 | =item Treex::Block::Filter::CzEng::IdenticalSentences
23 | 
24 | Feature that fires when cs and en are identical.
25 | 
26 | =back
27 | 
28 | =cut
29 | 
30 | # Copyright 2011 Ales Tamchyna
31 | 
32 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
33 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/CzEng/InterleavingSpaces.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::CzEng::InterleavingSpaces;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Treex::Core::Log;
 5 | extends 'Treex::Block::Filter::CzEng::Common';
 6 | 
 7 | sub process_bundle {
 8 |     my ( $self, $bundle ) = @_;
 9 | 
10 |     my $en      = $bundle->get_zone('en')->sentence;
11 |     my $cs      = $bundle->get_zone('cs')->sentence;
12 |     my $pattern = ' \w \w \w \w ';
13 |     if ( $cs =~ m/$pattern/ || $en =~ m/$pattern/ ) {
14 |         $self->add_feature( $bundle, 'interleaving_spaces' );
15 |     }
16 | 
17 |     return 1;
18 | }
19 | 
20 | 1;
21 | 
22 | =over
23 | 
24 | =item Treex::Block::Filter::CzEng::InterleavingSpaces
25 | 
26 | Letters interleaved w i t h  s p a c e s.
27 | 
28 | =back
29 | 
30 | =cut
31 | 
32 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna
33 | 
34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/CzEng/LongSentence.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::CzEng::LongSentence;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use List::Util qw( max );
 5 | extends 'Treex::Block::Filter::CzEng::Common';
 6 | 
 7 | sub process_bundle {
 8 |     my ( $self, $bundle ) = @_;
 9 | 
10 |     my $en     = $bundle->get_zone('en')->sentence;
11 |     my $cs     = $bundle->get_zone('cs')->sentence;
12 |     my $length = max( length $en, length $cs );
13 | 
14 |     my @bounds = ( 0, 10, 50, 100, 250, 500 );
15 |     $self->add_feature( $bundle, 'sentence_length=' . $self->quantize_given_bounds( $length, @bounds ) );
16 | 
17 |     return 1;
18 | }
19 | 
20 | 1;
21 | 
22 | =over
23 | 
24 | =item Treex::Block::Filter::CzEng::LongSentence
25 | 
26 | Quantized maximum sentence length.
27 | 
28 | =back
29 | 
30 | =cut
31 | 
32 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna
33 | 
34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/CzEng/LongWord.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::CzEng::LongWord;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use List::Util qw( max );
 5 | extends 'Treex::Block::Filter::CzEng::Common';
 6 | 
 7 | sub process_bundle {
 8 |     my ( $self, $bundle ) = @_;
 9 | 
10 |     my @tokens = (
11 |         $bundle->get_zone('en')->get_atree->get_descendants,
12 |         $bundle->get_zone('cs')->get_atree->get_descendants
13 |     );
14 | 
15 |     my $length = max( map { length $_->get_attr('form') } @tokens );
16 | 
17 |     my @bounds = ( 0, 5, 10, 20, 50 );
18 |     $self->add_feature( $bundle, 'max_word_length=' . $self->quantize_given_bounds( $length, @bounds ) );
19 | 
20 |     return 1;
21 | }
22 | 
23 | 1;
24 | 
25 | =over
26 | 
27 | =item Treex::Block::Filter::CzEng::LongWord
28 | 
29 | Quantized maximum word length.
30 | 
31 | =back
32 | 
33 | =cut
34 | 
35 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna
36 | 
37 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
38 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/CzEng/MicrosoftLinesWithFilenames.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::CzEng::MicrosoftLinesWithFilenames;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Treex::Core::Log;
 5 | extends 'Treex::Block::Filter::CzEng::Common';
 6 | 
 7 | sub process_bundle {
 8 |     my ( $self, $bundle ) = @_;
 9 | 
10 |     my $en      = $bundle->get_zone('en')->sentence;
11 |     my $cs      = $bundle->get_zone('cs')->sentence;
12 |     my $pattern = '\w+\.[a-z]{3}\b';
13 |     if ( $cs =~ m/$pattern/ || $en =~ m/$pattern/ ) {
14 |         $self->add_feature( $bundle, 'microsoft_lines_with_filenames' );
15 |     }
16 | 
17 |     return 1;
18 | }
19 | 
20 | 1;
21 | 
22 | =over
23 | 
24 | =item Treex::Block::Filter::CzEng::MicrosoftLinesWithFilenames
25 | 
26 | Marking lines containing file names or URLs (typically useless content).
27 | 
28 | =back
29 | 
30 | =cut
31 | 
32 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna
33 | 
34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/CzEng/NonASCIICharacter.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::CzEng::NonASCIICharacter;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Treex::Core::Log;
 5 | extends 'Treex::Block::Filter::CzEng::Common';
 6 | 
 7 | sub process_bundle {
 8 |     my ( $self, $bundle ) = @_;
 9 | 
10 |     my $en      = $bundle->get_zone('en')->sentence;
11 |     my $cs      = $bundle->get_zone('cs')->sentence;
12 |     while ($en =~ m/([^\p{ASCII}“”´´``—–€‐‘’‑‑])/g) {
13 |         if ($cs !~ m/$1/) {
14 |             $self->add_feature( $bundle, 'nonascii_character' );
15 |             last;
16 |         }
17 |     }
18 | 
19 |     return 1;
20 | }
21 | 
22 | 1;
23 | 
24 | =over
25 | 
26 | =item Treex::Block::Filter::CzEng::NonASCIICharacter
27 | 
28 | English side contains a non-ASCII character not confirmed by the Czech side.
29 | 
30 | =back
31 | 
32 | =cut
33 | 
34 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna
35 | 
36 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
37 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/CzEng/RepeatedCharacter.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::CzEng::RepeatedCharacter;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Treex::Core::Log;
 5 | extends 'Treex::Block::Filter::CzEng::Common';
 6 | 
 7 | sub process_bundle {
 8 |     my ( $self, $bundle ) = @_;
 9 | 
10 |     my $en      = $bundle->get_zone('en')->sentence;
11 |     my $cs      = $bundle->get_zone('cs')->sentence;
12 |     my $pattern = '([^\d])\1{3,}';
13 |     if ( $cs =~ m/$pattern/ || $en =~ m/$pattern/ ) {
14 |         $self->add_feature( $bundle, 'repeated_character' );
15 |     }
16 | 
17 |     return 1;
18 | }
19 | 
20 | 1;
21 | 
22 | =over
23 | 
24 | =item Treex::Block::Filter::CzEng::RepeatedCharacter
25 | 
26 | Finding subsequences of a character repeated four or more times.
27 | 
28 | =back
29 | 
30 | =cut
31 | 
32 | # Copyright 2011 Zdenek Zabokrtsky, Ales Tamchyna
33 | 
34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/Generic/Classifier.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::Generic::Classifier;
 2 | use Moose::Role;
 3 | 
 4 | requires qw( init see learn save load predict score );
 5 | 
 6 | 1;
 7 | 
 8 | =over
 9 | 
10 | =item Treex::Block::Filter::Generic::Classifier
11 | 
12 | A role that must be implemented by specific classifier types.
13 | 
14 | =back
15 | 
16 | =cut
17 | 
18 | # Copyright 2011, 2014 Ales Tamchyna
19 | 
20 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
21 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/Generic/IdenticalSentences.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::Generic::IdenticalSentences;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Treex::Core::Log;
 5 | extends 'Treex::Block::Filter::Generic::Common';
 6 | 
 7 | sub process_bundle {
 8 |     my ( $self, $bundle ) = @_;
 9 | 
10 |     my $src = $bundle->get_zone($self->language)->sentence;
11 |     my $tgt = $bundle->get_zone($self->to_language)->sentence;
12 | 
13 |     $self->add_feature( $bundle, 'identical' ) if $src eq $tgt;
14 | 
15 |     return 1;
16 | }
17 | 
18 | 1;
19 | 
20 | =over
21 | 
22 | =item Treex::Block::Filter::Generic::IdenticalSentences
23 | 
24 | Feature that fires when cs and en are identical.
25 | 
26 | =back
27 | 
28 | =cut
29 | 
30 | # Copyright 2011, 2014 Ales Tamchyna
31 | 
32 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
33 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/Generic/LongSentence.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::Generic::LongSentence;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use List::Util qw( max );
 5 | extends 'Treex::Block::Filter::Generic::Common';
 6 | 
 7 | sub process_bundle {
 8 |     my ( $self, $bundle ) = @_;
 9 | 
10 |     my $src    = $bundle->get_zone($self->language)->sentence;
11 |     my $tgt    = $bundle->get_zone($self->to_language)->sentence;
12 |     my $length = max( length $src, length $tgt );
13 | 
14 |     my @bounds = ( 0, 10, 50, 100, 250, 500 );
15 |     $self->add_feature( $bundle, 'sentence_length=' . $self->quantize_given_bounds( $length, @bounds ) );
16 | 
17 |     return 1;
18 | }
19 | 
20 | 1;
21 | 
22 | =over
23 | 
24 | =item Treex::Block::Filter::Generic::LongSentence
25 | 
26 | Quantized maximum sentence length.
27 | 
28 | =back
29 | 
30 | =cut
31 | 
32 | # Copyright 2011, 2014 Zdenek Zabokrtsky, Ales Tamchyna
33 | 
34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Filter/Generic/RepeatedCharacter.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Filter::Generic::RepeatedCharacter;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Treex::Core::Log;
 5 | extends 'Treex::Block::Filter::Generic::Common';
 6 | 
 7 | sub process_bundle {
 8 |     my ( $self, $bundle ) = @_;
 9 | 
10 |     my $src     = $bundle->get_zone($self->language)->sentence;
11 |     my $tgt     = $bundle->get_zone($self->to_language)->sentence;
12 |     my $pattern = '([^\d])\1{3,}';
13 |     if ( $src =~ m/$pattern/ || $tgt =~ m/$pattern/ ) {
14 |         $self->add_feature( $bundle, 'repeated_character' );
15 |     }
16 | 
17 |     return 1;
18 | }
19 | 
20 | 1;
21 | 
22 | =over
23 | 
24 | =item Treex::Block::Filter::Generic::RepeatedCharacter
25 | 
26 | Finding subsequences of a character repeated four or more times.
27 | 
28 | =back
29 | 
30 | =cut
31 | 
32 | # Copyright 2011, 2014 Zdenek Zabokrtsky, Ales Tamchyna
33 | 
34 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/LA/List_absolute_adverbs_ITTB.txt:
--------------------------------------------------------------------------------
 1 | altissime
 2 | arrogantissime
 3 | communissime
 4 | convenientissime
 5 | evidentissime
 6 | firmissime
 7 | frequentissime
 8 | imperfectissime
 9 | iustissime
10 | maxime
11 | minime
12 | optime
13 | perfectissime
14 | plenissime
15 | potissime
16 | summe
17 | ultimo
18 | verissime
19 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/LA/List_comparative_adverbs_ITTB.txt:
--------------------------------------------------------------------------------
 1 | accuratius
 2 | altius
 3 | amplius
 4 | apertius
 5 | ardentius
 6 | attentius
 7 | certius
 8 | clarius
 9 | communius
10 | congruentius
11 | convenientius
12 | decentius
13 | difficilius
14 | dignius
15 | diligentius
16 | efficacius
17 | eminentius
18 | evidentius
19 | expressius
20 | exterius
21 | facilius
22 | ferventius
23 | fortius
24 | frequentius
25 | imperfectius
26 | inferius
27 | intensius
28 | interius
29 | lentius
30 | levius
31 | liberalius
32 | liberius
33 | longius
34 | manifestius
35 | melius
36 | minus
37 | multiplicius
38 | nobilius
39 | particularius
40 | perfectius
41 | plenius
42 | plures
43 | pluries
44 | plus
45 | posterius
46 | potius
47 | principalius
48 | prius
49 | probabilius
50 | profundius
51 | promptius
52 | propinquius
53 | rationabilius
54 | remissius
55 | simplicius
56 | specialius
57 | suavius
58 | subtilius
59 | superius
60 | tardius
61 | uberius
62 | ulterius
63 | universalius
64 | utilius
65 | vehementius
66 | velocius
67 | vicinius
68 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/Test/AuxGIsPunctuation.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::HamleDT::Test::AuxGIsPunctuation;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::Test::BaseTester';
 5 | 
 6 | sub process_anode
 7 | {
 8 |     my $self = shift;
 9 |     my $node = shift;
10 |     # AuxG may also be used for numbers idenitifying items in numbered lists.
11 |     if($node->deprel() eq 'AuxG' && !$node->is_punctuation() && !$node->form() =~ m/^\d+$/)
12 |     {
13 |         $self->complain($node, 'AuxG : '.$node->tag());
14 |     }
15 | }
16 | 
17 | 1;
18 | 
19 | =over
20 | 
21 | =item Treex::Block::HamleDT::Test::AuxGIsPunctuation
22 | 
23 | A node attached as AuxG must be POS-tagged as punctuation.
24 | 
25 | =back
26 | 
27 | =cut
28 | 
29 | # Copyright 2012 Honza Václ
30 | # Copyright 2015 Dan Zeman
31 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README.
32 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/Test/AuxKUnderRoot.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::HamleDT::Test::AuxKUnderRoot;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Block::Test::BaseTester';
 6 | 
 7 | # Tests whether AuxK is attached directly to the root node.
 8 | 
 9 | sub process_anode
10 | {
11 |     my $self = shift;
12 |     my $node = shift;
13 |     if($node->deprel() eq 'AuxK')
14 |     {
15 |         my $parent = $node->parent();
16 |         if(defined($parent) && $parent->is_root())
17 |         {
18 |             $self->praise($node);
19 |         }
20 |         else
21 |         {
22 |             $self->complain($node);
23 |         }
24 |     }
25 | }
26 | 
27 | # (C) 2012 Jindřich Libovický <jlibovicky@gmail.com>
28 | # Copyright 2015 Dan Zeman <zeman@ufal.mff.cuni.cz>
29 | 
30 | 1;
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/Test/AuxVNotOnTop.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::HamleDT::Test::AuxVNotOnTop;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Block::Test::BaseTester';
 6 | 
 7 | # Testing if there is not an auxiliary verb directly under the root
 8 | 
 9 | sub process_atree {
10 |     my ( $self, $a_root ) = @_;
11 | 
12 |     foreach my $anode ($a_root->get_children()) {
13 |         if ($anode->deprel eq "AuxV") {
14 |             $self->complain($a_root);
15 |             return;
16 |         }
17 |     }
18 | }
19 | 
20 | # (C) 2012 Karel Bílek <kb@karelbilek.com>, Jindřich Libovický <jlibovicky@gmail.com>
21 | 
22 | 1;
23 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/Test/AuxXIsComma.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::HamleDT::Test::AuxXIsComma;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::Test::BaseTester';
 5 | 
 6 | sub process_anode {
 7 |     my ($self, $anode) = @_;
 8 | 
 9 |     if ($anode->deprel eq 'AuxX' && $anode->form ne ',') {
10 |         $self->complain($anode);
11 |     }
12 | }
13 | 
14 | 1;
15 | 
16 | =over
17 | 
18 | =item Treex::Block::HamleDT::Test::AuxXisComma
19 | 
20 | Only comma should be AuxX
21 | 
22 | =back
23 | 
24 | =cut
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/Test/CoApAboveEveryMember.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::HamleDT::Test::CoApAboveEveryMember;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::Test::BaseTester';
 5 | 
 6 | sub process_anode
 7 | {
 8 |     my $self = shift;
 9 |     my $node = shift;
10 |     if($node->is_member())
11 |     {
12 |         my $parent = $node->parent();
13 |         if($parent->deprel() !~ m/^(Coord|Apos)$/)
14 |         {
15 |             $self->complain($node);
16 |         }
17 |         else
18 |         {
19 |             $self->praise($node);
20 |         }
21 |     }
22 | }
23 | 
24 | 1;
25 | 
26 | =over
27 | 
28 | =item Treex::Block::HamleDT::Test::CoApAboveEveryMember
29 | 
30 | Nodes with is_member=1 are allowed only under coordination or apposition heads.
31 | 
32 | =back
33 | 
34 | =cut
35 | 
36 | # Copyright 2011 Zdeněk Žabokrtský
37 | # Copyright 2015 Dan Zeman
38 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README.
39 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/Test/LeafAux.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::HamleDT::Test::LeafAux;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::Test::BaseTester';
 5 | 
 6 | sub process_anode {
 7 |     my ( $self, $anode ) = @_;
 8 |     if ( ( $anode->deprel || '' ) =~ /^(AuxT|AuxR|AuxX|AuxA)$/ && $anode->get_children ) {
 9 |         $self->complain( $anode, $anode->afun );
10 |     }
11 |     return;
12 | }
13 | 
14 | 1;
15 | 
16 | =over
17 | 
18 | =item Treex::Block::HamleDT::Test::LeafAux
19 | 
20 | Afun values AuxT, AuxR, AuxX... (?) imply
21 | that the node should be a leave.
22 | 
23 | =back
24 | 
25 | =cut
26 | 
27 | # Copyright 2011 Zdenek Zabokrtsky
28 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README.
29 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/Test/MemberInEveryCoAp.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::HamleDT::Test::MemberInEveryCoAp;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::Test::BaseTester';
 5 | 
 6 | sub process_anode
 7 | {
 8 |     my $self = shift;
 9 |     my $node = shift;
10 |     if($node->deprel() =~ m/^(Coord|Apos)$/)
11 |     {
12 |         if(!first {$_->is_member()} $node->children())
13 |         {
14 |             $self->complain($node);
15 |         }
16 |         else
17 |         {
18 |             $self->praise($node);
19 |         }
20 |     }
21 | }
22 | 
23 | 1;
24 | 
25 | =over
26 | 
27 | =item Treex::Block::HamleDT::Test::MemberInEveryCoAp
28 | 
29 | Every coordination/apposition structure should have at least one
30 | member node among its children.
31 | 
32 | =back
33 | 
34 | =cut
35 | 
36 | # Copyright 2011 Zdeněk Žabokrtský
37 | # Copyright 2015 Dan Zeman
38 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README.
39 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/Test/NonParentAuxS.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::HamleDT::Test::NonParentAuxS;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::Test::BaseTester';
 5 | 
 6 | sub process_anode {
 7 |     my ($self, $anode) = @_;
 8 |     if ($anode->deprel eq 'AuxS'
 9 |             and  $anode->get_parents
10 |         ) {
11 |         $self->complain($anode);
12 |     }
13 | }
14 | 
15 | 1;
16 | 
17 | =over
18 | 
19 | =item Treex::Block::HamleDT::Test::NonParentAuxS
20 | 
21 | AuxS must not have a parent.
22 | 
23 | =back
24 | 
25 | =cut
26 | 
27 | # Copyright 2011 Zdenek Zabokrtsky
28 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README.
29 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/Test/NonemptyAttr.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::HamleDT::Test::NonemptyAttr;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::Test::BaseTester';
 5 | 
 6 | sub process_anode {
 7 |     my ($self, $anode) = @_;
 8 |     foreach my $attr_name (qw(form lemma tag)) {
 9 |         my $attr_value = $anode->get_attr($attr_name);
10 |         if ( !defined $attr_value || $attr_value eq '' ) {
11 |             $self->complain($anode, $attr_name);
12 |         }
13 |     }
14 | }
15 | 
16 | 1;
17 | 
18 | =over
19 | 
20 | =item Treex::Block::HamleDT::Test::NonemptyAttr
21 | 
22 | Report attributes form, lemma, or tag with empty string or undefined value.
23 | 
24 | =back
25 | 
26 | =cut
27 | 
28 | # Copyright 2011 Zdenek Zabokrtsky
29 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README.
30 | 
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/Test/UD/SingleRoot.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::HamleDT::Test::UD::SingleRoot;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::Test::BaseTester';
 5 | 
 6 | sub process_atree
 7 | {
 8 |     my $self = shift;
 9 |     my $root = shift;
10 |     # In Universal Dependencies, there is only one top node (child of our artificial root, dependency label 'root').
11 |     my @topnodes = $root->children();
12 |     if(scalar(@topnodes)>1)
13 |     {
14 |         $self->complain($topnodes[1], 'More than one top node.');
15 |     }
16 | }
17 | 
18 | 1;
19 | 
20 | =over
21 | 
22 | =item Treex::Block::HamleDT::Test::UD::SingleRoot
23 | 
24 | There must be just one top node.
25 | 
26 | We call the child of our artificial root node the top node.
27 | This is the actual sentence root from the linguistic point of view.
28 | 
29 | =back
30 | 
31 | =cut
32 | 
33 | # Copyright 2015 Dan Zeman
34 | # This file is distributed under the GNU GPL v2 or later. See $TMT_ROOT/README.
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/Transform/ComplexVerbRootFirst.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::HamleDT::Transform::ComplexVerbRootFirst;
 2 | use Moose;
 3 | extends 'Treex::Block::HamleDT::Transform::BaseTransformer';
 4 | use Treex::Tool::ATreeTransformer::ComplexVerb;
 5 | 
 6 | sub BUILD {
 7 |     my ($self) = @_;
 8 |     $self->set_transformer(
 9 |         Treex::Tool::ATreeTransformer::ComplexVerb->new(
10 |             {
11 |                 subscription => $self->subscription,
12 |                 new_root     => 'first',
13 |             }
14 |             )
15 |         )
16 | }
17 | 
18 | 1;
19 | 
20 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/HamleDT/Transform/ComplexVerbRootLast.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::HamleDT::Transform::ComplexVerbRootLast;
 2 | use Moose;
 3 | extends 'Treex::Block::HamleDT::Transform::BaseTransformer';
 4 | use Treex::Tool::ATreeTransformer::ComplexVerb;
 5 | 
 6 | sub BUILD {
 7 |     my ($self) = @_;
 8 |     $self->set_transformer(
 9 |         Treex::Tool::ATreeTransformer::ComplexVerb->new(
10 |             {
11 |                 subscription => $self->subscription,
12 |                 new_root     => 'last',
13 |             }
14 |             )
15 |         )
16 | }
17 | 
18 | 1;
19 | 
20 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Import/Sentences.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Import::Sentences;
 2 | use Moose;
 3 | 
 4 | use Treex::Core::Common;
 5 | 
 6 | extends 'Treex::Core::Block';
 7 | 
 8 | has 'from' => ( is => 'ro', isa => 'Treex::Core::Files', required => 1, coerce => 1 );
 9 | 
10 | sub process_document {
11 |     my ($self, $doc) = @_;
12 | 
13 |     my @bundles = $doc->get_bundles();
14 |     my $bundle_count = scalar @bundles;
15 | 
16 |     while (my $line = $self->from->next_line()) {
17 |         chomp $line;
18 |         if ($line =~ /^\s*$/) {
19 |             if (@bundles < $bundle_count) {
20 |                 log_fatal "Number of lines in the file to import does not correspond with number of bundles in the processed documents";
21 |             }
22 |             next;
23 |         }
24 |         my $bundle = shift @bundles;
25 |         my $new_zone = $bundle->create_zone($self->language, $self->selector);
26 |         $new_zone->set_sentence($line);
27 | 
28 |         last if (!@bundles);
29 |     }
30 | }
31 | 
32 | 1;
33 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/MLFix/CS/Oracle.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::MLFix::CS::Oracle;
 2 | 
 3 | use Moose;
 4 | use utf8;
 5 | 
 6 | use Treex::Tool::MLFix::CS::FormGenerator;
 7 | 
 8 | extends 'Treex::Block::MLFix::Oracle';
 9 | 
10 | sub _build_form_generator {
11 | 	my ($self) = @_;
12 | 
13 | 	return Treex::Tool::MLFix::CS::FormGenerator->new();
14 | }
15 | 
16 | 1;
17 | 
18 | =head1 NAME
19 | 
20 | MLFix::CS::Oracle
21 | 
22 | =head1 DESCRIPTION
23 | 
24 | =head1 PARAMETERS
25 | 
26 | =over
27 | 
28 | =back
29 | 
30 | =head1 AUTHOR
31 | 
32 | Dušan Variš <varis@ufal.mff.cuni.cz>
33 | 
34 | =head1 COPYRIGHT AND LICENSE
35 | 
36 | Copyright © 2013 by Institute of Formal and Applied Linguistics,
37 | Charles University in Prague
38 | 
39 | This module is free software; you can redistribute it and/or modify it
40 | under the same terms as Perl itself.
41 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/MLFix/CS/ScikitLearn.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::MLFix::CS::ScikitLearn;
 2 | 
 3 | use Moose;
 4 | use utf8;
 5 | 
 6 | use Treex::Tool::MLFix::CS::FormGenerator;
 7 | 
 8 | extends 'Treex::Block::MLFix::ScikitLearn';
 9 | 
10 | sub _build_form_generator {
11 | 	my ($self) = @_;
12 | 
13 | 	return Treex::Tool::MLFix::CS::FormGenerator->new();
14 | }
15 | 
16 | 1;
17 | 
18 | =head1 NAME
19 | 
20 | MLFix::CS::ScikitLearn
21 | 
22 | =head1 DESCRIPTION
23 | 
24 | =head1 PARAMETERS
25 | 
26 | =over
27 | 
28 | =back
29 | 
30 | =head1 AUTHOR
31 | 
32 | Dušan Variš <varis@ufal.mff.cuni.cz>
33 | 
34 | =head1 COPYRIGHT AND LICENSE
35 | 
36 | Copyright © 2013 by Institute of Formal and Applied Linguistics,
37 | Charles University in Prague
38 | 
39 | This module is free software; you can redistribute it and/or modify it
40 | under the same terms as Perl itself.
41 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Print/AtreeStats.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Print::AtreeStats;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | has '+language' 				=> ( required => 1 );
 7 | has '+selector'		=> (required => 1);
 8 | 
 9 | has '_num_sentences' => (is => 'rw', isa => 'Int', default => 0);
10 | has '_total_tokens' => (is => 'rw', isa => 'Int', default => 0);
11 | 
12 | sub process_atree {
13 |     my ($self, $tree) = @_;
14 | 	my @nodes = $tree->get_descendants( { ordered => 1 } );
15 | 	$self->_set_num_sentences($self->_num_sentences + 1);
16 | 	$self->_set_total_tokens($self->_total_tokens + scalar(@nodes));    
17 | }
18 | 
19 | sub process_end {
20 |     my ($self) = @_;
21 | 	print "Number of Sentences:\t" . $self->_num_sentences . "\n";
22 | 	print "Number of Tokens:\t" .    $self->_total_tokens . "\n";
23 | }
24 | 
25 | 1;


--------------------------------------------------------------------------------
/lib/Treex/Block/Print/Debug/DocumentTextHead.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ufal/treex/06c3353d9d7c61ff86c572cd926b055733a73442/lib/Treex/Block/Print/Debug/DocumentTextHead.pm


--------------------------------------------------------------------------------
/lib/Treex/Block/Read/PDT_schema/pml_common.rng:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0"?>
 2 | <grammar xmlns="http://relaxng.org/ns/structure/1.0"
 3 |   xmlns:pml="http://ufal.mff.cuni.cz/pdt/pml/"
 4 |   datatypeLibrary="http://www.w3.org/2001/XMLSchema-datatypes">
 5 | 
 6 |   <define name="id.attribute">
 7 |     <attribute name="id"><text/></attribute>
 8 |   </define>
 9 | 
10 |   <define name="head.element">
11 |     <element name="pml:head">
12 |       <element name="pml:schema">
13 | 	<attribute name="href"><text/></attribute>
14 |       </element>
15 |       <optional>
16 | 	<element name="pml:references">
17 | 	  <zeroOrMore>
18 | 	    <element name="pml:reffile">
19 | 	      <attribute name="id">
20 | 		<data type="ID"/>
21 | 	      </attribute>
22 | 	      <attribute name="name">
23 | 		<text/>
24 | 	      </attribute>
25 | 	      <attribute name="href">
26 | 		<text/>
27 | 	      </attribute>
28 | 	    </element>
29 | 	  </zeroOrMore>
30 | 	</element>
31 |       </optional>
32 |     </element>
33 |   </define>
34 | 
35 | </grammar>
36 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Read/t/base.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More;
 7 | use Test::Output;
 8 | BEGIN { require_ok('Treex::Block::Read::BaseReader') }
 9 | 
10 | my $reader = Treex::Block::Read::BaseReader->new( from => '-', file_stem => 'test' );
11 | isa_ok( $reader, 'Treex::Block::Read::BaseReader' );
12 | 
13 | stderr_like(
14 |     sub {
15 |         eval { $reader->next_document() };
16 |     },
17 |     qr/method next_document must be overridden in/,
18 |     'subroutine next_document has to fail'
19 | );
20 | 
21 | cmp_ok( $reader->number_of_documents(), '==', 1, 'There should be exactly one document' );
22 | 
23 | $reader->next_filename();
24 | 
25 | is( $reader->current_filename(), '-', 'Current file is STDIN(-)' );
26 | 
27 | done_testing();
28 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Read/t/base_aligned.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More tests => 3;
 7 | use Treex::Block::Read::BaseAlignedReader;
 8 | use Test::Output;
 9 | 
10 | my $reader = new_ok('Treex::Block::Read::BaseAlignedReader');
11 | 
12 | #require next_document overriden
13 | stderr_like(
14 |     sub {
15 |         eval {
16 |             $reader->next_document();
17 |         }
18 |     },
19 |     qr/next_document must be overriden/,
20 |     q(require next_document overriden)
21 | );
22 | 
23 | TODO: {
24 |     local $TODO = 'Need tests on (next|current)_filenames a spol.';
25 | 
26 |     fail('Write some tests');
27 | }
28 | 
29 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Read/t/base_aligned_text.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More tests => 2;
 7 | use_ok('Treex::Block::Read::BaseAlignedTextReader');
 8 | 
 9 | my $reader = new_ok('Treex::Block::Read::BaseAlignedTextReader');
10 | 
11 | __END__
12 | Stable test should not produce errors/warnings on STDERR, BaseAlignedTextReader will be substituted by another solution in future anyway
13 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Read/t/cdt_tag.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More;
 7 | use Treex::Block::Read::CdtTag;
 8 | use File::Basename;
 9 | my $my_dir = dirname($0);
10 | 
11 | my $reader = Treex::Block::Read::CdtTag->new(
12 |     from => join ',', map {"$my_dir/$_"} qw(cdt-test-0005-da.tag cdt-test-0005-es-lotte.tag cdt-test-0005-it-lisa.tag),
13 | );
14 | 
15 | my @documents;
16 | my $new_document;
17 | while ($new_document = $reader->next_document) {
18 |     push @documents, $new_document;
19 | }
20 | 
21 | 
22 | is(scalar(@documents), 3, q(All test tag files loaded));
23 | 
24 | done_testing();
25 | 
26 | 
27 | END {
28 | # delete temporary files
29 | }
30 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Read/t/gzip.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More;
 7 | use Treex::Block::Read::Text;
 8 | use PerlIO::via::gzip;
 9 | 
10 | my $value = int rand 100;    #get some random value
11 | my $filename = 'text' . (int rand 100) . '.gz';
12 | open my $f, '>:via(gzip)', $filename or die($!);                                #open it
13 | print $f $value;                                                               #print there the value
14 | close $f;
15 | my $reader = Treex::Block::Read::Text->new( language => 'en', from => $filename );
16 | 
17 | my $doc = $reader->next_document();
18 | is($doc->get_zone('en')->text, $value, q(Doc reader succesfully read generated value));
19 | done_testing();
20 | END {
21 |     unlink $filename;
22 | }
23 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Read/t/pcedt.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More tests => 1;
 7 | 
 8 | use Treex::Core;
 9 | 
10 | my $test_file  = "/net/work/people/toman/pcedt_data/pdtpml/00/wsj_0010_en.t.gz";
11 | my $schema_dir = "/net/os/h/zabokrtsky/svn_checkouts/pcedt_release/schemata";
12 | 
13 | # Stable test should not produce errors/warnings on STDERR
14 | #my $scenario = Treex::Core::Scenario->new(
15 | #    { from_string => "Read::PCEDT from=$test_file schema_dir=$schema_dir Write::Treex path=./" }
16 | #);
17 | TODO: {
18 |     local $TODO = q(PDEDT reader not ready yet);
19 |     my $result;
20 |     #my $result = eval {$scenario->run};
21 |     ok( $result, 'bunch of PCEDT files can be opened' );
22 | }
23 | 
24 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Read/t/word_alignment_xml.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More;
 7 | use Treex::Block::Read::WordAlignmentXML;
 8 | use File::Basename;
 9 | my $my_dir = dirname($0);
10 | 
11 | my $reader = Treex::Block::Read::WordAlignmentXML->new(
12 |     from => "$my_dir/word_alignment_xml_sample.wa",
13 | );
14 | 
15 | my $document = $reader->next_document;
16 | my @en_nodes = map {$_->get_zone('en')->get_atree->get_descendants} $document->get_bundles;
17 | 
18 | is(scalar(@en_nodes), 41, q(Correct number of English tokens read from the wa-file.));
19 | 
20 | done_testing();
21 | 
22 | END {
23 | # delete temporary files
24 | }
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Segment/NaiveSuggestBreaks.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Segment::NaiveSuggestBreaks;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::Segment::SuggestSegmentBreaks';
 5 | 
 6 | 
 7 | sub _find_breaks {
 8 |     my ($self, $scores) = @_;
 9 | 
10 |     my @break_idx_list = ();
11 |     
12 |     for (my $i = 1; $i < scalar @$scores; $i++) {
13 |         if ($i % $self->max_size == 0) {
14 |             push @break_idx_list, $i;
15 |         }
16 |     }
17 | 
18 |     return @break_idx_list;
19 | }
20 | 
21 | sub name {
22 |     return '';
23 | }
24 | 
25 | 1;
26 | 
27 | # TODO POD
28 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/SemevalABSA/AnnotateWithRules.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::SemevalABSA::AnnotateWithRules;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_anode {
 8 |     my ( $self, $anode ) = @_;
 9 |     if ($anode->wild->{absa_rules}) {
10 |         $anode->set_form($anode->form . "#RULES#" . $anode->wild->{absa_rules});
11 |         $anode->set_lemma($anode->lemma . "#RULES#" . $anode->wild->{absa_rules});
12 |     }
13 | }
14 | 
15 | 1;
16 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/SemevalABSA/MoveABSAFromWild.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::SemevalABSA::MoveABSAFromWild;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_anode {
 8 |     my ( $self, $anode ) = @_;
 9 |     if ($anode->wild->{absa_is_aspect}) {
10 |         my $polarity = $anode->wild->{absa_polarity};
11 |         $polarity =~ s/positive/+/;
12 |         $polarity =~ s/negative/-/;
13 |         $polarity =~ s/neutral/0/;
14 |         $anode->set_form($anode->form . "#ASP#$polarity");
15 |         $anode->set_lemma($anode->lemma . "#ASP#$polarity");
16 | 
17 |         delete $anode->wild->{absa_polarity};
18 |         delete $anode->wild->{absa_is_aspect};
19 |     }
20 | }
21 | 
22 | 1;
23 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/SemevalABSA/MoveABSAToWildCandidates.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::SemevalABSA::MoveABSAToWildCandidates;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_atree {
 8 |     my ( $self, $atree ) = @_;
 9 |     my @nodes = $atree->get_descendants;
10 |     my $isaspect = 0;
11 |     my $polarity = "";
12 |     for my $node (@nodes) {
13 |         if ($node->{form} =~ m/^_ASPECT_START_(.*)_$/) {
14 |             $polarity = $1;
15 |             $isaspect = 1;
16 |             $node->remove;
17 |         } elsif ($node->{form} =~ m/^_ASPECT_END_$/) {
18 |             $isaspect = 0;
19 |             $node->remove;
20 |         } else {
21 |             if ($isaspect) {
22 |                 $node->wild->{absa_rules} = "bsln^$polarity";
23 |             }
24 |         }
25 |     }
26 | 
27 |     return 1;
28 | }
29 | 
30 | 1;
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/AddInterleavedFormemeNodes.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ufal/treex/06c3353d9d7c61ff86c572cd926b055733a73442/lib/Treex/Block/T2A/AddInterleavedFormemeNodes.pm


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/CS/CheckCommas.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::CS::CheckCommas;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | 
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_tnode {
 8 |     my ($self, $tnode) = @_;
 9 | 
10 |     if ($tnode->wild->{check_comma_after}) {
11 |         my $anode = $tnode->get_lex_anode or return;
12 |         my $next_anode = $anode->get_next_node;
13 |         return if !$next_anode || $next_anode->lemma eq ",";
14 | 
15 |         my $comma = $anode->create_child({
16 |             'form'          => ',',
17 |             'lemma'         => ',',
18 |             'afun'          => 'AuxX',
19 |             'morphcat/pos'  => 'Z',
20 |             'clause_number' => 0,
21 |         });
22 |         $comma->shift_after_node($anode);
23 |     }
24 | }
25 | 
26 | 1;
27 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/EN/AddInfinitiveParticles.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::EN::AddInfinitiveParticles;
 2 | 
 3 | use utf8;
 4 | use Moose;
 5 | use Treex::Core::Common;
 6 | 
 7 | extends 'Treex::Block::T2A::AddInfinitiveParticles';
 8 | 
 9 | override 'works_as_conj' => sub {
10 |     my ($self, $particle) = @_;
11 |     return not $particle eq 'to';
12 | }; 
13 | 
14 | 1;
15 | 
16 | __END__
17 | 
18 | =encoding utf-8
19 | 
20 | =head1 NAME 
21 | 
22 | Treex::Block::T2A::EN::AddInfinitiveParticles
23 | 
24 | =head1 DESCRIPTION
25 | 
26 | The particle 'to' is added to English infinitives. Other prepositions
27 | in constructions such as "It's time for him to go home." are added
28 | as well.
29 | 
30 | =head1 AUTHORS 
31 | 
32 | Ondřej Dušek <odusek@ufal.mff.cuni.cz>
33 | 
34 | =head1 COPYRIGHT AND LICENSE
35 | 
36 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague
37 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
38 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/EN/AddPrepos.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::EN::AddPrepos;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::T2A::AddPrepos';
 5 | 
 6 | override 'get_prep_forms' => sub {
 7 |     my ( $self, $formeme ) = @_;
 8 |     return undef if ( !$formeme );
 9 |     my ($prep_forms) = ( $formeme =~ /(?:n|adj):(.+)\+/ );
10 |     return $prep_forms if ($prep_forms);
11 |     ($prep_forms) = ( $formeme =~ /v:(.+)\+ger/ );
12 |     return $prep_forms;
13 | };
14 | 
15 | 1;
16 | 
17 | __END__
18 | 
19 | =encoding utf-8
20 | 
21 | =head1 NAME 
22 | 
23 | Treex::Block::T2A::EN::AddPrepos
24 | 
25 | =head1 DESCRIPTION
26 | 
27 | Adding prepositional a-nodes according to prepositions contained in t-nodes' formemes.
28 | 
29 | English-specific: adding prepositions to gerunds. 
30 | 
31 | =head1 AUTHORS
32 | 
33 | Ondřej Dušek <odusek@ufal.mff.cuni.cz>
34 | 
35 | =head1 COPYRIGHT AND LICENSE
36 | 
37 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague
38 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/EN/CapitalizeSentStart.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::EN::CapitalizeSentStart;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Block::T2A::CapitalizeSentStart';
 6 | 
 7 | has '+opening_punct' => ( isa => 'Str', is => 'ro', default => '({[‘“«‹|*"\'' );
 8 | 
 9 | 1;
10 | 
11 | __END__
12 | 
13 | =encoding utf-8
14 | 
15 | =head1 NAME 
16 | 
17 | Treex::Block::T2A::EN::CapitalizeSentStart
18 | 
19 | =head1 DESCRIPTION
20 | 
21 | Capitalize the first letter of the first (non-punctuation)
22 | token in the sentence, and do the same for direct speech sections.
23 | 
24 | This contains just English-specific settings for L<Treex::Block::T2A::CapitalizeSentStart>. 
25 | 
26 | =head1 AUTHORS 
27 | 
28 | Ondřej Dušek <odusek@ufal.mff.cuni.cz>
29 | 
30 | =head1 COPYRIGHT AND LICENSE
31 | 
32 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague
33 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
34 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/EN/FixFlectErrors.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::EN::FixFlectErrors;
 2 | 
 3 | use utf8;
 4 | use Moose;
 5 | use Treex::Core::Common;
 6 | 
 7 | extends 'Treex::Core::Block';
 8 | 
 9 | sub process_anode {
10 |     my ( $self, $a_node ) = @_;
11 |     my $form         = $a_node->form         // '';
12 |     my $morphcat_pos = $a_node->morphcat_pos // '';
13 | 
14 |     if ( $form eq 'badder' ) {
15 |         $a_node->set_form('worse');
16 |     }
17 |     elsif ( $form eq 'halfs' ) {
18 |         $a_node->set_form('half');
19 |     }
20 |     elsif ( $form =~ /^[.,]/ and $form ne ( $a_node->lemma // '' ) ) {
21 |         $a_node->set_form( $a_node->lemma );
22 |     }
23 | 
24 |     return;
25 | }
26 | 
27 | 1;
28 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/ES/AddAuxVerbTense.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ufal/treex/06c3353d9d7c61ff86c572cd926b055733a73442/lib/Treex/Block/T2A/ES/AddAuxVerbTense.pm


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/ES/AddPrepos.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::ES::AddPrepos;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::T2A::AddPrepos';
 5 | 
 6 | # In Spanish, it seems adverbs may have prepositions as well (e.g. "por allí").
 7 | has '+formeme_prep_regexp' => ( default => '^(?:n|adj|adv):(.+)[+]' );
 8 | 
 9 | 1;
10 | 
11 | __END__
12 | 
13 | =encoding utf-8
14 | 
15 | =head1 NAME 
16 | 
17 | Treex::Block::T2A::ES::AddPrepos
18 | 
19 | =head1 DESCRIPTION
20 | 
21 | Adding prepositional a-nodes according to prepositions contained in t-nodes' formemes.
22 | In Spanish, it seems adverbs may have prepositions as well (e.g. "por allí").
23 | 
24 | =head1 AUTHORS
25 | 
26 | Martin Popel <popel@ufal.mff.cuni.cz>
27 | 
28 | =head1 COPYRIGHT AND LICENSE
29 | 
30 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/ES/AddReflexive.pm:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ufal/treex/06c3353d9d7c61ff86c572cd926b055733a73442/lib/Treex/Block/T2A/ES/AddReflexive.pm


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/EU/AddSentFinalPunct.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::EU::AddSentFinalPunct;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::T2A::AddSentFinalPunct';
 5 | 
 6 | override '_ends_with_clause_in_quotes' => sub {
 7 |     my ( $self, $last_token ) = @_;
 8 |     my ( $open_punct, $close_punct ) = ( $self->open_punct, $self->close_punct );
 9 |     
10 |     return 0;
11 | };
12 | 
13 | 1;
14 | 
15 | __END__
16 | 
17 | =encoding utf-8
18 | 
19 | =head1 NAME 
20 | 
21 | Treex::Block::T2A::EU::AddSentFinalPunct
22 | 
23 | =head1 DESCRIPTION
24 | 
25 | Override '_ends_with_clause_in_quotes'
26 | 
27 | =head1 AUTHORS 
28 | 
29 | Gorka Labaka <gorka.labaka@ehu.eus>
30 | 
31 | =head1 COPYRIGHT AND LICENSE
32 | 
33 | Copyright © 2008-2014 by Institute of Formal and Applied Linguistics, Charles University in Prague
34 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/EU/GenerateGazeteerItems.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::EU::GenerateGazeteerItems;
 2 | use Moose;
 3 | extends 'Treex::Core::Block';
 4 | 
 5 | sub process_tnode {
 6 |     my ($self, $tnode) = @_;
 7 |     my $anode = $tnode->get_lex_anode();
 8 | 
 9 |     return if (! $anode);
10 | 
11 |     if (($tnode->t_lemma_origin || "") eq "lookup-TrGazeteerItems") {
12 | 	$anode->set_form($anode->lemma);
13 |     }
14 | }
15 | 
16 | 1;
17 | 
18 | __END__
19 | 
20 | =encoding utf-8
21 | 
22 | =head1 NAME 
23 | 
24 | Treex::Block::T2A::EU::GenerateGazeteerItems
25 | 
26 | =head1 DESCRIPTION
27 | 
28 | Gazeteer items should be treat as Proper names, which are not (usually) flexioned
29 | 
30 | =head1 AUTHORS 
31 | 
32 | Gorka Labaka <gorka.labaka@ehu.eus>
33 | 
34 | =head1 COPYRIGHT AND LICENSE
35 | 
36 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague
37 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
38 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/MarkSubject.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::MarkSubject;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_tnode {
 7 |     my ( $self, $t_node ) = @_;
 8 |     if ($t_node->formeme eq 'n:subj'){
 9 |         my $a_node = $t_node->get_lex_anode() or return;
10 |         $a_node->set_afun('Sb');
11 |     }
12 |     return;
13 | }
14 | 
15 | 
16 | 1;
17 | 
18 | __END__
19 | 
20 | =encoding utf-8
21 | 
22 | =head1 NAME 
23 | 
24 | Treex::Block::T2A::MarkSubject
25 | 
26 | =head1 DESCRIPTION
27 | 
28 | Fill afun=Sb for anodes which correspond to t-nodes with formeme "n:subj".
29 | 
30 | =head1 AUTHORS
31 | 
32 | Martin Popel <popel@ufal.mff.cuni.cz>
33 | 
34 | =head1 COPYRIGHT AND LICENSE
35 | 
36 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague
37 | 
38 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
39 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/NL/RestoreVerbPrefixes.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::NL::RestoreVerbPrefixes;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_anode {
 8 | 
 9 |     my ( $self, $anode ) = @_;
10 | 
11 |     return if ( !$anode->wild->{verbal_prefix} );    
12 |     $anode->set_form($anode->wild->{verbal_prefix} . $anode->form);
13 | 
14 |     return;
15 | }
16 | 
17 | 1;
18 | 
19 | __END__
20 | 
21 | =encoding utf-8
22 | 
23 | =head1 NAME 
24 | 
25 | Treex::Block::T2A::EN::RestoreVerbPrefixes
26 | 
27 | =head1 DESCRIPTION
28 | 
29 | Verbal separable prefixes are restored after morphology generation.
30 | 
31 | =head1 AUTHORS 
32 | 
33 | Ondřej Dušek <odusek@ufal.mff.cuni.cz>
34 | 
35 | =head1 COPYRIGHT AND LICENSE
36 | 
37 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague
38 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
39 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/PT/AddPrepos.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::PT::AddPrepos;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::T2A::AddPrepos';
 5 | 
 6 | # In Portuguese, it seems adverbs may have prepositions as well (e.g. "por ali").
 7 | has '+formeme_prep_regexp' => ( default => '^(?:n|adj|adv):(.+)[+]' );
 8 | 
 9 | 1;
10 | 
11 | __END__
12 | 
13 | =encoding utf-8
14 | 
15 | =head1 NAME 
16 | 
17 | Treex::Block::T2A::PT::AddPrepos
18 | 
19 | =head1 DESCRIPTION
20 | 
21 | Adding prepositional a-nodes according to prepositions contained in t-nodes' formemes.
22 | In Portuguese, it seems adverbs may have prepositions as well (e.g. "por ali").
23 | 
24 | =head1 AUTHORS
25 | 
26 | Martin Popel <popel@ufal.mff.cuni.cz>
27 | 
28 | =head1 COPYRIGHT AND LICENSE
29 | 
30 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/RU/AddNegation.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::RU::AddNegation;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_tnode {
 8 |     my ( $self, $tnode ) = @_;
 9 |     if ( ( $tnode->gram_negation || '' ) eq 'neg1' && $tnode->gram_sempos eq 'v' ) {
10 |         my $anode    = $tnode->get_lex_anode();
11 |         my $new_node = $anode->create_child();
12 |         $new_node->shift_before_node($anode);
13 | 
14 |         $new_node->reset_morphcat();
15 |         $new_node->set_lemma('не');
16 |         $new_node->set_form('не');
17 |         $tnode->add_aux_anodes($new_node);
18 |     }
19 | 
20 |     return;
21 | }
22 | 
23 | 1;
24 | 
25 | =over
26 | 
27 | =item Treex::Block::T2A::RU::AddNegation
28 | 
29 | Add a new a-node which represents a verbal negation particle ("не").
30 | 
31 | =back
32 | 
33 | =cut
34 | 
35 | # Copyright 2012 Martin Popel
36 | 
37 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
38 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2A/RU/DropCopula.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2A::RU::DropCopula;
 2 | use utf8;
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_tnode {
 8 |     my ( $self, $t_node ) = @_;
 9 |     
10 |     if ($t_node->t_lemma eq 'быть' && ($t_node->gram_tense||'') eq 'sim'){
11 |         my $a_node = $t_node->get_lex_anode() or return;
12 |         $a_node->set_lemma('');
13 |     }
14 | 
15 |     return;
16 | }
17 | 
18 | 
19 | 1;
20 | 
21 | __END__
22 | 
23 | =encoding utf8
24 | 
25 | =head1 NAME
26 | 
27 | Treex::Block::T2A::RU::DropCopula - delete verb "to be"
28 | 
29 | =head1 DESCRIPTION
30 | 
31 | Russian copula verb (быть = to be) in present tense is dropped.
32 | E.g. "He is an idiot" -> "он дурак".
33 | 
34 | The current implementation just sets the m/lemma to an empty string.
35 | 
36 | # Copyright 2012 Martin Popel
37 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
38 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/CS2EN/FixDoubleNegative.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::CS2EN::FixDoubleNegative;
 2 | 
 3 | use utf8;
 4 | use Moose;
 5 | use Treex::Core::Common;
 6 | 
 7 | extends 'Treex::Core::Block';
 8 | 
 9 | sub process_tnode {
10 |     my ( $self, $t_node ) = @_;
11 | 
12 |     # only solve verbs
13 |     return if ( $t_node->formeme !~ /^v/ );
14 | 
15 |     my (@negs) = grep { $_->t_lemma =~ /^(no(_one|body|where|thing|ne|)?|never|not)$/ } $t_node->get_clause_edescendants();
16 | 
17 |     
18 |     if ( @negs == 1 and $negs[0]->t_lemma eq 'no' and $negs[0]->get_parent->formeme ne 'n:subj' ) {
19 |         my $neg = shift @negs;
20 |         
21 |         if ( not $neg->src_tnode or $neg->src_tnode->t_lemma ne 'ne' ){
22 |             $neg->set_t_lemma('any');
23 |             $neg->set_t_lemma_origin('rule-FixDoubleNegative');
24 |         }        
25 |     }
26 |     
27 |     if (@negs) {
28 |         $t_node->set_gram_negation('neg0');
29 |     }
30 | }
31 | 
32 | 1;
33 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/CS2EN/RemoveInfinitiveSubjects.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::CS2EN::RemoveInfinitiveSubjects;
 2 | 
 3 | use utf8;
 4 | use Moose;
 5 | use Treex::Core::Common;
 6 | 
 7 | extends 'Treex::Core::Block';
 8 | 
 9 | sub process_tnode {
10 |     my ( $self, $t_node ) = @_;
11 | 
12 |     # only solve infinitive verbs
13 |     return if ( $t_node->formeme !~ /^v.*inf$/ );
14 |     
15 |     # TODO: Sometimes (raising/control) the subject should not be deleted:
16 |     # Očekáváme, že přinese změnu -> We expect HIM to bring about a change.
17 |     
18 |     foreach my $subj (grep { $_->t_lemma eq '#PersPron' and $_->formeme eq 'n:subj' } $t_node->get_children()){
19 |         $subj->remove();
20 |     }
21 | }
22 | 
23 | 1;
24 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/EN2CS/CutVariants.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::EN2CS::CutVariants;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::T2T::CutVariants';
 5 | 
 6 | sub BUILD {
 7 |     log_warn 'This block is deprecated, use T2T::CutVariants instead';
 8 |     return;
 9 | }
10 | 
11 | 1;
12 | 
13 | =pod
14 | 
15 | This block is deprecated, use T2T::CutVariants instead
16 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/EN2CS/MoveEnoughBeforeAdj.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::EN2CS::MoveEnoughBeforeAdj;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_tnode {
 7 |     my ( $self, $tnode ) = @_;
 8 |     if (( $tnode->t_lemma || '' ) eq 'dost'
 9 |         and ( $tnode->get_parent->get_attr('mlayer_pos') || '' ) eq 'A'
10 |         and $tnode->get_parent->precedes($tnode)
11 |         )
12 |     {
13 |         $tnode->shift_before_node( $tnode->get_parent );
14 |     }
15 | }
16 | 
17 | 1;
18 | 
19 | =over
20 | 
21 | =item Treex::Block::T2T::EN2CS::MoveEnoughBeforeAdj
22 | 
23 | 'Enough' t-node adjectives should be moved
24 | in front of them. 'He is big enough' -> 'Je dost velky'.
25 | 
26 | =back
27 | 
28 | =cut
29 | 
30 | # Copyright 2010 Zdenek Zabokrtsky
31 | 
32 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
33 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/EN2CS/MoveJesteBeforeVerb.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::EN2CS::MoveJesteBeforeVerb;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_tnode {
 7 |     my ( $self, $tnode ) = @_;
 8 |     my $parent = $tnode->get_parent;
 9 | 
10 |     if ($tnode->t_lemma eq 'ještě'
11 |         && !$tnode->children
12 |         && ( $parent->gram_negation || '' ) eq 'neg1'
13 |         && $parent->precedes($tnode)
14 |         )
15 |     {
16 |         $tnode->shift_before_node($parent);
17 |     }
18 |     return;
19 | }
20 | 
21 | 1;
22 | 
23 | =over
24 | 
25 | =item Treex::Block::T2T::EN2CS::MoveJesteBeforeVerb
26 | 
27 | 'jeste' resulting from 'not yet' is moved in front of the negated verb.
28 | 
29 | =back
30 | 
31 | =cut
32 | 
33 | # Copyright 2011 Zdenek Zabokrtsky
34 | 
35 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
36 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/EN2CS/MovePersPronNextToVerb.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::EN2CS::MovePersPronNextToVerb;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_tnode {
 7 |     my ( $self, $tnode ) = @_;
 8 |     my $parent = $tnode->get_parent;
 9 |     if ($tnode->t_lemma eq '#PersPron'
10 |         && !$parent->is_root
11 |         && $parent->formeme =~ /^v:/
12 |         && $tnode->formeme !~ /^n:1/
13 |         && $tnode->ord > $parent->ord
14 |         )
15 |     {
16 |         $tnode->shift_after_node($parent);
17 |     }
18 |     return;
19 | }
20 | 
21 | 1;
22 | 
23 | =over
24 | 
25 | =item Treex::Block::T2T::EN2CS::MovePersPronNextToVerb
26 | 
27 | No-subject #PersProns which are governed by a verb are shifted nex to the verb.
28 | 
29 | =back
30 | 
31 | =cut
32 | 
33 | # Copyright 2010 David Marecek
34 | 
35 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
36 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/EN2CS/MoveRelClauseRight.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::EN2CS::MoveRelClauseRight;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_tnode {
 7 |     my ( $self, $tnode ) = @_;
 8 | 
 9 |     if ( $tnode->formeme =~ /rc/ ) {
10 |         my $parent = $tnode->get_parent;
11 |         if ( $tnode->precedes($parent) and $parent->formeme =~ /^n/ ) {
12 |             $tnode->shift_after_subtree($parent);
13 |         }
14 |     }
15 |     return;
16 | }
17 | 
18 | 1;
19 | 
20 | =over
21 | 
22 | =item Treex::Block::T2T::EN2CS::MoveRelClauseRight
23 | 
24 | Relative clauses placed before their governing nouns (created e.g.
25 | from ing-forms) are moved behing the nouns.
26 | 
27 | =back
28 | 
29 | =cut
30 | 
31 | # Copyright 2008-2011 Zdenek Zabokrtsky, David Marecek
32 | 
33 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
34 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/EN2EU/FixYouPl.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::EN2EU::FixYouPl;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | 
 7 | sub process_tnode {
 8 |     my ( $self, $tnode ) = @_;
 9 | 
10 |     if (($tnode->gram_sempos || "") =~ 'n.pron' &&
11 | 	($tnode->gram_person || "") eq '2') {
12 | 	$tnode->set_attr("gram/number", 'nr');
13 |     }
14 |     
15 |     return;
16 | };
17 | 
18 | 1;
19 | 
20 | __END__
21 | 
22 | =encoding utf-8
23 | 
24 | =head1 NAME 
25 | 
26 | Treex::Block::T2T::EN2EU::FixYouPl
27 | 
28 | =head1 DESCRIPTION
29 | 
30 | Some 'you' pronouns has plural analysis. It should be 'nr'
31 | 
32 | =head1 AUTHORS 
33 | 
34 | Gorka Labaka <gorka.labaka@ehu.eus>
35 | 
36 | =head1 COPYRIGHT AND LICENSE
37 | 
38 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague
39 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
40 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/EN2EU/RemoveRelPron.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::EN2EU::RemoveRelPron;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use utf8;
 5 | 
 6 | extends 'Treex::Core::Block';
 7 | 
 8 | sub process_tnode {
 9 |     my ( $self, $t_node ) = @_;
10 |     my $parent = $t_node->get_parent();
11 | 
12 | 
13 |     if ($t_node->t_lemma eq "that" and $parent->formeme =~ /^v:rc/) {
14 | 	$t_node->set_t_lemma("#PersPron");
15 | 	$t_node->set_t_lemma_origin('RemoveRelPron');
16 |     }
17 | 
18 | }
19 | 1;
20 | 
21 | __END__
22 | 
23 | =encoding utf-8
24 | 
25 | =head1 NAME
26 | 
27 | Treex::Block::T2T::EN2EU::RemoveRelPron;
28 | 
29 | =head1 DESCRIPTION
30 | 
31 | 
32 | =head1 AUTHORS
33 | 
34 | Gorka Labaka <gorka.labaka@ehu.eus>
35 | 
36 | =head1 COPYRIGHT AND LICENSE
37 | 
38 | Copyright © 2008 by Institute of Formal and Applied Linguistics, Charles University in Prague
39 | 
40 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
41 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/FixPunctFormemes.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::FixPunctFormemes;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_tnode {
 7 |     my ( $self, $tnode ) = @_;
 8 |     if (( $tnode->t_lemma || "" ) =~ /^(?:\p{P}+|-LRB-|-RRB-)$/ ) {
 9 |         $tnode->set_formeme('x');
10 |     }
11 | }
12 | 
13 | 1;
14 | 
15 | __END__
16 | 
17 | =encoding utf-8
18 | 
19 | =head1 NAME
20 | 
21 | Treex::Block::T2T::FixPunctFormemes
22 | 
23 | =head1 DESCRIPTION
24 | 
25 |     Force formeme x for all punctuation tokens.
26 | 
27 | 
28 | =head1 AUTHORS
29 | 
30 | Luís Gomes <luis.gomes@di.fc.ul.pt>, <luismsgomes@gmail.com>
31 | 
32 | =head1 COPYRIGHT AND LICENSE
33 | 
34 | Copyright © 2014 by NLX Group, Universidade de Lisboa
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/ProjectSelectedWild.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::ProjectSelectedWild;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | 
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_tnode {
 8 |     my ($self, $tst_tnode) = @_;
 9 |     my $src_tnode = $tst_tnode->src_tnode;
10 | 
11 |     if (defined $src_tnode->wild->{check_comma_after}) {
12 |         $tst_tnode->wild->{check_comma_after} = $src_tnode->wild->{check_comma_after};
13 |     }
14 | }
15 | 
16 | 1;
17 | 
18 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/RecoverUnknownLemmas.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::RecoverUnknownLemmas;
 2 | use Moose;
 3 | extends 'Treex::Core::Block';
 4 | 
 5 | sub process_tnode {
 6 |     my ( $self, $tnode ) = @_;
 7 |     my $src_tnode = $tnode->src_tnode() or return 1;
 8 |     return 1 if ($src_tnode->t_lemma ne $tnode->t_lemma);
 9 |     my $src_anode = $src_tnode->get_lex_anode() or return 1;
10 |     my $original_lemma = $src_anode->wild->{original_lemma} or return 1;
11 |     $tnode->set_t_lemma($original_lemma);
12 |     return 1;
13 | }
14 | 
15 | 1;
16 | 
17 | __END__
18 | 
19 | =encoding utf-8
20 | 
21 | =head1 NAME 
22 | 
23 | Treex::Block::T2T::RecoverUnknownLemmas
24 | 
25 | =head1 DESCRIPTION
26 | 
27 | Recovers lemmas that have been replaced with synset ids and were not "transferred".
28 | 
29 | =head1 AUTHORS
30 | 
31 | Luís Gomes <luis.gomes@di.fc.ul.pt>, <luismsgomes@gmail.com>
32 | 
33 | =head1 COPYRIGHT AND LICENSE
34 | 
35 | Copyright © 2014 by NLX Group, Universidade de Lisboa
36 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2T/RehangToOrigParents.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2T::RehangToOrigParents;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_tnode {
 7 |     my ( $self, $node ) = @_;
 8 |     my $orig_parent = $node->get_deref_attr('original_parent.rf') or return;
 9 |     $node->set_parent($orig_parent);
10 | }
11 | 
12 | 1;
13 | __END__
14 | 
15 | =over
16 | 
17 | =item Treex::Block::T2T::RehangToOrigParents
18 | 
19 | Rehangs nodes to its original parents as it was before applying
20 | the L<Treex::Block::T2T::RehangToEffParents> block.
21 | Original parents are taken from the C<original_parent.rf> attribute.
22 | 
23 | =back
24 | 
25 | =cut
26 | 
27 | # Copyright 2008 Martin Popel
28 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.
29 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2U/CS/AdjustStructure.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2U::CS::AdjustStructure;
 2 | use utf8;
 3 | use Moose;
 4 | extends 'Treex::Block::T2U::AdjustStructure';
 5 | 
 6 | use experimental 'signatures';
 7 | 
 8 | =head1 NAME
 9 | 
10 | Treex::Block::T2U::CS::AdjustStructure - Czech specifics for converting t-layer to u-layer.
11 | 
12 | =cut
13 | 
14 | override is_exclusive => sub($self, $tlemma) {
15 |     $tlemma =~ /^(?:jen(?:om)?|pouze|výhradně)$/
16 | };
17 | 
18 | override negation => sub { 'n(?:e|ikoliv?)' };
19 | 
20 | __PACKAGE__->meta->make_immutable
21 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2U/CS/ConvertCoreference.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2U::CS::ConvertCoreference;
 2 | use utf8;
 3 | use Moose;
 4 | extends 'Treex::Block::T2U::ConvertCoreference';
 5 | with 'Treex::Tool::UMR::CS::GrammatemeSetter';
 6 | 
 7 | =head1 NAME
 8 | 
 9 | Treex::Block::T2U::CS::ConvertCoreference - Czech specifics for converting coreference form the t-layer to u-layer.
10 | 
11 | =cut
12 | 
13 | {   my $RELATIVE = '(?:který|jenž|jaký|co|kd[ye]|odkud|kudy|kam)';
14 |     sub relative { $RELATIVE }
15 | }
16 | 
17 | __PACKAGE__->meta->make_immutable
18 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2U/LA/AdjustStructure.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2U::LA::AdjustStructure;
 2 | use Moose;
 3 | extends 'Treex::Block::T2U::AdjustStructure';
 4 | 
 5 | use experimental 'signatures';
 6 | 
 7 | =head1 NAME
 8 | 
 9 | Treex::Block::T2U::LA::AdjustStructure - Latin specifics for converting t-layer to u-layer.
10 | 
11 | =cut
12 | 
13 | override is_exclusive => sub($self, $tlemma) {
14 |     $tlemma =~ /^(?:solum|tantum)$/
15 | };
16 | 
17 | override negation => sub { '(?:n(?:on|e)|haud)' };
18 | 
19 | __PACKAGE__->meta->make_immutable
20 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/T2U/LA/ConvertCoreference.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::T2U::LA::ConvertCoreference;
 2 | use Moose;
 3 | extends 'Treex::Block::T2U::ConvertCoreference';
 4 | with 'Treex::Tool::UMR::LA::GrammatemeSetter';
 5 | 
 6 | =head1 NAME
 7 | 
 8 | Treex::Block::T2U::LA::ConvertCoreference - Latin specifics for converting coreference form the t-layer to u-layer.
 9 | 
10 | =cut
11 | 
12 | {   my $RELATIVE = '(?:qu[aio]|u(?:bi|nde))(?:cumque)?'
13 |                  . '|qu(?:omodo|isquis|alis|antus)';
14 |     sub relative { $RELATIVE }
15 | }
16 | 
17 | __PACKAGE__->meta->make_immutable
18 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Test/FieldCanHaveWideChars.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Test::FieldCanHaveWideChars;
 2 | 
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | 
 6 | extends 'Treex::Core::Block';
 7 | 
 8 | has 'problematic_attribute' => ( is => 'rw', isa => 'Str', default => "žluťoučký" );
 9 | 
10 | sub BUILD {
11 |     my $self = shift;
12 |     log_info("FieldCanHaveWideChars loaded, problematic_attribute=" . $self->problematic_attribute);
13 | }
14 | 
15 | sub process_document {
16 |     my $self = shift;
17 |     log_info("FieldCanHaveWideChars executed, problematic_attribute=" . $self->problematic_attribute);
18 | }
19 | 
20 | 1;
21 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Test/ParameterCanHaveWideChars.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Test::ParameterCanHaveWideChars;
 2 | 
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | 
 6 | extends 'Treex::Core::Block';
 7 | 
 8 | has 'problematic_attribute' => ( is => 'rw', isa => 'Str', default => "no wide chars as default but they can be set in scenario" );
 9 | 
10 | sub BUILD {
11 |     my $self = shift;
12 |     log_info("ParameterCanHaveWideChars loaded, problematic_attribute=" . $self->problematic_attribute);
13 | }
14 | 
15 | sub process_document {
16 |     my $self = shift;
17 |     log_info("ParameterCanHaveWideChars executed, problematic_attribute=" . $self->problematic_attribute);
18 | }
19 | 
20 | 1;
21 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/AppendSynsetIdToLemmas.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::AppendSynsetIdToLemmas;
 2 | use Moose;
 3 | extends 'Treex::Core::Block';
 4 | 
 5 | sub process_anode {
 6 |     my ( $self, $anode ) = @_;
 7 |     my $synsetid = $anode->wild->{lx_wsd} // 'UNK';
 8 |     my $lemma = $anode->lemma;
 9 |     if ($synsetid ne 'UNK') {
10 |         #my $alpha_synsetid = "$synsetid";
11 |         #$alpha_synsetid =~ tr/0-9/a-j/;
12 |         #$anode->set_lemma($lemma."__".$alpha_synsetid);
13 |         $anode->set_lemma($lemma."__".$synsetid);
14 |     }  
15 |     return 1;
16 | }
17 | 
18 | 1;
19 | 
20 | __END__
21 | 
22 | =encoding utf-8
23 | 
24 | =head1 NAME 
25 | 
26 | Treex::Block::W2A::AppendSynsetIdToLemmas
27 | 
28 | =head1 DESCRIPTION
29 | 
30 | Appends synset ids to lemmas (where applicable).
31 | 
32 | =head1 AUTHORS
33 | 
34 | Luís Gomes <luis.gomes@di.fc.ul.pt>, <luismsgomes@gmail.com>
35 | 
36 | =head1 COPYRIGHT AND LICENSE
37 | 
38 | Copyright © 2014 by NLX Group, Universidade de Lisboa
39 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/CS/TagFeaturama.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::CS::TagFeaturama;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Treex::Tool::Tagger::Featurama::CS;
 5 | extends 'Treex::Block::W2A::Tag';
 6 | 
 7 | sub _build_tagger{
 8 |     return Treex::Tool::Tagger::Featurama::CS->new;
 9 | }
10 | 
11 | 1;
12 | 
13 | __END__
14 | 
15 | =pod
16 | 
17 | =encoding utf-8
18 | 
19 | =head1 NAME
20 | 
21 | Treex::Block::W2A::CS::TagFeaturama - Czech PoS+morpho tagger
22 | 
23 | =head1 DESCRIPTION
24 | 
25 | Each node in the analytical tree is tagged using the L<Treex::Tool::Tagger::Featurama|Featurama> tagger.
26 | 
27 | =head1 AUTHORS
28 | 
29 | Martin Popel <popel@ufal.mff.cuni.cz>
30 | 
31 | =head1 COPYRIGHT AND LICENSE
32 | 
33 | Copyright © 2012 by Institute of Formal and Applied Linguistics, Charles University in Prague
34 | 
35 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
36 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/CS/TagMorphoDiTa.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::CS::TagMorphoDiTa;
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | use Moose;
 6 | use Treex::Core::Common;
 7 | extends 'Treex::Block::W2A::TagMorphoDiTa';
 8 | 
 9 | has '+model' => ( default => 'data/models/morphodita/cs/czech-morfflex-pdt-131112.tagger-best_accuracy' );
10 | 
11 | 1;
12 | 
13 | __END__
14 | 
15 | =pod
16 | 
17 | =encoding utf-8
18 | 
19 | =head1 NAME
20 | 
21 | Treex::Block::W2A::CS::TagMorphoDiTa
22 | 
23 | =head1 DESCRIPTION
24 | 
25 | This is just a small modification of L<Treex::Block::W2A::TagMorphoDiTa> which adds the path to the
26 | default model for Czech.
27 | 
28 | =head1 AUTHORS
29 | 
30 | Martin Popel <popel@ufal.mff.cuni.cz>
31 | 
32 | =head1 COPYRIGHT AND LICENSE
33 | 
34 | Copyright © 2014 by Institute of Formal and Applied Linguistics, Charles University in Prague
35 | 
36 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
37 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/ConvertTags.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::ConvertTags;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use utf8;
 5 | extends 'Treex::Block::HamleDT::Harmonize';
 6 | 
 7 | has iset_driver =>
 8 | (
 9 |     is            => 'ro',
10 |     isa           => 'Str',
11 |     required      => 1,
12 |     default       => 'ta::tamiltb',
13 |     documentation => 'Which interset driver should be used to decode tags in this treebank? '.
14 |                      'Lowercase, language code :: treebank code, e.g. "cs::pdt". '.
15 |                      'The driver must be available in "$TMT_ROOT/libs/other/tagset".'
16 | );
17 | 
18 | sub process_zone {
19 |     my $self   = shift;
20 |     my $zone   = shift;
21 | 	my $root = $zone->get_atree();
22 | 	$self->convert_tags($root); 
23 | }
24 | 
25 | 1;
26 | 
27 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/DE/TagStanford.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::DE::TagStanford;
 2 | 
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Block::W2A::TagStanford';
 6 | 
 7 | has '+model' => ( default => 'data/models/tagger/stanford/german-fast.tagger' );
 8 | 
 9 | 1;
10 | 
11 | __END__
12 | 
13 | =pod
14 | 
15 | =encoding utf-8
16 | 
17 | =head1 NAME
18 | 
19 | Treex::Block::W2A::DE::TagStanford
20 | 
21 | =head1 DESCRIPTION
22 | 
23 | This is just a small modification of L<Treex::Block::W2A::TagStanford> which adds the path to the
24 | default model for German.
25 | 
26 | =head1 AUTHORS
27 | 
28 | Ondřej Dušek <odusek@ufal.mff.cuni.cz>
29 | 
30 | =head1 COPYRIGHT AND LICENSE
31 | 
32 | Copyright © 2012 by Institute of Formal and Applied Linguistics, Charles University in Prague
33 | 
34 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/DefaultDepRel.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::DefaultDepRel;
 2 | use Treex::Core::Common;
 3 | use Moose;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | has 'def_rel' => (is => 'ro', isa => 'Str', default=> 'NR');
 7 | has 'deprel_attribute'  => ( is       => 'rw', isa => 'Str', default => 'afun');
 8 | 
 9 | sub process_atree {
10 |     my ( $self, $atree ) = @_;
11 |     my @anodes = $atree->get_descendants( { ordered => 1 } );
12 |     foreach my $an (@anodes) {
13 |     	$an->set_attr($self->deprel_attribute, $self->def_rel);
14 |     }	
15 | }
16 | 
17 | 1;
18 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/EN/HideIT.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::EN::HideIT;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Block::W2A::HideIT';
 5 | 
 6 | 1;
 7 | 
 8 | __END__
 9 | 
10 | =encoding utf-8
11 | 
12 | =head1 NAME
13 | 
14 | Treex::Block::W2A::EN::HideIT - hide IT-domain entites
15 | 
16 | =head1 DESCRIPTION
17 | 
18 | Currently, there is nothing English-specific,
19 | this block is just a nickname for L<Treex::Block::W2A::HideIT>
20 | (for legacy reasons).
21 | 
22 | =head1 AUTHOR
23 | 
24 | Martin Popel <popel@ufal.mff.cuni.cz>
25 | 
26 | =head1 COPYRIGHT AND LICENSE
27 | 
28 | Copyright © 2015 by Institute of Formal and Applied Linguistics, Charles University in Prague
29 | 
30 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
31 | 
32 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/EN/QtHackTags.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::EN::QtHackTags;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use utf8;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_anode {
 8 |     my ($self, $anode) = @_;
 9 | 
10 |     if ( ($anode->lemma // '') eq 'select'
11 |         && $anode->tag !~ /^VB/
12 |     ) {
13 |         $anode->set_tag('VB');
14 |     }
15 | 
16 |     return ;
17 | }
18 | 
19 | 
20 | 1;
21 | 
22 | =head1 NAME 
23 | 
24 | Treex::Block::W2A::EN::QtHackTags
25 | 
26 | =head1 DESCRIPTION
27 | 
28 | Some hacks useful for QTLeap; aka domain adaptation o:-)
29 | 
30 | "select" gets often tagged as adjectives, as in "select OK", so we set it to VB
31 | 
32 | =head1 AUTHOR
33 | 
34 | Rudolf Rosa <rosa@ufal.mff.cuni.cz>
35 | 
36 | =head1 COPYRIGHT AND LICENSE
37 | 
38 | Copyright © 2015 by Institute of Formal and Applied Linguistics,
39 | Charles University in Prague
40 | 
41 | This module is free software; you can redistribute it and/or modify it
42 | under the same terms as Perl itself.
43 | 
44 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/EscapeMoses.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::EscapeMoses;
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | use utf8;
 6 | use Moose;
 7 | use Treex::Core::Common;
 8 | extends 'Treex::Core::Block';
 9 | 
10 | use Treex::Tool::Moses;
11 | 
12 | sub process_atree {
13 |     my ($self, $aroot) = @_;
14 | 
15 |     Treex::Tool::Moses::escape_anodes($aroot);
16 | 
17 |     return;
18 | };
19 | 
20 | 1;
21 | 
22 | __END__
23 | 
24 | =encoding utf-8
25 | 
26 | =head1 NAME
27 | 
28 | Treex::Block::W2A::EscapeMoses
29 | 
30 | =head1 DESCRIPTION
31 | 
32 | Escape anodes in the way the Moses tokenizer does, using L<Treex::Tool::Moses::escape_anodes()>.
33 | 
34 | =head1 AUTHOR
35 | 
36 | Rudolf Rosa <rosa@ufal.mff.cuni.cz>
37 | 
38 | =head1 COPYRIGHT AND LICENSE
39 | 
40 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
41 | 
42 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/FR/TagStanford.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::FR::TagStanford;
 2 | 
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | extends 'Treex::Block::W2A::TagStanford';
 6 | 
 7 | has '+model' => ( default => 'data/models/tagger/stanford/french.tagger' );
 8 | 
 9 | 1;
10 | 
11 | __END__
12 | 
13 | =pod
14 | 
15 | =encoding utf-8
16 | 
17 | =head1 NAME
18 | 
19 | Treex::Block::W2A::FR::TagStanford
20 | 
21 | =head1 DESCRIPTION
22 | 
23 | This is just a small modification of L<Treex::Block::W2A::TagStanford> which adds the path to the
24 | default model for French.
25 | 
26 | =head1 AUTHORS
27 | 
28 | Ondřej Dušek <odusek@ufal.mff.cuni.cz>
29 | 
30 | =head1 COPYRIGHT AND LICENSE
31 | 
32 | Copyright © 2012 by Institute of Formal and Applied Linguistics, Charles University in Prague
33 | 
34 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/JA/FixPeriod.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::JA::FixPeriod;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Encode;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | # We change "。" to classic period, also rehang it to root 
 8 | 
 9 | sub process_atree {
10 |     my ( $self, $a_root ) = @_;
11 |     foreach my $child ( $a_root->get_descendants() ) {
12 |         if ( $child->form eq "。") {
13 |             $child->set_form(".");
14 |             $child->set_lemma(".");
15 |             $child->set_parent($a_root);
16 |         }
17 |     }
18 |     return 1;
19 | }
20 | 
21 | 1;
22 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/JA/t/parse_jdepp.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use utf8;
 7 | binmode(STDIN, ':utf8');
 8 | binmode(STDOUT, ':utf8');
 9 | binmode(STDERR, ':utf8');
10 | use Test::More tests => 3;
11 | 
12 | BEGIN { use_ok('Treex::Block::W2A::JA::ParseJDEPP') };
13 | 
14 | require_ok('Treex::Block::W2A::JA::ParseJDEPP');
15 | 
16 | Treex::Core::Log::log_set_error_level('WARN');
17 | my $block = Treex::Block::W2A::JA::ParseJDEPP->new();
18 | 
19 | $block->process_start();
20 | 
21 | isa_ok( $block->parser, 'Treex::Tool::Parser::JDEPP' );
22 | 
23 | # TODO: test parse chunk subroutine
24 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/JA/t/tag_mecab.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use utf8;
 7 | binmode(STDIN, ':utf8');
 8 | binmode(STDOUT, ':utf8');
 9 | binmode(STDERR, ':utf8');
10 | use Test::More tests => 3;
11 | 
12 | BEGIN { use_ok('Treex::Block::W2A::JA::TagMeCab') };
13 | 
14 | require_ok('Treex::Block::W2A::JA::TagMeCab');
15 | 
16 | my $block = Treex::Block::W2A::JA::TagMeCab->new();
17 | 
18 | $block->process_start();
19 | 
20 | isa_ok( $block->tagger, 'Treex::Tool::Tagger::MeCab' );
21 | 
22 | # TODO: test process_zone subroutine
23 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/ParseLeftBranching.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::ParseLeftBranching;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_atree {
 7 |     my ( $self, $root ) = @_;
 8 |     my @todo =  $root->get_descendants( { ordered => 1 } );
 9 | 
10 |     # Flatten the tree first, if there was some topology already.
11 |     foreach my $node (@todo) {
12 |         $node->set_parent($root);
13 |     }
14 | 
15 |    
16 |     my $child    = shift @todo;  
17 |     my $parent;
18 |     while (@todo) {      
19 | 	$parent   = shift @todo;  
20 |         $child->set_parent($parent);
21 | 	$child    = $parent;
22 | 	
23 |     }
24 |     return;
25 | }
26 | 
27 | 
28 | 
29 | 1;
30 | 
31 | __END__
32 | 
33 | =head1 NAME
34 | 
35 | Treex::Block::W2A::ParseRight 
36 | 
37 | =head1 DESCRIPTION
38 | 
39 | Creates a parse tree that is Left branching
40 | 
41 | itself.
42 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/ParseRightBranching.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::ParseRightBranching;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | extends 'Treex::Core::Block';
 5 | 
 6 | sub process_atree {
 7 |     my ( $self, $root ) = @_;
 8 |     my @todo =  $root->get_descendants( { ordered => 1 } );
 9 | 
10 |     # Flatten the tree first, if there was some topology already.
11 |     foreach my $node (@todo) {
12 |         $node->set_parent($root);
13 |     }
14 | 
15 |    
16 |     my $child    = shift @todo;  
17 |     my $parent;
18 |     while (@todo) {      
19 | 	$parent   = shift @todo;  
20 |         $parent->set_parent($child);
21 | 	$child    = $parent;
22 | 	
23 |     }
24 |     return;
25 | }
26 | 
27 | 
28 | 
29 | 1;
30 | 
31 | __END__
32 | 
33 | =head1 NAME
34 | 
35 | Treex::Block::W2A::ParseRight 
36 | 
37 | =head1 DESCRIPTION
38 | 
39 | Creates a parse tree that is Right branching
40 | 
41 | itself.
42 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/ReplaceLemmasWithSynsetId.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::ReplaceLemmasWithSynsetId;
 2 | use Moose;
 3 | extends 'Treex::Core::Block';
 4 | 
 5 | sub process_anode {
 6 |     my ( $self, $anode ) = @_;
 7 |     my $synsetid = $anode->wild->{synsetid} // 'UNK';
 8 |     if ($synsetid ne 'UNK') {
 9 | 	$anode->wild->{original_lemma} = $anode->lemma;
10 |         my $alpha_synsetid = "$synsetid";
11 |         $alpha_synsetid =~ tr/0-9/a-j/;
12 |         $anode->set_lemma($alpha_synsetid);
13 |     }  
14 |     return 1;
15 | }
16 | 
17 | 1;
18 | 
19 | __END__
20 | 
21 | =encoding utf-8
22 | 
23 | =head1 NAME 
24 | 
25 | Treex::Block::W2A::ReplaceLemmasWithSynsetId
26 | 
27 | =head1 DESCRIPTION
28 | 
29 | Replaces lemmas with synset ids (where applicable).
30 | 
31 | =head1 AUTHORS
32 | 
33 | Luís Gomes <luis.gomes@di.fc.ul.pt>, <luismsgomes@gmail.com>
34 | 
35 | =head1 COPYRIGHT AND LICENSE
36 | 
37 | Copyright © 2014 by NLX Group, Universidade de Lisboa
38 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/TA/FixTags.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::W2A::TA::FixTags;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use Treex::Tool::Orthography::TA;
 5 | extends 'Treex::Core::Block';
 6 | 
 7 | sub process_anode {
 8 | 	my ($self, $anode) = @_;
 9 | 	my $fixed_tag = $anode->tag;
10 | 	$fixed_tag = $self->get_correct_tag($anode->form, $anode->lemma, $anode->tag);
11 | 	$anode->set_attr('tag', $fixed_tag);
12 | }
13 | 
14 | sub get_correct_tag {
15 | 	my ($self, $f, $l, $t) = @_;
16 | 	
17 | 	# initials
18 | 	return 'NmNSN----------' if ($f =~ /($TA_VOWELS_REG)\.$/);
19 | 	return 'NmNSN----------' if ($f =~ /($TA_CONSONANTS_REG)\.$/);
20 | 	return 'NmNSN----------' if ($f =~ /($TA_CONSONANTS_PLUS_VOWEL_A_REG)($TA_VOWEL_SIGNS_REG)\.$/);
21 | 	return 'NmNSN----------' if ($f =~ /($TA_CONSONANTS_PLUS_VOWEL_A_REG)\.$/);
22 | 	return 'NmNSN----------' if ($f =~ /(எஸ்|எல்|எம்|என்|ஆர்)\.$/);
23 | 	
24 | 	return $t;
25 | }
26 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/t/resegment_sentences.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More;
 7 | use Treex::Block::W2A::ResegmentSentences;
 8 | Treex::Core::Log::log_set_error_level('WARN');
 9 | 
10 | my $block = new_ok('Treex::Block::W2A::ResegmentSentences');
11 | 
12 | foreach my $lang (qw(cs en de)) {
13 |     isa_ok( $block->_get_segmenter($lang), 'Treex::Tool::Segment::RuleBased' );
14 |     is( $block->_get_segmenter($lang), $block->_get_segmenter($lang), 'Returns same object on each _get_segmenter call' );
15 | }
16 | 
17 | done_testing();
18 | 
19 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/W2A/t/tokenize_on_whitespace.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More tests => 1;
 7 | 
 8 | TODO: {
 9 |     local $TODO = 'Tests not yet written';
10 | 
11 |     fail( 'Write some tests' );
12 | }
13 | 
14 | 


--------------------------------------------------------------------------------
/lib/Treex/Block/Write/LayerAttributes/Missing.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Block::Write::LayerAttributes::Missing;
 2 | 
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | 
 6 | with 'Treex::Block::Write::LayerAttributes::AttributeModifier';
 7 | 
 8 | has '+return_values_names' => ( default => sub { [''] } );
 9 | 
10 | 
11 | sub modify_single {
12 | 
13 |     my ( $self ) = @_;
14 |     
15 |     return undef;
16 | }
17 | 
18 | 1;
19 | 
20 | __END__
21 | 
22 | =encoding utf-8
23 | 
24 | =head1 NAME 
25 | 
26 | Treex::Block::Write::LayerAttributes::Missing
27 | 
28 | =head1 DESCRIPTION
29 | 
30 | A dummy 'text modifier' returning a missing value in all cases.
31 | 
32 | =head1 AUTHOR
33 | 
34 | Ondřej Dušek <odusek@ufal.mff.cuni.cz>
35 | 
36 | =head1 COPYRIGHT AND LICENSE
37 | 
38 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
39 | 
40 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
41 | 


--------------------------------------------------------------------------------
/lib/Treex/CS.pm:
--------------------------------------------------------------------------------
 1 | package Treex::CS;
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | use Treex::Unilang;
 6 | use Ufal::MorphoDiTa;
 7 | 
 8 | 1;
 9 | 
10 | __END__
11 | #Module is here for synchronizing CS with Unilang
12 | #TODO - synopsis, example of parsing
13 | #a) from commandline
14 | #b) directly from perl
15 | =pod
16 | 
17 | =encoding utf8
18 | 
19 | =head1 NAME
20 | 
21 | Treex::CS - collection of blocks for processing Czech
22 | 
23 | =head1 DESCRIPTION
24 | 
25 | =head1 AUTHOR
26 | 
27 | Dušan Variš <dvaris@seznam.cz>
28 | 
29 | =head1 COPYRIGHT AND LICENSE
30 | 
31 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
32 | 
33 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
34 | 
35 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/Node/t/ordered.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More;
 7 | use Test::Moose;
 8 | use Treex::Core::Document;
 9 | 
10 | my $doc = new_ok('Treex::Core::Document');
11 | my $bundle = $doc->create_bundle();
12 | my $bzone = $bundle->create_zone('en');
13 | my $t_root = $bzone->create_ttree();
14 | does_ok($t_root, 'Treex::Core::Node::Ordered', 'T-root is ordered');
15 | 
16 | done_testing;
17 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/compile_grammar.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use Parse::RecDescent 1.967009;
 5 | my $grammar;
 6 | open my $IN, '<', 'ScenarioParser.rdg';
 7 | {
 8 |     local $/ = undef;
 9 |     $grammar = <$IN>;
10 | }
11 | Parse::RecDescent->Precompile(
12 |     { -standalone => 1, }
13 |     , $grammar
14 |     , "Treex::Core::ScenarioParser"
15 | );
16 | 
17 | # The standalone version contains several packages in one file,
18 | # but the very Treex::Core::ScenarioParser starts around line 3300.
19 | # We need to silent Perl critics also in the first package.
20 | system 'mv ScenarioParser.pm temp';
21 | system '(echo "## no critic (Miscellanea::ProhibitUnrestrictedNoCritic)"; echo "## no critic Generated code follows"; cat temp) > ScenarioParser.pm';
22 | 
23 | # The old way did not generate *standalone* parser
24 | ##!/bin/bash
25 | #perl -MParse::RecDescent - ScenarioParser.rdg Treex::Core::ScenarioParser
26 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/share/tred_extension/extensions.lst:
--------------------------------------------------------------------------------
1 | treex
2 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/share/tred_extension/treex/contrib/treex/.gitignore:
--------------------------------------------------------------------------------
1 | .layouts.cfg
2 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/share/tred_extension/treex/contrib/treex/contrib.mac:
--------------------------------------------------------------------------------
 1 | # -*- cperl -*-
 2 | 
 3 | #ifinclude <contrib/pml/PML.mak>
 4 | #ifinclude <contrib/eng-vallex/contrib.mac>
 5 | #ifinclude <contrib/vallex/contrib.mac>
 6 | 
 7 | package Treex_mode;
 8 | #binding-context Treex_mode
 9 | 
10 | print STDERR "Initializing Treex extension\n";
11 | 
12 | BEGIN { import TredMacro; }
13 | 
14 | #include Treex_mode.inc
15 | 
16 | 1;
17 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/share/tred_extension/treex/package.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <tred_extension xmlns="http://ufal.mff.cuni.cz/pdt/pml/">
 3 | 
 4 |   <head>
 5 |     <schema href="tred_extension_schema.xml"/>
 6 |   </head>
 7 | 
 8 |   <pkgname>treex</pkgname>
 9 | 
10 |   <title>Support for .treex files</title>
11 | 
12 |   <version>0.1</version>
13 | 
14 |   <copyright year="2010">Zdeněk Žabokrtský</copyright>
15 | 
16 |   <description>This extension allows to use TrEd for browsing
17 |   Treex::Core::Document instances.</description>
18 | 
19 |   <require>  </require>
20 | 
21 | </tred_extension>
22 | 
23 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/share/tred_extension/treex/resources/pmlbackend_conf.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="utf-8"?>
 2 | <pmlbackend xmlns="http://ufal.mff.cuni.cz/pdt/pml/">
 3 |   <head>
 4 |     <schema href="pmlbackend_conf_schema.xml"/>
 5 |   </head>
 6 |   <options>
 7 |     <save>
 8 |       <write_single_LM>1</write_single_LM>
 9 |     </save>
10 |   </options>
11 | </pmlbackend>
12 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/share/tred_extension/treex/stylesheets/Treex_stylesheet:
--------------------------------------------------------------------------------
1 | context:  Treex_context
2 | hint:       <? $this->{_precomputed_hint} ?>
3 | rootstyle:  <? $this->{_precomputed_root_style} ?>
4 | style: <? $this->{_precomputed_node_style} ?>
5 | node: <? $this->{_precomputed_labels}->[0] ?>
6 | node: <? $this->{_precomputed_labels}->[1] ?>
7 | node: <? $this->{_precomputed_labels}->[2] ?>
8 | 
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/t/block.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More;
 7 | 
 8 | BEGIN { use_ok('Treex::Core::Block') }
 9 | 
10 | my $block = Treex::Core::Block->new;
11 | 
12 | isa_ok( $block, 'Treex::Core::Block' );
13 | 
14 | done_testing();
15 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/t/common.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use Test::More;
 4 | use Treex::Core::Common;
 5 | 
 6 | use Moose::Util::TypeConstraints qw(find_type_constraint);
 7 | ok(find_type_constraint('Treex::Type::NonNegativeInt'), 'Find type defined not directly in Common but in used module');
 8 | 
 9 | done_testing();
10 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/t/config.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More;
 7 | 
 8 | use Treex::Core::Config;
 9 | 
10 | my $TMP_DIR = Treex::Core::Config->tmp_dir();
11 | ok( -d $TMP_DIR, 'Temporary directory is directory' );
12 | ok( -w $TMP_DIR, 'Temporary directory is writable' );
13 | 
14 | done_testing();
15 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/t/dump_scen.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More tests => 3;
 7 | use Treex::Core::Scenario;
 8 | 
 9 | my $scen1 = Treex::Core::Scenario->new( from_string => 'Read::Text Write::Text' );
10 | 
11 | like( $scen1->construct_scenario_string(), qr{^Read::Text Write::Text$} , 'Simple scenario');
12 | 
13 | my $scen2 = Treex::Core::Scenario->new( from_string => 'Read::Text  language=en    Write::Text' );
14 | 
15 | like( $scen2->construct_scenario_string( multiline => 1 ), qr{^Read::Text language=en\nWrite::Text$} , "Multiline scenario");
16 | 
17 | my $scen3 = Treex::Core::Scenario->new( from_string => 'Read::Text  language=en  ::Another::Block  Write::Text' );
18 | 
19 | like( $scen3->construct_scenario_string( ), qr{^Read::Text language=en ::Another::Block Write::Text$} , "Scenario with block out of Treex::Block namespace");
20 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/t/error.scen:
--------------------------------------------------------------------------------
1 | aaa aaa
2 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/t/files.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More tests => 3;
 7 | use File::Slurp;
 8 | 
 9 | my @lines = read_file( \*DATA );
10 | write_file( 'filelist', @lines );
11 | END { unlink 'filelist'; }
12 | chomp @lines;
13 | use_ok('Treex::Core::Files');
14 | 
15 | my $files = Treex::Core::Files->new( string => '@filelist' );
16 | isa_ok( $files, 'Treex::Core::Files' );
17 | is_deeply( $files->filenames, \@lines, 'Got filenames chomped' );
18 | 
19 | __DATA__
20 | first.file
21 | second.file
22 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/t/following_node.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More;
 7 | 
 8 | use Treex::Core;
 9 | 
10 | my $document = Treex::Core::Document->new;
11 | my $bundle = $document->create_bundle;
12 | 
13 | foreach my $language (qw(en ru de cs)) {
14 | 
15 |     foreach my $selector (undef, 'test') {
16 |         my $zone = $bundle->create_zone($language,$selector);
17 | 
18 |         foreach my $level ('a','t') {
19 | 
20 |             my $root = $zone->create_tree($level);
21 | 
22 |             for (1..3) {
23 |                 $root->create_child();
24 |             }
25 |         }
26 |     }
27 | }
28 | 
29 | 
30 | my @nodes;
31 | 
32 | my $node = $bundle;
33 | while ($node) {
34 |     push @nodes, $node;
35 |     $node = $node->following;
36 | }
37 | 
38 | is( scalar(@nodes), 1 + 4 * 2 * 2 * 4 ,
39 |     'following() traverses through all nodes in all trees in all zones' );
40 | 
41 | done_testing;
42 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/t/grammar.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | use Treex::Core;
 6 | 
 7 | use Test::More;
 8 | use File::Basename;
 9 | 
10 | my @strings = (
11 |     q(Read::Text),
12 |     q(Read::Text Util::Eval),
13 |     q(Read::Text Util::Eval document='print'),
14 |     q(Read::Text Util::Eval document='print"hello";'),
15 |     q(Read::Text Util::Eval document='print "hello";'),
16 |     q(Read::Text Util::Eval document='print "hello";'),
17 |     dirname($0) . q(/test.scen),
18 | );
19 | 
20 | #plan tests => @strings + 2;
21 | BEGIN { use_ok('Treex::Core::ScenarioParser'); }
22 | 
23 | my $parser = new Treex::Core::ScenarioParser;
24 | 
25 | isa_ok( $parser, 'Parse::RecDescent::_Runtime' );
26 | 
27 | #$::RD_TRACE = 1;
28 | #$::RD_HINT  = 1;
29 | foreach my $string (@strings) {
30 |     isnt( $parser->startrule($string), undef );
31 | }
32 | 
33 | done_testing();
34 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/t/remove_bundle.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More;
 7 | use Treex::Core;
 8 | 
 9 | my $document = Treex::Core::Document->new;
10 | 
11 | foreach my $bundle_number (1..5) {
12 | 
13 |     my $bundle   = $document->create_bundle();
14 |     $bundle->set_id("i$bundle_number");
15 | 
16 |     if ( $bundle_number < 5 ) { # check if it works for empty bundles too
17 |         $bundle->create_zone('en');
18 |     }
19 | };
20 | 
21 | my @bundles = $document->get_bundles;
22 | 
23 | foreach my $bundle_number (1,3,5) {
24 |     $bundles[$bundle_number-1]->remove;
25 | }
26 | 
27 | is( ( join '-', map { $_->id() } $document->get_bundles ), 'i2-i4',
28 |     'Bundles correctly removed from the beginning, the middle and the end of a document' );
29 | 
30 | done_testing();
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/t/resource.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More;
 7 | use Test::Output;
 8 | use File::Temp qw(tempfile);
 9 | use File::Spec;
10 | 
11 | BEGIN { require_ok('Treex::Core::Resource') }
12 | 
13 | SKIP:
14 | {
15 |     skip "May fail when not online", 1 unless ($ENV{AUTHOR_TESTING});
16 |     my $file = Treex::Core::Resource::require_file_from_share('data/models/parser/mst/cs/README');
17 |     ok( -e $file, 'file from resource exists' );
18 |     
19 |     my ($fh, $filename) = tempfile();
20 |     $file = Treex::Core::Resource::require_file_from_share($filename);
21 |     ok( -e $file, 'file with absolute path' );
22 |     
23 |     my ($volume, $dir, $f) = File::Spec->splitpath($filename);
24 |     chdir $dir;
25 |     $file = Treex::Core::Resource::require_file_from_share("./$f");
26 |     ok( -e $file, 'file with relative path' );
27 |     
28 |     unlink $filename;
29 | }
30 | done_testing();
31 | 
32 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/t/runall.sh:
--------------------------------------------------------------------------------
1 | for SCRIPT in *.t
2 |   do
3 |     echo
4 |     echo RUNNING $SCRIPT
5 |     echo
6 |     ./$SCRIPT
7 |   done


--------------------------------------------------------------------------------
/lib/Treex/Core/t/test.scen:
--------------------------------------------------------------------------------
1 | Read::Text Util::Eval document='print "hello";'
2 | 


--------------------------------------------------------------------------------
/lib/Treex/Core/t/writers.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # Testing treex -p and base writers
 3 | use strict;
 4 | use warnings;
 5 | use Test::More;
 6 | use File::Basename;
 7 | 
 8 | BEGIN {
 9 |   Test::More::plan( skip_all => 'these tests require export AUTHOR_TESTING=1' ) if !$ENV{AUTHOR_TESTING};
10 |   Test::More::plan( skip_all => 'these tests require SGE qsub' ) if !`which qsub`;
11 | }
12 | 
13 | my $command = q{-Len Read::Sentences lines_per_doc=1 Util::Eval document='$document->set_path("dir")' Write::Sentences to=.};
14 | 
15 | chdir(dirname(__FILE__));
16 | `rm -rf dir; seq 3 | treex $command`;
17 | is(`cat dir/noname002.txt`, "2\n", 'local execution');
18 | 
19 | `rm -rf dir; seq 3 | treex -pj3 $command`;
20 | is(`cat dir/noname002.txt`, "2\n", 'treex -p execution');
21 | # A bug causes the files are created in the current directory instead of "dir"
22 | # Let's delete also these files
23 | `rm -f noname00?.txt`;
24 | 
25 | `rm -rf dir *-cluster-run-*`;
26 | done_testing();
27 | 


--------------------------------------------------------------------------------
/lib/Treex/EN.pm:
--------------------------------------------------------------------------------
 1 | package Treex::EN;
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | use Treex::Unilang;
 6 | 
 7 | 1;
 8 | 
 9 | __END__
10 | #Module is here for synchronizing EN with Unilang
11 | #TODO - synopsis, example of parsing
12 | #a) from commandline
13 | #b) directly from perl
14 | =pod
15 | 
16 | =encoding utf8
17 | 
18 | =head1 NAME
19 | 
20 | Treex::EN - collection of blocks for processing English
21 | 
22 | =head1 DESCRIPTION
23 | 
24 | =head1 AUTHOR
25 | 
26 | Tomáš Kraut <kraut@ufal.mff.cuni.cz>
27 | 
28 | =head1 COPYRIGHT AND LICENSE
29 | 
30 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
31 | 
32 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
33 | 
34 | 


--------------------------------------------------------------------------------
/lib/Treex/Service/t/fixtures/en_sample.txt:
--------------------------------------------------------------------------------
1 | I got a gift for my brother. "Contributing factors were the long-term trend toward warmer temperatures, as well as a moderate El Nino in the Pacific," Jay Lawrimore of NOAA's National Climatic Data Center said in a telephone interview from Asheville, North Carolina. The next-warmest winter on record was in 2004, and the third warmest winter was in 1998, Lawrimore said. The 10 warmest years on record have occurred since 1995. "We don't say this winter is evidence of the influence of greenhouse gases," Lawrimore said.
2 | 


--------------------------------------------------------------------------------
/lib/Treex/Service/t/fixtures/hi_example.txt:
--------------------------------------------------------------------------------
1 | अनुच्छेद 1 — सभी मनुष्यों को गौरव और अधिकारों के मामले में जन्मजात स्वतन्त्रता और समानता प्राप्त हैं। उन्हें बुद्धि और अन्तरात्मा की देन प्राप्त है और परस्पर उन्हें भाईचारे के भाव से बर्ताव करना चाहिए।
2 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/CorefSegments/Features.pm:
--------------------------------------------------------------------------------
1 | package Treex::Tool::CorefSegments::Features;
2 | 
3 | use Moose::Role;
4 | 
5 | requires 'extract_features';
6 | requires 'init_doc_features';
7 | 
8 | 1;
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Coreference/CS/PronAnaphFilter.pm:
--------------------------------------------------------------------------------
 1 | ##########################################
 2 | ######## THIS MODULE IS OBSOLETE #########
 3 | ########### SHOULD BE DELETED ############
 4 | ##########################################
 5 | package Treex::Tool::Coreference::CS::PronAnaphFilter;
 6 | 
 7 | use Moose;
 8 | use Treex::Core::Common;
 9 | 
10 | with 'Treex::Tool::Coreference::NodeFilter';
11 | 
12 | # according to rule presented in Nguy et al. (2009)
13 | # nodes with the t_lemma #PersPron and third person in gram/person
14 | sub is_candidate {
15 |     my ($self, $node) = @_;
16 |     log_warn "Class Treex::Tool::Coreference::CS::PronAnaphFilter is DEPRECATED. Use Treex::Tool::Coreference::NodeFilter::PersPron instead.";
17 |     return ( (defined $node->t_lemma) && ($node->t_lemma eq '#PersPron') 
18 |         && (defined $node->gram_person) && ($node->gram_person eq '3') );
19 | }
20 | 
21 | # TODO doc
22 | 
23 | 1;
24 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Coreference/CorefFeatures.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Tool::Coreference::CorefFeatures;
 2 | use Moose;
 3 | 
 4 | extends 'Treex::Tool::ML::Ranker::Features';
 5 | 
 6 | has '+node1_label' => ( default => 'anaph' );
 7 | has '+node2_label' => ( default => 'cand' );
 8 | 
 9 | 1;
10 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Coreference/DistrModelComponent/CandOrd.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Tool::Coreference::DistrModelComponent::CandOrd;
 2 | 
 3 | use Moose;
 4 | 
 5 | with 'Treex::Tool::Coreference::DistrModelComponent';
 6 | 
 7 | has 'last_one_prob' => (
 8 |     is          => 'ro',
 9 |     isa         => 'Num',
10 |     required    => 1,
11 |     default     => 0.5, 
12 | );
13 | 
14 | sub _select_features {
15 |     my ($self, $anaph, $cand) = @_;
16 |     my $cand_ord = $cand->{'c_cand_ord'};
17 |     return ($cand_ord);
18 | }
19 | 
20 | sub _base_distrib {
21 |     my ($self, $cand_ord) = @_;
22 | 
23 |     if ($cand_ord > 500) {
24 |         $cand_ord = 500;
25 |     }
26 | 
27 |     return ($self->last_one_prob ** $cand_ord) * (1 - $self->last_one_prob);
28 | }
29 | 
30 | 1;
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Coreference/DistrModelComponent/Number.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Tool::Coreference::DistrModelComponent::Number;
 2 | 
 3 | use Moose;
 4 | 
 5 | with 'Treex::Tool::Coreference::DistrModelComponent';
 6 | 
 7 | has 'number_count' => (
 8 |     is          => 'ro',
 9 |     isa         => 'Int',
10 |     required    => 1,
11 |     default     => 5, 
12 | );
13 | 
14 | sub _select_features {
15 |     my ($self, $anaph, $cand) = @_;
16 |     my $anaph_num = $anaph->{'c_anaph_num'};
17 |     my $cand_num = $cand->{'c_cand_num'};
18 |     return ($cand_num, $anaph_num);
19 | }
20 | 
21 | sub _base_distrib {
22 |     my ($self, $cand_num, $anaph_num) = @_;
23 |     
24 |     #if (($cand_num eq $anaph_num ) && 
25 |     #    (($cand_num eq 'sg') || ($cand_num eq 'pl'))) {
26 |     #    return (0.5 / 2);
27 |     #}
28 |     #else {
29 |     #    return (0.5 / ($self->number_count ** 2 - 2));
30 |     #}
31 | 
32 |     return (1 / $self->number_count);
33 | }
34 | 
35 | 1;
36 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Coreference/DistrModelComponent/SentDist.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Tool::Coreference::DistrModelComponent::SentDist;
 2 | 
 3 | use Moose;
 4 | 
 5 | with 'Treex::Tool::Coreference::DistrModelComponent';
 6 | 
 7 | has 'sent_dist_count' => (
 8 |     is          => 'ro',
 9 |     isa         => 'Int',
10 |     required    => 1,
11 |     default     => 2, 
12 | );
13 | 
14 | sub _select_features {
15 |     my ($self, $anaph, $cand) = @_;
16 |     my $cand_dist = $cand->{'c_sent_dist'};
17 |     return ($cand_dist);
18 | }
19 | 
20 | sub _base_distrib {
21 |     my ($self, $cand_dist, $anaph_dist) = @_;
22 | 
23 |     return (1 / $self->sent_dist_count);
24 | }
25 | 
26 | 1;
27 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Coreference/EN/PronAnaphFilter.pm:
--------------------------------------------------------------------------------
 1 | ##########################################
 2 | ######## THIS MODULE IS OBSOLETE #########
 3 | ########### SHOULD BE DELETED ############
 4 | ##########################################
 5 | package Treex::Tool::Coreference::EN::PronAnaphFilter;
 6 | 
 7 | use Moose;
 8 | use Treex::Core::Common;
 9 | use Treex::Tool::Coreference::NodeFilter::PersPron;
10 | 
11 | with 'Treex::Tool::Coreference::NodeFilter';
12 | 
13 | has 'skip_referential' => ( is => 'ro', isa => 'Bool', default => 0, required => 1);
14 | 
15 | sub is_candidate {
16 |     my ($self, $t_node) = @_;
17 | 
18 |     log_warn "Class Treex::Tool::Coreference::EN::PronAnaphFilter is DEPRECATED. Use Treex::Tool::Coreference::NodeFilter::PersPron instead.";
19 | 
20 |     my $args = {};
21 |     if ($self->skip_referential) {
22 |         $args->{skip_nonref} = 1;
23 |     }
24 | 
25 |     return Treex::Tool::Coreference::NodeFilter::PersPron::is_3rd_pers($t_node, $args);
26 | }
27 | 
28 | # TODO doc
29 | 
30 | 1;
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Coreference/Features/Coreference.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Tool::Coreference::Features::Coreference;
 2 | 
 3 | use Moose;
 4 | use Treex::Core::Common;
 5 | 
 6 | use List::MoreUtils qw/any/;
 7 | 
 8 | extends 'Treex::Tool::Coreference::BaseCorefFeatures';
 9 | 
10 | sub _is_coref {
11 |     my ($anaph, $cand) = @_;
12 |     my @antecs = $anaph->get_coref_chain;
13 |     #push @antecs, map { $_->functor =~ /^(APPS|CONJ|DISJ|GRAD)$/ ? $_->children : () } @antecs;
14 |     return any {$_ == $cand} @antecs;
15 | }
16 | 
17 | override '_binary_features' => sub {
18 |     my ($self, $set_features, $anaph, $cand, $candord) = @_;
19 |     
20 |     my $feats = {};
21 |     $feats->{is_coref} = _is_coref($anaph, $cand) ? 1 : 0;
22 |     return $feats;
23 | };
24 | 
25 | augment '_unary_features' => sub {
26 |     my $feats = inner() || {};
27 |     return $feats;
28 | };
29 | 
30 | 1;
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Coreference/NodeFilter/Utils.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Tool::Coreference::NodeFilter::Utils;
 2 | 
 3 | use Treex::Core::Common;
 4 | use Exporter 'import';
 5 | our @EXPORT_OK = qw(ternary_arg);
 6 | 
 7 | # processing ternary arguments for binary indicators
 8 | # arg = 0 : does not take the indicator into account
 9 | # arg = 1 : indicator must be true
10 | # arg = -1 : indicator must be false
11 | sub ternary_arg {
12 |     my ($arg, $indicator) = @_;
13 |     if ($arg > 0) {
14 |         return $indicator;
15 |     }
16 |     elsif ($arg < 0) {
17 |         return !$indicator;
18 |     }
19 |     else {
20 |         return 1;
21 |     }
22 | }
23 | 
24 | 1;
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Depfix/CS/DiacriticsStripper.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Tool::Depfix::CS::DiacriticsStripper;
 2 | use Moose;
 3 | use Treex::Core::Common;
 4 | use utf8;
 5 | 
 6 | sub strip_diacritics {
 7 |     my ($word) = @_;
 8 | 
 9 |     $word =~ tr/áčďéěíľňóřšťúůýžÁČĎÉĚÍĽŇÓŘŠŤÚŮÝŽ/acdeeilnorstuuyzACDEEILNORSTUUYZ/;
10 | 
11 |     return $word;
12 | }
13 | 
14 | 1;
15 | 
16 | =head1 NAME 
17 | 
18 | Treex::Tool::Depfix::CS::DiacriticsStripper
19 | 
20 | =head1 DESCRIPTION
21 | 
22 | =head1 PARAMETERS
23 | 
24 | =over
25 | 
26 | =back
27 | 
28 | =head1 AUTHOR
29 | 
30 | Rudolf Rosa <rosa@ufal.mff.cuni.cz>
31 | 
32 | =head1 COPYRIGHT AND LICENSE
33 | 
34 | Copyright © 2012 by Institute of Formal and Applied Linguistics,
35 | Charles University in Prague
36 | 
37 | This module is free software; you can redistribute it and/or modify it
38 | under the same terms as Perl itself.
39 | 
40 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/EnglishMorpho/exceptions/adj_adv.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | my $DATA = <<'END_DATA';
 3 | better	JJR	good
 4 | best	JJS	good
 5 | worse	JJR	bad
 6 | worst	JJS	bad
 7 | further	JJR	far
 8 | furthest	JJS	far
 9 | elder	JJR	old
10 | eldest	JJS	old
11 | stranger	JJR	strange
12 | strangest	JJS	strange
13 | better	RBR	well
14 | best	RBS	well
15 | worse	RBR	badly
16 | worst	RBS	badly
17 | further	RBR	far
18 | furthest	RBS	far
19 | END_DATA
20 | 
21 | if   ( $ARGV[0] =~ /^-[ad]$/ ) { print $DATA; }
22 | else                           { die "Invalid usage: use option -a or -d\n"; }
23 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/EnglishMorpho/exceptions/contractions.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | # 's VBZ can be lemmatized both as "be" and "have".
 3 | # According to BNC, "be" is more frequent (309K vs. 29K occurrences).
 4 | my $DATA = <<'END_DATA';
 5 | n't	RB	not
 6 | 's	VBZ	be
 7 | 're	VBP	be
 8 | 've	VBP	have
 9 | 've	VB	have
10 | 'm	VBP	be
11 | 'll	MD	will
12 | 'd	MD	would
13 | 'd	VBD	have
14 | wo	MD	will
15 | ca	MD	can
16 | END_DATA
17 | 
18 | if   ( $ARGV[0] =~ /^-[ad]$/ ) { print $DATA; }
19 | else                           { die "Invalid usage: use option -a or -d\n"; }
20 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/EnglishMorpho/exceptions/false_negation.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | my $DATA = <<'END_DATA';
 3 | nonetheless	RB	nonetheless
 4 | none-the-less	RB	nonetheless
 5 | nonchalance	NN	nonchalance
 6 | nonchalant	JJ	nonchalant
 7 | noncommittal	JJ	noncommittal
 8 | noncommittally	RB	noncommittally
 9 | nonsense	NN	nonsense
10 | nonsenses	NN	nonsenses
11 | nonstop	NN	nonstop
12 | non-stop	NN	nonstop
13 | END_DATA
14 | 
15 | if   ( $ARGV[0] =~ /^-[ad]$/ ) { print $DATA; }
16 | else                           { die "Invalid usage: use option -a or -d\n"; }
17 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/EnglishMorpho/exceptions/nouns_invariant_polemic.list:
--------------------------------------------------------------------------------
1 | clutches
2 | dues
3 | equities
4 | fumes
5 | groceries
6 | hostilities
7 | returns
8 | refreshments
9 | stays


--------------------------------------------------------------------------------
/lib/Treex/Tool/EnglishMorpho/exceptions/nouns_plural_es.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | my @DATA = qw(
 3 |     bias
 4 |     canvas
 5 |     gas
 6 |     iris
 7 | );
 8 | 
 9 | sub analyze() {		## no critic qw(Subroutines::ProhibitSubroutinePrototypes)
10 |     foreach (@DATA) {
11 |         print "${_}es\tNNS\t$_\n";
12 |     }
13 |     return;
14 | }
15 | 
16 | sub generate() {	## no critic qw(Subroutines::ProhibitSubroutinePrototypes)
17 |     foreach (@DATA) {
18 |         print "$_\t${_}es\n";
19 |     }
20 |     return;
21 | }
22 | 
23 | if    ( $ARGV[0] eq '-a' ) { analyze(); }
24 | elsif ( $ARGV[0] eq '-g' ) { generate(); }
25 | elsif ( $ARGV[0] eq '-d' ) {
26 |     foreach (@DATA) { print "$_\n"; }
27 | }
28 | else { die "Invalid usage: use option -a, -g or -d\n"; }
29 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/EnglishMorpho/exceptions/verbs_cked.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | my @DATA = qw(
 3 |     bivouac
 4 |     frolic
 5 |     mimic
 6 |     panic
 7 |     picnic
 8 |     traffic
 9 | );
10 | 
11 | sub analyze() {		## no critic qw(Subroutines::ProhibitSubroutinePrototypes)
12 |     foreach (@DATA) {
13 |         print $_. "ked\tVBD\t" . $_ . "\n";
14 |         print $_. "ked\tVBN\t" . $_ . "\n";
15 |         print $_. "king\tVBG\t" . $_ . "\n";
16 |     }
17 |     return;
18 | }
19 | 
20 | if ( $ARGV[0] eq '-a' ) { analyze(); }
21 | elsif ( $ARGV[0] eq '-d' ) {
22 |     foreach (@DATA) { print "$_\n"; }
23 | }
24 | else { die "Invalid usage: use option -a or -d\n"; }
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/EnglishMorpho/exceptions/verbs_other.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | my $DATA = <<'END_DATA';
 3 | am	VBP	be
 4 | are	VBP	be
 5 | is	VBZ	be
 6 | has	VBZ	have
 7 | hath	VBZ	have
 8 | being	VBG	be
 9 | belied	VBN	belie
10 | belied	VBD	belie
11 | belies	VBZ	belie
12 | belying	VBG	belie
13 | underlies	VBZ	underlie
14 | underlied	VBD	underlie
15 | underlied	VBN	underlie
16 | underlying	VBG	underlie
17 | ageing	VBG	age
18 | skiing	VBG	ski
19 | END_DATA
20 | 
21 | if   ( $ARGV[0] =~ /^-[ad]$/ ) { print $DATA; }
22 | else                           { die "Invalid usage: use option -a or -d\n"; }
23 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/EnglishMorpho/t/contractions.txt:
--------------------------------------------------------------------------------
 1 | It's gonna rain.
 2 | It is going to rain.
 3 | I ain't gonna do it.
 4 | I am not going to do it.
 5 | I wanna hold your hand.
 6 | I want to hold your hand.
 7 | I've gotta go.
 8 | You oughta know.
 9 | It needs lotsa work.
10 | We're outta time.
11 | Gimme that.
12 | Lemme see that.
13 | Whatcha gonna do?
14 | I dunno.
15 | I shoulda never gotten married.
16 | It coulda been worse.
17 | Let's use contractions 'cos it's kinda cool.
18 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/EnglishMorpho/t/morpho.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use Test::More;
 4 | use Treex::Core::Log;
 5 | 
 6 | require_ok('Treex::Tool::EnglishMorpho::Lemmatizer');
 7 | 
 8 | my ( $word, $tag ) = qw(I PP);
 9 | my $lemmatizer = new_ok('Treex::Tool::EnglishMorpho::Lemmatizer');
10 | my @result = $lemmatizer->lemmatize( $word, $tag );
11 | cmp_ok( scalar @result, '==', 2, 'Lemmatization returns array of two arguments' );
12 | 
13 | done_testing();
14 | 
15 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Gazetteer/RuleBasedScorer.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Tool::Gazetteer::RuleBasedScorer;
 2 | 
 3 | use Moose;
 4 | 
 5 | my $weights = {
 6 |     full_str_eq => [0, 2],
 7 |     full_str_non_alpha => [0, -100],
 8 |     first_starts_capital => [-10, 10],
 9 |     entity_starts_capital => [-50, 10],
10 |     all_start_capital => [-1, 1],
11 |     no_first => [-50, 1],
12 |     last_menu => [0, -50],
13 | };
14 | 
15 | sub score {
16 |     my ($feats) = @_;
17 | 
18 |     my %feat_hash = ();
19 |     my $score = 0;
20 | 
21 |     foreach my $pair (@$feats) {
22 |         my ($key, $value) = @$pair;
23 |         $feat_hash{$key} = $value;
24 |         next if (!defined $weights->{$key});
25 |         
26 |         $value = $value >= 1 ? 1 : 0;
27 |         $score += $weights->{$key}->[$value];
28 |     }
29 |     #if ($feat_hash{all_capital}) {
30 |     #    $score += $feat_hash{full_str_eq} ? 10 : -50;
31 |     #}
32 |     my $anode_count = $feat_hash{anode_count} // 1;
33 | 
34 |     return $score * $anode_count;
35 | }
36 | 
37 | 1;
38 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/GoogleTranslate/t/texts.txt:
--------------------------------------------------------------------------------
1 | Dnes je krásný den, šel jsem k vodě ven.
2 | Kdo jinému jámu kopá až se ucho utrhne.
3 | Moje vznášedlo je plné ptakopysků.
4 | 
5 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/IO/t/test.arff:
--------------------------------------------------------------------------------
 1 | @relation weather
 2 | 
 3 | @attribute outlook {sunny, overcast, rainy}
 4 | @attribute temperature real
 5 | @attribute humidity real
 6 | @attribute windy {TRUE, FALSE}
 7 | @attribute play {yes, no}
 8 | 
 9 | @data
10 | sunny,85,85,FALSE,no
11 | sunny,80,90,TRUE,no
12 | overcast,83,86,FALSE,yes
13 | rainy,70,96,FALSE,yes
14 | {0 rainy,1 68,2 80,3 FALSE}
15 | {0 rainy,2 70,3 'TRUE',4 no}
16 | overcast,64,65,TRUE,?
17 | ?,72,95,FALSE,?
18 | sunny,69,70,FALSE,yes
19 | rainy,75,80,FALSE,yes
20 | sunny,?,70,TRUE,yes
21 | 'overcast',72,90,TRUE,yes
22 | "overcast",81,75,FALSE,yes
23 | rainy,71,91,TRUE,no
24 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/IR/esa_script.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Treex::Tool::IR::ESA;
 7 | 
 8 | my $esa = Treex::Tool::IR::ESA->new();
 9 | 
10 | while (my $line = <STDIN>) {
11 |     chomp $line;
12 | 
13 |     print STDERR "Generating ESA vector for $line...\n";
14 |     my %vector = $esa->esa_vector_n_best($line, 10);
15 | 
16 |     next if (!%vector);
17 | 
18 |     my @sorted_keys = sort {$vector{$a} <=> $vector{$b}} keys %vector;
19 |     print STDOUT (join " ", @sorted_keys) . "\n";
20 | }
21 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/LM/t/interactive_testTreeLM.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use utf8;
 5 | binmode STDIN, ':utf8';
 6 | binmode STDOUT, ':utf8';
 7 | binmode STDERR, ':utf8';
 8 | 
 9 | #use IO::Prompt; nějak to nezvládá utf8
10 | 
11 | use Treex::Tool::LM::Lemma;
12 | use Treex::Tool::LM::TreeLM;
13 | my $model = Treex::Tool::LM::TreeLM->new();
14 | 
15 | while (1){
16 |     print "-------- Query---------\n";
17 |     print 'Lg POS: ';  $_ = <>; chomp; my $uLg = $_ or last;
18 |     print 'Ld POS: ';  $_ = <>; chomp; my $uLd = $_ or last;
19 |     print 'Fd: ';      $_ = <>; chomp; my $Fd  = $_ or last;
20 |     my $Lg = Treex::Tool::LM::Lemma->new($uLg);
21 |     my $Ld = Treex::Tool::LM::Lemma->new($uLd);
22 |     my $probLdFd_Lg = $model->get_prob_LdFd_given_Lg($Ld,$Fd,$Lg,1);
23 | }
24 | print "\n";


--------------------------------------------------------------------------------
/lib/Treex/Tool/LM/t/test_MorphoLM.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use utf8;
 5 | binmode STDOUT, ':utf8';
 6 | 
 7 | use Treex::Tool::LM::MorphoLM;
 8 | use Treex::Tool::LM::FormInfo;
 9 | 
10 | # load default model file
11 | my $morphoLM = Treex::Tool::LM::MorphoLM->new();
12 | 
13 | print "Lemma 'moci': form tag count\n";
14 | my @forms = $morphoLM->forms_of_lemma('moci');
15 | foreach my $form_info (@forms) {
16 |     print join( "\t", $form_info->get_form(), $form_info->get_tag(), $form_info->get_count() ), "\n";
17 | }
18 | 
19 | print "\nMost frequent past participle of 'moci' is: "
20 |     , $morphoLM->best_form_of_lemma( 'moci', '^Vp' )
21 |     , "\n\n";
22 | 
23 | print "Past participles of 'moci'\n";
24 | @forms = $morphoLM->forms_of_lemma( 'moci', { tag_regex => '^Vp' } );
25 | foreach my $form_info (@forms) {
26 |     print $form_info->to_string(), "\n";
27 | }
28 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Lexicon/Derivations/test_cs.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use utf8;
 5 | 
 6 | use Treex::Tool::Lexicon::Derivations::CS;
 7 | binmode STDOUT, ":utf8";
 8 | 
 9 | my %sample_input = (
10 |     adj2adv        => [qw(zelený sporý drahý plochý pracný zajímavý)],
11 |     verb2noun      => [qw(chodit platit hlídat smát)],
12 |     noun2adj       => [qw(hrad pes prach strom les matka Josef Praha Bush)],
13 |     verb2adj       => [qw(praštit vařit létat ušít skácet napsat)],
14 |     verb2activeadj => [qw(chodit plavat klamat hořet)],
15 |     perf2imperf    => [qw(otevřít)],
16 |     imperf2perf    => [qw(dosahovat)],
17 | );
18 | 
19 | foreach my $type ( keys %sample_input ) {
20 |     print "Derivations of type $type\n";
21 |     foreach my $input ( @{ $sample_input{$type} } ) {
22 |         print "\t$input --> " . join( ", ", Treex::Tool::Lexicon::Derivations::CS::derive( $type, $input ) ) . "\n";
23 |     }
24 | }
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Lexicon/Generation/t/cs.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use utf8;
 3 | use strict;
 4 | use warnings;
 5 | use Test::More; #tests => 14;
 6 | 
 7 | use_ok('Treex::Tool::Lexicon::Generation::CS');
 8 | my $generator = new_ok('Treex::Tool::Lexicon::Generation::CS');
 9 | 
10 | my @TESTS = (
11 |     ['pes', 'NNMS4-----A----', 'psa'],
12 |     ['pes', '...S4', 'psa'],
13 | );
14 | 
15 | foreach my $test (@TESTS) {
16 |     my ($lemma, $tag_regex, $expected_form) = @$test;
17 |     my $form_info = $generator->best_form_of_lemma($lemma, $tag_regex);
18 |     my $form = $form_info ? $form_info->get_form() : undef;
19 |     
20 |     cmp_ok($form, 'eq', $expected_form, "$lemma + $tag_regex => $expected_form");
21 | }
22 | 
23 | done_testing();
24 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Lexicon/Generation/t/es.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use utf8;
 3 | use strict;
 4 | use warnings;
 5 | use Test::More; #tests => 14;
 6 | use Lingua::Interset::FeatureStructure;
 7 | 
 8 | use_ok('Treex::Tool::Lexicon::Generation::ES');
 9 | my $generator = new_ok('Treex::Tool::Lexicon::Generation::ES');
10 | 
11 | my @TESTS = (
12 |     ['llover', {pos=> 'verb', number=>'sing', mood=>'ind', person=>3, tense=>'past'}, 'llovió'],
13 |     ['gustar', {pos=> 'verb', number=>'sing', mood=>'ind', person=>3, tense=>'pres'}, 'gusta'],
14 | );
15 | 
16 | foreach my $test (@TESTS) {
17 |     my ($lemma, $features, $expected_form) = @$test;
18 |     my $iset = Lingua::Interset::FeatureStructure->new($features);
19 |     my $form = $generator->best_form_of_lemma($lemma, $iset);
20 |     
21 |     cmp_ok($form, 'eq', $expected_form, "$lemma + ".$iset->as_string()." => $expected_form");
22 | }
23 | 
24 | done_testing();
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Lexicon/Generation/t/pt.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use utf8;
 3 | use strict;
 4 | use warnings;
 5 | use Test::More; #tests => 14;
 6 | use Lingua::Interset::FeatureStructure;
 7 | 
 8 | use_ok('Treex::Tool::Lexicon::Generation::PT');
 9 | my $generator = new_ok('Treex::Tool::Lexicon::Generation::PT');
10 | 
11 | my @TESTS = (
12 |     ['chover', {pos=> 'verb', number=>'sing', mood=>'ind', person=>3, tense=>'past'}, 'choveu'],
13 |     ['gostar', {pos=> 'verb', number=>'sing', mood=>'ind', person=>3, tense=>'pres'}, 'gosta'],
14 | );
15 | 
16 | foreach my $test (@TESTS) {
17 |     my ($lemma, $features, $expected_form) = @$test;
18 |     my $iset = Lingua::Interset::FeatureStructure->new($features);
19 |     my $form = $generator->best_form_of_lemma($lemma, $iset);
20 |     
21 |     cmp_ok($form, 'eq', $expected_form, "$lemma + ".$iset->as_string()." => $expected_form");
22 | }
23 | 
24 | done_testing();
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Lexicon/Generation/t/ru.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | use Test::More;
 6 | use Treex::Tool::Lexicon::Generation::RU;
 7 | 
 8 | my $generator = Treex::Tool::Lexicon::Generation::RU->new();
 9 | 
10 | use utf8;
11 | 
12 | BEGIN { use_ok('Treex::Tool::Lexicon::Generation::RU') }
13 | 
14 | cmp_ok( ${[map {$_->get_form} $generator->forms_of_lemma('Россия',{ tag_regex => 'NNFS6.*'})]}[0],
15 |         'eq', 'России',   'Correct generation of forms of "Russia" in the locative case');
16 | 
17 | done_testing();
18 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/ML/Clustering/t/c_cluster.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | #BEGIN {
 6 | #  if (!$ENV{EXPERIMENTAL} || !$ENV{EXPENSIVE_TESTING}) {
 7 | #    require Test::More;
 8 | #    Test::More::plan(skip_all => 'This test takes long time and is experimental');
 9 | #  }
10 | #}
11 | 
12 | use Test::More tests => 1;
13 | 
14 | use Treex::Tool::ML::Clustering::C_Cluster;
15 | my $cluster = Treex::Tool::ML::Clustering::C_Cluster->new();
16 | 
17 | isa_ok( $cluster, 'Treex::Tool::ML::Clustering::C_Cluster', 'cluster instantiated' );
18 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/MLFix/.ScikitLearn.pm.swp:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ufal/treex/06c3353d9d7c61ff86c572cd926b055733a73442/lib/Treex/Tool/MLFix/.ScikitLearn.pm.swp


--------------------------------------------------------------------------------
/lib/Treex/Tool/NamedEnt/README:
--------------------------------------------------------------------------------
 1 | 
 2 | NamedEnt: Tools for SVM-based named-entity recognition
 3 | 
 4 | Files in this directory:
 5 | 
 6 | Features.pl - script for extracting feature vectors from data (uses modules mentioned below)
 7 | Features/*  - modules for extracting one/two/three-word entity features
 8 | 
 9 | README - this file
10 | 
11 | TestSVM.pl - script that evaluates an SVM model on given data
12 | TrainSVM.pl - script that trains an SVM model on given data
13 | TuneSVM*.pl - scripts for SVM tuning
14 | 
15 | 
16 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/NamedEnt/tuneWrapper.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | . /net/projects/SGE/user/sge_profile > /dev/null
 4 | export PATH=$PATH:~pajas/bin
 5 | 
 6 | qcmd -j -- "./TuneSVM.map.pl $@"
 7 | #qsub -cwd -j y -V "perl TuneSVM.map.pl oneword.feat $@"
 8 | #qrsh -cwd -V -p -50 -l mf=5g -now no 'renice 10 $$ > /dev/null; perl TuneSVM.map.pl oneword.feat $@'
 9 | 
10 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/Charniak/Node.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Tool::Parser::Charniak::Node;
 2 | 
 3 | use Moose;
 4 | 
 5 | has term => (
 6 |     isa      => 'Str',
 7 |     is       => 'rw',
 8 |     required => 1,
 9 |     default  => 'null'
10 | );
11 | 
12 | has children =>
13 |     (
14 |     isa     => 'ArrayRef',
15 |     is      => 'rw',
16 |     default => sub { [] },
17 |     reader  => 'get_children',
18 |     );
19 | 
20 | sub BUILD {
21 |     my ( $self, $params ) = @_;
22 | }
23 | 
24 | sub add_child {
25 |     my ( $self, $child ) = @_;
26 |     push @{ $self->children }, $child;
27 | }
28 | 
29 | sub get_type {
30 |     my ($self) = @_;
31 |     return $self->{term};
32 | }
33 | 
34 | 1;
35 | __END__
36 | 
37 | 
38 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/samples/labeller_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./test_labeller_tsv.pl sample_test.tsv sample.Lmodel sample.config $@
3 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/samples/labeller_train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./train_labeller_tsv.pl sample_train.tsv sample.Lmodel sample.config 0 $@
3 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/samples/sample_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./test_tsv.pl sample_test.tsv sample.model sample.config


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/samples/sample_train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./train_tsv.pl sample_train.tsv sample.model sample.config 1
3 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/samples/treex_input.txt:
--------------------------------------------------------------------------------
 1 | Boys love girls.
 2 | John Brown loves Mary Thatcher.
 3 | John eagerly loves the sweet little Mary.
 4 | A boy loves a girl.
 5 | John loves Mary.
 6 | Counting Flowers On The Wall
 7 | I keep hearing you're concerned about my happiness.
 8 | But all that thought you're giving me is conscience I guess.
 9 | If I was walking in your shoes, I wouldn't worry none.
10 | While you and your friends are worried about me I'm having lots of fun.
11 | Counting flowers on the wall that don't bother me at all.
12 | Playing solitaire till dawn with a deck of fifty-one.
13 | Smoking cigarettes and watching Captain Kangaroo.
14 | Now don't tell me I've nothing to do.
15 | It's good to see you, I must go, I know I look a fright.
16 | Anyway my eyes are not accustomed to this light.
17 | And my shoes are not accustomed to this hard concrete.
18 | So I must go back to my room and make my day complete.
19 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/samples/treex_parse.scen:
--------------------------------------------------------------------------------
 1 | # You can use this scenario for parsing sentences in Treex.
 2 | # If you have Treex installed, just run:
 3 | # treex treex_parse.scen
 4 | Util::SetGlobal language=en selector=src
 5 | Read::Sentences from=treex_input.txt
 6 | W2A::ResegmentSentences
 7 | W2A::EN::Tokenize
 8 | W2A::EN::NormalizeForms
 9 | W2A::EN::FixTokenization
10 | W2A::EN::TagMorce
11 | W2A::EN::FixTags
12 | W2A::EN::Lemmatize
13 | A2N::EN::StanfordNamedEntities model=ner-eng-ie.crf-3-all2008.ser.gz
14 | A2N::EN::DistinguishPersonalNames
15 | W2A::MarkChunks
16 | W2A::EN::ParseMSTperl model_name=conll_2007_medium
17 | W2A::EN::FixNominalGroups
18 | W2A::EN::FixIsMember
19 | W2A::EN::FixAtree
20 | W2A::EN::FixMultiwordPrepAndConj
21 | W2A::EN::FixDicendiVerbs
22 | W2A::EN::SetAfunAuxCPCoord
23 | W2A::EN::SetAfun
24 | Write::Treex to=treex_output.treex
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/conll2inline.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use utf8;
 5 | 
 6 | sub say {
 7 |     my $line = shift;
 8 |     print "$line\n";
 9 | }
10 | 
11 | binmode STDIN, ':utf8';
12 | binmode STDOUT, ':utf8';
13 | binmode STDERR, ':utf8';
14 | 
15 | my @sentence;
16 | while (<>) {
17 |     chomp;
18 |     if ($_) {
19 | 	my $attributes = $_;
20 | 	$attributes =~ s/\t/ /g;
21 | 	push @sentence, $attributes;
22 |     } else {
23 | 	print join "\t", @sentence;
24 | 	print "\n";
25 | 	@sentence = ();
26 |     }
27 | }
28 | if (@sentence) {
29 |     print join "\t", @sentence;
30 | }
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/inline2conll.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use utf8;
 5 | 
 6 | sub say {
 7 |     my $line = shift;
 8 |     print "$line\n";
 9 | }
10 | 
11 | binmode STDIN, ':utf8';
12 | binmode STDOUT, ':utf8';
13 | binmode STDERR, ':utf8';
14 | 
15 | while (<>) {
16 |     chomp;
17 |     my @nodes = split /\t/;
18 |     foreach my $node (@nodes) {
19 |         my @attributes = split / /, $node;
20 |         my $line = join "\t", @attributes;
21 |         say $line;
22 |     }
23 |     say '';
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/inline_sentences_reorder.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use utf8;
 5 | 
 6 | sub say {
 7 |     my $line = shift;
 8 |     print "$line\n";
 9 | }
10 | 
11 | binmode STDIN, ':utf8';
12 | binmode STDOUT, ':utf8';
13 | binmode STDERR, ':utf8';
14 | 
15 | my %sentences;
16 | 
17 | while (<>) {
18 |     chomp;
19 |     my $sentence = $_;
20 |     my @nodes = split /\t/;
21 |     my $sent_length = scalar(@nodes);
22 |     $sentences{$sentence} = $sent_length;
23 | }
24 | 
25 | # ascending length
26 | #my @sorted_sentences = sort {$sentences{$a} <=> $sentences{$b}} keys %sentences;
27 | 
28 | # descending length
29 | my @sorted_sentences = sort {$sentences{$b} <=> $sentences{$a}} keys %sentences;
30 | 
31 | foreach my $sentence (@sorted_sentences) {
32 |     say $sentence;
33 | }
34 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/labelled_parse_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # $1=test data $2=config $3=model $4=lmodel
 3 | # $5=algorithm $6=debug $7=pruning
 4 | echo "Going to test the parser and the labeller in one pipeline."
 5 | echo "Test data: $1"
 6 | echo "Config file: $2"
 7 | echo "Parser model file: $3"
 8 | echo "Labeller model file: $4"
 9 | # echo "Algorithm: $5"
10 | # echo "Debug level: $6"
11 | # echo "N-best pruning: $7"
12 | /home/rosa/mst_perl/scripts/test_parse_and_label.pl /home/rosa/mst_perl/data/$1 /home/rosa/mst_perl/$2 /home/rosa/models/$3 /home/rosa/models/$4 # $5 $6 $7
13 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/labeller_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # $1=test data $2=config $3=model $4=algorithm $5=debug $6=pruning
 3 | echo "Going to test the labeller."
 4 | echo "Test data: $1"
 5 | echo "Config file: $2.config"
 6 | echo "Model file: $2.lmodel"
 7 | #echo "Algorithm: $4"
 8 | #echo "Debug level: $5"
 9 | #echo "Max number of states in Viterbi: $6"
10 | /home/rosa/mst_perl/scripts/test_labeller_tsv.pl  /home/rosa/mst_perl/data/$1 /home/rosa/models/$2.lmodel /home/rosa/models/$2.config #$4 $5 $6
11 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/labeller_train_and_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # $1=training data $2=test data $3=config $4=model $5=algorithm $6=debug $7=pruning
 3 | echo "Going to train and test the labeller."
 4 | echo "Training data: $1"
 5 | echo "Test data: $2"
 6 | echo "Config file: $3"
 7 | echo "Model file: $4"
 8 | #echo "Algorithm: $5"
 9 | #echo "Debug level: $6"
10 | #echo "Max number of states in Viterbi: $7"
11 | /home/rosa/mst_perl/scripts/train_labeller_tsv.pl /home/rosa/mst_perl/data/$1 /home/rosa/models/$4 /home/rosa/mst_perl/$3 0 # $5 $6 $7
12 | /home/rosa/mst_perl/scripts/test_labeller_tsv.pl  /home/rosa/mst_perl/data/$2 /home/rosa/models/$4 /home/rosa/mst_perl/$3 # $5 $6 $7


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/make_czech_tags.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | binmode STDIN, ":utf8";
 7 | binmode STDOUT, ":utf8";
 8 | 
 9 | while (<>) {
10 |     chomp;
11 |     my @item = split(/\t/, $_);
12 |     # $item[3]: pos (N, V, A, ...) - set to $item[4]
13 |     # $item[4]: subpos (A, B, C, D, E, F, ...) - changed to tag (N4, VB, ...)
14 |     # $item[5]: morphological features - kept
15 |     # other items kept and untouched
16 |     my $tag;
17 |     if (@item) {
18 |         if ($item[5] =~ /Cas=(.)/) {
19 |             $tag = $item[3].$1; # pos + case
20 |         }
21 |         else {
22 | 	    $item[5] =~ /SubPOS=(.)/;
23 |             $tag = $item[3].$1; # pos + subpos
24 |         }
25 |         $item[3] = $tag;
26 |         $item[4] = $tag;
27 |     }
28 |     print join("\t", @item)."\n";
29 | }
30 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/pcedt2conll.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | renice 10 $$
 3 | treex Write::AttributeSentencesAligned \
 4 |     language=cs alignment_language=en layer=a \
 5 |     alignment_type=int.gdfa \
 6 |     attributes="ord form lemma CzechCoarseTag(tag) tag parent->ord afun \
 7 |     aligned->ord aligned->tag aligned->afun aligned->parent->ord \
 8 |     AlignedTreeDistances(node,alignment_hash)" \
 9 | -- ../data/pcedt_latest/*/wsj_*.treex.gz > ../data/pcedt_data_latest.tsv
10 | head -n -119991 ../data/pcedt_data_latest.tsv > ../data/pcedt_train_latest.tsv
11 | tail -n  119991 ../data/pcedt_data_latest.tsv > ../data/pcedt_test_latest.tsv
12 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/pcedt2conll_tag_and_parse_en.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | renice 10 $$
 3 | treex \
 4 |     W2A::EN::TagMorce language=en \
 5 |     W2A::EN::FixTags language=en \
 6 |     W2A::EN::Lemmatize language=en \
 7 |     ../../scenarios/en_analysis_2.scen \
 8 |     Write::AttributeSentencesAligned \
 9 |     language=cs alignment_language=en layer=a \
10 |     alignment_type=int.gdfa \
11 |     attributes="ord form lemma CzechCoarseTag(tag) tag parent->ord afun \
12 |     aligned->ord aligned->tag aligned->afun aligned->parent->ord" \
13 | -- ../wsj_*.treex.gz \
14 | > ../data/pcedt_data_en_parsed.tsv
15 | head -n -119991 ../data/pcedt_data_en_parsed.tsv > ../data/pcedt_train_en_parsed.tsv
16 | tail -n  119991 ../data/pcedt_data_en_parsed.tsv > ../data/pcedt_test_en_parsed.tsv
17 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/pcedt2conll_td.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | renice 10 $$
 3 | treex \
 4 |     Write::AttributeSentencesAligned \
 5 |     language=cs alignment_language=en layer=a \
 6 |     alignment_type=int.gdfa \
 7 |     attributes="ord form lemma CzechCoarseTag(tag) tag parent->ord afun \
 8 |     aligned->ord aligned->tag aligned->afun aligned->parent->ord \
 9 |     AlignedTreeDistances(node,alignment_hash)" \
10 | -- ../data/pcedt/*/wsj_*.treex.gz \
11 | > ../data/pcedt_data_td.tsv
12 | head -n -119991 ../data/pcedt_data_td.tsv > ../data/pcedt_train_td.tsv
13 | tail -n  119991 ../data/pcedt_data_td.tsv > ../data/pcedt_test_td.tsv
14 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/pdtT2conll.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | renice 10 $$
3 | for i in ../data/pdt20amw/dtest/*.a.gz
4 | do
5 | treex Read::PDT schema_dir="/net/projects/pdt/pdt20/data/schemas/" t_layer=0 from="$i" \
6 | Write::AttributeSentencesAligned language=cs layer=a attributes="ord form lemma CzechCoarseTag(tag) tag parent->ord afun" alignment_type=none alignment_is_backwards=0 alignment_language=en \
7 | >> ../data/pdt20_test.tsv
8 | done


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/simple_lemmas.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use utf8;
 5 | 
 6 | binmode STDIN, ':utf8';
 7 | binmode STDOUT, ':utf8';
 8 | binmode STDERR, ':utf8';
 9 | 
10 | use Treex::Tool::Lexicon::CS;
11 | 
12 | my $lemmaFieldIndex = 2;
13 | 
14 | open my $file, '<:utf8', $ARGV[0] or die 'cannot open input file';
15 | while (<$file>) {
16 |     chomp;
17 |     if ($_) {
18 | 	my @fields = split /\t/;
19 | 	$fields[$lemmaFieldIndex] = Treex::Tool::Lexicon::CS::truncate_lemma ($fields[$lemmaFieldIndex], 1);
20 | 	print join "\t", @fields;
21 | 	print "\n";
22 |     } else {
23 | 	print "\n";
24 |     }
25 | }
26 | close $file;
27 | print STDERR "Done.\n";
28 | 
29 | sub get_simple_lemma {
30 |     my $lemma = shift;
31 | 
32 |     $lemma =~ s/-[0-9]+$//;
33 |     #$lemma =~ s/(`|_[;:,^]).+$//;
34 |     #$lemma =~ s/(-|`|_[;:,^]).+$//;
35 | 
36 |     return $lemma;
37 | }
38 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/split_afun_ismember.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | cut $1 -f-7 > $$.first.tmp
 3 | cut $1 -f8 > $$.afuns.tmp
 4 | cut $1 -f9- > $$.last.tmp
 5 | sed $$.afuns.tmp -e 's/$/_0/' > $$.afuns0.tmp
 6 | cut $$.afuns0.tmp -d'_' -f1 > $$.noM_afuns.tmp
 7 | cut $$.afuns0.tmp -d'_' -f2 > $$.afun_Ms.tmp
 8 | paste $$.first.tmp $$.noM_afuns.tmp $$.afun_Ms.tmp $$.last.tmp > ${1/.tsv}_split_afuns.tsv
 9 | rm $$.*.tmp
10 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/train_conll.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use utf8;
 5 | 
 6 | binmode STDIN, ':utf8';
 7 | binmode STDOUT, ':utf8';
 8 | 
 9 | use Treex::Tool::Parser::MSTperl::Config;
10 | use Treex::Tool::Parser::MSTperl::Reader;
11 | use Treex::Tool::Parser::MSTperl::TrainerUnlabelled;
12 | 
13 | my ($train_file, $model_file, $config_file, $save_tsv) = @ARGV;
14 | 
15 | if (!$config_file) {
16 |     $config_file = "$model_file.config";
17 |     $model_file = "$model_file.model";
18 | }
19 | 
20 | my $config = Treex::Tool::Parser::MSTperl::Config->new(config_file => $config_file);
21 | my $reader = Treex::Tool::Parser::MSTperl::Reader->new(config => $config);
22 | my $training_data = $reader->read_tsv($train_file);
23 | my $trainer = Treex::Tool::Parser::MSTperl::TrainerUnlabelled->new(config => $config);
24 | 
25 | $trainer->train($training_data);
26 | $trainer->model->store($model_file);
27 | if ($save_tsv) {
28 |     $trainer->model->store_tsv($model_file.'.tsv');
29 | }
30 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/unlabelled_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # $1=test data $2=config $3=model
3 | echo "Going to test the unlabelled parser."
4 | echo "Test data: $1"
5 | echo "Config file: $2.config"
6 | echo "Model file: $2.model"
7 | /home/rosa/mst_perl/scripts/test_conll.pl  /home/rosa/mst_perl/data/$1 /home/rosa/models/$2.model /home/rosa/models/$2.config
8 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/unlabelled_test_rur.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | # $1=test data $2=config $3=model
3 | echo "Going to test the RUR unlabelled parser."
4 | echo "Test data: $1"
5 | echo "Config file: $2.config"
6 | echo "Model file: $2.model"
7 | /home/rosa/mst_perl/scripts/test_rur_conll.pl  /home/rosa/mst_perl/data/$1 /home/rosa/models/$2.model /home/rosa/models/$2.config
8 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/unlabelled_train_and_test.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # $1=training data $2=test data $3=config $4=model
 3 | echo "Going to train and test the unlabelled parser."
 4 | echo "Training data: $1"
 5 | echo "Test data: $2"
 6 | echo "Config file: $3"
 7 | c=${3#*/}
 8 | model=${c%config}model
 9 | echo "Model file: ${model}"
10 | cd /home/rosa/models/
11 | ln -s /home/rosa/mst_perl/$3
12 | /home/rosa/mst_perl/scripts/train_conll.pl /home/rosa/mst_perl/data/$1 /home/rosa/models/$model /home/rosa/mst_perl/$3 0
13 | /home/rosa/mst_perl/scripts/test_conll.pl  /home/rosa/mst_perl/data/$2 /home/rosa/models/$model /home/rosa/mst_perl/$3
14 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/MSTperl/scripts/worsen_pcedt.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | renice 10 $$
 3 | 
 4 | # worsen PCEDT and save it into tsv
 5 | treex -Lcs \
 6 | A2A::CS::WorsenWordForms err_distr_from=/home/rosa/depfix/tagchanges.tsv \
 7 | Write::AttributeSentencesAligned language=cs alignment_language=en layer=a alignment_type=int.gdfa \
 8 | attributes="ord form lemma CzechCoarseTag(tag) tag parent->ord afun \
 9 | aligned->ord aligned->tag aligned->afun aligned->parent->ord" \
10 | -- /home/rosa/depfix/mst_perl/data/pcedt/*/wsj_*.treex.gz \
11 | >  /home/rosa/depfix/mst_perl/data/pcedt_worsened.tsv
12 | 
13 | # split into train set and test set
14 | head -n -119991 /home/rosa/depfix/mst_perl/data/pcedt_worsened.tsv > /home/rosa/depfix/mst_perl/data/pcedt_worsened_train.tsv
15 | tail -n  119991 /home/rosa/depfix/mst_perl/data/pcedt_worsened.tsv > /home/rosa/depfix/mst_perl/data/pcedt_worsened_test.tsv
16 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/RUR/samples/labeller_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./test_labeller_tsv.pl sample_test.tsv sample.Lmodel sample.config $@
3 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/RUR/samples/labeller_train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./train_labeller_tsv.pl sample_train.tsv sample.Lmodel sample.config 0 $@
3 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/RUR/samples/sample_test.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./test_tsv.pl sample_test.tsv sample.model sample.config


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/RUR/samples/sample_train.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | ./train_tsv.pl sample_train.tsv sample.model sample.config 1
3 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/RUR/samples/treex_input.txt:
--------------------------------------------------------------------------------
 1 | Boys love girls.
 2 | John Brown loves Mary Thatcher.
 3 | John eagerly loves the sweet little Mary.
 4 | A boy loves a girl.
 5 | John loves Mary.
 6 | Counting Flowers On The Wall
 7 | I keep hearing you're concerned about my happiness.
 8 | But all that thought you're giving me is conscience I guess.
 9 | If I was walking in your shoes, I wouldn't worry none.
10 | While you and your friends are worried about me I'm having lots of fun.
11 | Counting flowers on the wall that don't bother me at all.
12 | Playing solitaire till dawn with a deck of fifty-one.
13 | Smoking cigarettes and watching Captain Kangaroo.
14 | Now don't tell me I've nothing to do.
15 | It's good to see you, I must go, I know I look a fright.
16 | Anyway my eyes are not accustomed to this light.
17 | And my shoes are not accustomed to this hard concrete.
18 | So I must go back to my room and make my day complete.
19 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/RUR/samples/treex_parse.scen:
--------------------------------------------------------------------------------
 1 | # You can use this scenario for parsing sentences in Treex.
 2 | # If you have Treex installed, just run:
 3 | # treex treex_parse.scen
 4 | Util::SetGlobal language=en selector=src
 5 | Read::Sentences from=treex_input.txt
 6 | W2A::ResegmentSentences
 7 | W2A::EN::Tokenize
 8 | W2A::EN::NormalizeForms
 9 | W2A::EN::FixTokenization
10 | W2A::EN::TagMorce
11 | W2A::EN::FixTags
12 | W2A::EN::Lemmatize
13 | A2N::EN::StanfordNamedEntities model=ner-eng-ie.crf-3-all2008.ser.gz
14 | A2N::EN::DistinguishPersonalNames
15 | W2A::MarkChunks
16 | W2A::EN::ParseMSTperl model_name=conll_2007_medium
17 | W2A::EN::FixNominalGroups
18 | W2A::EN::FixIsMember
19 | W2A::EN::FixAtree
20 | W2A::EN::FixMultiwordPrepAndConj
21 | W2A::EN::FixDicendiVerbs
22 | W2A::EN::SetAfunAuxCPCoord
23 | W2A::EN::SetAfun
24 | Write::Treex to=treex_output.treex
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/t/fanse.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | BEGIN {
 6 |   if (!$ENV{EXPENSIVE_TESTING}) {
 7 |     require Test::More;
 8 |     Test::More::plan(skip_all => 'This test takes long time');
 9 |   }
10 | }
11 | 
12 | use Test::More tests => 4;
13 | 
14 | use Treex::Tool::Parser::Fanse;
15 | my $parser = Treex::Tool::Parser::Fanse->new();
16 | 
17 | isa_ok( $parser, 'Treex::Tool::Parser::Fanse', 'parser instantiated' );
18 | 
19 | my @forms = qw(John loves Mary);
20 | my ( $parent_indices, $edge_labels, $pos_tags ) = $parser->parse( \@forms );
21 | 
22 | is_deeply( $parent_indices, [ 2, 0, 2 ], 'topology ok' );
23 | is_deeply( $edge_labels, [qw(nsubj ROOT dobj)], 'edge labels ok' );
24 | is_deeply( $pos_tags,    [qw(NNP VBZ NNP)],  'pos tags ok' );
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/t/malt.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use Treex::Tool::Parser::Malt;
 5 | 
 6 | 
 7 | 
 8 | use Test::More;
 9 | 
10 | plan skip_all => q(Module still using $TMT_ROOT, won't test, until changed to TC::Resource );
11 | 
12 | 
13 | plan tests => 3;
14 | 
15 | my $parser = Treex::Tool::Parser::Malt->new( { model => 'en_nivreeager.mco' } );
16 | 
17 | isa_ok( $parser, 'Treex::Tool::Parser::Malt', 'parser instantiated' );
18 | 
19 | my @forms    = qw(John loves Mary);
20 | my @lemmas   = qw(John love Mary);
21 | my @pos      = qw(NNP VBZ NNP);
22 | my @cpos     = qw(NN VB NN);
23 | my @features = qw(_ _ _);
24 | 
25 | my ( $parent_indices, $edge_labels ) = $parser->parse( \@forms, \@lemmas, \@cpos, \@pos, \@features );
26 | 
27 | is_deeply( $parent_indices, [ 2, 0, 2 ], 'topology' );
28 | is_deeply( $edge_labels, [qw(SBJ ROOT OBJ)], 'edge labels' );
29 | 
30 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Parser/t/zpar.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | BEGIN {
 6 |   if (!$ENV{EXPERIMENTAL} || !$ENV{EXPENSIVE_TESTING}) {
 7 |     require Test::More;
 8 |     Test::More::plan(skip_all => 'This test takes long time and is experimental');
 9 |   }
10 | }
11 | 
12 | use Test::More tests => 4;
13 | 
14 | use Treex::Tool::Parser::Zpar;
15 | my $parser = Treex::Tool::Parser::Zpar->new();
16 | 
17 | isa_ok( $parser, 'Treex::Tool::Parser::Zpar', 'parser instantiated' );
18 | 
19 | my @forms = qw(John loves Mary);
20 | my ( $parent_indices, $edge_labels, $pos_tags ) = $parser->parse( \@forms );
21 | 
22 | is_deeply( $parent_indices, [ 2, 0, 2 ], 'topology ok' );
23 | is_deeply( $edge_labels, [qw(SUB ROOT OBJ)], 'edge labels ok' );    # CoNLL uses "SBJ" for subject, not "SUB"
24 | is_deeply( $pos_tags,    [qw(NNP VBZ NNP)],  'pos tags ok' );
25 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Phrase2Dep/t/pennconverter.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | 
 5 | use Treex::Tool::Phrase2Dep::Pennconverter;
 6 | 
 7 | use Test::More tests=>3;
 8 | 
 9 | my $converter = Treex::Tool::Phrase2Dep::Pennconverter->new();
10 | 
11 | isa_ok( $converter, 'Treex::Tool::Phrase2Dep::Pennconverter', 'Penn Converter instantiated' );
12 | 
13 | my $penn_string = '(S (NP (NNP John)) (VP (VBZ loves) (NP (NNP Mary))))';
14 | my $expected_parents = [2, 0, 2];
15 | my $expected_deprels = [qw(VMOD ROOT OBJ)];
16 | 
17 | my ( $parents_ref, $deprels_ref ) = $converter->convert($penn_string);
18 | 
19 | is_deeply($parents_ref, $expected_parents, 'correct topology');
20 | is_deeply($deprels_ref, $expected_deprels, 'correct deprels');
21 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/SRLParser/submit_training_to_maxent.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # file with classification features
 4 | training_features=/net/work/people/strakova/robust_parsing/training_features.txt
 5 | heldout_features=/net/work/people/strakova/robust_parsing/heldout_features.txt
 6 | 
 7 | # model
 8 | model=${TMT_ROOT%/}/share/data/models/srl_parser/srl_parser_model_cs
 9 | 
10 | ${TMT_ROOT%/}/share/external_tools/MaxEntToolkit/maxent_x86_64 \
11 |     $training_features --heldout $heldout_features -b -m $model -i 100 -v
12 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Segment/t/rule_based.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More tests => 4;
 7 | BEGIN{ use_ok('Treex::Tool::Segment::RuleBased');}
 8 | my $segmenter = new_ok('Treex::Tool::Segment::RuleBased');
 9 | 
10 | my $text = 'Dummy text. Which has to be segmented';
11 | 
12 | my $result = eval { $segmenter->get_segments($text) };
13 | 
14 | ok($result, 'Segmenter returns some result');
15 | 
16 | 
17 | TODO: {
18 |     local $TODO = 'Test not yet written', 1;
19 |     fail ('Test on semantics of segmenting');
20 | }
21 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Stemmer/TA/CorpusSuffixSplitter.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | use utf8;
 6 | use Treex::Tool::Stemmer::TA::SuffixSplitter;
 7 | 
 8 | while (<>) {
 9 |     chomp;
10 |     my $sentence = $_;
11 |     my $stemmed_sentence = Treex::Tool::Stemmer::TA::SuffixSplitter::stem_sentence($sentence);
12 |     print "$stemmed_sentence\n";    
13 | }
14 | 
15 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Stemmer/TA/sample.txt:
--------------------------------------------------------------------------------
 1 | patikkinRa pazakkan enakku irukkiRaTu.
 2 | OtinAl varuvaTu puN . 
 3 | patikkiRaTu .
 4 | d
 5 | dd
 6 | 3
 7 | +3
 8 | 
 9 | 
10 | OtAmal irukkATu .
11 | paNaTTaiTTAn kotuTTaTaiyE .
12 | patikkaT TAn vawTEn.
13 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Stemmer/TA/test.pl:
--------------------------------------------------------------------------------
 1 | # This program simply stems a given sentence
 2 | 
 3 | use Moose;
 4 | use utf8;
 5 | use Treex::Tool::Stemmer::TA::Simple;
 6 | 
 7 | my $sentence = "patikkinRa pazakkan enakku irukkiRaTu.";
 8 | 
 9 | my $stemmed_sentence = Treex::Tool::Stemmer::TA::Simple::stem_sentence($sentence);
10 | 
11 | print "Stemmed sentence: " . $stemmed_sentence . "\n";
12 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Stemmer/TA/test1.pl:
--------------------------------------------------------------------------------
 1 | use Moose;
 2 | use utf8;
 3 | use Treex::Tool::Stemmer::TA::Simple;
 4 | 
 5 | # stem a given document. output is written to a new file
 6 | Treex::Tool::Stemmer::TA::Simple::stem_document("sample.txt", "sample.stm.txt");
 7 | 
 8 | # restore the original document. 
 9 | Treex::Tool::Stemmer::TA::Simple::restore_document("sample.stm.txt", "sample.res.txt");
10 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Tagger/Featurama/t/featurama_en.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More;
 7 | 
 8 | eval {
 9 |     require Featurama::Perc;
10 |     1;
11 | } or plan skip_all => 'Cannot load Featurama::Perc';
12 | 
13 | plan tests => 5;
14 | 
15 | use_ok('Treex::Tool::Tagger::Featurama::EN');
16 | 
17 | 
18 | my $tagger = Treex::Tool::Tagger::Featurama::EN->new();
19 | 
20 | isa_ok( $tagger, 'Treex::Tool::Tagger::Featurama::EN' );
21 | isa_ok( $tagger, 'Treex::Tool::Tagger::Featurama' );
22 | 
23 | my ( $tags_rf, $lemmas_rf ) = $tagger->tag_sentence( [qw(How are you ?)] );
24 | cmp_ok( scalar @$tags_rf,   '==', 4, q{There's Correct number of tags} );
25 | cmp_ok( scalar @$lemmas_rf, '==', 4, q{There's Correct number of lemmas} );
26 | note( join ' ', @$tags_rf );
27 | note( join ' ', @$lemmas_rf );
28 | 
29 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Tagger/t/mecab.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use utf8;
 7 | binmode(STDIN, ':utf8');
 8 | binmode(STDOUT, ':utf8');
 9 | binmode(STDERR, ':utf8');
10 | use Test::More tests => 10;
11 | 
12 | BEGIN { use_ok('Treex::Tool::Tagger::MeCab') };
13 | 
14 | require_ok('Treex::Tool::Tagger::MeCab');
15 | 
16 | my $tagger = Treex::Tool::Tagger::MeCab->new();
17 | 
18 | isa_ok( $tagger, 'Treex::Tool::Tagger::MeCab' );
19 | 
20 | my $sentence = qw(わたしは日本語を話します);
21 | my @tokens = $tagger->process_sentence($sentence);
22 | 
23 | # tokenized sentence: "わたし は 日本語 を 話し ます"
24 | cmp_ok( scalar @tokens, '==', 6, q{Correct number of tokens});
25 | 
26 | foreach my $token (@tokens) {
27 |   cmp_ok( scalar (split /\t/, $token), '==', 10, q{Correct number of features});
28 | }
29 | 
30 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Tagger/t/tree_tagger.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use Treex::Tool::Tagger::TreeTagger;
 5 | use Test::More;
 6 | 
 7 | my $tagger = Treex::Tool::Tagger::TreeTagger->new( model => $ENV{TMT_ROOT} . 'share/data/models/tagger/tree_tagger/en.par' );
 8 | isa_ok( $tagger, 'Treex::Tool::Tagger::TreeTagger', 'tagger instantiated' );
 9 | 
10 | #SKIP: {
11 | #    skip "Test is broken", 2;
12 |     my @forms           = qw(How are you ?);
13 |     my @expected_tags   = qw(WRB VBP PP SENT);
14 |     my @expected_lemmas = qw(How be you ?);
15 |     my ( $tags, $lemmas ) = $tagger->tag_sentence( \@forms );
16 |     is_deeply( $tags,   \@expected_tags,   'tags ok' );
17 |     is_deeply( $lemmas, \@expected_lemmas, 'lemmas ok' );
18 | #       }
19 | 
20 | done_testing();


--------------------------------------------------------------------------------
/lib/Treex/Tool/TranslationModel/Derivative/EN2CS/_readme.txt:
--------------------------------------------------------------------------------
1 | Hyphen_compounds
2 | Deverbial_nouns
3 | Negative_prefices
4 | Deverbial_adjectives
5 | Suffices
6 | Prefices
7 | 
8 | 
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Transliteration/t/check_utf8_latin_conversion.t:
--------------------------------------------------------------------------------
 1 | use Treex::Tool::Transliteration::TA;
 2 | use Test::More;
 3 | use utf8;
 4 | 
 5 | binmode STDIN,  ':encoding(utf8)';
 6 | binmode STDOUT, ':encoding(utf8)';
 7 | binmode STDERR, ':encoding(utf8)';
 8 | 
 9 | my @latin_str = ( 'kAtu',         'malai',     'vItu' );
10 | my @utf8_str  = ( 'காடு', 'மலை', 'வீடு' );
11 | 
12 | my $transliterator =
13 |   Treex::Tool::Transliteration::TA->new( use_enc_map => 'utf8_2_latin' );
14 | 
15 | # test - utf8 to latin conversion
16 | foreach my $i ( 0 .. $#utf8_str ) {
17 | 	my $out_string = $transliterator->transliterate_string( $utf8_str[$i] );
18 | 	ok( $out_string eq $latin_str[$i], "$utf8_str[$i] => $out_string" );
19 | }
20 | 
21 | # test - latin to utf8 conversion
22 | $transliterator->set_enc_map('latin_2_utf8');
23 | foreach my $i ( 0 .. $#latin_str ) {
24 | 	my $out_string = $transliterator->transliterate_string( $latin_str[$i] );
25 | 	ok( $out_string eq $utf8_str[$i], "$latin_str[$i] => $out_string" );
26 | }
27 | done_testing();
28 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/UMR/CS/GrammatemeSetter.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Tool::UMR::CS::GrammatemeSetter;
 2 | use Moose::Role;
 3 | with 'Treex::Tool::UMR::GrammatemeSetter';
 4 | 
 5 | use experimental qw{ signatures };
 6 | 
 7 | =head1 NAME
 8 | 
 9 | Treex::Tool::UMR::CS::GrammatemeSetter - Language specific grammateme
10 | deduction from morphology.
11 | 
12 | =cut
13 | 
14 | {    my %REGEX = (person => '^.{7}([123])',
15 |                   number => '^(?x:(?| .{6} ([SP])'
16 |                                  . '| .{3} ([SP]) ))');
17 |     sub tag_regex($self, $grammateme) { $REGEX{$grammateme} }
18 | }
19 | 
20 | {   my %GRAM = (person => {1 => 1,
21 |                            2 => 2,
22 |                            3 => 3},
23 |                 number => {S => 'sg',
24 |                            P => 'pl'});
25 |     sub translate($self, $grammateme, $value) { $GRAM{$grammateme}{$value} }
26 | }
27 | 
28 | __PACKAGE__
29 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/UMR/LA/GrammatemeSetter.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Tool::UMR::LA::GrammatemeSetter;
 2 | use Moose::Role;
 3 | with 'Treex::Tool::UMR::GrammatemeSetter';
 4 | 
 5 | use experimental qw{ signatures };
 6 | 
 7 | =head1 NAME
 8 | 
 9 | Treex::Tool::UMR::LA::GrammatemeSetter - Language specific grammateme
10 | deduction from morphology.
11 | 
12 | =cut
13 | 
14 | {    my %REGEX = (person => '^.([123])',
15 |                   number => '^..([sp])');
16 |     sub tag_regex($self, $grammateme) { $REGEX{$grammateme} }
17 | }
18 | 
19 | {   my %GRAM = (person => {1 => 1,
20 |                            2 => 2,
21 |                            3 => 3},
22 |                 number => {s => 'sg',
23 |                            p => 'pl'});
24 |     sub translate($self, $grammateme, $value) { $GRAM{$grammateme}{$value} }
25 | }
26 | 
27 | __PACKAGE__
28 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/UMR/t/pdt2pb.csv:
--------------------------------------------------------------------------------
1 | UMR ID,PDT frame,Role_mapping,AUTOMATIC MAPPING,CORRECTION,COMMENTS,Unambiguous mapping - SSC and/or CEV,Prevailing mapping - SSC and/or CEV,Unambiguous SSC mapping (other than CEV),Info on automatic mapping,Source,Mapping via CzEngVallex,Mapping via SynSemClass5.1 (r.16488),mapping via SynSemClass5.0 (Lindat_version),mapping via SynSemClass (old_version),
2 | """absorbovat-001""",absorbovat (v-w10f1),vec00476,,,,,,,,both,,,,,0
3 | ,ACT: 1,ACT->Absorber,ARG0,,,ARG0,,,,,ACT->ARG0/17,ACT->Absorber(ARG0/25),ACT->Absorber(ARG0/25),ACT->Absorber(ARG0/25),0
4 | ,PAT: 4,PAT->Absorbed,ARG1,,,ARG1,,,,,PAT->ARG1/28,PAT->Absorbed(ARG1/38),PAT->Absorbed(ARG1/38),PAT->Absorbed(ARG1/38),0
5 | 
6 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Vallex/t/print_all_frames.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Treex::Tool::Vallex::ValencyFrame;
 7 | 
 8 | my $i = 1;
 9 | 
10 | while ( $i < 14983 ) {
11 |     my $frame = Treex::Tool::Vallex::ValencyFrame->new( { ord => $i++, lexicon => 'vallex.xml', language => 'cs' } );
12 | 
13 |     print $frame->to_string . "\n";
14 | }
15 | 
16 | __END__
17 | 
18 | =encoding utf-8
19 | 
20 | This lists all the valency frames form the PDT-Vallex Czech valency lexicon.
21 | 
22 | =head1 AUTHOR
23 | 
24 | Ondřej Dušek <odusek@ufal.mff.cuni.cz>
25 | 
26 | =head1 COPYRIGHT AND LICENSE
27 | 
28 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
29 | 
30 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Tool/Word2vec/readbin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ufal/treex/06c3353d9d7c61ff86c572cd926b055733a73442/lib/Treex/Tool/Word2vec/readbin


--------------------------------------------------------------------------------
/lib/Treex/Tool/Word2vec/txt2vw.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use utf8;
 5 | #use autodie;
 6 | #use PerlIO::gzip;
 7 | 
 8 | sub tsvsay {
 9 |     my $line = join " ", @_;
10 |     print "$line\n";
11 | }
12 | 
13 | binmode STDIN, ':utf8';
14 | binmode STDOUT, ':utf8';
15 | binmode STDERR, ':utf8';
16 | 
17 | # 1st line
18 | my $first = <>;
19 | chomp $first;
20 | my ($count, $d) = split / /, $first;
21 | 
22 | # convert other lines
23 | while (<>) {
24 |     chomp;
25 |     my ($word, @vec) = split / /;
26 |     $word =~ tr/:| /;!_/;
27 |     my $f = 1;
28 |     my @fs = map { 'f' . ($f++) . ':' . ($_) } @vec[0 .. ($d-1)];
29 |     tsvsay($word, @fs);
30 | }
31 | 
32 | 


--------------------------------------------------------------------------------
/lib/Treex/Tutorial/Config.pod:
--------------------------------------------------------------------------------
 1 | package Treex::Tutorial::Config;
 2 | 
 3 | =pod
 4 | 
 5 | =encoding utf8
 6 | 
 7 | =head1 NAME
 8 | 
 9 | Treex::Tutorial::Config - Configuring Treex
10 | 
11 | =head1 EXAMPLES
12 | 
13 | TODO
14 | 
15 | =head1 SEE ALSO
16 | 
17 | L<Treex::Core::Config>
18 | 
19 | =head1 AUTHOR
20 | 
21 | Dušan Variš <varis@ufal.mff.cuni.cz>
22 | Martin Popel <popel@ufal.mff.cuni.cz>
23 | 
24 | =head1 COPYRIGHT AND LICENSE
25 | 
26 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
27 | 
28 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
29 | 
30 | 


--------------------------------------------------------------------------------
/lib/Treex/Tutorial/ReadersAndWriters.pod:
--------------------------------------------------------------------------------
 1 | package Treex::Tutorial::ReadersAndWriters;
 2 | 
 3 | =pod
 4 | 
 5 | =encoding utf8
 6 | 
 7 | =head1 NAME
 8 | 
 9 | Treex::Tutorial::ReadersAndWriters - Reading/writing from/to different file formats
10 | 
11 | 
12 | =head1 FORMATS
13 | 
14 | Treex can process data in many formats: plain text, HTML, CoNLL, PDT (PML),...
15 | For each supported format there exists a B<reader> block
16 | (that converts the given format to Treex in-memory representation)
17 | and a writer block (that prints the in-memory representation to the given format).
18 | 
19 |     cat index.html | treex -q -Len Read::HTML Write::Text
20 | 
21 | 
22 | =head1 AUTHOR
23 | 
24 | Martin Popel <popel@ufal.mff.cuni.cz>
25 | 
26 | =head1 COPYRIGHT AND LICENSE
27 | 
28 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
29 | 
30 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
31 | 


--------------------------------------------------------------------------------
/lib/Treex/Tutorial/WritingNewReaders.pod:
--------------------------------------------------------------------------------
 1 | package Treex::Tutorial::WritingNewReaders;
 2 | 
 3 | =pod
 4 | 
 5 | =encoding utf8
 6 | 
 7 | =head1 NAME
 8 | 
 9 | Treex::Tutorial::WritingNewReaders - Guidelines for writing your own Treex reader/writer blocks.
10 | 
11 | =head1 EXAMPLES
12 | 
13 | TODO
14 | 
15 | =head1 AUTHOR
16 | 
17 | Dušan Variš <varis@ufal.mff.cuni.cz>
18 | Martin Popel <popel@ufal.mff.cuni.cz>
19 | 
20 | =head1 COPYRIGHT AND LICENSE
21 | 
22 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
23 | 
24 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
25 | 
26 | 


--------------------------------------------------------------------------------
/lib/Treex/Tutorial/generate_html.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | perl -MPod::Simple::HTML -e Pod::Simple::HTML::go Install.pod\
3 |  | perl -nlpe 's{</head>}{<link rel="stylesheet" title="treexpod" type="text/css" href="treexpod.css" media="all" ></head>}'\
4 |  > install.html
5 | 
6 | perl -MPod::Simple::HTML -e Pod::Simple::HTML::go FirstSteps.pod\
7 |  | perl -nlpe 's{</head>}{<link rel="stylesheet" title="treexpod" type="text/css" href="treexpod.css" media="all" ></head>}'\
8 |  > firststeps.html
9 | 


--------------------------------------------------------------------------------
/lib/Treex/Unilang.pm:
--------------------------------------------------------------------------------
 1 | package Treex::Unilang;
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | use Treex::Core;
 6 | 
 7 | 1;
 8 | 
 9 | __END__
10 | #TODO this is module just to keep synchronized Unilang with Core
11 | =pod
12 | 
13 | =encoding utf8
14 | 
15 | =head1 NAME
16 | 
17 | Treex::Unilang - collection of blocks parametrized by language and language independent
18 | 
19 | =head1 DESCRIPTION
20 | 
21 | =head1 AUTHOR
22 | 
23 | Tomáš Kraut <kraut@ufal.mff.cuni.cz>
24 | 
25 | =head1 COPYRIGHT AND LICENSE
26 | 
27 | Copyright © 2011 by Institute of Formal and Applied Linguistics, Charles University in Prague
28 | 
29 | This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
30 | 
31 | 


--------------------------------------------------------------------------------
/packaging/Makefile:
--------------------------------------------------------------------------------
 1 | default: usage
 2 | 
 3 | PACKAGES := Treex-Core Treex-Unilang Treex-EN Treex-CS Treex-JA Treex-Parser-MSTperl Treex-Doc
 4 | 
 5 | usage:
 6 | 	echo 'Targets: cleanall buildall testall'
 7 | 
 8 | cleanall:
 9 | 	for d in $(PACKAGES); do \
10 | 		echo "Package $$d"; \
11 | 		( cd $$d; $(MAKE) clean ); \
12 | 	done;
13 | 
14 | buildall:
15 | 	for d in $(PACKAGES); do \
16 | 		echo "Package $$d"; \
17 | 		( cd $$d; $(MAKE) build ); \
18 | 	done;
19 | 
20 | testall:
21 | 	for d in $(PACKAGES); do \
22 | 		echo "Package $$d"; \
23 | 		( cd $$d; $(MAKE) test ); \
24 | 	done;
25 | 


--------------------------------------------------------------------------------
/packaging/Treex-CS/Changes.template:
--------------------------------------------------------------------------------
 1 | Revision history of the Treex-EN distribution:
 2 | 
 3 | 0.08171       2012-02-16
 4 |     - added Featurama tagger
 5 | 
 6 | 0.08057       2012-02-07
 7 |     - MSTperl parser was updated
 8 | 
 9 | 0.07297       2011-11-15
10 |     - added MSTperl parser
11 | 
12 | 0.07194       2011-11-07
13 |     - added dependency on current version of Treex::Unilang
14 | 
15 | 0.07191       2011-11-06
16 |     - first non-dev release
17 | 
18 | 0.06903_1     2011-10-03 
19 | 
20 |     - the first release at CPAN
21 |     - contain English analysis up to tagger
22 | 
23 | For detailed log of changes see https://svn.ms.mff.cuni.cz/projects/tectomt_devel/timeline (Use 'public' username and same password)
24 | 


--------------------------------------------------------------------------------
/packaging/Treex-CS/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=bash
 2 | 
 3 | include ../common.mk
 4 | 
 5 | ##List of blocks used by scenario
 6 | #Read::Sentences
 7 | #Util::SetGlobal language=cs selector=src
 8 | #W2A::ResegmentSentences
 9 | #W2A::CS::Tokenize
10 | #W2A::CS::TagMorphoDiTa
11 | #W2A::CS::ParseMSTperl
12 | 
13 | ALLDIRS=${CS_T}/ ${TOOLS}/ ${TAGGER}/ ${LEXICON}/CS/ 
14 | 
15 | MODULES :=  ${PREFIX}/CS.pm \
16 | 	$(addprefix ${CS}/, Tokenize.pm TagMorphoDiTa.pm ParseMSTperl.pm FixAtreeAfterMcD.pm FixIsMember.pm FixReflexiveTantum.pm FixReflexivePronouns.pm) \
17 |   $(addprefix ${LEXICON}/, CS/Reflexivity.pm)
18 | 


--------------------------------------------------------------------------------
/packaging/Treex-Core/compile_grammar.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use Parse::RecDescent 1.967009;
 5 | my $grammar;
 6 | open my $IN, '<', 'lib/Treex/Core/ScenarioParser.rdg';
 7 | {
 8 |     local $/ = undef;
 9 |     $grammar = <$IN>;
10 | }
11 | Parse::RecDescent->Precompile(
12 |     { -standalone => 1, }
13 |     , $grammar
14 |     , "Treex::Core::ScenarioParser"
15 | );
16 | 
17 | # The standalone version contains several packages in one file,
18 | # but the very Treex::Core::ScenarioParser starts around line 3300.
19 | # We need to silent Perl critics also in the first package.
20 | system '(echo "## no critic (Miscellanea::ProhibitUnrestrictedNoCritic)"; echo "## no critic Generated code follows"; cat ScenarioParser.pm) > lib/Treex/Core/ScenarioParser.pm';
21 | unlink 'ScenarioParser.pm';
22 | 
23 | # The old way did not generate *standalone* parser
24 | ##!/bin/bash
25 | #perl -MParse::RecDescent - ScenarioParser.rdg Treex::Core::ScenarioParser
26 | 


--------------------------------------------------------------------------------
/packaging/Treex-Doc/Changes.template:
--------------------------------------------------------------------------------
 1 | Revision history of the Treex-Doc distribution:
 2 | 
 3 | 0.08324        2012-02-29
 4 | 
 5 |     - improvements in documentation
 6 | 
 7 | 0.07297        2011-11-15
 8 | 
 9 |     - the first release at CPAN - FAQ and Tutorial
10 | 
11 | For detailed log of changes see https://svn.ms.mff.cuni.cz/projects/tectomt_devel/timeline (Use 'public' username and same password)
12 | 


--------------------------------------------------------------------------------
/packaging/Treex-Doc/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=bash
 2 | 
 3 | include ../common.mk
 4 | 
 5 | MANUAL=${PREFIX}/Manual
 6 | TUTORIAL=${PREFIX}/Tutorial
 7 | 
 8 | ALLDIRS=${PREFIX}
 9 | 
10 | MODULES :=$(PREFIX)/Tutorial.pod ${MANUAL} \
11 |     	${TUTORIAL}
12 | 
13 | 


--------------------------------------------------------------------------------
/packaging/Treex-Doc/t/doctest.t:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | 
 3 | use strict;
 4 | use warnings;
 5 | 
 6 | use Test::More tests => 1;
 7 | 
 8 | pass('No tests available. We think it should produce PASS, not N/A');
 9 | 
10 | done_testing();
11 | 
12 | 


--------------------------------------------------------------------------------
/packaging/Treex-EN/Changes.template:
--------------------------------------------------------------------------------
 1 | Revision history of the Treex-EN distribution:
 2 | 
 3 | 0.08171       2012-02-16
 4 |     - added Featurama tagger
 5 | 
 6 | 0.08057       2012-02-07
 7 |     - MSTperl parser was updated
 8 | 
 9 | 0.07297       2011-11-15
10 |     - added MSTperl parser
11 | 
12 | 0.07194       2011-11-07
13 |     - added dependency on current version of Treex::Unilang
14 | 
15 | 0.07191       2011-11-06
16 |     - first non-dev release
17 | 
18 | 0.06903_1     2011-10-03 
19 | 
20 |     - the first release at CPAN
21 |     - contain English analysis up to tagger
22 | 
23 | For detailed log of changes see https://svn.ms.mff.cuni.cz/projects/tectomt_devel/timeline (Use 'public' username and same password)
24 | 


--------------------------------------------------------------------------------
/packaging/Treex-EN/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=bash
 2 | 
 3 | include ../common.mk
 4 | 
 5 | ##List of blocks used by scenario
 6 | #Read::AlignedSentences en_src=sample-en.txt cs_ref=sample-cs.txt
 7 | #Util::SetGlobal language=en selector=src
 8 | #W2A::ResegmentSentences
 9 | #W2A::EN::Tokenize
10 | #W2A::EN::NormalizeForms
11 | #W2A::EN::FixTokenization
12 | ##W2A::EN::TagMorce
13 | #W2A::TagTreeTagger
14 | #W2A::EN::FixTags
15 | #W2A::EN::Lemmatize
16 | #Tagger still missing
17 | 
18 | 
19 | ALLDIRS=${EN_T}/ ${TOOLS}/ ${SEGMENT_EN}/ ${FEATURAMA_T}/
20 | 
21 | MODULES :=  ${PREFIX}/EN.pm \
22 | 	$(addprefix ${EN}/, Tokenize.pm NormalizeForms.pm FixTokenization.pm FixTags.pm Lemmatize.pm TagLinguaEn.pm ParseMSTperl.pm SetIsMemberFromDeprel.pm FixTagsAfterParse.pm TagMorphoDiTa.pm) \
23 | 	$(addprefix ${EN_T}/, lingua_en.t) \
24 | 	$(addprefix ${SEGMENT_EN}/,  RuleBased.pm) \
25 | 	$(addprefix ${FEATURAMA}/,  EN.pm) \
26 | 	$(addprefix ${FEATURAMA_T}/,  featurama_en.t) \
27 | 	${ENGLISHMORPHO}/
28 | 
29 | 


--------------------------------------------------------------------------------
/packaging/Treex-JA/Changes.template:
--------------------------------------------------------------------------------
 1 | Revision history of the Treex-EN distribution:
 2 | 
 3 | 0.08171       2012-02-16
 4 |     - added Featurama tagger
 5 | 
 6 | 0.08057       2012-02-07
 7 |     - MSTperl parser was updated
 8 | 
 9 | 0.07297       2011-11-15
10 |     - added MSTperl parser
11 | 
12 | 0.07194       2011-11-07
13 |     - added dependency on current version of Treex::Unilang
14 | 
15 | 0.07191       2011-11-06
16 |     - first non-dev release
17 | 
18 | 0.06903_1     2011-10-03 
19 | 
20 |     - the first release at CPAN
21 |     - contain English analysis up to tagger
22 | 
23 | For detailed log of changes see https://svn.ms.mff.cuni.cz/projects/tectomt_devel/timeline (Use 'public' username and same password)
24 | 


--------------------------------------------------------------------------------
/packaging/Treex-JA/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=bash
 2 | 
 3 | include ../common.mk
 4 | 
 5 | ##List of blocks used by scenario
 6 | #Read::Sentences from=sample-ja.txt
 7 | #Util::SetGlobal language=ja selector=src
 8 | #W2A::JA::TagMeCab
 9 | #W2A::JA::ParseJDEPP
10 | #W2A::JA::RomanizeTags
11 | #W2A::JA::FixInterpunction
12 | #W2A::JA::FixCopulas
13 | #W2A::JA::RehangAuxVerbs
14 | #W2A::JA::RehangCopulas
15 | #W2A::JA::RehangConjunctions
16 | #W2A::JA::RehangParticles
17 | 
18 | ALLDIRS=${JA_T}/ ${TOOLS}/ ${TAGGER_T}/ ${PARSER_T}/ 
19 | 
20 | MODULES := ${PREFIX}/JA.pm \
21 |   $(addprefix ${JA}/, TagMeCab.pm ParseJDEPP.pm RomanizeTags.pm FixInterpunction.pm FixCopulas.pm RehangAuxVerbs.pm RehangCopulas.pm RehangConjunctions.pm RehangParticles.pm) \
22 |   $(addprefix ${JA_T}/, tag_mecab.t parse_jdepp.t) \
23 |   $(addprefix ${TAGGER_T}/, mecab.t) \
24 |   $(addprefix ${PARSER_T}/, jdepp.t) \
25 |   $(addprefix ${TAGGER}/, MeCab.pm) \
26 |   $(addprefix ${PARSER}/, JDEPP.pm) \
27 | 
28 | 


--------------------------------------------------------------------------------
/packaging/Treex-JA/share/examples/sample-ja.txt:
--------------------------------------------------------------------------------
 1 | Johnを探しています。
 2 | 鳥や犬や猫や馬がいました。
 3 | すみません！
 4 | 私と一緒に来てください。
 5 | わたしは日本語を話します。
 6 | 少しだけ。
 7 | 名前は何ですか？
 8 | 私の名前はJohnです。
 9 | 私は水瓜が好きです。
10 | おめでとう！
11 | わかりません。
12 | 学校に行きます。
13 | 電車を乗ります。
14 | 家に犬がいます。
15 | それわ楽しいです。
16 | 木の葉の色が変わります。
17 | 贈り物をいっぱい買っておきました。
18 | 日本に行く時、贈り物をいっぱい買っておきました。
19 | 


--------------------------------------------------------------------------------
/packaging/Treex-JA/share/examples/sample.scen:
--------------------------------------------------------------------------------
 1 | Util::SetGlobal language=ja selector=src
 2 | Read::Sentences from=sample-ja.txt
 3 | 
 4 | W2A::JA::TagMeCab
 5 | 
 6 | W2A::JA::ParseJDEPP
 7 | 
 8 | W2A::JA::RomanizeTags
 9 | W2A::JA::FixInterpunction
10 | W2A::JA::FixCopulas
11 | W2A::JA::RehangAuxVerbs
12 | W2A::JA::RehangCopulas
13 | W2A::JA::RehangConjunctions
14 | W2A::JA::RehangParticles
15 | 
16 | Write::Treex
17 | 


--------------------------------------------------------------------------------
/packaging/Treex-JA/weaver.ini.template:
--------------------------------------------------------------------------------
 1 | [@CorePrep]
 2 |   
 3 | [-SingleEncoding]
 4 | 
 5 | [Name]
 6 | [Version]
 7 | 
 8 | [Region  / prelude]
 9 | 
10 | [Generic / SYNOPSIS]
11 | [Generic / DESCRIPTION]
12 | [Generic / OVERVIEW]
13 | 
14 | [Collect / ATTRIBUTES]
15 | command = attr
16 | 
17 | [Collect / METHODS]
18 | command = method
19 | 
20 | [Collect / FUNCTIONS]
21 | command = func
22 | 
23 | [Leftovers]
24 | 
25 | [Region  / postlude]
26 | 
27 | [Authors]
28 | [Legal]
29 | 


--------------------------------------------------------------------------------
/packaging/Treex-Parser-MSTperl/Changes.template:
--------------------------------------------------------------------------------
 1 | Revision history of the Treex-Parser-MSTPerl distribution:
 2 | 
 3 | VERSION      DATE
 4 |     - TODO
 5 | 
 6 | 0.11949      2014-04-05
 7 |     - updating ArrayRef[] and similar to work under new Perl
 8 | 
 9 | 0.11336      2013-12-04
10 |     - removing new Perl version dependency
11 | 
12 | 0.11319      2013-11-28
13 |     - setting minimum required version
14 | 
15 | 0.11306      2013-11-24
16 |     - updated POD a little
17 | 
18 | 0.11299      2013-11-24
19 |     - safer file path handling & more detailed error reporting
20 | 
21 | 0.09731      2012-11-27
22 |     - adding advanced capabilities (parallel features, large-scale data features)
23 | 
24 | 0.08055      2012-02-07
25 |     - labeller now works
26 | 
27 | 0.07298      2011-11-15
28 |     - the first release at CPAN
29 | 
30 | 
31 | For detailed log of changes see
32 | https://svn.ms.mff.cuni.cz/projects/tectomt_devel/log/trunk/treex/lib/Treex/Tool/Parser/MSTperl
33 | (Use 'public' username and same password)
34 | 


--------------------------------------------------------------------------------
/packaging/Treex-Parser-MSTperl/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=bash
 2 | 
 3 | include ../common.mk
 4 | 
 5 | ALLDIRS=${PARSER}
 6 | 
 7 | VERSION=`svn info .| grep Revision | perl -ne 's/(\d+)//;printf("0.%05d%s", $$1, "${VERSION_SUFFIX}")'`
 8 | 
 9 | MODULES := ${MST}/ \
10 |         $(addprefix ${PARSER}/, MSTperl.pm)
11 |            #$(addprefix ${WRITE}/, Text.pm Treex.pm Sentences.pm Redirectable.pm)
12 | 
13 | 


--------------------------------------------------------------------------------
/packaging/Treex-Unilang/Changes.template:
--------------------------------------------------------------------------------
 1 | Revision history of the Treex-Unilang distribution:
 2 | 
 3 | 0.08170       2012-02-16
 4 |     - added Featurama tagger
 5 | 
 6 | 0.08056       2012-02-07
 7 |     - use new version of MSTperl parser and Treex::Core
 8 | 
 9 | 0.07297       2011-11-15
10 |     - added Read::CoNLLX and Write::CoNLLX
11 |     - added MSTperl parser
12 | 
13 | 0.07194       2011-11-07
14 |     - added dependency on current version of Treex::Core
15 | 
16 | 0.07191       2011-11-06
17 |     - first non-dev release
18 | 
19 | 0.06903_1     2011-10-03
20 | 
21 |     - the first release at CPAN
22 | 
23 | For detailed log of changes see https://svn.ms.mff.cuni.cz/projects/tectomt_devel/timeline (Use 'public' username and same password)
24 | 


--------------------------------------------------------------------------------
/packaging/test_treex_installation_by_cpanm.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # tested in SU2
 4 | 
 5 | export BASE=myperl
 6 | export COMMONLIB=$BASE/basic
 7 | export TREEXLIB=$BASE/treex
 8 | 
 9 | export PERL5LIB=$TREEXLIB/lib:$COMMONLIB/lib:$PERL5LIB
10 | export PATH=$TREEXLIB/bin:$COMMONLIB/bin:$PATH
11 | 
12 | mkdir -p $COMMONLIB
13 | mkdir -p $TREEXLIB
14 | 
15 | curl -LO http://xrl.us/cpanm
16 | perl ./cpanm -l $COMMONLIB Moose
17 | 
18 | perl ./cpanm -l $TREEXLIB treex-core-testy/Treex-Core-0.08040.tar.gz


--------------------------------------------------------------------------------
/training/treelm/create_ids.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use utf8;
 5 | use Storable;
 6 | 
 7 | my %lemmas;
 8 | binmode STDIN, ':utf8';
 9 | while (<STDIN>) {
10 |     chomp;
11 |     my ( $count, $Lg, $Pg, $Ld, $Pd, $Fd ) = split /\t/, $_;
12 |     $lemmas{"$Ld $Pd"} += $count;
13 |     $lemmas{"$Lg $Pg"} += $count;
14 | }
15 | 
16 | my @ids = (undef);
17 | my %id_of;
18 | my $i=1;
19 | 
20 | foreach my $lemma_pos ( sort {$lemmas{$b} <=> $lemmas{$a}} keys %lemmas ) {
21 |     my ($lemma, $pos) = split / /, $lemma_pos;
22 |     push @ids, [$lemma, $pos];
23 |     $id_of{$lemma_pos} = $i++;
24 | }
25 | 
26 | Storable::nstore_fd([\@ids, \%id_of], \*STDOUT);
27 | 
28 | # Copyright 2009 Martin Popel
29 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.


--------------------------------------------------------------------------------
/training/treelm/en/czeng/Makefile:
--------------------------------------------------------------------------------
1 | 
2 | vectors:
3 | 	treex -p -j 100 -Len Read::Treex from='@/net/cluster/TMP/czeng10/filelist_parsed_train' A2A::ConvertTags input_driver=en::penn A2A::EN::EnhanceInterset Util::Eval anode='$$.set_tag($$.iset->pos)' Print::VectorsForTreeLM path=vectors/
4 | 


--------------------------------------------------------------------------------
/training/treelm/en/stackoverflow/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=bash
 2 | 
 3 | extract:
 4 | 	mkdir -p vectors
 5 | 	treex -p -j 100 --survive -Len -Ssrc Read::Treex from='!parsed/f*.treex.gz' skip_finished='{parsed/(f.*).treex.gz}{vectors/$$1}' Util::Eval anode='$$.set_tag($$.iset->pos)' Print::VectorsForTreeLM path=vectors/
 6 | 
 7 | 
 8 | parse:
 9 | 	mkdir -p parsed
10 | 	treex -p -j 100 --survive --mem 16g -Len -Ssrc Read::Sentences from='!plain/f*.txt' skip_finished='{plain/(f.*).txt}{parsed/$$1.treex.gz}' W2A::ResegmentSentences W2A::HideIT Scen::Analysis::EN domain=IT Write::Treex path=parsed/
11 | 
12 | 
13 | plain: Posts.xml
14 | 	mkdir -p plain
15 | 	cat Posts.xml | sed 's/.*Body="\([^"]*\)".*/\1/' | ./clean_stackexchange.pl | split --lines=100 --numeric-suffixes --suffix-length=6 --additional-suffix=.txt - plain/f
16 | 
17 | Posts.xml:
18 | 	wget https://archive.org/download/stackexchange/stackoverflow.com-Posts.7z
19 | 	7z x stackoverflow.com-Posts.7z
20 | 


--------------------------------------------------------------------------------
/training/treelm/en/stackoverflow/clean_stackexchange.pl:
--------------------------------------------------------------------------------
1 | ../superuser/clean_stackexchange.pl


--------------------------------------------------------------------------------
/training/treelm/en/superuser/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=bash
 2 | 
 3 | extract:
 4 | 	mkdir -p vectors
 5 | 	treex -p -j 100 --survive -Len -Ssrc Read::Treex from='!parsed/f*.treex.gz' skip_finished='{parsed/(f.*).treex.gz}{vectors/$$1}' Util::Eval anode='$$.set_tag($$.iset->pos)' Print::VectorsForTreeLM path=vectors/
 6 | 
 7 | 
 8 | parse:
 9 | 	mkdir -p parsed
10 | 	treex -p -j 100 --survive --mem 16g -Len -Ssrc Read::Sentences from='!plain/f*.txt' skip_finished='{plain/(f.*).txt}{parsed/$$1.treex.gz}' W2A::ResegmentSentences W2A::HideIT Scen::Analysis::EN domain=IT Write::Treex path=parsed/
11 | 
12 | 
13 | plain: Posts.xml
14 | 	mkdir -p plain
15 | 	cat Posts.xml | sed 's/.*Body="\([^"]*\)".*/\1/' | ./clean_stackexchange.pl | split --lines=100 --numeric-suffixes --suffix-length=6 --additional-suffix=.txt - plain/f
16 | 
17 | Posts.xml:
18 | 	wget https://archive.org/download/stackexchange/superuser.com.7z
19 | 	7z e superuser.com.7z Posts.xml
20 | 


--------------------------------------------------------------------------------
/training/treelm/en/ubuntu-dialogue/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=bash
 2 | 
 3 | 
 4 | vectors: parsed
 5 | 	mkdir -p vectors
 6 | 	treex -p -j 100 -Len -Ssrc Read::Treex from='!parsed/f*.treex.gz' Util::Eval anode='$$.set_tag($$.iset->pos)' Print::VectorsForTreeLM path=vectors/
 7 | 
 8 | 
 9 | parsed: plain
10 | 	mkdir -p parsed
11 | 	treex -p -j 100 --mem 16g -Len -Ssrc Read::Sentences from='!plain/f*.txt' W2A::ResegmentSentences W2A::HideIT Scen::Analysis::EN domain=IT Write::Treex path=parsed/ 
12 | 
13 | 
14 | plain: dialogs
15 | 	mkdir -p plain
16 | 	find dialogs/ -name '*.tsv' -exec cat {} \; | cut -f 4 |  \
17 | 	    perl -CSDA -pe 's/[^\x9\xA\xD\x20-\x{D7FF}\x{E000}-\x{FFFD}\x{10000}-\x{10FFFF}]+//g;' | \
18 | 	    split --lines=100 --numeric-suffixes --suffix-length=6 --additional-suffix=.txt - plain/f
19 | 
20 | dialogs:
21 | 	wget http://cs.mcgill.ca/~jpineau/datasets/ubuntu-corpus-1.0/ubuntu_dialogs.tgz
22 | 	tar -xzf ubuntu_dialogs.tgz
23 | 


--------------------------------------------------------------------------------
/training/treelm/en/wmt15-newscrawl14v2/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=bash
 2 | 
 3 | vectors: parsed
 4 | 	mkdir -p vectors
 5 | 	treex -p -j 100 -Len -Ssrc Read::Treex from='!parsed/f*.treex.gz' Util::Eval anode='$$.set_tag($$.iset->pos)' Print::VectorsForTreeLM path=vectors/
 6 | 
 7 | 
 8 | parsed: plain
 9 | 	mkdir -p parsed
10 | 	treex -p -j 100 --mem 16g -Len -Ssrc Read::Sentences from='!plain/f*.txt' W2A::ResegmentSentences W2A::HideIT Scen::Analysis::EN domain=IT Write::Treex path=parsed/ 
11 | 
12 | 
13 | plain: 
14 | 	mkdir -p plain
15 | 	zcat news.2014.en.shuffled.v2.gz | split --lines=100 --numeric-suffixes --suffix-length=6 --additional-suffix=.txt - plain/f
16 | 
17 | news.2014.en.shuffled.v2.gz:
18 | 	wget http://www.statmt.org/wmt15/training-monolingual-news-crawl-v2/news.2014.cs.shuffled.v2.gz
19 | 


--------------------------------------------------------------------------------
/training/treelm/en/wmt15-newsdiscuss/Makefile:
--------------------------------------------------------------------------------
 1 | SHELL=bash
 2 | 
 3 | vectors: parsed
 4 | 	mkdir -p vectors
 5 | 	treex -p -j 100 -Len -Ssrc Read::Treex from='!parsed/f*.treex.gz' Util::Eval anode='$$.set_tag($$.iset->pos)' Print::VectorsForTreeLM path=vectors/
 6 | 
 7 | 
 8 | parsed: plain
 9 | 	mkdir -p parsed
10 | 	treex -p -j 100 --mem 16g -Len -Ssrc Read::Sentences from='!plain/f*.txt' W2A::ResegmentSentences W2A::HideIT Scen::Analysis::EN domain=IT Write::Treex path=parsed/ 
11 | 
12 | 
13 | plain: 
14 | 	mkdir -p plain
15 | 	zcat news-discuss-v1.en.txt.gz | split --lines=100 --numeric-suffixes --suffix-length=6 --additional-suffix=.txt - plain/f
16 | 
17 | news-discuss-v1.en.txt.gz:
18 | 	wget http://www.statmt.org/wmt15/news-discuss-v1.en.txt.gz
19 | 


--------------------------------------------------------------------------------
/training/treelm/print_plsgz.pl:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env perl
 2 | use strict;
 3 | use warnings;
 4 | use utf8;
 5 | use autodie;
 6 | use PerlIO::gzip;
 7 | use Storable;
 8 | use Data::Dumper;
 9 | my $filename = shift or die "No filename as argument\n";
10 | open my $IN, ($filename =~ /\.gz$/) ? '<:gzip' : '<', $filename;
11 | my $model = Storable::fd_retrieve($IN) or die 'Can not read Storable.';
12 | close $IN;
13 | print Dumper($model);
14 | 
15 | # Copyright 2009 Martin Popel
16 | # This file is distributed under the GNU General Public License v2. See $TMT_ROOT/README.


--------------------------------------------------------------------------------