├── .github └── workflows │ ├── ci.yaml │ ├── pull-request-test-reports.yaml │ └── pull-request.yaml ├── .gitignore ├── .gitmodules ├── LICENSE.txt ├── LICENSES.txt ├── README.md ├── bin ├── changeAlgoNames.sh ├── upgrade-pipelines-3_0-4_0.sh └── upgrade-pipelines-3_0-4_1.sh ├── commons-cli.diff ├── perl └── TermBankTFIDF2lst.pl ├── pom.xml └── src ├── build └── resources │ └── findbugs-excluded.xml ├── main ├── java │ ├── gate │ │ ├── plugin │ │ │ └── learningframework │ │ │ │ ├── AbstractDocumentProcessor.java │ │ │ │ ├── EvaluationMethod.java │ │ │ │ ├── Globals.java │ │ │ │ ├── LFUtils.java │ │ │ │ ├── LF_ApplyChunking.java │ │ │ │ ├── LF_ApplyClassification.java │ │ │ │ ├── LF_ApplyRegression.java │ │ │ │ ├── LF_ApplyTopicModel.java │ │ │ │ ├── LF_EvaluateClassification.java │ │ │ │ ├── LF_EvaluateRegression.java │ │ │ │ ├── LF_Export.java │ │ │ │ ├── LF_ExportBase.java │ │ │ │ ├── LF_ExportText.java │ │ │ │ ├── LF_GenFeatures_Affixes.java │ │ │ │ ├── LF_GenFeatures_Misc.java │ │ │ │ ├── LF_TrainChunking.java │ │ │ │ ├── LF_TrainClassification.java │ │ │ │ ├── LF_TrainRegression.java │ │ │ │ ├── LF_TrainTopicModel.java │ │ │ │ ├── LearningFrameworkPRBase.java │ │ │ │ ├── ModelApplication.java │ │ │ │ ├── ScalingMethod.java │ │ │ │ ├── data │ │ │ │ ├── Attribute.java │ │ │ │ ├── Attributes.java │ │ │ │ ├── CorpusRepresentation.java │ │ │ │ ├── CorpusRepresentationLibSVM.java │ │ │ │ ├── CorpusRepresentationMallet.java │ │ │ │ ├── CorpusRepresentationMalletLDA.java │ │ │ │ ├── CorpusRepresentationMalletRelated.java │ │ │ │ ├── CorpusRepresentationMalletSeq.java │ │ │ │ ├── CorpusRepresentationMalletTarget.java │ │ │ │ ├── CorpusRepresentationVolatileBase.java │ │ │ │ ├── CorpusRepresentationVolatileDense2JsonStream.java │ │ │ │ ├── InstanceRepresentation.java │ │ │ │ └── InstanceRepresentationDenseVolatile.java │ │ │ │ ├── engines │ │ │ │ ├── Algorithm.java │ │ │ │ ├── AlgorithmClassification.java │ │ │ │ ├── AlgorithmClustering.java │ │ │ │ ├── AlgorithmKind.java │ │ │ │ ├── AlgorithmRegression.java │ │ │ │ ├── Engine.java │ │ │ │ ├── EngineDV.java │ │ │ │ ├── EngineDVFileJson.java │ │ │ │ ├── EngineDVFileJsonKeras.java │ │ │ │ ├── EngineDVFileJsonPyTorch.java │ │ │ │ ├── EngineKerasWrapper.java │ │ │ │ ├── EngineMB.java │ │ │ │ ├── EngineMBCostclaWrapper.java │ │ │ │ ├── EngineMBLibSVM.java │ │ │ │ ├── EngineMBMallet.java │ │ │ │ ├── EngineMBMalletClass.java │ │ │ │ ├── EngineMBMalletSeq.java │ │ │ │ ├── EngineMBPythonNetworksBase.java │ │ │ │ ├── EngineMBServer.java │ │ │ │ ├── EngineMBSklearnBase.java │ │ │ │ ├── EngineMBSklearnWrapper.java │ │ │ │ ├── EngineMBTensorFlowWrapper.java │ │ │ │ ├── EngineMBTopicsLDA.java │ │ │ │ ├── EngineMBWekaWrapper.java │ │ │ │ ├── EvaluationResult.java │ │ │ │ ├── EvaluationResultClHO.java │ │ │ │ ├── EvaluationResultClXval.java │ │ │ │ ├── EvaluationResultClassification.java │ │ │ │ ├── EvaluationResultRegression.java │ │ │ │ ├── EvaluationResultRgHO.java │ │ │ │ ├── EvaluationResultRgXval.java │ │ │ │ ├── Info.java │ │ │ │ ├── Parms.java │ │ │ │ └── Utils4Engines.java │ │ │ │ ├── export │ │ │ │ ├── CorpusExporter.java │ │ │ │ ├── CorpusExporterDRJson.java │ │ │ │ ├── CorpusExporterMR.java │ │ │ │ ├── CorpusExporterMRARFF.java │ │ │ │ ├── CorpusExporterMRCSV.java │ │ │ │ ├── CorpusExporterMRJsonBase.java │ │ │ │ ├── CorpusExporterMRJsonSeq.java │ │ │ │ ├── CorpusExporterMRJsonTarget.java │ │ │ │ ├── CorpusExporterMRLibSVM.java │ │ │ │ ├── CorpusExporterMRMatrixMarket2.java │ │ │ │ ├── CorpusExporterMRSeq.java │ │ │ │ ├── CorpusExporterMRTarget.java │ │ │ │ ├── Exporter.java │ │ │ │ └── ExporterText.java │ │ │ │ ├── features │ │ │ │ ├── CodeAs.java │ │ │ │ ├── Datatype.java │ │ │ │ ├── FeatureExtractionBase.java │ │ │ │ ├── FeatureExtractionDense.java │ │ │ │ ├── FeatureExtractionMalletSparse.java │ │ │ │ ├── FeatureInfo.java │ │ │ │ ├── FeatureSpecAttribute.java │ │ │ │ ├── FeatureSpecAttributeList.java │ │ │ │ ├── FeatureSpecNgram.java │ │ │ │ ├── FeatureSpecSimpleAttribute.java │ │ │ │ ├── FeatureSpecification.java │ │ │ │ ├── MissingValueTreatment.java │ │ │ │ ├── SeqEncoder.java │ │ │ │ ├── SeqEncoderEnum.java │ │ │ │ ├── SeqEncoder_SimpleBIO.java │ │ │ │ └── TargetType.java │ │ │ │ ├── mallet │ │ │ │ ├── LFAlphabet.java │ │ │ │ ├── LFInstanceList.java │ │ │ │ ├── LFLabelAlphabet.java │ │ │ │ ├── LFPipe.java │ │ │ │ ├── NominalTargetWithCosts.java │ │ │ │ ├── PipeScaleMeanVarAll.java │ │ │ │ └── PipeScaleMinMaxAll.java │ │ │ │ ├── mbstats │ │ │ │ ├── FVStatsMeanVarAll.java │ │ │ │ ├── FeatureVectorStats.java │ │ │ │ └── PerFeatureStats.java │ │ │ │ ├── pipelines │ │ │ │ └── LF_TrainTopicModel_Mallet_EN.java │ │ │ │ └── stats │ │ │ │ ├── Stats.java │ │ │ │ └── StatsForFeatures.java │ │ └── resources │ │ │ └── img │ │ │ └── svg │ │ │ └── LF_TrainTopicModel_Mallet_ENIcon.java │ └── org │ │ └── apache │ │ └── commons │ │ └── clipatched │ │ ├── AlreadySelectedException.java │ │ ├── AmbiguousOptionException.java │ │ ├── BasicParser.java │ │ ├── CommandLine.java │ │ ├── CommandLineParser.java │ │ ├── DefaultParser.java │ │ ├── GnuParser.java │ │ ├── HelpFormatter.java │ │ ├── MissingArgumentException.java │ │ ├── MissingOptionException.java │ │ ├── Option.java │ │ ├── OptionBuilder.java │ │ ├── OptionGroup.java │ │ ├── OptionValidator.java │ │ ├── Options.java │ │ ├── ParseException.java │ │ ├── Parser.java │ │ ├── PatternOptionBuilder.java │ │ ├── PosixParser.java │ │ ├── TypeHandler.java │ │ ├── UnrecognizedOptionException.java │ │ ├── Util.java │ │ ├── overview.html │ │ └── package-info.java └── resources │ ├── creole.xml │ └── resources │ ├── pipelines │ ├── .LF_TrainTopicModel_Mallet_EN.metadata │ │ ├── LF_TrainTopicModel_Mallet_EN.svg │ │ ├── long-desc.html │ │ ├── metadata.xml │ │ └── short-desc.html │ ├── LF_TrainTopicModel_Mallet_EN.xgapp │ ├── gazetteer │ │ ├── stopwords-en-long.def │ │ ├── stopwords-en-long.lst │ │ ├── stopwords-en.def │ │ └── stopwords-en.lst │ ├── groovy │ │ ├── filterTokens4LDA.groovy │ │ └── removeUnwantedTokens4LDA.groovy │ └── regexp │ │ └── unwantedText4LDA.txt │ └── wrappers │ ├── FileJsonKeras │ ├── apply.cmd │ ├── apply.py │ ├── apply.sh │ ├── train.cmd │ ├── train.py │ ├── train.sh │ └── wrapperInfo.yaml │ └── FileJsonPyTorch │ ├── apply.cmd │ ├── apply.sh │ ├── train.cmd │ ├── train.sh │ └── wrapperInfo.yaml └── test ├── java └── gate │ └── plugin │ └── learningframework │ └── tests │ ├── GappLoadingTest.java │ ├── ITEngineLibSVM.java │ ├── ITEngineMalletClass.java │ ├── ITEngineMalletSeq.java │ ├── ITFeatureScaling.java │ ├── TestCorpusRepresentationVD2JS.java │ ├── TestFeatureExtraction.java │ ├── TestFeatureExtractionDense.java │ ├── TestFeatureSpecification.java │ ├── TestGenFeaturesAffixes.java │ ├── TestInfo.java │ ├── TestModelApplication.java │ ├── TestParms.java │ ├── TestPipeSerialization.java │ ├── TestStats.java │ ├── TestUtils4Engines.java │ └── Utils.java └── resources └── creole.properties /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 9 | permissions: 10 | contents: read 11 | pages: write 12 | id-token: write 13 | checks: write 14 | pull-requests: write 15 | 16 | jobs: 17 | common-build: 18 | uses: GateNLP/gate-top/.github/workflows/standard-module.yml@master 19 | with: 20 | deploy_site_to_pages: false 21 | # Full rather than shallow clone 22 | fetch_depth: "0" 23 | secrets: inherit 24 | -------------------------------------------------------------------------------- /.github/workflows/pull-request-test-reports.yaml: -------------------------------------------------------------------------------- 1 | name: Test Reports (PR) 2 | 3 | on: 4 | workflow_run: 5 | workflows: ["Pull Request"] 6 | types: 7 | - completed 8 | 9 | permissions: 10 | contents: read 11 | actions: read 12 | checks: write 13 | pull-requests: write 14 | 15 | jobs: 16 | common-pr: 17 | uses: GateNLP/gate-top/.github/workflows/standard-module-pr-test-report.yml@master 18 | -------------------------------------------------------------------------------- /.github/workflows/pull-request.yaml: -------------------------------------------------------------------------------- 1 | name: Pull Request 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | common-pr: 10 | uses: GateNLP/gate-top/.github/workflows/standard-module-pr.yml@master 11 | with: 12 | # Full rather than shallow clone 13 | fetch_depth: "0" 14 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.project 2 | /gateplugin-LearningFramework-*.zip 3 | /gateplugin-LearningFramework.jar 4 | /classes 5 | *~ 6 | *.bak 7 | #*# 8 | *.class 9 | TEST-*.xml 10 | TEST-*.txt 11 | TESTS-TestSuites.xml 12 | junit-noframes.html 13 | hs_err_*.log 14 | 15 | /tmp* 16 | /tests/*/*/pipe.pipe 17 | /tests/*/*/info.yaml 18 | /tests/*/*/lf.model 19 | /tests/*/*/data.arff 20 | /tests/*/*/header.arff 21 | /tests/cl-ionosphere/export/* 22 | /tests/rg-abalone/export/* 23 | /tests/seq-wikipedia1/export/* 24 | !/tests/*/*/.keep 25 | !/tests/*/model-*/weka.yaml 26 | /test-tmp-EngineMalletSeqOut/ 27 | /tests/cl-ionosphere/model-sklearn-external-rf/sklmodel* 28 | /tests/cl-ionosphere/model-sklearn-external-rf/indep.mtx 29 | /tests/cl-ionosphere/model-sklearn-external-rf/dep.mtx 30 | /tests/cl-ionosphere/model-sklearn-external-rf/instweights.mtx 31 | /tests/cl-ionosphere/model-costcla-external-rf/dep.mtx 32 | /tests/cl-ionosphere/model-costcla-external-rf/indep.mtx 33 | /tests/cl-ionosphere/model-costcla-external-rf/instcosts.mtx 34 | /tests/cl-ionosphere/model-costcla-external-rf/costclamodel 35 | /tests/cl-ionosphere/model-costcla-external-rf/costclamodel_01.npy 36 | /tests/cl-ionosphere/model-costcla-external-rf/data.zip 37 | /tests/cl-ionosphere/model-costcla-external-rf/m1 38 | /tests/cl-ionosphere/model-costcla-external-rf/m2 39 | /tests/cl-ionosphere/model-costcla-external-rf/m2_01.npy 40 | /tests/cl-ionosphere/model-costcla-external-rf/model 41 | 42 | /tests/cl-ionosphere/model-keras-m1/dep.csv 43 | /tests/cl-ionosphere/model-keras-m1/indep.csv 44 | /tests/cl-ionosphere/model-keras-m1/kerasmodel.h5 45 | 46 | /tests/seq-wikipedia1/model-dense-pytorch-1/* 47 | /tests/cl-ionosphere/model-dense-pytorch/* 48 | /tests/rg-abalone/model-dense-pytorch-1/* 49 | 50 | /tests/seq-wikipedia1/model-dense-keras-1/* 51 | /tests/cl-ionosphere/model-dense-keras/* 52 | /tests/rg-abalone/model-dense-keras-1/* 53 | 54 | 55 | /tests/cl-ionosphere/export-dense/* 56 | !/tests/cl-ionosphere/export-dense/.keep 57 | 58 | /tests/dense-pos-seq-2/model-dense-pytorch1/ 59 | !/tests/dense-pos-seq-2/model-dense-pytorch1/.keepme 60 | 61 | /tests/dense-pos-seq-2/model-malletLDA/diagnostics.xml 62 | 63 | /target/ 64 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "src/main/resources/resources/wrappers/FileJsonPyTorch/gate-lf-python-data"] 2 | path = src/main/resources/resources/wrappers/FileJsonPyTorch/gate-lf-python-data 3 | url = https://github.com/GateNLP/gate-lf-python-data.git 4 | [submodule "src/main/resources/resources/wrappers/FileJsonKeras/gate-lf-python-data"] 5 | path = src/main/resources/resources/wrappers/FileJsonKeras/gate-lf-python-data 6 | url = https://github.com/GateNLP/gate-lf-python-data.git 7 | [submodule "src/main/resources/resources/wrappers/FileJsonPyTorch/gate-lf-pytorch-json"] 8 | path = src/main/resources/resources/wrappers/FileJsonPyTorch/gate-lf-pytorch-json 9 | url = https://github.com/GateNLP/gate-lf-pytorch-json.git 10 | [submodule "src/main/resources/resources/wrappers/FileJsonKeras/gate-lf-keras-json"] 11 | path = src/main/resources/resources/wrappers/FileJsonKeras/gate-lf-keras-json 12 | url = https://github.com/GateNLP/gate-lf-keras-json.git 13 | -------------------------------------------------------------------------------- /LICENSES.txt: -------------------------------------------------------------------------------- 1 | This plugin directly depends on the following software: 2 | 3 | Mallet: http://mallet.cs.umass.edu/ 4 | License: Apache 2.0 https://github.com/mimno/Mallet/blob/master/LICENSE 5 | 6 | SnakeYaml: https://bitbucket.org/asomov/snakeyaml 7 | License: Apache License 2.0 8 | 9 | LibSVM: https://www.csie.ntu.edu.tw/~cjlin/libsvm/ 10 | License: http://www.csie.ntu.edu.tw/~cjlin/libsvm/COPYRIGHT 11 | 12 | Modified CommonsCLI: https://github.com/johann-petrak/commons-cli 13 | License: Apache License 2.0 14 | based on: 15 | CommonsCLI: https://commons.apache.org/proper/commons-cli/ 16 | License: Apache License 2.0 17 | 18 | gateplugin-Evaluation: https://github.com/johann-petrak/gateplugin-Evaluation 19 | License: LGPL 2.1 20 | 21 | JDOM: http://jdom.org/ 22 | License: http://jdom.org/docs/faq.html#a0030 (Apache-like, more permissive) 23 | 24 | Jackson Databind: https://github.com/FasterXML/jackson-databind 25 | License: Apache 2.0 http://www.apache.org/licenses/LICENSE-2.0 26 | 27 | Unirest: http://unirest.io/ 28 | License: MIT https://github.com/Mashape/unirest-java/blob/master/LICENSE 29 | 30 | Please see each software for their own dependencies and their licenses. 31 | 32 | 33 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GATE Learning Framework Plugin 2 | 3 | Welcome to the page of the GATE Learning Framework plugin! 4 | 5 | * [User Documentation](https://gatenlp.github.io/gateplugin-LearningFramework/) 6 | * [Developer Documentation/Notes](https://github.com/GateNLP/gateplugin-LearningFramework/wiki) 7 | * [JavaDoc](https://gatenlp.github.io/gateplugin-LearningFramework/apidocs/) 8 | 9 | 10 | -------------------------------------------------------------------------------- /bin/changeAlgoNames.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | file="$1" 4 | if [[ "x$file" == "x" ]] 5 | then 6 | echo 'need filename' 7 | exit 1 8 | fi 9 | 10 | sed -i.bak ' 11 | s/PYTORCH_CL_WRAPPER_DENSE/PytorchWrapper_CL_DR/g 12 | s/PYTORCH_SEQ_WRAPPER_DENSE/PytorchWrapper_SEQ_DR/g 13 | s/KERAS_CL_WRAPPER_DENSE/KerasWrapper_CL_DR/g 14 | s/KERAS_SEQ_WRAPPER_DENSE/KerasWrapper_SEQ_DR/g 15 | s/MALLET_CL_BALANCED_WINNOW/MalletBalancedWinnow_CL_MR/g 16 | s/MALLET_CL_C45/MalletC45_CL_MR/g 17 | s/MALLET_CL_DECISION_TREE/MalletDecisionTree_CL_MR/g 18 | s/MALLET_CL_MAX_ENT/MalletMaxEnt_CL_MR/g 19 | s/MALLET_CL_NAIVE_BAYES_EM/MalletNaiveBayesEM_CL_MR/g 20 | s/MALLET_CL_NAIVE_BAYES/MalletNaiveBayes_CL_MR/g 21 | s/MALLET_CL_WINNOW/MalletWinnow_CL_MR/g 22 | s/MALLET_SEQ_CRF_SG/MalletCRFSG_SEQ_MR/g 23 | s/MALLET_SEQ_CRF_VG/MalletCRFVG_SEQ_MR/g 24 | s/MALLET_SEQ_CRF/MalletCRF_SEQ_MR/g 25 | s/COSTCLA_CL_WRAPPER/CostclaWrapper_CL_MR/g 26 | s/MALLET_SEQ_MEMM/MalletMEMM_SEQ_MR/g 27 | s/WEKA_CL_WRAPPER/WekaWrapper_CL_MR/g 28 | s/SKLEARN_CL_WRAPPER/SklearnWrapper_CL_MR/g 29 | s/KERAS_CL_WRAPPER/KerasWrapper_CL_MR/g 30 | s/LIBSVM_RG/LibSVM_RG_MR/g 31 | s/WEKA_RG_WRAPPER/WekaWrapper_RG_MR/g 32 | s/PYTORCH_RG_WRAPPER_Dense/PytorchWrapper_RG_DR/g 33 | s/SKLEARN_RG_WRAPPER/SklearnWrapper_RG_MR/g 34 | s/KERAS_RG_WRAPPER/WekaWrapper_RG_MR/g 35 | s/EXPORTER_ARFF_CLASS/ARFF_CL_MR/g 36 | s/EXPORTER_ARFF_REGRESSION/ARFF_RG_MR/g 37 | s/EXPORTER_CSV_CLASS/CSV_CL_MR/g 38 | s/EXPORTER_CSV_REGRESSION/CSV_RG_MR/g 39 | s/EXPORTER_JSON_SEQ/JSON_CL_MR/g 40 | s/EXPORTER_JSON_REGRESSION/JSON_RG_MR/g 41 | s/EXPORTER_JSON_CLASS/JSON_SEQ_MR/g 42 | s/EXPORTER_LIBSVM_CLASS/LibSVM_CL_MR/g 43 | s/EXPORTER_LIBSVM_REGRESSION/LibSVM_RG_MR/g 44 | s/EXPORTER_MATRIXMARKET2_CLASS/MatrixMarket2_CL_MR/g 45 | s/EXPORTER_MATRIXMARKET2_REGRESSION/MatrixMatket2_RG_MR/g 46 | s/LIBSVM_CL/LibSVM_CL_MR/g 47 | s/learningframework.Exporter/learningframework.export.Exporter/g 48 | ' $file 49 | -------------------------------------------------------------------------------- /bin/upgrade-pipelines-3_0-4_0.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | dir="$1" 4 | if [[ "x$dir" == "x" ]] 5 | then 6 | dir="." 7 | fi 8 | 9 | find "$dir" -name '*.gapp' -o -name '*.xgapp' | while read f 10 | do 11 | echo "Updating file $f" 12 | sed -i.bak ' 13 | s/PYTORCH_CL_WRAPPER_DENSE/PytorchWrapper_CL_DR/g 14 | s/PYTORCH_SEQ_WRAPPER_DENSE/PytorchWrapper_SEQ_DR/g 15 | s/KERAS_CL_WRAPPER_DENSE/KerasWrapper_CL_DR/g 16 | s/KERAS_SEQ_WRAPPER_DENSE/KerasWrapper_SEQ_DR/g 17 | s/MALLET_CL_BALANCED_WINNOW/MalletBalancedWinnow_CL_MR/g 18 | s/MALLET_CL_C45/MalletC45_CL_MR/g 19 | s/MALLET_CL_DECISION_TREE/MalletDecisionTree_CL_MR/g 20 | s/MALLET_CL_MAX_ENT/MalletMexEnt_CL_MR/g 21 | s/MALLET_CL_NAIVE_BAYES_EM/MalletNaiveBayesEM_CL_MR/g 22 | s/MALLET_CL_NAIVE_BAYES/MalletNaiveBayes_CL_MR/g 23 | s/MALLET_CL_WINNOW/MalletWinnow_CL_MR/g 24 | s/MALLET_SEQ_CRF_SG/MalletCRFSG_SEQ_MR/g 25 | s/MALLET_SEQ_CRF_VG/MalletCRFVG_SEQ_MR/g 26 | s/MALLET_SEQ_CRF/MalletCRF_SEQ_MR/g 27 | s/COSTCLA_CL_WRAPPER/CostclaWrapper_CL_MR/g 28 | s/MALLET_SEQ_MEMM/MalletMEMM_SEQ_MR/g 29 | s/WEKA_CL_WRAPPER/WekaWrapper_CL_MR/g 30 | s/SKLEARN_CL_WRAPPER/SklearnWrapper_CL_MR/g 31 | s/KERAS_CL_WRAPPER/KerasWrapper_CL_MR/g 32 | s/LIBSVM_RG/LibSVM_RG_MR/g 33 | s/WEKA_RG_WRAPPER/WekaWrapper_RG_MR/g 34 | s/PYTORCH_RG_WRAPPER_Dense/PytorchWrapper_RG_DR/g 35 | s/SKLEARN_RG_WRAPPER/SklearnWrapper_RG_MR/g 36 | s/KERAS_RG_WRAPPER/WekaWrapper_RG_MR/g 37 | s/EXPORTER_ARFF_CLASS/ARFF_CL_MR/g 38 | s/EXPORTER_ARFF_REGRESSION/ARFF_RG_MR/g 39 | s/EXPORTER_CSV_CLASS/CSV_CL_MR/g 40 | s/EXPORTER_CSV_REGRESSION/CSV_RG_MR/g 41 | s/EXPORTER_JSON_SEQ/JSON_CL_MR/g 42 | s/EXPORTER_JSON_REGRESSION/JSON_RG_MR/g 43 | s/EXPORTER_JSON_CLASS/JSON_SEQ_MR/g 44 | s/EXPORTER_LIBSVM_CLASS/LibSVM_CL_MR/g 45 | s/EXPORTER_LIBSVM_REGRESSION/LibSVM_RG_MR/g 46 | s/EXPORTER_MATRIXMARKET2_CLASS/MatrixMarket2_CL_MR/g 47 | s/EXPORTER_MATRIXMARKET2_REGRESSION/MatrixMatket2_RG_MR/g 48 | s/LIBSVM_CL/LibSVM_CL_MR/g 49 | s/learningframework.Exporter/learningframework.export.Exporter/g 50 | ' $f 51 | done 52 | -------------------------------------------------------------------------------- /bin/upgrade-pipelines-3_0-4_1.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | dir="$1" 4 | if [[ "x$dir" == "x" ]] 5 | then 6 | dir="." 7 | fi 8 | 9 | find "$dir" -name '*.gapp' -o -name '*.xgapp' | while read f 10 | do 11 | echo "Updating file $f" 12 | sed -i.bak ' 13 | s/PYTORCH_CL_WRAPPER_DENSE/PytorchWrapper_CL_DR/g 14 | s/PYTORCH_SEQ_WRAPPER_DENSE/PytorchWrapper_SEQ_DR/g 15 | s/KERAS_CL_WRAPPER_DENSE/KerasWrapper_CL_DR/g 16 | s/KERAS_SEQ_WRAPPER_DENSE/KerasWrapper_SEQ_DR/g 17 | s/MALLET_CL_BALANCED_WINNOW/MalletBalancedWinnow_CL_MR/g 18 | s/MALLET_CL_C45/MalletC45_CL_MR/g 19 | s/MALLET_CL_DECISION_TREE/MalletDecisionTree_CL_MR/g 20 | s/MALLET_CL_MAX_ENT/MalletMaxEnt_CL_MR/g 21 | s/MALLET_CL_NAIVE_BAYES_EM/MalletNaiveBayesEM_CL_MR/g 22 | s/MALLET_CL_NAIVE_BAYES/MalletNaiveBayes_CL_MR/g 23 | s/MALLET_CL_WINNOW/MalletWinnow_CL_MR/g 24 | s/MALLET_SEQ_CRF_SG/MalletCRFSG_SEQ_MR/g 25 | s/MALLET_SEQ_CRF_VG/MalletCRFVG_SEQ_MR/g 26 | s/MALLET_SEQ_CRF/MalletCRF_SEQ_MR/g 27 | s/COSTCLA_CL_WRAPPER/CostclaWrapper_CL_MR/g 28 | s/MALLET_SEQ_MEMM/MalletMEMM_SEQ_MR/g 29 | s/WEKA_CL_WRAPPER/WekaWrapper_CL_MR/g 30 | s/SKLEARN_CL_WRAPPER/SklearnWrapper_CL_MR/g 31 | s/KERAS_CL_WRAPPER/KerasWrapper_CL_MR/g 32 | s/LIBSVM_RG/LibSVM_RG_MR/g 33 | s/WEKA_RG_WRAPPER/WekaWrapper_RG_MR/g 34 | s/PYTORCH_RG_WRAPPER_Dense/PytorchWrapper_RG_DR/g 35 | s/SKLEARN_RG_WRAPPER/SklearnWrapper_RG_MR/g 36 | s/KERAS_RG_WRAPPER/WekaWrapper_RG_MR/g 37 | s/EXPORTER_ARFF_CLASS/ARFF_CL_MR/g 38 | s/EXPORTER_ARFF_REGRESSION/ARFF_RG_MR/g 39 | s/EXPORTER_CSV_CLASS/CSV_CL_MR/g 40 | s/EXPORTER_CSV_REGRESSION/CSV_RG_MR/g 41 | s/EXPORTER_JSON_SEQ/JSON_CL_MR/g 42 | s/EXPORTER_JSON_REGRESSION/JSON_RG_MR/g 43 | s/EXPORTER_JSON_CLASS/JSON_SEQ_MR/g 44 | s/EXPORTER_LIBSVM_CLASS/LibSVM_CL_MR/g 45 | s/EXPORTER_LIBSVM_REGRESSION/LibSVM_RG_MR/g 46 | s/EXPORTER_MATRIXMARKET2_CLASS/MatrixMarket2_CL_MR/g 47 | s/EXPORTER_MATRIXMARKET2_REGRESSION/MatrixMatket2_RG_MR/g 48 | s/LIBSVM_CL/LibSVM_CL_MR/g 49 | s/learningframework.Exporter/learningframework.export.Exporter/g 50 | ' $f 51 | done 52 | -------------------------------------------------------------------------------- /commons-cli.diff: -------------------------------------------------------------------------------- 1 | Index: src/main/java/org/apache/commons/cli/DefaultParser.java 2 | =================================================================== 3 | --- src/main/java/org/apache/commons/cli/DefaultParser.java (revision 1728877) 4 | +++ src/main/java/org/apache/commons/cli/DefaultParser.java (working copy) 5 | @@ -43,6 +43,10 @@ 6 | */ 7 | protected boolean stopAtNonOption; 8 | 9 | + protected boolean ignoreUnknownOptions = false; 10 | + 11 | + public void setIgnoreUnknownOptions(boolean flag) { ignoreUnknownOptions = flag; } 12 | + 13 | /** The token currently processed. */ 14 | protected String currentToken; 15 | 16 | @@ -342,7 +346,7 @@ 17 | */ 18 | private void handleUnknownToken(String token) throws ParseException 19 | { 20 | - if (token.startsWith("-") && token.length() > 1 && !stopAtNonOption) 21 | + if (token.startsWith("-") && token.length() > 1 && !stopAtNonOption && !ignoreUnknownOptions) 22 | { 23 | throw new UnrecognizedOptionException("Unrecognized option: " + token, token); 24 | } 25 | -------------------------------------------------------------------------------- /perl/TermBankTFIDF2lst.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/perl -w 2 | 3 | ## Example perl script of converting a CSV file of TFIDF values as prodiced by TermRaider 4 | ## to a LST file that can be used with the ExtendedGazetteer or FeatureGazetteer from 5 | ## the StringAnnotation plugin. 6 | ## 7 | ## Usage: 8 | ## perl TermBankTFIDF2lst < termbankfile.csv > tfidf.lst 9 | ## Then create a tfidf.def file with the following content: 10 | ## tfidf.lst:tfidf:: 11 | ## This pair of files (tfidf.def, tfidf.lst) can then be used with the 12 | ## ExtendedGazetteer to create new annotations for matching text or annotations 13 | ## or with the FeatureGazetteer to add the features to existing annotations. 14 | ## 15 | ## NOTE: the Gazetteer will store the values as Strings, not numbers! 16 | ## 17 | use strict; 18 | 19 | my $linenr = 0; 20 | while() { 21 | $linenr++; 22 | next if ($linenr < 3); # ignore header and summary lines 23 | chomp; 24 | my ($term,$lang,$type,$tfidf,$tfidfraw,$termfreq,$localDocFreq,$refDocFreq) = split(/\,/); 25 | print "$term\ttfidf=$tfidf\tdf=$localDocFreq\ttf=$termfreq\n"; 26 | } 27 | 28 | -------------------------------------------------------------------------------- /src/build/resources/findbugs-excluded.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 7 | 8 | 10 | 11 | 12 | 15 | 16 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/EvaluationMethod.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework; 21 | 22 | public enum EvaluationMethod { 23 | CROSSVALIDATION, HOLDOUT; 24 | } 25 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/Globals.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework; 21 | 22 | /** 23 | * 24 | * @author Johann Petrak 25 | */ 26 | public class Globals { 27 | public static final String outputClassFeature = "LF_target"; 28 | public static final String outputProbFeature = "LF_confidence"; 29 | public static final String outputSequenceSpanIDFeature = "LF_seq_span_id"; 30 | //In the case of NER, output instance annotations to temporary 31 | //AS, to keep them separate. 32 | public static final String tempOutputASName = "tmp_outputas_for_ner"; 33 | public static final String savedModelDirectory = "savedModel"; 34 | public static final String trainFilename = "trainfile"; 35 | public static final String dataBasename = "data"; 36 | public static final String headerBasename = "header"; 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/LF_ExportBase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework; 21 | 22 | /** 23 | * Base class for all Training classes, inherits from LF_Base. 24 | * This adds the parameters that are common to all training PRs. 25 | * 26 | * @author Johann Petrak 27 | */ 28 | public abstract class LF_ExportBase extends LearningFrameworkPRBase { 29 | 30 | private static final long serialVersionUID = 2484394528950089187L; 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/LearningFrameworkPRBase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework; 21 | 22 | import gate.Controller; 23 | import gate.creole.metadata.CreoleParameter; 24 | import gate.creole.metadata.Optional; 25 | import gate.creole.metadata.RunTime; 26 | 27 | /** 28 | * Base class for all LearningFramework PRs providing the shared parameters. 29 | */ 30 | @SuppressWarnings("serial") 31 | public abstract class LearningFrameworkPRBase 32 | extends AbstractDocumentProcessor { 33 | 34 | // ================================================================= 35 | // Creole Parameters for all the PRs that derive from this class 36 | // ================================================================= 37 | protected String inputASName; 38 | 39 | @RunTime 40 | @Optional 41 | @CreoleParameter 42 | public void setInputASName(String iasn) { 43 | this.inputASName = iasn; 44 | } 45 | 46 | public String getInputASName() { 47 | return this.inputASName; 48 | } 49 | 50 | protected String instanceType; 51 | 52 | @RunTime 53 | @Optional 54 | @CreoleParameter(defaultValue = "Token", comment = "The annotation type to " 55 | + "be treated as instance. This is required for some algorithms.") 56 | public void setInstanceType(String inst) { 57 | this.instanceType = inst; 58 | } 59 | 60 | public String getInstanceType() { 61 | return this.instanceType; 62 | } 63 | 64 | 65 | protected String algorithmParameters = ""; 66 | protected boolean algorithmParamtersChanged = true; 67 | 68 | @RunTime 69 | @Optional 70 | @CreoleParameter(comment = "Some of the learners take parameters. Parameters " 71 | + "can be entered here. For example, the LibSVM supports parameters.", defaultValue = "") 72 | public void setAlgorithmParameters(String learnerParams) { 73 | if(learnerParams == null) { 74 | learnerParams = ""; 75 | } 76 | if(learnerParams.equals(this.algorithmParameters)) { 77 | // do nothing 78 | } else { 79 | algorithmParamtersChanged = true; 80 | this.algorithmParameters = learnerParams; 81 | } 82 | } 83 | 84 | public String getAlgorithmParameters() { 85 | return this.algorithmParameters; 86 | } 87 | 88 | public boolean getAlgorithmParametersIsChanged() { 89 | boolean tmp = algorithmParamtersChanged; 90 | algorithmParamtersChanged = false; 91 | return tmp; 92 | } 93 | 94 | protected boolean debug = false; 95 | @RunTime 96 | @Optional 97 | @CreoleParameter(comment = "Enable debugging", defaultValue = "false") 98 | public void setDebug(Boolean value) { 99 | debug = value; 100 | } 101 | 102 | public Boolean getDebug() { 103 | return debug; 104 | } 105 | 106 | @Override 107 | public void controllerFinished(Controller ctl, Throwable t) { 108 | 109 | } 110 | 111 | } 112 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/ScalingMethod.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework; 21 | 22 | public enum ScalingMethod { 23 | NONE, 24 | MEANVARIANCE_ALL_FEATURES, 25 | MINMAX_ALL_FEATURES; 26 | // UNIT_LENGTH_L2; // normalize the vector to have length one (l1 or l2 norm) 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/data/Attribute.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.data; 22 | 23 | import cc.mallet.types.Alphabet; 24 | import gate.plugin.learningframework.features.CodeAs; 25 | import gate.plugin.learningframework.features.Datatype; 26 | import gate.plugin.learningframework.features.MissingValueTreatment; 27 | import gate.plugin.learningframework.mallet.LFAlphabet; 28 | import java.util.Objects; 29 | import org.apache.log4j.Logger; 30 | 31 | 32 | /** 33 | * Class that describes a single attribute/feature. 34 | * @author Johann Petrak 35 | */ 36 | public class Attribute { 37 | private static final Logger LOG = Logger.getLogger(Attribute.class.getName()); 38 | /** 39 | * Name of the attribute 40 | */ 41 | public String name; 42 | 43 | @Override 44 | public int hashCode() { 45 | int hash = 3; 46 | hash = 41 * hash + Objects.hashCode(this.name); 47 | hash = 41 * hash + this.index; 48 | return hash; 49 | } 50 | 51 | @Override 52 | public boolean equals(Object obj) { 53 | if (obj == null) { 54 | return false; 55 | } 56 | if (getClass() != obj.getClass()) { 57 | return false; 58 | } 59 | final Attribute other = (Attribute) obj; 60 | if (!Objects.equals(this.name, other.name)) { 61 | return false; 62 | } 63 | return this.index == other.index; 64 | } 65 | 66 | public Attribute(String name, int index, Datatype datatype, CodeAs codeAs, MissingValueTreatment mvt, LFAlphabet alphabet) { 67 | this.name = name; 68 | this.index = index; 69 | this.datatype = datatype; 70 | this.codeAs = codeAs; 71 | this.alphabet = alphabet; 72 | this.mvTreatment = mvt; 73 | } 74 | /** 75 | * Index/location of the attribute in a (sparse) feature vector. 76 | */ 77 | public int index; 78 | /** 79 | * The type of the values of the attribute/feature 80 | */ 81 | public Datatype datatype; 82 | /** 83 | * If the attribute/feature is nominal, how the value is coded 84 | */ 85 | public CodeAs codeAs; 86 | 87 | public MissingValueTreatment mvTreatment; 88 | 89 | @Override 90 | public String toString() { 91 | return "Attribute{" + "name=" + name + ", index=" + index + ", datatype=" + datatype + ", codeAs=" + codeAs + ", mvt="+mvTreatment+ ", alphabet=" + alphabet + '}'; 92 | } 93 | /** 94 | * Dictionary of possible values and their codes if the attribute/feature 95 | * is nominal and coded as number 96 | */ 97 | public Alphabet alphabet; 98 | } 99 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/data/CorpusRepresentation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.data; 22 | 23 | import gate.AnnotationSet; 24 | import gate.plugin.learningframework.features.SeqEncoder; 25 | import gate.plugin.learningframework.features.TargetType; 26 | import java.util.List; 27 | 28 | /** 29 | * The base class of all classes that handle the representation of instances. 30 | * The LearningFramework uses the MalletSeq and Mallet representations whenever possible. 31 | * The other subclasses so far are only used to convert from Mallet representation for 32 | * training, classification or export. 33 | * @author Johann Petrak 34 | */ 35 | public abstract class CorpusRepresentation { 36 | //protected FeatureInfo featureInfo; 37 | //protected ScalingMethod scalingMethod; 38 | //protected LFPipe pipe; 39 | 40 | /** 41 | * Returns whatever object the concrete representation uses to represent the instances. 42 | * In addition, each specific CorpusRepresentation subclass has a representation specific 43 | * method that returns the correct type of data, e.g. getRepresentationLibSVM 44 | * 45 | * @return the instance representation object 46 | */ 47 | public abstract Object getRepresentation(); 48 | 49 | //public abstract InstanceList getRepresentationMallet(); 50 | 51 | // NOTE: if the target type is NONE and the corpus representation is one to be used for clustering/LDA, 52 | // then the instanceAS is either an annotation covering the "document" or null, in which case the whole 53 | // document is used. The inputAS is the set of token annotations for the whole document. 54 | public abstract void add(AnnotationSet instancesAS, AnnotationSet sequenceAS, AnnotationSet inputAS, AnnotationSet classAS, String targetFeatureName, TargetType targetType, String instanceWeightFeature, String nameFeatureName, SeqEncoder seqEncoder); 55 | 56 | public abstract void finishAdding(); 57 | 58 | public abstract void startAdding(); 59 | 60 | /** 61 | * Returns the number of training instances added to this CR so far. 62 | * 63 | * 64 | * @return number of instances added so far 65 | */ 66 | public abstract int nrInstances(); 67 | 68 | /** 69 | * Number of actual dimensions represented. 70 | * 71 | * @return 72 | */ 73 | public abstract int nrDimensions(); 74 | 75 | protected TargetType targetType; 76 | /** 77 | * Get the target type set for this corpus. 78 | * @return the target type 79 | */ 80 | public TargetType getTargetType() { 81 | return targetType; 82 | } 83 | /** 84 | * Set the target type for the corpus representation. 85 | * Normally, this is automatically set when the corpus representation 86 | * subclass is created and needs never to be changed. 87 | * @param val the target type 88 | */ 89 | public void setTargetType(TargetType val) { 90 | targetType = val; 91 | } 92 | 93 | /** 94 | * If we have labels, returns a list of strings, otherwise an empty list 95 | */ 96 | public abstract List getLabelList(); 97 | 98 | 99 | } 100 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/data/CorpusRepresentationMalletRelated.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.data; 22 | 23 | import cc.mallet.types.InstanceList; 24 | import gate.plugin.learningframework.ScalingMethod; 25 | import gate.plugin.learningframework.features.FeatureInfo; 26 | import gate.plugin.learningframework.mallet.LFPipe; 27 | 28 | /** 29 | * The base class of all classes that are somehow related or dependent on the 30 | * Mallet instance list and Mallet pipes. 31 | * 32 | * @author Johann Petrak 33 | */ 34 | public abstract class CorpusRepresentationMalletRelated extends CorpusRepresentation { 35 | protected FeatureInfo featureInfo; 36 | protected ScalingMethod scalingMethod; 37 | protected LFPipe pipe; 38 | 39 | 40 | public abstract InstanceList getRepresentationMallet(); 41 | 42 | @Override 43 | public void startAdding() {}; 44 | 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/data/CorpusRepresentationVolatileBase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework.data; 21 | 22 | import gate.AnnotationSet; 23 | import gate.plugin.learningframework.features.SeqEncoder; 24 | import gate.plugin.learningframework.features.TargetType; 25 | import org.apache.log4j.Logger; 26 | 27 | /** 28 | * Common base class for non Mallet volatile representations. 29 | * 30 | * This is for representations which are "volatile" i.e. whenever something is 31 | * added it is not kept in memory. Data could get immediately written to a file 32 | * or database or immediately passed on to an online training algorithm 33 | * 34 | * @author Johann Petrak 35 | */ 36 | public abstract class CorpusRepresentationVolatileBase extends CorpusRepresentation { 37 | 38 | private Logger LOGGER = org.apache.log4j.Logger.getLogger(CorpusRepresentationVolatileBase.class); 39 | 40 | 41 | 42 | /** 43 | * Prevent the addition of new features or feature values when instances are added. 44 | */ 45 | public void stopGrowth() { 46 | // TODO: this may be useful for sparse volatile representations, not used yet 47 | } 48 | 49 | /** 50 | * Enable the addition of new features or feature values when instances are added. 51 | * After a CorpusRepresentationMallet instance is created, growth is enabled by default. 52 | */ 53 | public void startGrowth() { 54 | // TODO: this may be useful for sparse volatile representations, not used yet 55 | } 56 | 57 | @Override 58 | public abstract void add(AnnotationSet instancesAS, AnnotationSet sequenceAS, AnnotationSet inputAS, AnnotationSet classAS, String targetFeatureName, TargetType targetType, String instanceWeightFeature, String nameFeatureName, SeqEncoder seqEncoder); 59 | 60 | /** 61 | * Finish adding data to the CR. This may close or finish any channel for 62 | * passing on the data to a file, database or other sink. 63 | * 64 | */ 65 | @Override 66 | public abstract void finishAdding(); 67 | 68 | 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/data/InstanceRepresentation.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework.data; 21 | 22 | /** 23 | * Common interface to our own representations of learning instances. 24 | * 25 | * Learning instances represent features and optional target information. 26 | * Features map from a feature name (a String) to a feature value (an Object). 27 | * Specific InstanceRepresentations can limit the type of the value to e.g. 28 | * just floats. 29 | * The target information also maps a target property name (a String) to a 30 | * target property value (an Object). Again, specific implementations can 31 | * limit the available target property names and/or the type of their values. 32 | * 33 | * @author Johann Petrak 34 | */ 35 | public interface InstanceRepresentation { 36 | 37 | public static final String TARGET_VALUE = "╔TARGETVALUE╗"; 38 | public static final String TARGET_COSTS = "╔TARGETCOSTS╗"; 39 | public static final String INSTANCE_WEIGHT = "╔INSTANCEWEIGHT╗"; 40 | public static final String HASMISSINGVALUE_FLAG = "╔HASMISSINGVALUE╗"; 41 | 42 | public InstanceRepresentation setFeature(String name, Object value); 43 | public Object getFeature(String name); 44 | public boolean hasFeature(String name); 45 | public int numFeatures(); 46 | public InstanceRepresentation setTargetValue(Object value); 47 | public boolean hasTarget(); 48 | public InstanceRepresentation setTargetCosts(Object value); 49 | public Object getTargetValue(); 50 | public InstanceRepresentation setInstanceWeight(double weight); 51 | public double getInstanceWeight(); 52 | public InstanceRepresentation setHasMissing(boolean flag); 53 | public boolean hasMissing(); 54 | } 55 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/data/InstanceRepresentationDenseVolatile.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework.data; 21 | 22 | import java.util.HashMap; 23 | import java.util.Map; 24 | 25 | /** 26 | * Representation of a dense-representation, non-lasting learning instance. 27 | * 28 | * This is for when we create a dense representation of a learning instance 29 | * which will then immediately get converted or written, so a memory efficient 30 | * representation is not required. 31 | *

32 | * This representation just wraps a HashMap for storing all features and target 33 | * properties. 34 | *

35 | * NOTE: this does not support removing features for now! 36 | * 37 | * @author Johann Petrak 38 | */ 39 | public class InstanceRepresentationDenseVolatile implements InstanceRepresentation { 40 | protected Map map = new HashMap<>(); 41 | protected int numFeatures = 0; 42 | 43 | @Override 44 | public InstanceRepresentation setFeature(String name, Object value) { 45 | if(!map.containsKey(name)) { 46 | numFeatures += 1; 47 | } 48 | map.put(name, value); 49 | return this; 50 | } 51 | 52 | @Override 53 | public int numFeatures() { 54 | return numFeatures; 55 | } 56 | 57 | @Override 58 | public Object getFeature(String name) { 59 | return map.get(name); 60 | } 61 | 62 | @Override 63 | public InstanceRepresentation setTargetValue(Object value) { 64 | map.put(TARGET_VALUE, value); 65 | return this; 66 | } 67 | 68 | @Override 69 | public Object getTargetValue() { 70 | return map.get(TARGET_VALUE); 71 | } 72 | 73 | @Override 74 | public InstanceRepresentation setTargetCosts(Object value) { 75 | map.put(TARGET_COSTS, value); 76 | return this; 77 | } 78 | 79 | @Override 80 | public InstanceRepresentation setInstanceWeight(double weight) { 81 | map.put(INSTANCE_WEIGHT,weight); 82 | return this; 83 | } 84 | 85 | @Override 86 | public double getInstanceWeight() { 87 | return (double)map.get(INSTANCE_WEIGHT); 88 | } 89 | 90 | @Override 91 | public boolean hasFeature(String name) { 92 | return map.containsKey(name); 93 | } 94 | 95 | @Override 96 | public boolean hasTarget() { 97 | return map.containsKey(TARGET_VALUE); 98 | } 99 | 100 | @Override 101 | public InstanceRepresentation setHasMissing(boolean flag) { 102 | map.put(HASMISSINGVALUE_FLAG, flag); 103 | return this; 104 | } 105 | 106 | @Override 107 | public boolean hasMissing() { 108 | return map.containsKey(HASMISSINGVALUE_FLAG); 109 | } 110 | 111 | @Override 112 | public String toString() { 113 | return "{InstanceRepresentationDenseVolatile: "+map.toString()+"}"; 114 | } 115 | 116 | } 117 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/Algorithm.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.engines; 22 | 23 | /** 24 | * All algorithms implement this interface. 25 | * @author johann 26 | */ 27 | public interface Algorithm { 28 | public Class getTrainerClass(); 29 | public Class getEngineClass(); 30 | public AlgorithmKind getAlgorithmKind(); 31 | // For those algorithms called SOMETHING_SPECIFY_CLASS, the trainer class is initially null, 32 | // but we use this method to set it to whatever class the user actually specifies 33 | public void setTrainerClass(Class trainerClass); 34 | } 35 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/AlgorithmClustering.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.engines; 22 | 23 | import gate.util.GateRuntimeException; 24 | 25 | 26 | /** 27 | * 28 | * @author johann 29 | */ 30 | public enum AlgorithmClustering implements Algorithm { 31 | //GensimWrapper_CLUS_DR(EngineDVFileJsonGensim.class,null), 32 | GensimWrapper_CLUS_DR(null,null), 33 | MalletLDA_CLUS_MR(EngineMBTopicsLDA.class,null) 34 | ; 35 | private AlgorithmClustering() { 36 | 37 | } 38 | private AlgorithmClustering(Class engineClass, Class algorithmClass) { 39 | this.engineClass = engineClass; 40 | this.trainerClass = algorithmClass; 41 | this.algorithmKind = AlgorithmKind.CLUSTERING; 42 | } 43 | private Class engineClass; 44 | private Class trainerClass; 45 | private AlgorithmKind algorithmKind; 46 | @Override 47 | public Class getEngineClass() { return engineClass; } 48 | @Override 49 | public Class getTrainerClass() { return trainerClass; } 50 | @Override 51 | public AlgorithmKind getAlgorithmKind() { return algorithmKind; } 52 | 53 | @Override 54 | public void setTrainerClass(Class trainerClass) { 55 | // not used for now, we avoid setting anything here because 56 | // findBugs warns about this and is a PITA 57 | throw new GateRuntimeException("setTrainerClass not implemented for now for AlgorithmClustering"); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/AlgorithmKind.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.engines; 22 | 23 | /** 24 | * Describe what task an algorithm is performing. 25 | * @author johann 26 | */ 27 | public enum AlgorithmKind { 28 | CLASSIFIER, // map an instance to a nominal value 29 | SEQUENCE_TAGGER, // map instances within a sequence to nominal values 30 | REGRESSOR, // map an instance to a numeric value 31 | CLUSTERING // assign one or more cluster ids (integers) to instances 32 | } 33 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/AlgorithmRegression.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.engines; 22 | 23 | 24 | /** 25 | * 26 | * @author johann 27 | */ 28 | public enum AlgorithmRegression implements Algorithm { 29 | // KerasWrapper_RG_DR(EngineDVFileJsonKeras.class,null), 30 | // KerasWrapper_RG_MR(EngineKerasWrapper.class,null), 31 | LibSVM_RG_MR(EngineMBLibSVM.class,null), 32 | //GenericServer_RG_MR(EngineServer.class,null), 33 | PytorchWrapper_RG_DR(EngineDVFileJsonPyTorch.class,null), 34 | SklearnWrapper_RG_MR(EngineMBSklearnWrapper.class,null), 35 | WekaWrapper_RG_MR(EngineMBWekaWrapper.class,null), 36 | //TensorflowWrapper_RG_MR(EngineTensorFlowWrapper.class,null), 37 | ; 38 | private AlgorithmRegression() { 39 | 40 | } 41 | private AlgorithmRegression(Class engineClass, Class algorithmClass) { 42 | this.engineClass = engineClass; 43 | this.trainerClass = algorithmClass; 44 | this.algorithmKind = AlgorithmKind.REGRESSOR; 45 | } 46 | private Class engineClass; 47 | private Class trainerClass; 48 | private AlgorithmKind algorithmKind; 49 | @Override 50 | public Class getEngineClass() { return engineClass; } 51 | @Override 52 | public Class getTrainerClass() { return trainerClass; } 53 | @Override 54 | public AlgorithmKind getAlgorithmKind() { return algorithmKind; } 55 | 56 | @Override 57 | public void setTrainerClass(Class trainerClass) { 58 | this.trainerClass = trainerClass; 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EngineDV.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) The University of Sheffield. 3 | * 4 | * This file is free software, licensed under the 5 | * GNU Library General Public License, Version 2.1, June 1991. 6 | * See the file LICENSE.txt that comes with this software. 7 | * 8 | */ 9 | package gate.plugin.learningframework.engines; 10 | 11 | /** 12 | * Common base class for all Engines which use the Dense Volatile representation 13 | * 14 | * 15 | * @author Johann Petrak 16 | */ 17 | public abstract class EngineDV extends Engine { 18 | 19 | } 20 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EngineDVFileJsonKeras.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) The University of Sheffield. 3 | * 4 | * This file is free software, licensed under the 5 | * GNU Library General Public License, Version 2.1, June 1991. 6 | * See the file LICENSE.txt that comes with this software. 7 | * 8 | */ 9 | package gate.plugin.learningframework.engines; 10 | 11 | /** 12 | * Keras wrapper for dense vector representation 13 | * 14 | * 15 | * @author Johann Petrak 16 | */ 17 | public class EngineDVFileJsonKeras extends EngineDVFileJson { 18 | 19 | public EngineDVFileJsonKeras() { 20 | WRAPPER_NAME = "FileJsonKeras"; 21 | } 22 | 23 | 24 | } -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EngineDVFileJsonPyTorch.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) The University of Sheffield. 3 | * 4 | * This file is free software, licensed under the 5 | * GNU Library General Public License, Version 2.1, June 1991. 6 | * See the file LICENSE.txt that comes with this software. 7 | * 8 | */ 9 | package gate.plugin.learningframework.engines; 10 | 11 | /** 12 | * Common base class for all Engines which are dense, volatile and write JSON to a file. 13 | * 14 | * 15 | * @author Johann Petrak 16 | */ 17 | public class EngineDVFileJsonPyTorch extends EngineDVFileJson { 18 | 19 | public EngineDVFileJsonPyTorch() { 20 | WRAPPER_NAME = "FileJsonPyTorch"; 21 | } 22 | } -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EngineKerasWrapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework.engines; 21 | 22 | /** 23 | * An engine that represents Python Keras through en external process. 24 | * 25 | * This requires that the user configures the location of where keras-wrapper is installed. 26 | * This can be done by setting the environment variable KERAS_WRAPPPER_HOME, the Java property 27 | * gate.plugin.learningframework.keraswrapper.home or by adding another yaml file "keras.yaml" 28 | * to the data directory which contains the setting keraswrapper.home. 29 | * If the path starts with a slash 30 | * it is an absolute path, otherwise the path is resolved relative to the 31 | * directory. 32 | * 33 | * 34 | * @author Johann Petrak 35 | */ 36 | public class EngineKerasWrapper extends EngineMBPythonNetworksBase { 37 | 38 | static class KerasModel { } 39 | 40 | public EngineKerasWrapper() { 41 | WRAPPER_NAME = "KerasWrapper"; 42 | ENV_WRAPPER_HOME = "KERAS_WRAPPER_HOME"; 43 | PROP_WRAPPER_HOME = "gate.plugin.learningframework.keraswrapper.home"; 44 | YAML_FILE = "keras.yaml"; 45 | YAML_SETTING_WRAPPER_HOME = "keraswrapper.home"; 46 | SCRIPT_APPLY_BASENAME = "kerasWrapperApply"; 47 | SCRIPT_TRAIN_BASENAME = "kerasWrapperTrain"; 48 | SCRIPT_EVAL_BASENAME = "kerasWrapperEval"; 49 | MODEL_BASENAME = "kerasmodel"; 50 | MODEL_INSTANCE = new KerasModel(); 51 | } 52 | 53 | 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EngineMB.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) The University of Sheffield. 3 | * 4 | * This file is free software, licensed under the 5 | * GNU Library General Public License, Version 2.1, June 1991. 6 | * See the file LICENSE.txt that comes with this software. 7 | * 8 | */ 9 | package gate.plugin.learningframework.engines; 10 | 11 | import cc.mallet.types.Alphabet; 12 | import gate.plugin.learningframework.data.CorpusRepresentation; 13 | import gate.plugin.learningframework.data.CorpusRepresentationMallet; 14 | import gate.plugin.learningframework.data.CorpusRepresentationMalletLDA; 15 | import gate.plugin.learningframework.data.CorpusRepresentationMalletSeq; 16 | import gate.plugin.learningframework.data.CorpusRepresentationMalletTarget; 17 | import gate.plugin.learningframework.features.FeatureInfo; 18 | import gate.plugin.learningframework.features.TargetType; 19 | import gate.plugin.learningframework.mallet.LFPipe; 20 | import gate.util.GateRuntimeException; 21 | import java.io.File; 22 | import java.net.URL; 23 | import java.util.ArrayList; 24 | 25 | /** 26 | * Common base class for all Engines which use the Mallet Corpus Representation. 27 | * 28 | * This is used to gather the code for all engines which use a CorpusRepresentationMallet 29 | * internally even if the algorithm wrapped by the engine is not a Mallet algorithm. 30 | * This is the case for other Java algorithms like LibSVM where the Mallet 31 | * corpus representation is used to create a temporary representation which is 32 | * then converted to the algorithm specific representation before training. 33 | * 34 | * @author Johann Petrak 35 | */ 36 | public abstract class EngineMB extends Engine { 37 | 38 | protected CorpusRepresentationMallet corpusRepresentation; 39 | 40 | @Override 41 | public CorpusRepresentation getCorpusRepresentation() { 42 | return corpusRepresentation; 43 | } 44 | 45 | protected void updateInfo() { 46 | //System.err.println("In updateInfo, model is "+model); 47 | if(model!=null) { 48 | info.modelClass = model.getClass().getName(); 49 | } 50 | info.nrTrainingInstances = corpusRepresentation.getRepresentationMallet().size(); 51 | info.nrTrainingDimensions = corpusRepresentation.getRepresentationMallet().getDataAlphabet().size(); 52 | LFPipe pipe = corpusRepresentation.getPipe(); 53 | Alphabet targetAlph = pipe.getTargetAlphabet(); 54 | if(targetAlph == null) { 55 | info.nrTargetValues = 0; 56 | } else { 57 | info.nrTargetValues = targetAlph.size(); 58 | //info.classLabels = 59 | Object[] objs = targetAlph.toArray(); 60 | ArrayList labels = new ArrayList<>(); 61 | for(Object obj : objs) { labels.add(obj.toString()); } 62 | info.classLabels = labels; 63 | } 64 | 65 | } 66 | 67 | @Override 68 | protected void saveCorpusRepresentation(File directory) { 69 | corpusRepresentation.finishAdding(); 70 | corpusRepresentation.savePipe(directory); 71 | } 72 | 73 | @Override 74 | protected void loadAndSetCorpusRepresentation(URL directory) { 75 | // TODO: Special case if the corpus representaiton is for clustering or we 76 | // override in the Engine!! 77 | if(corpusRepresentation==null) { 78 | corpusRepresentation = CorpusRepresentationMalletTarget.load(directory); 79 | } 80 | } 81 | 82 | 83 | @Override 84 | protected void initWhenCreating(URL directory, Algorithm algorithm, 85 | String parameters, FeatureInfo fi, TargetType tt) { 86 | if(null == algorithm.getAlgorithmKind()) { 87 | throw new GateRuntimeException("Not a usable algorithm kind for now with Mallet based engines: "+algorithm); 88 | } else { 89 | switch (algorithm.getAlgorithmKind()) { 90 | case SEQUENCE_TAGGER: 91 | corpusRepresentation = new CorpusRepresentationMalletSeq(fi); 92 | break; 93 | case REGRESSOR: 94 | case CLASSIFIER: 95 | corpusRepresentation = new CorpusRepresentationMalletTarget(fi, tt); 96 | break; 97 | case CLUSTERING: 98 | corpusRepresentation = new CorpusRepresentationMalletLDA(fi); 99 | break; 100 | default: 101 | throw new GateRuntimeException("Not a usable algorithm kind for now with Mallet based engines: "+algorithm); 102 | } 103 | } 104 | this.featureInfo = fi; 105 | corpusRepresentation.startAdding(); 106 | } 107 | 108 | } 109 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EngineMBCostclaWrapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.engines; 22 | 23 | /** 24 | * 25 | * @author Johann Petrak 26 | */ 27 | public class EngineMBCostclaWrapper extends EngineMBSklearnBase { 28 | 29 | public EngineMBCostclaWrapper() { 30 | WRAPPER_NAME = "SklearnWrapper"; 31 | ENV_WRAPPER_HOME = "SKLEARN_WRAPPER_HOME"; 32 | PROP_WRAPPER_HOME = "gate.plugin.learningframework.sklearnwrapper.home"; 33 | YAML_FILE = "sklearn.yaml"; 34 | YAML_SETTING_WRAPPER_HOME = "sklearnwrapper.home"; 35 | SCRIPT_APPLY_BASENAME = "costclaWrapperApply"; 36 | SCRIPT_TRAIN_BASENAME = "costclaWrapperTrain"; 37 | SCRIPT_EVAL_BASENAME = "costclaWrapperEval"; 38 | MODEL_BASENAME = "costclamodel"; 39 | MODEL_INSTANCE = new CostclaModel(); 40 | } 41 | 42 | static class CostclaModel { } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EngineMBMallet.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | 22 | package gate.plugin.learningframework.engines; 23 | 24 | import gate.plugin.learningframework.data.CorpusRepresentationMallet; 25 | import static gate.plugin.learningframework.engines.Engine.FILENAME_MODEL; 26 | import gate.util.GateRuntimeException; 27 | import java.io.File; 28 | import java.io.FileOutputStream; 29 | import java.io.IOException; 30 | import java.io.ObjectOutputStream; 31 | import org.apache.log4j.Logger; 32 | 33 | /** 34 | * Base class for all engines which wrap a Mallet algorithm. 35 | * 36 | * This kind of engines always used Mallet corpus representation. 37 | * 38 | * @author Johann Petrak 39 | */ 40 | public abstract class EngineMBMallet extends EngineMB { 41 | 42 | private static Logger LOGGER = Logger.getLogger(EngineMBMallet.class); 43 | 44 | public CorpusRepresentationMallet getCorpusRepresentationMallet() { 45 | return corpusRepresentation; 46 | } 47 | 48 | 49 | @Override 50 | protected void saveModel(File directory) { 51 | if(model==null) { 52 | // TODO: this should eventually throw an exception, we leave it for testing now. 53 | System.err.println("WARNING: saving a null model!!!"); 54 | } 55 | ObjectOutputStream oos = null; 56 | try { 57 | oos = new ObjectOutputStream(new FileOutputStream(new File(directory, FILENAME_MODEL))); 58 | oos.writeObject(model); 59 | } catch (IOException e) { 60 | throw new GateRuntimeException("Could not store Mallet model", e); 61 | } finally { 62 | if (oos != null) { 63 | try { 64 | oos.close(); 65 | } catch (IOException ex) { 66 | LOGGER.error("Could not close object output stream", ex); 67 | } 68 | } 69 | } 70 | } 71 | 72 | 73 | 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EngineMBSklearnWrapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework.engines; 21 | 22 | /** 23 | * An engine that represents Python Scikit Learn through en external process. 24 | * 25 | * This requires that the user configures the location of where sklearn-wrapper is installed. 26 | * This can be done by setting the environment variable SKLEARN_WRAPPPER_HOME, the Java property 27 | * gate.plugin.learningframework.sklearnwrapper.home or by adding another yaml file "sklearn.yaml" 28 | * to the data directory which contains the setting sklearnwrapper.home. 29 | * If the path starts with a slash 30 | * it is an absolute path, otherwise the path is resolved relative to the 31 | * directory. 32 | * 33 | * 34 | * @author Johann Petrak 35 | */ 36 | public class EngineMBSklearnWrapper extends EngineMBSklearnBase { 37 | 38 | public EngineMBSklearnWrapper() { 39 | WRAPPER_NAME = "SklearnWrapper"; 40 | ENV_WRAPPER_HOME = "SKLEARN_WRAPPER_HOME"; 41 | PROP_WRAPPER_HOME = "gate.plugin.learningframework.sklearnwrapper.home"; 42 | YAML_FILE = "sklearn.yaml"; 43 | YAML_SETTING_WRAPPER_HOME = "sklearnwrapper.home"; 44 | SCRIPT_APPLY_BASENAME = "sklearnWrapperApply"; 45 | SCRIPT_TRAIN_BASENAME = "sklearnWrapperTrain"; 46 | SCRIPT_EVAL_BASENAME = "sklearnWrapperEval"; 47 | MODEL_BASENAME = "sklmodel"; 48 | MODEL_INSTANCE = new SklearnModel(); 49 | } 50 | 51 | static class SklearnModel { } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EngineMBTensorFlowWrapper.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework.engines; 21 | 22 | /** 23 | * An engine that represents Python TensorFlow through en external process. 24 | * 25 | * This requires that the user configures the location of where tensorflow-wrapper is installed. 26 | * This can be done by setting the environment variable TENSORFLOW_WRAPPPER_HOME, the Java property 27 | * gate.plugin.learningframework.tensorflowwrapper.home or by adding another yaml file "tensorflow.yaml" 28 | * to the data directory which contains the setting tensorflowwrapper.home. 29 | * If the path starts with a slash 30 | * it is an absolute path, otherwise the path is resolved relative to the 31 | * directory. 32 | * 33 | * 34 | * @author Johann Petrak 35 | */ 36 | public class EngineMBTensorFlowWrapper extends EngineMBPythonNetworksBase { 37 | 38 | static class TensorFLowModel { } 39 | 40 | public EngineMBTensorFlowWrapper() { 41 | WRAPPER_NAME = "TensorFlowWrapper"; 42 | ENV_WRAPPER_HOME = "TENSORFLOW_WRAPPER_HOME"; 43 | PROP_WRAPPER_HOME = "gate.plugin.learningframework.tensorflowwrapper.home"; 44 | YAML_FILE = "tensorflow.yaml"; 45 | YAML_SETTING_WRAPPER_HOME = "tensorflowwrapper.home"; 46 | SCRIPT_APPLY_BASENAME = "tensorflowWrapperApply"; 47 | SCRIPT_TRAIN_BASENAME = "tensorflowWrapperTrain"; 48 | SCRIPT_EVAL_BASENAME = "tensorflowWrapperEval"; 49 | MODEL_BASENAME = "tensorflowmodel"; 50 | MODEL_INSTANCE = new TensorFLowModel(); 51 | } 52 | 53 | 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EvaluationResult.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.engines; 22 | 23 | /** 24 | * Abstraction for the results we get from internal, library-specific evaluation strategies. 25 | * 26 | * @author Johann Petrak 27 | */ 28 | public class EvaluationResult { 29 | // This is not used at the moment, but maybe later ... 30 | // private Object internalEvaluationResult; // the library-specific evaluation result 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EvaluationResultClHO.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.engines; 22 | 23 | /** 24 | * A class that represents the result of a crossvalidation or hold-out evaluation. 25 | * 26 | * @author Johann Petrak 27 | */ 28 | public class EvaluationResultClHO extends EvaluationResultClassification { 29 | public double trainingFraction; 30 | public boolean stratified; 31 | public int nrRepeats; 32 | 33 | @Override 34 | public String toString() { 35 | return "EvaluationResultClHO{" + "accuracy=" + accuracyEstimate + ",trainingFraction="+trainingFraction+",nrRepeats="+nrRepeats+ 36 | ",stratified="+stratified + "}"; 37 | } 38 | 39 | } 40 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EvaluationResultClXval.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.engines; 22 | 23 | /** 24 | * A class that represents the result of a crossvalidation or hold-out evaluation. 25 | * 26 | * @author Johann Petrak 27 | */ 28 | public class EvaluationResultClXval extends EvaluationResultClassification { 29 | public int nrFolds; 30 | public boolean stratified; 31 | 32 | @Override 33 | public String toString() { 34 | return "EvaluationResultClXval{" + "accuracy=" + accuracyEstimate + ",nrFolds="+nrFolds+ 35 | ",stratified="+stratified + "}"; 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EvaluationResultClassification.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.engines; 22 | 23 | /** 24 | * A class that represents the result of a classification evaluation. 25 | * 26 | * @author Johann Petrak 27 | */ 28 | public abstract class EvaluationResultClassification extends EvaluationResult { 29 | public double accuracyEstimate; 30 | public int nrCorrect; // number of correct over all folds and all repeats 31 | public int nrIncorrect; // number of incorrect over all folds and all repeats 32 | 33 | // TODO: correct implementation of equals and hashCode!?! 34 | 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EvaluationResultRegression.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.engines; 22 | 23 | /** 24 | * A class that represents the result of a classification evaluation. 25 | * 26 | * @author Johann Petrak 27 | */ 28 | public abstract class EvaluationResultRegression extends EvaluationResult { 29 | public double rmse; 30 | public double nrTotal; // number of instances 31 | public double sumSqrErr; // sum of squared errors 32 | public double sumAbsErr; // sum of absolute errors 33 | 34 | // TODO: correct implementation of equals and hashCode!?! 35 | 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EvaluationResultRgHO.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.engines; 22 | 23 | /** 24 | * A class that represents the result of a crossvalidation or hold-out evaluation. 25 | * 26 | * @author Johann Petrak 27 | */ 28 | public class EvaluationResultRgHO extends EvaluationResultRegression { 29 | public double trainingFraction; 30 | public int nrRepeats; 31 | 32 | @Override 33 | public String toString() { 34 | return "EvaluationResultClHO{" + "rmse=" + rmse + ",trainingFraction="+trainingFraction+",nrRepeats="+nrRepeats + "}"; 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/EvaluationResultRgXval.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.engines; 22 | 23 | /** 24 | * A class that represents the result of a crossvalidation or hold-out evaluation. 25 | * 26 | * @author Johann Petrak 27 | */ 28 | public class EvaluationResultRgXval extends EvaluationResultRegression { 29 | public int nrFolds; 30 | 31 | @Override 32 | public String toString() { 33 | return "EvaluationResultRgXval{" + "RMSE=" + rmse + ",nrFolds="+nrFolds + "}"; 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/engines/Utils4Engines.java: -------------------------------------------------------------------------------- 1 | package gate.plugin.learningframework.engines; 2 | 3 | import gate.util.GateRuntimeException; 4 | import java.io.File; 5 | import java.io.IOException; 6 | import java.net.MalformedURLException; 7 | import java.net.URISyntaxException; 8 | import java.net.URL; 9 | import java.nio.file.FileSystem; 10 | import java.nio.file.FileSystems; 11 | import java.nio.file.FileVisitResult; 12 | import java.nio.file.Path; 13 | import java.nio.file.Paths; 14 | import java.nio.file.Files; 15 | import java.nio.file.SimpleFileVisitor; 16 | import java.nio.file.StandardCopyOption; 17 | import java.nio.file.attribute.BasicFileAttributes; 18 | import java.util.HashMap; 19 | 20 | /** 21 | * Class for factoring out static methods that do not fit into the Engine 22 | * hierarchy. 23 | * 24 | * @author Johann Petrak 25 | */ 26 | public class Utils4Engines { 27 | 28 | /** 29 | * Copy wrapper from plugin JAR to directory. 30 | * @param wrapperName name of the wrapper 31 | * @param targetDirectory target directory 32 | */ 33 | public static void copyWrapper(String wrapperName, File targetDirectory) { 34 | // First of all, check if the target directory already has the directory expected. 35 | // If ths is the case just silently quit. 36 | if (new File(targetDirectory, wrapperName).exists()) { 37 | return; 38 | } 39 | // Otherwise go on and actually try to copy everything ... 40 | copyResources(targetDirectory, "/resources/wrappers/"+wrapperName); 41 | } 42 | 43 | /** 44 | * Copy resources from plugin jar to target directory. 45 | * @param targetDir target directory 46 | * @param root root location of resources to copy 47 | */ 48 | public static void copyResources(File targetDir, String root) { 49 | 50 | // TODO: check targetDir is a dir? 51 | //if (!hasResources()) 52 | // throw new UnsupportedOperationException( 53 | // "this plugin doesn't have any resources you can copy as you would know had you called hasResources first :P"); 54 | URL artifactURL = Utils4Engines.class.getResource("/creole.xml"); 55 | try { 56 | artifactURL = new URL(artifactURL, "."); 57 | } catch (MalformedURLException ex) { 58 | throw new GateRuntimeException("Could not get jar URL"); 59 | } 60 | try ( 61 | FileSystem zipFs 62 | = FileSystems.newFileSystem(artifactURL.toURI(), new HashMap<>());) { 63 | 64 | Path target = Paths.get(targetDir.toURI()); 65 | Path pathInZip = zipFs.getPath(root); 66 | if (!Files.isDirectory(pathInZip)) { 67 | throw new GateRuntimeException("ODD: not a directory " + pathInZip); 68 | } 69 | Path parentPathInZip = pathInZip.getParent(); 70 | Files.walkFileTree(pathInZip, new SimpleFileVisitor() { 71 | @Override 72 | public FileVisitResult visitFile(Path filePath, 73 | BasicFileAttributes attrs) throws IOException { 74 | // Make sure that we conserve the hierachy of files and folders 75 | // inside the zip 76 | //System.err.println("DEBUG filePath=" + filePath); 77 | Path relativePathInZip = parentPathInZip.relativize(filePath); 78 | Path targetPath = target.resolve(relativePathInZip.toString()); 79 | //System.err.println("DEBUG: WARNING create directories" + targetPath.getParent()); 80 | Files.createDirectories(targetPath.getParent()); 81 | 82 | // And extract the file 83 | //System.err.println("DEBUG: WARNING copy from " + filePath + " to " + targetPath); 84 | Files.copy(filePath, targetPath, StandardCopyOption.REPLACE_EXISTING); 85 | // if the file ends in .sh or .cmd make it executable 86 | String tp = targetPath.toString(); 87 | if(tp.endsWith(".sh") || tp.endsWith(".cmd")) { 88 | targetPath.toFile().setExecutable(true); 89 | } 90 | 91 | return FileVisitResult.CONTINUE; 92 | } 93 | }); 94 | } catch (IOException | URISyntaxException ex) { 95 | throw new GateRuntimeException("Error trying to copy the resources", ex); 96 | } 97 | } 98 | 99 | } 100 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/export/CorpusExporterDRJson.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.export; 22 | 23 | import gate.plugin.learningframework.data.CorpusRepresentationVolatileDense2JsonStream; 24 | import gate.plugin.learningframework.engines.Info; 25 | 26 | /** 27 | * Common base class of all dense vector / json file representation based exporters 28 | * @author Johann Petrak 29 | */ 30 | public class CorpusExporterDRJson extends CorpusExporter { 31 | 32 | @Override 33 | public void initWhenCreating() { 34 | corpusRepresentation = new CorpusRepresentationVolatileDense2JsonStream(dataDirFile, featureInfo); 35 | corpusRepresentation.startAdding(); 36 | } 37 | 38 | 39 | @Override 40 | public void export() { 41 | // all the data already has been written through the corpusRepresentation.add 42 | // method. 43 | // Here we only need to finish the writing to that file and also write 44 | // the metadata. 45 | corpusRepresentation.finishAdding(); 46 | CorpusRepresentationVolatileDense2JsonStream crdr = 47 | (CorpusRepresentationVolatileDense2JsonStream)corpusRepresentation; 48 | crdr.saveMetadata(); 49 | } // export 50 | 51 | 52 | @Override 53 | public Info getInfo() { 54 | throw new UnsupportedOperationException("getInfo() not implemented but should not be necessary???"); 55 | } 56 | 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/export/CorpusExporterMR.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.export; 22 | 23 | import gate.plugin.learningframework.ScalingMethod; 24 | import gate.plugin.learningframework.data.CorpusRepresentationMallet; 25 | import gate.plugin.learningframework.data.CorpusRepresentationMalletTarget; 26 | import gate.plugin.learningframework.engines.Info; 27 | import gate.plugin.learningframework.mallet.LFPipe; 28 | import java.util.ArrayList; 29 | 30 | /** 31 | * Common base class of all mallet-related exporters. 32 | * @author johann 33 | */ 34 | public abstract class CorpusExporterMR extends CorpusExporter { 35 | 36 | @Override 37 | public void initWhenCreating() { 38 | // for all mallet related exporters, we need to create a mallet corpus 39 | // representation here, either seq or target, depending on the actual 40 | // exporter. We provide a default implementation here which creates a 41 | // target CR, the seq exporters then override in turn 42 | // TODO: need to properly support scaling when exporting! 43 | corpusRepresentation = new CorpusRepresentationMalletTarget( 44 | featureInfo, 45 | targetType); 46 | } 47 | 48 | // All the mallet related exporters also write the pipe and the info, each 49 | // of the export() implementations should call this method 50 | // This is done as the first step in the export() method and since 51 | // the scaling needs to be done before exporting, the finishAdding() method 52 | // is called in here always, just to be sure. The finishAdding() method is 53 | // not doing anything on any call after the first call. 54 | public void exportMeta() { 55 | CorpusRepresentationMallet crm = (CorpusRepresentationMallet)corpusRepresentation; 56 | crm.finishAdding(); 57 | // get the pre-filled info object 58 | Info info = getInfo(); 59 | // In addition to the actual data file exported by the methods above, 60 | // always also export the pipe and a template info file! 61 | info.classAnnotationType = "null"; 62 | LFPipe lfpipe = crm.getPipe(); 63 | if (lfpipe.getTargetAlphabet() == null) { 64 | info.classLabels = null; 65 | } else { 66 | //info.classLabels = lfpipe.getTargetAlphabet().toArray(); 67 | Object[] objs = lfpipe.getTargetAlphabet().toArray(); 68 | info.nrTargetValues = objs.length; 69 | ArrayList labels = new ArrayList<>(); 70 | for (Object obj : objs) { 71 | labels.add(obj.toString()); 72 | } 73 | info.classLabels = labels; 74 | } 75 | info.nrTrainingDimensions = lfpipe.getDataAlphabet().size(); 76 | info.nrTrainingDocuments = 0; 77 | info.nrTrainingInstances = crm.getRepresentationMallet().size(); 78 | info.targetFeature = "class"; 79 | info.task = "CLASSIFIER"; 80 | info.trainerClass = ""; 81 | info.trainingCorpusName = ""; 82 | info.save(dataDirFile); 83 | // finally save the Mallet corpus representation 84 | crm.savePipe(dataDirFile); 85 | 86 | } 87 | 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/export/CorpusExporterMRSeq.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.export; 22 | 23 | import cc.mallet.types.InstanceList; 24 | import gate.plugin.learningframework.ScalingMethod; 25 | import gate.plugin.learningframework.data.CorpusRepresentationMallet; 26 | import gate.plugin.learningframework.data.CorpusRepresentationMalletSeq; 27 | import gate.plugin.learningframework.engines.Info; 28 | import java.io.File; 29 | 30 | /** 31 | * 32 | * @author johann 33 | */ 34 | public class CorpusExporterMRSeq extends CorpusExporterMR { 35 | 36 | @Override 37 | public Info getInfo() { 38 | Info info = new Info(); 39 | info.algorithmClass = "gate.plugin.learningframework.engines.AlgorithmSequenceTagging"; 40 | info.algorithmName = "DUMMY"; 41 | info.engineClass = "DUMMY"; 42 | info.modelClass = "DUMMY"; 43 | return info; 44 | } 45 | 46 | @Override 47 | public void export() { 48 | exportMeta(); 49 | CorpusRepresentationMallet crm = (CorpusRepresentationMallet)corpusRepresentation; 50 | InstanceList malletInstances = crm.getRepresentationMallet(); 51 | //Pipe pipe = malletInstances.getPipe(); 52 | //Attributes attrs = new Attributes(pipe,instanceType); 53 | malletInstances.save(new File(dataDirFile, "data.malletseq.ser")); 54 | } // export 55 | 56 | @Override 57 | public void initWhenCreating() { 58 | corpusRepresentation = new CorpusRepresentationMalletSeq(featureInfo); 59 | } 60 | 61 | 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/export/CorpusExporterMRTarget.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.export; 22 | 23 | import cc.mallet.types.InstanceList; 24 | import gate.plugin.learningframework.data.CorpusRepresentationMallet; 25 | import gate.plugin.learningframework.engines.Info; 26 | import java.io.File; 27 | 28 | /** 29 | * 30 | * @author johann 31 | */ 32 | public class CorpusExporterMRTarget extends CorpusExporterMR { 33 | 34 | @Override 35 | public Info getInfo() { 36 | Info info = new Info(); 37 | info.algorithmClass = "gate.plugin.learningframework.engines.AlgorithmClassification"; 38 | info.algorithmName = "DUMMY"; 39 | info.engineClass = "DUMMY"; 40 | info.modelClass = "DUMMY"; 41 | return info; 42 | } 43 | 44 | @Override 45 | public void export() { 46 | exportMeta(); 47 | CorpusRepresentationMallet crm = (CorpusRepresentationMallet)corpusRepresentation; 48 | InstanceList malletInstances = crm.getRepresentationMallet(); 49 | //Pipe pipe = malletInstances.getPipe(); 50 | //Attributes attrs = new Attributes(pipe,instanceType); 51 | malletInstances.save(new File(dataDirFile, "data.mallettarget.ser")); 52 | } // export 53 | 54 | 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/export/Exporter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework.export; 21 | 22 | import gate.plugin.learningframework.engines.AlgorithmKind; 23 | import gate.plugin.learningframework.features.TargetType; 24 | 25 | public enum Exporter { 26 | JSON_CL_DR(CorpusExporterDRJson.class,TargetType.NOMINAL,AlgorithmKind.CLASSIFIER), 27 | JSON_RG_DR(CorpusExporterDRJson.class,TargetType.NUMERIC,AlgorithmKind.REGRESSOR), 28 | JSON_SEQ_DR(CorpusExporterDRJson.class,TargetType.NOMINAL,AlgorithmKind.SEQUENCE_TAGGER), 29 | ARFF_CL_MR(CorpusExporterMRARFF.class,TargetType.NOMINAL,AlgorithmKind.CLASSIFIER), 30 | ARFF_RG_MR(CorpusExporterMRARFF.class,TargetType.NUMERIC,AlgorithmKind.REGRESSOR), 31 | CSV_CL_MR(CorpusExporterMRCSV.class,TargetType.NOMINAL,AlgorithmKind.CLASSIFIER), 32 | CSV_RG_MR(CorpusExporterMRCSV.class,TargetType.NUMERIC,AlgorithmKind.REGRESSOR), 33 | JSON_CL_MR(CorpusExporterMRJsonTarget.class,TargetType.NOMINAL,AlgorithmKind.CLASSIFIER), 34 | JSON_RG_MR(CorpusExporterMRJsonTarget.class,TargetType.NUMERIC,AlgorithmKind.REGRESSOR), 35 | JSON_SEQ_MR(CorpusExporterMRJsonSeq.class,TargetType.NOMINAL,AlgorithmKind.SEQUENCE_TAGGER), 36 | LibSVM_CL_MR(CorpusExporterMRLibSVM.class,TargetType.NOMINAL,AlgorithmKind.CLASSIFIER), 37 | LibSVM_RG_MR(CorpusExporterMRLibSVM.class,TargetType.NUMERIC,AlgorithmKind.REGRESSOR), 38 | MatrixMarket2_CLUS_MR(CorpusExporterMRMatrixMarket2.class,TargetType.NONE,AlgorithmKind.CLUSTERING), 39 | MatrixMarket2_CL_MR(CorpusExporterMRMatrixMarket2.class,TargetType.NOMINAL,AlgorithmKind.CLASSIFIER), 40 | MatrixMatket2_RG_MR(CorpusExporterMRMatrixMarket2.class,TargetType.NUMERIC,AlgorithmKind.REGRESSOR 41 | ); 42 | 43 | 44 | private Exporter(Class corpusExporterClass, TargetType ttype, AlgorithmKind algkind) { 45 | this.corpusExporterClass = corpusExporterClass; 46 | this.ttype = ttype; 47 | this.algkind = algkind; 48 | } 49 | private Class corpusExporterClass = null; 50 | private TargetType ttype = TargetType.NOMINAL; 51 | private AlgorithmKind algkind = AlgorithmKind.CLASSIFIER; 52 | public Class getCorpusExporterClass() { return corpusExporterClass; } 53 | public TargetType getTargetType() { return ttype; } 54 | public AlgorithmKind getAlgorithmKind() {return algkind; } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/export/ExporterText.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | package gate.plugin.learningframework.export; 21 | 22 | import gate.plugin.learningframework.engines.AlgorithmKind; 23 | import gate.plugin.learningframework.features.TargetType; 24 | 25 | public enum ExporterText { 26 | // For now this is done ad-hoc in the exporter PR, but it will get moved to 27 | // exporter classes 28 | TEXTLINE_TSV(null,null,null) 29 | ; 30 | 31 | 32 | 33 | private ExporterText(Class corpusExporterClass, TargetType ttype, AlgorithmKind algkind) { 34 | this.corpusExporterClass = corpusExporterClass; 35 | this.ttype = ttype; 36 | this.algkind = algkind; 37 | } 38 | private final Class corpusExporterClass; 39 | private final TargetType ttype; 40 | private final AlgorithmKind algkind; 41 | public Class getCorpusExporterClass() { return corpusExporterClass; } 42 | public TargetType getTargetType() { return ttype; } 43 | public AlgorithmKind getAlgorithmKind() {return algkind; } 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/features/CodeAs.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.features; 22 | 23 | /** 24 | * 25 | * @author Johann Petrak 26 | */ 27 | public enum CodeAs { 28 | one_of_k, number 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/features/Datatype.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.features; 22 | 23 | /** 24 | * 25 | * @author Johann Petrak 26 | */ 27 | public enum Datatype { 28 | nominal, numeric, bool 29 | 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/features/FeatureSpecAttribute.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | 22 | package gate.plugin.learningframework.features; 23 | 24 | import gate.plugin.learningframework.LFUtils; 25 | import gate.util.GateRuntimeException; 26 | import java.io.Serializable; 27 | 28 | /** 29 | * 30 | * @author Johann Petrak 31 | */ 32 | public abstract class FeatureSpecAttribute implements Serializable, Cloneable { 33 | 34 | private static final long serialVersionUID = 651636894843439700L; 35 | 36 | public String annType; 37 | public String feature; 38 | public String name; 39 | public int featureId; // a feature index, starting with 0 40 | public String missingValueValue = null; 41 | public Datatype datatype; 42 | public String listsep = null; 43 | public String featureCode = "INVALID"; 44 | 45 | public String emb_file = ""; 46 | public String emb_id = ""; 47 | public Integer emb_dims = 0; 48 | public String emb_train = ""; 49 | public Integer emb_minfreq = 0; 50 | 51 | 52 | public abstract void stopGrowth(); 53 | public abstract void startGrowth(); 54 | 55 | @Override 56 | public FeatureSpecAttribute clone() { 57 | try { 58 | return (FeatureSpecAttribute) super.clone(); 59 | } catch (CloneNotSupportedException ex) { 60 | throw new RuntimeException("Could not clone Attribute",ex); 61 | } 62 | } 63 | 64 | /** 65 | * Return the code used to identify the attribute type in a feature name. 66 | * @return code 67 | */ 68 | public String getCode() { 69 | return featureCode; 70 | } 71 | 72 | 73 | /** 74 | * Returns the missing value as the proper data type for this attribute. 75 | * For example returns a String for nominal or a Float for numeric. 76 | * @return missing value for the data type 77 | */ 78 | public Object missingValue() { 79 | Object ret = null; 80 | switch (datatype) { 81 | case nominal: 82 | ret = missingValueValue; 83 | break; 84 | case bool: 85 | ret = Boolean.parseBoolean(missingValueValue); 86 | break; 87 | case numeric: 88 | ret = Double.parseDouble(missingValueValue); 89 | break; 90 | default: 91 | throw new GateRuntimeException("Unknown datatype: "+datatype); 92 | } 93 | return ret; 94 | } 95 | 96 | /** 97 | * Returns either a String, Double or Boolean for the given Object. 98 | * 99 | * @param val object 100 | * @return converted object 101 | */ 102 | public Object toValue(Object val) { 103 | Object ret = null; 104 | if(val == null) return missingValue(); 105 | switch (datatype) { 106 | case nominal: 107 | ret = LFUtils.anyToStringOrElse(val, ""); 108 | break; 109 | case bool: 110 | ret = LFUtils.anyToBooleanOrElse(val, false); 111 | break; 112 | case numeric: 113 | ret = LFUtils.anyToDoubleOrElse(val, 0.0); 114 | break; 115 | default: 116 | throw new GateRuntimeException("Unknown datatype: "+datatype); 117 | } 118 | return ret; 119 | 120 | } 121 | 122 | 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/features/FeatureSpecAttributeList.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.features; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * 27 | * @author Johann Petrak 28 | */ 29 | public class FeatureSpecAttributeList extends FeatureSpecSimpleAttribute implements Serializable, Cloneable { 30 | 31 | private static final long serialVersionUID = -4627730393276173588L; 32 | 33 | public FeatureSpecAttributeList( 34 | String aname, 35 | String type, 36 | String feature, 37 | Datatype datatype, 38 | CodeAs codeas, 39 | MissingValueTreatment missingValueTreatment, 40 | String missingValueValue, 41 | String scalingMethod, 42 | String transformMethod, 43 | int from, int to, 44 | String withinType, 45 | String listsep, 46 | String featureName4Value) { 47 | super(aname, type, feature, datatype, codeas, missingValueTreatment, 48 | missingValueValue, scalingMethod, transformMethod, withinType, listsep, featureName4Value); 49 | this.from = from; 50 | this.to = to; 51 | } 52 | 53 | /** 54 | * Create an AttributeList instance from a SimpleAttribute plus the from and to values 55 | * @param att attribute 56 | * @param withinType within type 57 | * @param from from index 58 | * @param to to index 59 | */ 60 | public FeatureSpecAttributeList(FeatureSpecSimpleAttribute att, String withinType, int from, int to) { 61 | /* 62 | String aname, 63 | String type, 64 | String feature, 65 | Datatype datatype, 66 | CodeAs codeas, 67 | MissingValueTreatment missingValueTreatment, 68 | String missingValueValue, 69 | String scalingMethod, 70 | String transformMethod, 71 | String withinType, 72 | String listsep, 73 | String featureName4Value 74 | */ 75 | super(att.name, att.annType, att.feature, 76 | att.datatype, att.codeas, 77 | att.missingValueTreatment, 78 | att.missingValueValue, "", "", withinType, att.listsep, att.featureName4Value); 79 | this.from = from; 80 | this.to = to; 81 | featureCode = "L"; 82 | this.emb_dims = att.emb_dims; 83 | this.emb_file = att.emb_file; 84 | this.emb_id = att.emb_id; 85 | this.emb_train = att.emb_train; 86 | } 87 | 88 | public int from; 89 | public int to; 90 | 91 | // NOTE: this inherits the alphabet from SimpleAttribute: even though this object represents a 92 | // whole set of features, the alphabet gets shared by all of them! 93 | 94 | 95 | @Override 96 | public String toString() { 97 | return "AttributeList(name="+name+ 98 | ",type="+annType+ 99 | ",feature="+feature+ 100 | ",datatype="+datatype+ 101 | ",missingvaluetreatment="+missingValueTreatment+ 102 | ",codeas="+codeas+ 103 | ",within="+withinType+ 104 | ",from="+from+ 105 | ",to="+to; 106 | } 107 | 108 | @Override 109 | public FeatureSpecAttributeList clone() { 110 | return (FeatureSpecAttributeList) super.clone(); 111 | } 112 | 113 | 114 | } 115 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/features/FeatureSpecNgram.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | 22 | package gate.plugin.learningframework.features; 23 | 24 | import java.io.Serializable; 25 | 26 | /** 27 | * 28 | * @author Johann Petrak 29 | */ 30 | public class FeatureSpecNgram extends FeatureSpecAttribute implements Serializable, Cloneable { 31 | 32 | private static final long serialVersionUID = -3538356352141472056L; 33 | 34 | public FeatureSpecNgram(String aname, int number, String type, String feature, String featureName4Value) { 35 | this.name = aname; 36 | this.number = number; 37 | this.annType = type; 38 | this.feature = feature; 39 | this.featureName4Value = featureName4Value; 40 | this.datatype = Datatype.nominal; 41 | this.missingValueValue = ""; 42 | featureCode = "N"; 43 | } 44 | public int number = -1; 45 | public String featureName4Value = ""; 46 | public int maxlen = 0; 47 | public String shorten = ""; 48 | 49 | @Override 50 | public void stopGrowth() { 51 | /// we do not have any alphabets in an Ngram attribute, do nothing 52 | } 53 | 54 | @Override 55 | public void startGrowth() { 56 | /// we do not have any alphabets, do nothing 57 | } 58 | 59 | @Override 60 | public String toString() { 61 | return "NgramAttribute(name="+name+ 62 | ",type="+annType+ 63 | ",feature="+feature+ 64 | ",featureName4Value="+featureName4Value+ 65 | ",number="+number+ 66 | ",maxlen="+maxlen+ 67 | ",shorten="+shorten 68 | ; 69 | } 70 | 71 | @Override 72 | public FeatureSpecNgram clone() { 73 | return (FeatureSpecNgram) super.clone(); 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/features/FeatureSpecSimpleAttribute.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.features; 22 | 23 | import gate.plugin.learningframework.mallet.LFAlphabet; 24 | import java.io.Serializable; 25 | 26 | /** 27 | * 28 | * @author Johann Petrak 29 | */ 30 | public class FeatureSpecSimpleAttribute extends FeatureSpecAttribute implements Serializable, Cloneable { 31 | 32 | private static final long serialVersionUID = -2346560362547132478L; 33 | 34 | /** 35 | * Constructor 36 | * @param aname attribute name 37 | * @param type attribute type 38 | * @param feature feature name 39 | * @param datatype datatype 40 | * @param codeas code as setting 41 | * @param missingValueTreatment missing value treatment 42 | * @param missingValueValue missing value 43 | * @param scalingMethod scalign method 44 | * @param transformMethod transformation method 45 | * @param withinType withing which sequence type 46 | * @param listsep list separator string 47 | * @param featureName4Value which feature to get the value from 48 | */ 49 | public FeatureSpecSimpleAttribute( 50 | String aname, 51 | String type, 52 | String feature, 53 | Datatype datatype, 54 | CodeAs codeas, 55 | MissingValueTreatment missingValueTreatment, 56 | String missingValueValue, 57 | String scalingMethod, 58 | String transformMethod, 59 | String withinType, 60 | String listsep, 61 | String featureName4Value) { 62 | this.name = aname; 63 | this.annType = type; 64 | this.feature = feature; 65 | this.datatype = datatype; 66 | this.codeas = codeas; 67 | this.missingValueTreatment = missingValueTreatment; 68 | if (datatype == Datatype.nominal && codeas == CodeAs.number) { 69 | alphabet = new LFAlphabet(); 70 | } 71 | this.withinType = withinType; 72 | this.listsep = listsep; 73 | this.featureName4Value = featureName4Value; 74 | this.missingValueValue = missingValueValue; 75 | featureCode = "A"; 76 | } 77 | public CodeAs codeas = CodeAs.one_of_k; 78 | public MissingValueTreatment missingValueTreatment = MissingValueTreatment.zero_value; 79 | public LFAlphabet alphabet; 80 | public String withinType; 81 | public String featureName4Value; 82 | 83 | @Override 84 | public void stopGrowth() { 85 | if(alphabet!=null) { alphabet.stopGrowth(); } 86 | } 87 | 88 | 89 | @Override 90 | public void startGrowth() { 91 | if(alphabet!=null) { alphabet.startGrowth(); } 92 | } 93 | 94 | @Override 95 | public String toString() { 96 | return "SimpleAttribute(name="+name+ 97 | ",type="+annType+ 98 | ",feature="+feature+ 99 | ",datatype="+datatype+ 100 | ",missingvaluetreatment="+missingValueTreatment+ 101 | ",within="+withinType+ 102 | ",codeas="+codeas; 103 | } 104 | 105 | @Override 106 | public FeatureSpecSimpleAttribute clone() { 107 | return (FeatureSpecSimpleAttribute) super.clone(); 108 | } 109 | 110 | 111 | } 112 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/features/MissingValueTreatment.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.features; 22 | 23 | /** 24 | * How to treat/represent missing values when creating the instances. 25 | * 26 | * "keep" tries to preserve the missing value for the learning algorithm. This is done by 27 | * representing the missing value as a value that is not otherwise used: for one_of_k 28 | * representations, all k features are set to 0.0, for numeric representations of a nominal value, 29 | * -1 is used, for numeric features, NaN is used and for boolean 0.5 is used. NOTE: this will only 30 | * work if the algorithm supports missing values in some way! 31 | * 32 | * "special_value" replaces the missing value with a special value that should be different from 33 | * all other values, but still can be handled by algorithms which do not support missing values. 34 | * This is not really always possible, but the values used are a hopefully good compromise: for 35 | * nominal features, a special nominal value is used, for numeric values "-1.0" is used and for 36 | * boolean "0.5" is used if the boolean is represented as a number, otherwise false is used. 37 | * 38 | * "zero_value" is similar to "special_value" but uses the "zero" value for the datatype, 39 | * false for boolean and 0.0 for numeric. For nominal values which are coded numerically, 40 | * the MV string is used, for nominal values which are coded one-of-k this is the 41 | * same as "keep" and no feature is set. 42 | * 43 | * "ignore_instance" records the fact that a missing value is present in the instance and filters 44 | * the instance. This means that the instance is not used for training and at application time, 45 | * that no classification is performed for it. 46 | * 47 | * "impute_mostfreq" initially uses "keep" but makes another pass over all instances at training 48 | * time and replaces the value with the most frequent value. This may not be a good idea for 49 | * truely continuous numeric features. The same value is then also used at application time. 50 | * 51 | * "impute_median" initially uses "keep" but makes another pass over all isntances at training 52 | * time and replaces the value with the median value. At application time, that median value is 53 | * also used. 54 | * 55 | * "use_value" uses the value giving in the element MISSINGVALUEVALUE for the attribute. 56 | * 57 | * NOTE: not all treatments are yet implemented!!!! 58 | * 59 | */ 60 | public enum MissingValueTreatment { 61 | ignore_instance, keep, special_value, zero_value, impute_mostfreq, impute_median, use_value 62 | 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/features/SeqEncoder.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package gate.plugin.learningframework.features; 7 | 8 | import gate.Annotation; 9 | import gate.Document; 10 | import java.util.Collection; 11 | import java.util.HashMap; 12 | import java.util.Map; 13 | import java.util.regex.Pattern; 14 | 15 | /** 16 | * Class for implementing methods to convert sequences to classes and back 17 | * @author Johann Petrak 18 | */ 19 | public abstract class SeqEncoder { 20 | public static final String CODESEP = "|"; 21 | public static final String CODESEP_PATTERN = Pattern.quote(CODESEP); 22 | public static final String TYPESEP = ","; 23 | public static final String TYPESEP_PATTERN = Pattern.quote(TYPESEP); 24 | public static final String CODE_OUTSIDE = "O"; 25 | public static final String CODE_BEGIN = "B"; 26 | public static final String CODE_INSIDE = "I"; 27 | public static final String CODE_END = "E"; 28 | public static final String CODE_SINGLE = "S"; // = begin and end 29 | private Map options = new HashMap<>(); 30 | public abstract String seqAnns2ClassLabel(Collection seqAnns, Annotation instAnn, Document curDoc); 31 | public void setOptions(Map options) { 32 | if(options != null) this.options.putAll(options); 33 | } 34 | 35 | /** 36 | * Return options. 37 | * 38 | * TODO: this still needs to get implemented. 39 | * 40 | * @return Option settings. 41 | */ 42 | public Map getOptions() { return options; } 43 | // TODO: not sure yet what the best way is to implement the conversion back from 44 | // class labels to annotations. This probably needs to map the full sequence 45 | // of class labels to a set of annotations? 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/features/SeqEncoderEnum.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.features; 22 | 23 | import java.util.HashMap; 24 | import java.util.Map; 25 | 26 | 27 | /** 28 | * 29 | * @author Johann Petrak 30 | */ 31 | public enum SeqEncoderEnum { 32 | BIO(SeqEncoder_SimpleBIO.class,null), 33 | //BIEO(null,null), 34 | //BISO(null,null), 35 | ; 36 | private SeqEncoderEnum() { 37 | 38 | } 39 | private SeqEncoderEnum(Class encoderClass, Map encoderOptions) { 40 | this.encoderClass = encoderClass; 41 | this.encoderOptions = new HashMap<>(); 42 | if(encoderOptions != null) { 43 | this.encoderOptions.putAll(encoderOptions); 44 | } 45 | } 46 | private Class encoderClass; 47 | private Map encoderOptions; 48 | public Class getEncoderClass() { return encoderClass; } 49 | public Map getOptions() { return encoderOptions; } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/features/TargetType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.features; 22 | 23 | /** 24 | * 25 | * @author Johann Petrak 26 | */ 27 | public enum TargetType { 28 | NUMERIC, NOMINAL, NONE 29 | // possible others: ORDINAL, ?? 30 | } 31 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/mallet/LFAlphabet.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package gate.plugin.learningframework.mallet; 7 | 8 | import cc.mallet.types.Alphabet; 9 | 10 | /** 11 | * Attempt to make the Mallet Alphabet class somewhat synchronized. 12 | * This naively synchronizes some of the methods (but not all, hopefully 13 | * all we use in the LF). 14 | * 15 | * @author Johann Petrak 16 | */ 17 | public class LFAlphabet extends Alphabet { 18 | 19 | private static final long serialVersionUID = 3271929926108562395L; 20 | 21 | public LFAlphabet() { 22 | super(); // same 23 | } 24 | public LFAlphabet(int capacity) { 25 | super(capacity); // same 26 | } 27 | 28 | @Override 29 | public synchronized int lookupIndex(Object entry) { 30 | return super.lookupIndex(entry); 31 | } 32 | 33 | @Override 34 | public synchronized int lookupIndex(Object entry, boolean addifmissing) { 35 | return super.lookupIndex(entry, addifmissing); 36 | } 37 | 38 | @Override 39 | public synchronized Object lookupObject(int index) { 40 | return super.lookupObject(index); 41 | } 42 | 43 | @Override 44 | public synchronized int size() { 45 | return super.size(); 46 | } 47 | 48 | @Override 49 | public synchronized Object[] toArray() { 50 | return super.toArray(); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/mallet/LFInstanceList.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package gate.plugin.learningframework.mallet; 7 | 8 | import cc.mallet.types.Instance; 9 | import cc.mallet.types.InstanceList; 10 | 11 | /** 12 | * A concurrent replacement for Mallet InstanceList. 13 | * This tries in a naive way to synchronize some access to the list (but not all). 14 | * Most importantly the add(Instance) method is synchronized. 15 | * 16 | * @author JohannPetrak 17 | */ 18 | public class LFInstanceList extends InstanceList { 19 | 20 | public LFInstanceList(LFPipe pipe) { 21 | super(pipe); 22 | } 23 | 24 | 25 | private static final long serialVersionUID = 4320038272253815542L; 26 | 27 | @Override 28 | public synchronized boolean add(Instance instance) { 29 | return super.add(instance); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/mallet/LFLabelAlphabet.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this license header, choose License Headers in Project Properties. 3 | * To change this template file, choose Tools | Templates 4 | * and open the template in the editor. 5 | */ 6 | package gate.plugin.learningframework.mallet; 7 | 8 | import cc.mallet.types.Label; 9 | import cc.mallet.types.LabelAlphabet; 10 | 11 | /** 12 | * Attempt to make LabelAlphabet more synchronized. 13 | * 14 | * !!NOTE: currently, this is not used as Mallet Classifier requires the 15 | * target alphabet to be LabelAlphabet or a superclasse (???) instead of 16 | * LabelAlphabet or a subclass. 17 | * See https://github.com/mimno/Mallet/issues/132 18 | * 19 | * @author Johann Petrak 20 | */ 21 | public class LFLabelAlphabet extends LabelAlphabet { 22 | 23 | private static final long serialVersionUID = -5084491342253339406L; 24 | 25 | @Override 26 | public synchronized int lookupIndex(Object entry) { 27 | return super.lookupIndex(entry); 28 | } 29 | 30 | @Override 31 | public synchronized int lookupIndex(Object entry, boolean addifmissing) { 32 | return super.lookupIndex(entry, addifmissing); 33 | } 34 | 35 | @Override 36 | public synchronized Object lookupObject(int index) { 37 | return super.lookupObject(index); 38 | } 39 | 40 | @Override 41 | public synchronized int size() { 42 | return super.size(); 43 | } 44 | 45 | @Override 46 | public synchronized Object[] toArray() { 47 | return super.toArray(); 48 | } 49 | 50 | @Override 51 | public synchronized Label lookupLabel(Object entry, boolean addifmissing) { 52 | return super.lookupLabel(entry, addifmissing); 53 | } 54 | 55 | @Override 56 | public synchronized Label lookupLabel(Object entry) { 57 | return super.lookupLabel(entry); 58 | } 59 | 60 | @Override 61 | public synchronized Label lookupLabel(int idx) { 62 | return super.lookupLabel(idx); 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/mallet/LFPipe.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.mallet; 22 | 23 | import cc.mallet.pipe.Pipe; 24 | import cc.mallet.pipe.SerialPipes; 25 | import gate.plugin.learningframework.features.FeatureInfo; 26 | import java.io.Serializable; 27 | import java.util.Collection; 28 | 29 | /** 30 | * An extended version of the Mallet SerialPipes class which allows us to store 31 | * some additional important information. 32 | * This adds methods to store the feature configuration, to associate each entry from the 33 | * feature config with one or more features, to associate each feature with its feature config, 34 | * and to associate features which are nominal and codedas numeric with their value alphabet. 35 | * All the additional information is stored in a single container: this container is used when 36 | * the features get extracted from documents to look up and store the relevant information. 37 | * 38 | * @author Johann Petrak 39 | * 40 | * TODO: turns out we will probably not need this after all: it is probably easiest to 41 | * store the featureinfo object in whatever pipe we store as a property! 42 | */ 43 | public class LFPipe extends SerialPipes implements Serializable { 44 | private static final long serialVersionUID = 1; 45 | public LFPipe(Collection pipes) { 46 | super(pipes); 47 | } 48 | protected FeatureInfo featureInfo; 49 | 50 | /** 51 | * Set the feature info. 52 | * @param info feature info 53 | */ 54 | public void setFeatureInfo(FeatureInfo info) { featureInfo = info; } 55 | 56 | /** 57 | * Get the feature info. 58 | * @return feature info 59 | */ 60 | public FeatureInfo getFeatureInfo() { return featureInfo; } 61 | 62 | /** 63 | * Add another pipe at the end of this SerialPipes. 64 | * @param pipe pipe to add 65 | */ 66 | public void addPipe(Pipe pipe) { 67 | super.pipes().add(pipe); 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/gate/plugin/learningframework/mallet/PipeScaleMeanVarAll.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.mallet; 22 | 23 | import cc.mallet.pipe.Pipe; 24 | import cc.mallet.types.Alphabet; 25 | import cc.mallet.types.FeatureVector; 26 | import cc.mallet.types.Instance; 27 | import gate.plugin.learningframework.mbstats.FVStatsMeanVarAll; 28 | import gate.plugin.learningframework.mbstats.PerFeatureStats; 29 | import gate.util.GateRuntimeException; 30 | import java.io.Serializable; 31 | import java.util.List; 32 | 33 | /** 34 | * Pipe for normalizing features so they have mean 1 and standard deviation 1. 35 | * 36 | * If a feature only has one value the variance is 0 so it is impossible to scale 37 | * to variance 1. 38 | * 39 | */ 40 | public class PipeScaleMeanVarAll extends Pipe implements Serializable { 41 | 42 | protected double means[]; 43 | protected double variances[]; 44 | protected boolean normalize[]; 45 | 46 | /** 47 | * Constructor from alphabet and stats. 48 | * @param alphabet alphabet 49 | * @param stats feature stats 50 | */ 51 | public PipeScaleMeanVarAll(Alphabet alphabet, FVStatsMeanVarAll stats) { 52 | super(alphabet, null); 53 | List pfss = stats.getStats(); 54 | int n = pfss.size(); 55 | means = new double[n]; 56 | variances = new double[n]; 57 | normalize = new boolean[n]; 58 | for(int i=0; i. 19 | */ 20 | package gate.plugin.learningframework.stats; 21 | 22 | import java.util.HashMap; 23 | import java.util.Map; 24 | 25 | /** 26 | * A simple lightweight wrapper class for maintaining stats about many features. 27 | * 28 | * This gathers statistics about many features, mapping feature names to 29 | * feature statistics. The kind of statistic gathered depends on the type 30 | * of value passed in for each data point. 31 | *

32 | * Currently statistics are calculated like this, depending on the type 33 | * of value passed in: 34 | *

    35 | *
  • Numeric: over the double representation of the value itself 36 | *
  • Boolean: over the 0/1 representation of false/true 37 | *
  • String: For now, no statistics are generated for this 38 | *
  • List/Array: over the size of the list or array 39 | *
40 | * 41 | * @author Johann Petrak 42 | */ 43 | public class StatsForFeatures { 44 | private Map feature2stats = new HashMap<>(); 45 | private final Object lockingObject = new Object(); 46 | 47 | public static final String KEY_FOR_TARGET = "╳TARGET╳"; 48 | 49 | /** 50 | * Add a value to the stats object 51 | * @param featureName feature name 52 | * @param value value to add 53 | */ 54 | public void addValue(String featureName, Object value) { 55 | synchronized(lockingObject) { 56 | Stats stats; 57 | if(feature2stats.containsKey(featureName)) { 58 | stats = feature2stats.get(featureName); 59 | } else { 60 | stats = new Stats(value); 61 | feature2stats.put(featureName, stats); 62 | } 63 | stats.addValue(value); 64 | } // synchronized 65 | } // addValue(...) 66 | 67 | /** 68 | * Get the statistics object for the feature. 69 | * @param featureName feature 70 | * @return stats object 71 | */ 72 | public Stats getStatistics(String featureName) { 73 | synchronized(lockingObject) { 74 | return feature2stats.get(featureName); 75 | } 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /src/main/java/org/apache/commons/clipatched/AlreadySelectedException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.commons.clipatched; 19 | 20 | /** 21 | * Thrown when more than one option in an option group 22 | * has been provided. 23 | * 24 | * @version $Id: AlreadySelectedException.java 1443102 2013-02-06 18:12:16Z tn $ 25 | */ 26 | public class AlreadySelectedException extends ParseException 27 | { 28 | /** 29 | * This exception {@code serialVersionUID}. 30 | */ 31 | private static final long serialVersionUID = 3674381532418544760L; 32 | 33 | /** The option group selected. */ 34 | private OptionGroup group; 35 | 36 | /** The option that triggered the exception. */ 37 | private Option option; 38 | 39 | /** 40 | * Construct a new AlreadySelectedException 41 | * with the specified detail message. 42 | * 43 | * @param message the detail message 44 | */ 45 | public AlreadySelectedException(String message) 46 | { 47 | super(message); 48 | } 49 | 50 | /** 51 | * Construct a new AlreadySelectedException 52 | * for the specified option group. 53 | * 54 | * @param group the option group already selected 55 | * @param option the option that triggered the exception 56 | * @since 1.2 57 | */ 58 | public AlreadySelectedException(OptionGroup group, Option option) 59 | { 60 | this("The option '" + option.getKey() + "' was specified but an option from this group " 61 | + "has already been selected: '" + group.getSelected() + "'"); 62 | this.group = group; 63 | this.option = option; 64 | } 65 | 66 | /** 67 | * Returns the option group where another option has been selected. 68 | * 69 | * @return the related option group 70 | * @since 1.2 71 | */ 72 | public OptionGroup getOptionGroup() 73 | { 74 | return group; 75 | } 76 | 77 | /** 78 | * Returns the option that was added to the group and triggered the exception. 79 | * 80 | * @return the related option 81 | * @since 1.2 82 | */ 83 | public Option getOption() 84 | { 85 | return option; 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /src/main/java/org/apache/commons/clipatched/AmbiguousOptionException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.commons.clipatched; 19 | 20 | import java.util.Collection; 21 | import java.util.Iterator; 22 | 23 | /** 24 | * Exception thrown when an option can't be identified from a partial name. 25 | * 26 | * @version $Id: AmbiguousOptionException.java 1669814 2015-03-28 18:09:26Z britter $ 27 | * @since 1.3 28 | */ 29 | public class AmbiguousOptionException extends UnrecognizedOptionException 30 | { 31 | /** 32 | * This exception {@code serialVersionUID}. 33 | */ 34 | private static final long serialVersionUID = 5829816121277947229L; 35 | 36 | /** The list of options matching the partial name specified */ 37 | private final Collection matchingOptions; 38 | 39 | /** 40 | * Constructs a new AmbiguousOptionException. 41 | * 42 | * @param option the partial option name 43 | * @param matchingOptions the options matching the name 44 | */ 45 | public AmbiguousOptionException(String option, Collection matchingOptions) 46 | { 47 | super(createMessage(option, matchingOptions), option); 48 | this.matchingOptions = matchingOptions; 49 | } 50 | 51 | /** 52 | * Returns the options matching the partial name. 53 | * @return a collection of options matching the name 54 | */ 55 | public Collection getMatchingOptions() 56 | { 57 | return matchingOptions; 58 | } 59 | 60 | /** 61 | * Build the exception message from the specified list of options. 62 | * 63 | * @param option 64 | * @param matchingOptions 65 | * @return 66 | */ 67 | private static String createMessage(String option, Collection matchingOptions) 68 | { 69 | StringBuilder buf = new StringBuilder("Ambiguous option: '"); 70 | buf.append(option); 71 | buf.append("' (could be: "); 72 | 73 | Iterator it = matchingOptions.iterator(); 74 | while (it.hasNext()) 75 | { 76 | buf.append("'"); 77 | buf.append(it.next()); 78 | buf.append("'"); 79 | if (it.hasNext()) 80 | { 81 | buf.append(", "); 82 | } 83 | } 84 | buf.append(")"); 85 | 86 | return buf.toString(); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/java/org/apache/commons/clipatched/BasicParser.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.commons.clipatched; 19 | 20 | /** 21 | * The class BasicParser provides a very simple implementation of 22 | * the {@link Parser#flatten(Options,String[],boolean) flatten} method. 23 | * 24 | * @version $Id: BasicParser.java 1443102 2013-02-06 18:12:16Z tn $ 25 | * @deprecated since 1.3, use the {@link DefaultParser} instead 26 | */ 27 | @Deprecated 28 | public class BasicParser extends Parser 29 | { 30 | /** 31 | *

A simple implementation of {@link Parser}'s abstract 32 | * {@link Parser#flatten(Options, String[], boolean) flatten} method.

33 | * 34 | *

Note: options and stopAtNonOption 35 | * are not used in this flatten method.

36 | * 37 | * @param options The command line {@link Options} 38 | * @param arguments The command line arguments to be parsed 39 | * @param stopAtNonOption Specifies whether to stop flattening 40 | * when an non option is found. 41 | * @return The arguments String array. 42 | */ 43 | @Override 44 | protected String[] flatten(@SuppressWarnings("unused") Options options, 45 | String[] arguments, 46 | @SuppressWarnings("unused") boolean stopAtNonOption) 47 | { 48 | // just echo the arguments 49 | return arguments; 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/org/apache/commons/clipatched/CommandLineParser.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.commons.clipatched; 19 | 20 | /** 21 | * A class that implements the CommandLineParser interface 22 | * can parse a String array according to the {@link Options} specified 23 | * and return a {@link CommandLine}. 24 | * 25 | * @version $Id: CommandLineParser.java 1443102 2013-02-06 18:12:16Z tn $ 26 | */ 27 | public interface CommandLineParser 28 | { 29 | /** 30 | * Parse the arguments according to the specified options. 31 | * 32 | * @param options the specified Options 33 | * @param arguments the command line arguments 34 | * @return the list of atomic option and value tokens 35 | * 36 | * @throws ParseException if there are any problems encountered 37 | * while parsing the command line tokens. 38 | */ 39 | CommandLine parse(Options options, String[] arguments) throws ParseException; 40 | 41 | /** 42 | * Parse the arguments according to the specified options and 43 | * properties. 44 | * 45 | * @param options the specified Options 46 | * @param arguments the command line arguments 47 | * @param properties command line option name-value pairs 48 | * @return the list of atomic option and value tokens 49 | * 50 | * @throws ParseException if there are any problems encountered 51 | * while parsing the command line tokens. 52 | */ 53 | /* To maintain binary compatibility, this is commented out. 54 | It is still in the abstract Parser class, so most users will 55 | still reap the benefit. 56 | CommandLine parse(Options options, String[] arguments, Properties properties) 57 | throws ParseException; 58 | */ 59 | 60 | /** 61 | * Parse the arguments according to the specified options. 62 | * 63 | * @param options the specified Options 64 | * @param arguments the command line arguments 65 | * @param stopAtNonOption if true an unrecognized argument stops 66 | * the parsing and the remaining arguments are added to the 67 | * {@link CommandLine}s args list. If false an unrecognized 68 | * argument triggers a ParseException. 69 | * 70 | * @return the list of atomic option and value tokens 71 | * @throws ParseException if there are any problems encountered 72 | * while parsing the command line tokens. 73 | */ 74 | CommandLine parse(Options options, String[] arguments, boolean stopAtNonOption) throws ParseException; 75 | 76 | /** 77 | * Parse the arguments according to the specified options and 78 | * properties. 79 | * 80 | * @param options the specified Options 81 | * @param arguments the command line arguments 82 | * @param properties command line option name-value pairs 83 | * @param stopAtNonOption if true an unrecognized argument stops 84 | * the parsing and the remaining arguments are added to the 85 | * {@link CommandLine}s args list. If false an unrecognized 86 | * argument triggers a ParseException. 87 | * 88 | * @return the list of atomic option and value tokens 89 | * @throws ParseException if there are any problems encountered 90 | * while parsing the command line tokens. 91 | */ 92 | /* To maintain binary compatibility, this is commented out. 93 | It is still in the abstract Parser class, so most users will 94 | still reap the benefit. 95 | CommandLine parse(Options options, String[] arguments, Properties properties, boolean stopAtNonOption) 96 | throws ParseException; 97 | */ 98 | } 99 | -------------------------------------------------------------------------------- /src/main/java/org/apache/commons/clipatched/MissingArgumentException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.commons.clipatched; 19 | 20 | /** 21 | * Thrown when an option requiring an argument 22 | * is not provided with an argument. 23 | * 24 | * @version $Id: MissingArgumentException.java 1443102 2013-02-06 18:12:16Z tn $ 25 | */ 26 | public class MissingArgumentException extends ParseException 27 | { 28 | /** 29 | * This exception {@code serialVersionUID}. 30 | */ 31 | private static final long serialVersionUID = -7098538588704965017L; 32 | 33 | /** The option requiring additional arguments */ 34 | private Option option; 35 | 36 | /** 37 | * Construct a new MissingArgumentException 38 | * with the specified detail message. 39 | * 40 | * @param message the detail message 41 | */ 42 | public MissingArgumentException(String message) 43 | { 44 | super(message); 45 | } 46 | 47 | /** 48 | * Construct a new MissingArgumentException 49 | * with the specified detail message. 50 | * 51 | * @param option the option requiring an argument 52 | * @since 1.2 53 | */ 54 | public MissingArgumentException(Option option) 55 | { 56 | this("Missing argument for option: " + option.getKey()); 57 | this.option = option; 58 | } 59 | 60 | /** 61 | * Return the option requiring an argument that wasn't provided 62 | * on the command line. 63 | * 64 | * @return the related option 65 | * @since 1.2 66 | */ 67 | public Option getOption() 68 | { 69 | return option; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/org/apache/commons/clipatched/MissingOptionException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.commons.clipatched; 19 | 20 | import java.util.List; 21 | import java.util.Iterator; 22 | 23 | /** 24 | * Thrown when a required option has not been provided. 25 | * 26 | * @version $Id: MissingOptionException.java 1443102 2013-02-06 18:12:16Z tn $ 27 | */ 28 | public class MissingOptionException extends ParseException 29 | { 30 | /** This exception {@code serialVersionUID}. */ 31 | private static final long serialVersionUID = 8161889051578563249L; 32 | 33 | /** The list of missing options and groups */ 34 | private List missingOptions; 35 | 36 | /** 37 | * Construct a new MissingSelectedException 38 | * with the specified detail message. 39 | * 40 | * @param message the detail message 41 | */ 42 | public MissingOptionException(String message) 43 | { 44 | super(message); 45 | } 46 | 47 | /** 48 | * Constructs a new MissingSelectedException with the 49 | * specified list of missing options. 50 | * 51 | * @param missingOptions the list of missing options and groups 52 | * @since 1.2 53 | */ 54 | public MissingOptionException(List missingOptions) 55 | { 56 | this(createMessage(missingOptions)); 57 | this.missingOptions = missingOptions; 58 | } 59 | 60 | /** 61 | * Returns the list of options or option groups missing in the command line parsed. 62 | * 63 | * @return the missing options, consisting of String instances for simple 64 | * options, and OptionGroup instances for required option groups. 65 | * @since 1.2 66 | */ 67 | public List getMissingOptions() 68 | { 69 | return missingOptions; 70 | } 71 | 72 | /** 73 | * Build the exception message from the specified list of options. 74 | * 75 | * @param missingOptions the list of missing options and groups 76 | * @since 1.2 77 | */ 78 | private static String createMessage(List missingOptions) 79 | { 80 | StringBuilder buf = new StringBuilder("Missing required option"); 81 | buf.append(missingOptions.size() == 1 ? "" : "s"); 82 | buf.append(": "); 83 | 84 | Iterator it = missingOptions.iterator(); 85 | while (it.hasNext()) 86 | { 87 | buf.append(it.next()); 88 | if (it.hasNext()) 89 | { 90 | buf.append(", "); 91 | } 92 | } 93 | 94 | return buf.toString(); 95 | } 96 | } 97 | -------------------------------------------------------------------------------- /src/main/java/org/apache/commons/clipatched/OptionValidator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.commons.clipatched; 19 | 20 | /** 21 | * Validates an Option string. 22 | * 23 | * @version $Id: OptionValidator.java 1544819 2013-11-23 15:34:31Z tn $ 24 | * @since 1.1 25 | */ 26 | final class OptionValidator 27 | { 28 | /** 29 | * Validates whether opt is a permissible Option 30 | * shortOpt. The rules that specify if the opt 31 | * is valid are: 32 | * 33 | *
    34 | *
  • a single character opt that is either 35 | * ' '(special case), '?', '@' or a letter
  • 36 | *
  • a multi character opt that only contains 37 | * letters.
  • 38 | *
39 | *

40 | * In case {@code opt} is {@code null} no further validation is performed. 41 | * 42 | * @param opt The option string to validate, may be null 43 | * @throws IllegalArgumentException if the Option is not valid. 44 | */ 45 | static void validateOption(String opt) throws IllegalArgumentException 46 | { 47 | // if opt is NULL do not check further 48 | if (opt == null) 49 | { 50 | return; 51 | } 52 | 53 | // handle the single character opt 54 | if (opt.length() == 1) 55 | { 56 | char ch = opt.charAt(0); 57 | 58 | if (!isValidOpt(ch)) 59 | { 60 | throw new IllegalArgumentException("Illegal option name '" + ch + "'"); 61 | } 62 | } 63 | 64 | // handle the multi character opt 65 | else 66 | { 67 | for (char ch : opt.toCharArray()) 68 | { 69 | if (!isValidChar(ch)) 70 | { 71 | throw new IllegalArgumentException("The option '" + opt + "' contains an illegal " 72 | + "character : '" + ch + "'"); 73 | } 74 | } 75 | } 76 | } 77 | 78 | /** 79 | * Returns whether the specified character is a valid Option. 80 | * 81 | * @param c the option to validate 82 | * @return true if c is a letter, '?' or '@', otherwise false. 83 | */ 84 | private static boolean isValidOpt(char c) 85 | { 86 | return isValidChar(c) || c == '?' || c == '@'; 87 | } 88 | 89 | /** 90 | * Returns whether the specified character is a valid character. 91 | * 92 | * @param c the character to validate 93 | * @return true if c is a letter. 94 | */ 95 | private static boolean isValidChar(char c) 96 | { 97 | return Character.isJavaIdentifierPart(c); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /src/main/java/org/apache/commons/clipatched/ParseException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.commons.clipatched; 19 | 20 | /** 21 | * Base for Exceptions thrown during parsing of a command-line. 22 | * 23 | * @version $Id: ParseException.java 1443102 2013-02-06 18:12:16Z tn $ 24 | */ 25 | public class ParseException extends Exception 26 | { 27 | /** 28 | * This exception {@code serialVersionUID}. 29 | */ 30 | private static final long serialVersionUID = 9112808380089253192L; 31 | 32 | /** 33 | * Construct a new ParseException 34 | * with the specified detail message. 35 | * 36 | * @param message the detail message 37 | */ 38 | public ParseException(String message) 39 | { 40 | super(message); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/org/apache/commons/clipatched/UnrecognizedOptionException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.commons.clipatched; 19 | 20 | /** 21 | * Exception thrown during parsing signalling an unrecognized 22 | * option was seen. 23 | * 24 | * @version $Id: UnrecognizedOptionException.java 1443102 2013-02-06 18:12:16Z tn $ 25 | */ 26 | public class UnrecognizedOptionException extends ParseException 27 | { 28 | /** 29 | * This exception {@code serialVersionUID}. 30 | */ 31 | private static final long serialVersionUID = -252504690284625623L; 32 | 33 | /** The unrecognized option */ 34 | private String option; 35 | 36 | /** 37 | * Construct a new UnrecognizedArgumentException 38 | * with the specified detail message. 39 | * 40 | * @param message the detail message 41 | */ 42 | public UnrecognizedOptionException(String message) 43 | { 44 | super(message); 45 | } 46 | 47 | /** 48 | * Construct a new UnrecognizedArgumentException 49 | * with the specified option and detail message. 50 | * 51 | * @param message the detail message 52 | * @param option the unrecognized option 53 | * @since 1.2 54 | */ 55 | public UnrecognizedOptionException(String message, String option) 56 | { 57 | this(message); 58 | this.option = option; 59 | } 60 | 61 | /** 62 | * Returns the unrecognized option. 63 | * 64 | * @return the related option 65 | * @since 1.2 66 | */ 67 | public String getOption() 68 | { 69 | return option; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/org/apache/commons/clipatched/Util.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | package org.apache.commons.clipatched; 19 | 20 | /** 21 | * Contains useful helper methods for classes within this package. 22 | * 23 | * @version $Id: Util.java 1443102 2013-02-06 18:12:16Z tn $ 24 | */ 25 | final class Util 26 | { 27 | /** 28 | * Remove the hyphens from the beginning of str and 29 | * return the new String. 30 | * 31 | * @param str The string from which the hyphens should be removed. 32 | * 33 | * @return the new String. 34 | */ 35 | static String stripLeadingHyphens(String str) 36 | { 37 | if (str == null) 38 | { 39 | return null; 40 | } 41 | if (str.startsWith("--")) 42 | { 43 | return str.substring(2, str.length()); 44 | } 45 | else if (str.startsWith("-")) 46 | { 47 | return str.substring(1, str.length()); 48 | } 49 | 50 | return str; 51 | } 52 | 53 | /** 54 | * Remove the leading and trailing quotes from str. 55 | * E.g. if str is '"one two"', then 'one two' is returned. 56 | * 57 | * @param str The string from which the leading and trailing quotes 58 | * should be removed. 59 | * 60 | * @return The string without the leading and trailing quotes. 61 | */ 62 | static String stripLeadingAndTrailingQuotes(String str) 63 | { 64 | int length = str.length(); 65 | if (length > 1 && str.startsWith("\"") && str.endsWith("\"") && str.substring(1, length - 1).indexOf('"') == -1) 66 | { 67 | str = str.substring(1, length - 1); 68 | } 69 | 70 | return str; 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /src/main/java/org/apache/commons/clipatched/overview.html: -------------------------------------------------------------------------------- 1 | 17 | 18 | 19 |

Commons CLI -- version 1.3

20 | 21 |

The commons-cli package aides in parsing command-line arguments.

22 | 23 |

Allow command-line arguments to be parsed against a descriptor of 24 | valid options (long and short), potentially with arguments.

25 | 26 |

command-line arguments may be of the typical String[] 27 | form, but also may be a java.util.List. Indexes allow 28 | for parsing only a portion of the command-line. Also, functionality 29 | for parsing the command-line in phases is built in, allowing for 30 | 'cvs-style' command-lines, where some global options are specified 31 | before a 'command' argument, and command-specific options are 32 | specified after the command argument: 33 | 34 | 35 |

36 |         myApp -p <port> command -p <printer>
37 |     
38 | 39 | 40 | 41 |

The homepage for the project is 42 | Apache Commons/ 43 | 44 | -------------------------------------------------------------------------------- /src/main/java/org/apache/commons/clipatched/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | 18 | /** 19 | * Commons CLI 1.3 20 | * 21 | * @version $Id: package-info.java 1443102 2013-02-06 18:12:16Z tn $ 22 | */ 23 | package org.apache.commons.clipatched; 24 | -------------------------------------------------------------------------------- /src/main/resources/creole.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | -------------------------------------------------------------------------------- /src/main/resources/resources/pipelines/.LF_TrainTopicModel_Mallet_EN.metadata/long-desc.html: -------------------------------------------------------------------------------- 1 | 2 |

Pipeline LF_TrainTopicModel_Mallet_EN

3 | 4 | A pipeline for training a topic model on the filtered tokens for 5 | English documents. 6 |

7 | This pipeline expects Token annotations with POS tags (in feature "category") in 8 | the default annotation set and filters them by token kind, 9 | English stop words, and POS tag to create TokenWord annotations in the "LDA" annotation set 10 | which is used for training the Mallet topic model. 11 |

12 | More information: Pipeline LF_TrainTopicModel_Mallet_EN 13 |

14 | This is part of the GATE Learning Framework Plugin 15 | -------------------------------------------------------------------------------- /src/main/resources/resources/pipelines/.LF_TrainTopicModel_Mallet_EN.metadata/metadata.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | LF Train Mallet Topic Model, EN 4 | LF_TrainTopicModel_Mallet_EN 5 | 6 | English 7 | Topic Models 8 | 9 | 10 | -------------------------------------------------------------------------------- /src/main/resources/resources/pipelines/.LF_TrainTopicModel_Mallet_EN.metadata/short-desc.html: -------------------------------------------------------------------------------- 1 | Pipeline for trainin a topic model using the Mallet LDA algorithm. 2 | This pipeline filters the document tokens using by token kind, 3 | English stop words, and POS tag (Penn tagset in feature "category"). 4 | -------------------------------------------------------------------------------- /src/main/resources/resources/pipelines/gazetteer/stopwords-en-long.def: -------------------------------------------------------------------------------- 1 | stopwords-en-long.lst:stop:en: 2 | -------------------------------------------------------------------------------- /src/main/resources/resources/pipelines/gazetteer/stopwords-en.def: -------------------------------------------------------------------------------- 1 | stopwords-en.lst:stop:en: 2 | -------------------------------------------------------------------------------- /src/main/resources/resources/pipelines/groovy/filterTokens4LDA.groovy: -------------------------------------------------------------------------------- 1 | // pre-filter tokens and put subset into set LDA 2 | // Johann Petrak, 2018-09-27 3 | import gate.Utils; 4 | 5 | // remove what we added last time, if any 6 | oldAnns = outputAS.get("TokenWord") 7 | outputAS.removeAll(oldAnns) 8 | 9 | for(Annotation ann : inputAS.get("Token")) { 10 | fm = ann.getFeatures() 11 | kind = fm.get("kind") 12 | pick = true 13 | if(!kind.equals("word")) { 14 | pick = false 15 | } 16 | pos = (String)fm.get("category") 17 | if(pos.startsWith("V")) { 18 | pick = false 19 | } 20 | if(pick) { 21 | str = (String)fm.get("string") 22 | if(str.length() > 1) { 23 | fm.put("lc_string",str.toLowerCase()) 24 | gate.Utils.addAnn(outputAS, ann, "TokenWord", fm) 25 | } 26 | } 27 | } -------------------------------------------------------------------------------- /src/main/resources/resources/pipelines/groovy/removeUnwantedTokens4LDA.groovy: -------------------------------------------------------------------------------- 1 | // Remove all the TokenWord annotations within Unwanted annotations 2 | import gate.Utils 3 | 4 | Set toDelete = [] 5 | 6 | for(Annotation unwanted : inputAS.get("Unwanted")) { 7 | contained = gate.Utils.getContainedAnnotations(inputAS, unwanted) 8 | toDelete.addAll(contained) 9 | } 10 | 11 | outputAS.removeAll(toDelete) -------------------------------------------------------------------------------- /src/main/resources/resources/pipelines/regexp/unwantedText4LDA.txt: -------------------------------------------------------------------------------- 1 | // URLs 2 | |(https?://[-a-zA-Z0-9+&@#/%?=~_|!:,.;]*[-a-zA-Z0-9+&@#/%=~_|]) 3 | 1 => Unwanted type="url" 4 | 5 | // (some) email addresses 6 | |(\b[a-zA-Z0-9!#$%&'*+/=\?^_`{|}~-]{1,64}(?:\.[a-zA-Z0-9!#$%&'*+/=\?^_`{|}~-]{1,64}){0,32})@([a-zA-Z0-9-]{1,63}(?:\.[a-zA-Z0-9-]{1,63}){1,32}\b) 7 | 1 => Unwanted type="email" 8 | -------------------------------------------------------------------------------- /src/main/resources/resources/wrappers/FileJsonKeras/apply.cmd: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | if "x%WRAPPER_HOME%"=="x" ( 4 | set WRAPPER_HOME=%~dp0 5 | ) 6 | SET model=%1 7 | shift 8 | 9 | : create var with remaining arguments 10 | set r=%1 11 | :loop 12 | shift 13 | if "x%1"=="x" goto done 14 | set r=%r% %1 15 | goto loop 16 | :done 17 | 18 | if "x%PYTHON_BIN%"=="x" ( 19 | set PYTHON_BIN="%HOMEDRIVE%""%HOMEPATH%"\Miniconda3\python.exe 20 | ) 21 | %PYTHON_BIN% %WRAPPER_HOME%\apply.py %model% %r% 22 | 23 | -------------------------------------------------------------------------------- /src/main/resources/resources/wrappers/FileJsonKeras/apply.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import os 4 | import logging 5 | from gatelfdata import Dataset 6 | #from gatelfkerasjson import ??? 7 | from gatelfkerasjson import KerasWrapperImpl1 8 | 9 | modelprefix=sys.argv[1] 10 | metafile=sys.argv[2] 11 | datadir=sys.argv[3] 12 | 13 | # Set up logging 14 | logger = logging.getLogger("gatelfdata") 15 | logger.setLevel(logging.ERROR) 16 | logger = logging.getLogger("gatelfkerasjson") 17 | logger.setLevel(logging.DEBUG) 18 | streamhandler = logging.StreamHandler(stream=sys.stderr) 19 | formatter = logging.Formatter( 20 | '%(asctime)s %(name)-12s %(levelname)-8s %(message)s') 21 | streamhandler.setFormatter(formatter) 22 | logger.addHandler(streamhandler) 23 | filehandler = logging.FileHandler(os.path.join(datadir,"FileJsonKerasWrapper.train.log")) 24 | logger.addHandler(filehandler) 25 | 26 | # restore the wrapper 27 | ds = Dataset(metafile, targets_need_padding=False) 28 | wrapper = KerasWrapperImpl1(ds) 29 | wrapper.loadModel(modelprefix) 30 | 31 | with sys.stdin as infile: 32 | for line in infile: 33 | #! print("PYTHON FileJsonKeras APPLICATION, input=",line,file=sys.stderr) 34 | if line == "STOP": 35 | break 36 | # TODO: currently the LF sends individual instances here, we may want to change 37 | # However we need to always apply to a set of instances, so wrap into another array 38 | instancedata = json.loads(line) 39 | # TODO: better error handling: put the apply call into a try block and catch any error, also 40 | # check returned data. If there is a problem send back in the map we return!! 41 | # NOTE: the LF expects to get a map with the following elements: 42 | # status: must be "ok", anything else is interpreted as an error 43 | # output: the actual prediction: gets extracted from the returned data here 44 | # confidence: some confidence/probability score for the output, may be null: this gets extracted 45 | # from our returned data here 46 | # confidences: a map with confidences for all labels, may be null: this is NOT SUPPORTED in the LF yet! 47 | preds=wrapper.applyModel(instancedata) 48 | #! print("PYTHON APPLICATION, preds=", preds, file=sys.stderr) 49 | # preds are a list of one or two lists, where the first list contains all the labels and the second 50 | # list contains all the confidences in the order used by the model. 51 | # For now we just extract the label or for a sequence, the list of labels, knowing that for now we always process only one instance/sequence! 52 | ret = {"status":"ok", "output":preds[0]} 53 | #! print("PYTHON FileJsonKeras APPLICATION, return=", ret, file=sys.stderr) 54 | print(json.dumps(ret)) 55 | # TODO: IMPORTANT!!! What the model returns is currently different from what the LF code expects!!! 56 | sys.stdout.flush() 57 | -------------------------------------------------------------------------------- /src/main/resources/resources/wrappers/FileJsonKeras/apply.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Args we should get 4 | modelbase="$1" 5 | shift 6 | metafile="$1" 7 | shift 8 | wrapperdir="$1" 9 | shift 10 | 11 | wrapperapply=$wrapperdir/apply.py 12 | 13 | datadir=`dirname modelbase` 14 | datadir=`cd $datadir; pwd -P` 15 | 16 | versionpython="UNKNOWN" 17 | wherepython=`which python` 18 | if [[ "x$wherepython" != "x" ]] 19 | then 20 | versionpython=`python -V |& cut -f 2 -d " " | cut -f 1 -d'.'` 21 | fi 22 | if [[ "$versionpython" == "3" ]] 23 | then 24 | pythoncmd=$wherepython 25 | else 26 | wherepython=`which python3` 27 | if [[ "x$wherepython" == "x" ]] 28 | then 29 | echo 'ERROR: could not find a python 3 interpreter, exiting' 30 | exit 1 31 | fi 32 | fi 33 | 34 | export PYTHONPATH="$wrapperdir/gate-lf-python-data:$wrapperdir/gate-lf-keras-json" 35 | 36 | 37 | echo 'MODEL BASE NAME = ' $modelbase >&2 38 | echo 'META FILE = ' $metafile >&2 39 | echo 'DATA DIR = ' $datadir >&2 40 | echo 'ADDITIONALPARMS = ' "$@" >&2 41 | echo 'WRAPPER SCRIPT = ' $wrapperapply >&2 42 | echo 'ADDITIONALPARMS = ' "$@" >&2 43 | echo 'PYTHON = ' $wherepython >&2 44 | echo 'PYTHONPATH = ' $PYTHONPATH >&2 45 | echo 'RUNNING = ' ${wherepython} "${wrapperapply}" "${modelbase}" "${metafile}" "${datadir}" "$@" >&2 46 | 47 | ${wherepython} "${wrapperapply}" "${modelbase}" "${metafile}" "${datadir}" "$@" 48 | 49 | -------------------------------------------------------------------------------- /src/main/resources/resources/wrappers/FileJsonKeras/train.cmd: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | if "x%WRAPPER_HOME%"=="x" ( 4 | set WRAPPER_HOME=%~dp0 5 | ) 6 | SET data=%1 7 | shift 8 | SET model=%1 9 | shift 10 | 11 | : create var with remaining arguments 12 | set r=%1 13 | :loop 14 | shift 15 | if "x%1"=="x" goto done 16 | set r=%r% %1 17 | goto loop 18 | :done 19 | 20 | if "x%PYTHON_BIN%"=="x" ( 21 | set PYTHON_BIN="%HOMEDRIVE%""%HOMEPATH%"\Miniconda3\python.exe 22 | ) 23 | %PYTHON_BIN% %WRAPPER_HOME%\train.py %data% %model% %r% 24 | -------------------------------------------------------------------------------- /src/main/resources/resources/wrappers/FileJsonKeras/train.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import json 3 | import os 4 | import logging 5 | from gatelfdata import Dataset 6 | from gatelfkerasjson import KerasWrapperImpl1 7 | modelprefix=sys.argv[1] 8 | metafile=sys.argv[2] 9 | datadir=sys.argv[3] 10 | 11 | # Set up logging 12 | logger = logging.getLogger("gatelfdata") 13 | logger.setLevel(logging.ERROR) 14 | logger = logging.getLogger("gatelfkerasjson") 15 | logger.setLevel(logging.DEBUG) 16 | streamhandler = logging.StreamHandler(stream=sys.stderr) 17 | formatter = logging.Formatter( 18 | '%(asctime)s %(name)-12s %(levelname)-8s %(message)s') 19 | streamhandler.setFormatter(formatter) 20 | logger.addHandler(streamhandler) 21 | filehandler = logging.FileHandler(os.path.join(datadir,"FileJsonPyTorch.train.log")) 22 | logger.addHandler(filehandler) 23 | 24 | ds = Dataset(metafile, targets_need_padding=False) 25 | 26 | kerasModel = KerasWrapperImpl1(ds) 27 | kerasModel.genKerasModel() 28 | kerasModel.trainModel() 29 | kerasModel.saveModel(modelprefix) 30 | 31 | -------------------------------------------------------------------------------- /src/main/resources/resources/wrappers/FileJsonKeras/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Args we should get 4 | metafile="$1" 5 | shift 6 | modelbase="$1" 7 | shift 8 | datadir=`dirname $metafile` 9 | datadir=`cd $datadir; pwd -P` 10 | 11 | wrapperdir=$datadir/FileJsonKeras 12 | wrappertrain=$wrapperdir/train.py 13 | 14 | versionpython="UNKNOWN" 15 | wherepython=`which python` 16 | if [[ "x$wherepython" != "x" ]] 17 | then 18 | versionpython=`python -V |& cut -f 2 -d " " | cut -f 1 -d'.'` 19 | fi 20 | if [[ "$versionpython" == "3" ]] 21 | then 22 | pythoncmd=$wherepython 23 | else 24 | wherepython=`which python3` 25 | if [[ "x$wherepython" == "x" ]] 26 | then 27 | echo 'ERROR: could not find a python 3 interpreter, exiting' 28 | exit 1 29 | fi 30 | fi 31 | 32 | export PYTHONPATH="$wrapperdir/gate-lf-python-data:$wrapperdir/gate-lf-keras-json" 33 | 34 | echo 'MODEL BASE NAME = ' $modelbase >&2 35 | echo 'META FILE = ' $metafile >&2 36 | echo 'DATA DIR = ' $datadir >&2 37 | echo 'WRAPPER SCRIPT = ' $wrappertrain >&2 38 | echo 'ADDITIONALPARMS = ' "$@" >&2 39 | echo 'PYTHON = ' $wherepython >&2 40 | echo 'PYTHONPATH = ' $PYTHONPATH >&2 41 | echo 'RUNNING = ' ${wherepython} "${wrappertrain}" "${modelbase}" "${metafile}" "${datadir}" "$@" >&2 42 | 43 | ${wherepython} "${wrappertrain}" "${modelbase}" "${metafile}" "${datadir}" "$@" 44 | 45 | -------------------------------------------------------------------------------- /src/main/resources/resources/wrappers/FileJsonKeras/wrapperInfo.yaml: -------------------------------------------------------------------------------- 1 | - version: 1.0 2 | 3 | 4 | -------------------------------------------------------------------------------- /src/main/resources/resources/wrappers/FileJsonPyTorch/apply.cmd: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | if "x%WRAPPER_HOME%"=="x" ( 4 | set WRAPPER_HOME=%~dp0 5 | ) 6 | SET model=%1 7 | shift 8 | SET meta=%1 9 | shift 10 | shift 11 | 12 | : create var with remaining arguments 13 | set r=%1 14 | :loop 15 | shift 16 | if "x%1"=="x" goto done 17 | set r=%r% %1 18 | goto loop 19 | :done 20 | 21 | if "x%PYTHON_BIN%"=="x" ( 22 | set PYTHON_BIN="%HOMEDRIVE%""%HOMEPATH%"\Miniconda3\python.exe 23 | ) 24 | %PYTHON_BIN% %WRAPPER_HOME%\gate-lf-pytorch-json\apply.py %model% --cuda False --metafile %meta% %r% 25 | 26 | -------------------------------------------------------------------------------- /src/main/resources/resources/wrappers/FileJsonPyTorch/apply.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Args we should get 4 | modelbase="$1" 5 | shift 6 | metafile="$1" 7 | shift 8 | wrapperdir="$1" 9 | shift 10 | 11 | wrapperapply=$wrapperdir/gate-lf-pytorch-json/apply.py 12 | 13 | datadir=`dirname modelbase` 14 | datadir=`cd $datadir; pwd -P` 15 | 16 | if [[ -z "${PYTHON_BIN}" ]] 17 | then 18 | versionpython="UNKNOWN" 19 | wherepython=`which python` 20 | if [[ "x$wherepython" != "x" ]] 21 | then 22 | versionpython=`python -V |& cut -f 2 -d " " | cut -f 1 -d'.'` 23 | fi 24 | if [[ "$versionpython" == "3" ]] 25 | then 26 | pythoncmd=$wherepython 27 | else 28 | wherepython=`which python3` 29 | if [[ "x$wherepython" == "x" ]] 30 | then 31 | echo 'ERROR: could not find a python 3 interpreter, exiting' >&2 32 | exit 1 33 | fi 34 | fi 35 | else 36 | wherepython="${PYTHON_BIN}" 37 | fi 38 | 39 | export PYTHONPATH="$wrapperdir/gate-lf-python-data:$wrapperdir/gate-lf-pytorch-json" 40 | 41 | 42 | #echo 'PYTHON_BIN = ' ${PYTHON_BIN} >&2 43 | #echo 'MODEL BASE NAME = ' $modelbase >&2 44 | #echo 'META FILE = ' $metafile >&2 45 | #echo 'DATA DIR = ' $datadir >&2 46 | #echo 'ADDITIONALPARMS = ' "$@" >&2 47 | #echo 'WRAPPER SCRIPT = ' $wrapperapply >&2 48 | #echo 'ADDITIONALPARMS = ' "$@" >&2 49 | #echo 'PYTHON = ' $wherepython >&2 50 | #echo 'PYTHONPATH = ' $PYTHONPATH >&2 51 | #echo 'RUNNING = ' ${wherepython} "${wrapperapply}" "${modelbase}" "$@" >&2 52 | 53 | if ${wherepython} "${wrapperapply}" "${modelbase}" --cuda False --metafile "${metafile}" "$@" ; then 54 | echo 'PROCESSING OK ' $? >&2 55 | exit 0 56 | else 57 | echo 'PROCESSING ERROR ' $? >&2 58 | exit 127 59 | fi 60 | 61 | -------------------------------------------------------------------------------- /src/main/resources/resources/wrappers/FileJsonPyTorch/train.cmd: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | if "x%WRAPPER_HOME%"=="x" ( 4 | set WRAPPER_HOME=%~dp0 5 | ) 6 | SET meta=%1 7 | shift 8 | SET model=%1 9 | shift 10 | 11 | : create var with remaining arguments 12 | set r=%1 13 | :loop 14 | shift 15 | if "x%1"=="x" goto done 16 | set r=%r% %1 17 | goto loop 18 | :done 19 | 20 | if "x%PYTHON_BIN%"=="x" ( 21 | set PYTHON_BIN="%HOMEDRIVE%""%HOMEPATH%"\Miniconda3\python.exe 22 | ) 23 | %PYTHON_BIN% %WRAPPER_HOME%\gate-lf-pytorch-json\train.py %meta% %model% %r% 24 | -------------------------------------------------------------------------------- /src/main/resources/resources/wrappers/FileJsonPyTorch/train.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ## Args we should get 4 | metafile="$1" 5 | shift 6 | modelbase="$1" 7 | shift 8 | 9 | if [[ "x$modelbase" == "x" ]] 10 | then 11 | echo 'Error: two parameters required: metafile and modelprefix' >&2 12 | exit 1 13 | fi 14 | datadir=`dirname $metafile` 15 | datadir=`cd $datadir; pwd -P` 16 | 17 | wrapperdir=$datadir/FileJsonPyTorch 18 | wrappertrain=$wrapperdir/gate-lf-pytorch-json/train.py 19 | 20 | 21 | if [[ -z ${PYTHON_BIN} ]] 22 | then 23 | versionpython="UNKNOWN" 24 | wherepython=`which python` 25 | if [[ "x$wherepython" != "x" ]] 26 | then 27 | versionpython=`python -V |& cut -f 2 -d " " | cut -f 1 -d'.'` 28 | fi 29 | if [[ "$versionpython" == "3" ]] 30 | then 31 | pythoncmd=$wherepython 32 | else 33 | wherepython=`which python3` 34 | if [[ "x$wherepython" == "x" ]] 35 | then 36 | echo 'ERROR: could not find a python 3 interpreter, exiting' >&2 37 | exit 1 38 | fi 39 | fi 40 | else 41 | wherepython="${PYTHON_BIN}" 42 | fi 43 | 44 | export PYTHONPATH="$wrapperdir/gate-lf-python-data:$wrapperdir/gate-lf-pytorch-json" 45 | 46 | #echo 'PYTHON_BIN = ' ${PYTHON_BIN} >&2 47 | #echo 'MODEL BASE NAME = ' $modelbase >&2 48 | #echo 'META FILE = ' $metafile >&2 49 | #echo 'DATA DIR = ' $datadir >&2 50 | #echo 'WRAPPER SCRIPT = ' $wrappertrain >&2 51 | #echo 'ADDITIONALPARMS = ' "$@" >&2 52 | #echo 'PYTHON = ' $wherepython >&2 53 | #echo 'PYTHONPATH = ' $PYTHONPATH >&2 54 | #echo 'RUNNING = ' ${wherepython} "${wrappertrain}" "${metafile}" "${modelbase}" "$@" >&2 55 | 56 | if ${wherepython} "${wrappertrain}" "${metafile}" "${modelbase}" "$@" ; then 57 | echo 'PROCESSING OK ' $? >&2 58 | exit 0 59 | else 60 | echo 'PROCESSING ERROR ' $? >&2 61 | exit 127 62 | fi 63 | 64 | -------------------------------------------------------------------------------- /src/main/resources/resources/wrappers/FileJsonPyTorch/wrapperInfo.yaml: -------------------------------------------------------------------------------- 1 | - version: 1.0 2 | 3 | 4 | -------------------------------------------------------------------------------- /src/test/java/gate/plugin/learningframework/tests/GappLoadingTest.java: -------------------------------------------------------------------------------- 1 | package gate.plugin.learningframework.tests; 2 | 3 | import gate.test.GappLoadingTestCase; 4 | 5 | public class GappLoadingTest extends GappLoadingTestCase { 6 | 7 | } 8 | 9 | -------------------------------------------------------------------------------- /src/test/java/gate/plugin/learningframework/tests/ITFeatureScaling.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.tests; 22 | 23 | import gate.AnnotationSet; 24 | import gate.Document; 25 | import gate.creole.ResourceInstantiationException; 26 | import gate.plugin.learningframework.ScalingMethod; 27 | import gate.plugin.learningframework.data.CorpusRepresentationMalletTarget; 28 | import gate.plugin.learningframework.features.FeatureInfo; 29 | import gate.plugin.learningframework.features.FeatureSpecification; 30 | import gate.plugin.learningframework.features.TargetType; 31 | import static gate.plugin.learningframework.tests.Utils.loadDocument; 32 | import gate.util.GateException; 33 | import java.io.File; 34 | import java.net.MalformedURLException; 35 | import org.junit.Test; 36 | import org.junit.BeforeClass; 37 | import gate.test.GATEPluginTests; 38 | 39 | /** 40 | * 41 | * @author Johann Petrak 42 | */ 43 | public class ITFeatureScaling extends GATEPluginTests { 44 | 45 | @BeforeClass 46 | public static void init() throws GateException { 47 | gate.Gate.init(); 48 | } 49 | 50 | @Test 51 | public void testEngineMalletClass1() throws MalformedURLException, ResourceInstantiationException { 52 | File configFile = new File("tests/cl-ionosphere/feats.xml"); 53 | FeatureSpecification spec = new FeatureSpecification(configFile); 54 | FeatureInfo featureInfo = spec.getFeatureInfo(); 55 | featureInfo.setGlobalScalingMethod(ScalingMethod.MEANVARIANCE_ALL_FEATURES); 56 | CorpusRepresentationMalletTarget crm = new CorpusRepresentationMalletTarget(featureInfo, TargetType.NOMINAL); 57 | 58 | Document doc = loadDocument(new File("tests/cl-ionosphere/ionosphere_gate.xml")); 59 | 60 | AnnotationSet instanceAS = doc.getAnnotations().get("Mention"); 61 | AnnotationSet sequenceAS = null; 62 | AnnotationSet inputAS = doc.getAnnotations(); 63 | AnnotationSet classAS = null; 64 | String targetFeature = "class"; 65 | String nameFeature = null; 66 | crm.add(instanceAS, sequenceAS, inputAS, classAS, targetFeature, TargetType.NOMINAL, "", nameFeature, null); 67 | 68 | System.err.println("TESTS Scaling 1: added instances, number of instances now: "+crm.getRepresentationMallet().size()); 69 | 70 | // TODO: make this test actually work! 71 | File outDir1 = new File(new File(System.getProperty("java.io.tmpdir")),"lf-unscaled"); 72 | outDir1.mkdir(); 73 | File outDir2 = new File(new File(System.getProperty("java.io.tmpdir")),"lf-scaled"); 74 | outDir2.mkdir(); 75 | //System.err.println("Exporting unscaled"); 76 | //Exporter.export(crm, Exporter.ARFF_CL_MR, outDir1, "Mention", ""); 77 | crm.finishAdding(); 78 | //System.err.println("Exporting scaled"); 79 | //Exporter.export(crm, Exporter.ARFF_CL_MR, outDir2, "Mention", ""); 80 | 81 | } 82 | 83 | } 84 | -------------------------------------------------------------------------------- /src/test/java/gate/plugin/learningframework/tests/TestFeatureSpecification.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.tests; 22 | 23 | import gate.plugin.learningframework.features.FeatureSpecAttribute; 24 | import gate.plugin.learningframework.features.FeatureInfo; 25 | import gate.plugin.learningframework.features.FeatureSpecification; 26 | import java.util.List; 27 | import org.junit.Test; 28 | import static org.junit.Assert.*; 29 | import gate.test.GATEPluginTests; 30 | 31 | 32 | /** 33 | * Tests for the FeatureSpecification parsing and creation of FeatureInfo. 34 | * 35 | * @author Johann Petrak 36 | */ 37 | public class TestFeatureSpecification extends GATEPluginTests { 38 | 39 | @Test 40 | public void basicSpecParsing1() { 41 | String spec = ""+ 42 | "theType"+ 43 | ""; 44 | FeatureSpecification fs; 45 | FeatureInfo fi; 46 | List as; 47 | fs = new FeatureSpecification(spec); 48 | fi = fs.getFeatureInfo(); 49 | as = fi.getAttributes(); 50 | assertNotNull(as); 51 | assertEquals(1,as.size()); 52 | assertEquals("SimpleAttribute(name=,type=theType,feature=,datatype=bool,missingvaluetreatment=zero_value,within=null,codeas=number",as.get(0).toString()); 53 | 54 | spec = ""+ 55 | "theTypestringnominal-21"+ 56 | ""; 57 | fs = new FeatureSpecification(spec); 58 | fi = fs.getFeatureInfo(); 59 | as = fi.getAttributes(); 60 | assertNotNull(as); 61 | assertEquals(1,as.size()); 62 | assertEquals("AttributeList(name=,type=theType,feature=string,datatype=nominal,missingvaluetreatment=keep,codeas=one_of_k,within=null,from=-2,to=1",as.get(0).toString()); 63 | 64 | spec = ""+ 65 | "theTypetheFeature3"+ 66 | ""; 67 | fs = new FeatureSpecification(spec); 68 | fi = fs.getFeatureInfo(); 69 | as = fi.getAttributes(); 70 | assertNotNull(as); 71 | assertEquals(1,as.size()); 72 | assertEquals("NgramAttribute(name=,type=theType,feature=theFeature,featureName4Value=,number=3,maxlen=0,shorten=",as.get(0).toString()); 73 | 74 | // make sure that the feature info object we get from the feature specification is a clone 75 | FeatureInfo fi2 = fs.getFeatureInfo(); 76 | assertFalse(fi == fi2); 77 | 78 | } 79 | 80 | } 81 | -------------------------------------------------------------------------------- /src/test/java/gate/plugin/learningframework/tests/TestInfo.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.tests; 22 | 23 | import gate.plugin.learningframework.engines.Info; 24 | import java.io.File; 25 | import java.net.MalformedURLException; 26 | import java.net.URL; 27 | import org.junit.Test; 28 | import static org.junit.Assert.*; 29 | import gate.test.GATEPluginTests; 30 | 31 | /** 32 | * 33 | * @author Johann Petrak 34 | */ 35 | public class TestInfo extends GATEPluginTests { 36 | @Test 37 | public void testInfo1() throws MalformedURLException { 38 | Info info = new Info(); 39 | info.trainerClass = "theAlgorithmClass"; 40 | info.engineClass = "theEngineClass"; 41 | info.nrTrainingInstances = 2; 42 | File directory = new File("/tmp/testInfo"); 43 | directory.mkdir(); 44 | info.save(directory); 45 | URL dirURL = directory.toURI().toURL(); 46 | Info info2 = Info.load(dirURL); 47 | System.err.println("Info1="+info); 48 | System.err.println("Info2="+info2); 49 | assertEquals(info, info2); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /src/test/java/gate/plugin/learningframework/tests/TestParms.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.tests; 22 | 23 | import gate.plugin.learningframework.engines.Parms; 24 | import org.junit.Test; 25 | import static org.junit.Assert.*; 26 | import gate.test.GATEPluginTests; 27 | 28 | /** 29 | * Tester for the Parms class. 30 | * @author Johann Petrak 31 | */ 32 | public class TestParms extends GATEPluginTests { 33 | @Test 34 | public void testParms1() { 35 | Parms ps = new Parms("-toIgnore -maxDepth 3 -prune ", "m:maxDepth:i", "p:prune:b", "x:xoxo:d"); 36 | assertEquals(3,ps.size()); 37 | assertEquals(3,ps.getValue("maxDepth")); 38 | assertEquals(true,ps.getValue("prune")); 39 | assertEquals(2.0,(double)ps.getValueOrElse("xoxo",2.0),0.001); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /src/test/java/gate/plugin/learningframework/tests/TestStats.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.tests; 22 | 23 | import gate.plugin.learningframework.stats.Stats; 24 | import gate.plugin.learningframework.stats.StatsForFeatures; 25 | import java.net.MalformedURLException; 26 | import static org.junit.Assert.*; 27 | import org.junit.Test; 28 | import gate.test.GATEPluginTests; 29 | 30 | /** 31 | * 32 | * @author Johann Petrak 33 | */ 34 | public class TestStats extends GATEPluginTests { 35 | @Test 36 | public void testStats1() throws MalformedURLException { 37 | StatsForFeatures stats = new StatsForFeatures(); 38 | stats.addValue("feature1", 13); 39 | stats.addValue("feature2", true); 40 | stats.addValue("feature3", new double[]{1.0,2.0}); 41 | stats.addValue("feature1", 0.0); 42 | stats.addValue("feature2", true); 43 | stats.addValue("feature3", new double[]{1.0,2.0,3.0,4.0}); 44 | stats.addValue("feature1", 2); 45 | stats.addValue("feature2", false); 46 | stats.addValue("feature3", new double[]{1.0}); 47 | Stats st_feature1 = stats.getStatistics("feature1"); 48 | Stats st_feature2 = stats.getStatistics("feature2"); 49 | Stats st_feature3 = stats.getStatistics("feature3"); 50 | //System.err.println("TestStats/testStats1 Debug: feature1="+st_feature1.toString()); 51 | //System.err.println("TestStats/testStats1 Debug: feature2="+st_feature2.toString()); 52 | //System.err.println("TestStats/testStats1 Debug: feature3="+st_feature3.toString()); 53 | 54 | assertEquals(3,st_feature1.getN()); 55 | assertEquals(0.0,st_feature1.getMin(),0.00001); 56 | assertEquals(13.0,st_feature1.getMax(),0.00001); 57 | assertEquals(49.0,st_feature1.getVariance(),0.00001); 58 | 59 | assertEquals(3,st_feature2.getN()); 60 | assertEquals(0.0,st_feature2.getMin(),0.00001); 61 | assertEquals(1.0,st_feature2.getMax(),0.00001); 62 | assertEquals(0.33333333333333333,st_feature2.getVariance(),0.0001); 63 | 64 | assertEquals(3,st_feature3.getN()); 65 | assertEquals(1.0,st_feature3.getMin(),0.00001); 66 | assertEquals(4.0,st_feature3.getMax(),0.00001); 67 | assertEquals(2.33333333333333333,st_feature3.getVariance(),0.0001); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/test/java/gate/plugin/learningframework/tests/TestUtils4Engines.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.tests; 22 | 23 | import gate.plugin.learningframework.engines.Utils4Engines; 24 | import java.net.MalformedURLException; 25 | import gate.test.GATEPluginTests; 26 | 27 | /** 28 | * 29 | * @author Johann Petrak 30 | */ 31 | public class TestUtils4Engines extends GATEPluginTests { 32 | // Cannot use this test any more: with the new Maven-based approach for running the tests, 33 | // we do not have the JAR/ZIP yet, so we cannot find and copy anything out of it 34 | // @Test 35 | public void test1() throws MalformedURLException { 36 | Utils4Engines.copyWrapper("FileJsonPyTorch", Utils.TESTS_DIR); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/test/java/gate/plugin/learningframework/tests/Utils.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2015-2016 The University Of Sheffield. 3 | * 4 | * This file is part of gateplugin-LearningFramework 5 | * (see https://github.com/GateNLP/gateplugin-LearningFramework). 6 | * 7 | * This program is free software: you can redistribute it and/or modify 8 | * it under the terms of the GNU Lesser General Public License as published by 9 | * the Free Software Foundation, either version 2.1 of the License, or 10 | * (at your option) any later version. 11 | * 12 | * This program is distributed in the hope that it will be useful, 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | * GNU Lesser General Public License for more details. 16 | * 17 | * You should have received a copy of the GNU Lesser General Public License 18 | * along with this software. If not, see . 19 | */ 20 | 21 | package gate.plugin.learningframework.tests; 22 | 23 | import cc.mallet.types.Alphabet; 24 | import cc.mallet.types.AugmentableFeatureVector; 25 | import cc.mallet.types.Instance; 26 | import gate.Annotation; 27 | import gate.AnnotationSet; 28 | import gate.Document; 29 | import gate.Factory; 30 | import gate.FeatureMap; 31 | import gate.creole.ResourceInstantiationException; 32 | import gate.plugin.learningframework.mallet.LFAlphabet; 33 | import java.io.File; 34 | import java.net.MalformedURLException; 35 | 36 | /** 37 | * 38 | * @author Johann Petrak 39 | */ 40 | public class Utils { 41 | 42 | public static final String TESTS_DIR_NAME = "tmp-tests"; 43 | public static final File TESTS_DIR = new File(TESTS_DIR_NAME); 44 | // For the comparison of doubles, we use an epsilon of approximately 45 | // 1.7E-15 which is 1.0 (the maximum expected number) divided through the value of the maximum 46 | // mantissa of double (64 bit), but with 3 bits taken away, i.e. 52-3 bits for the mantissa, 47 | // i.e. 2^49 48 | public static final double EPS = 1.7763568394002505e-15; 49 | public static final double EPS4 = 1e-4; 50 | 51 | // create a string with 1000 blanks which we will use as document content for many documents 52 | // dynamically created in the tests 53 | public static final String STR1000 = new String(new char[1000]).replace("\0", " "); 54 | 55 | public static Document newDocument() throws ResourceInstantiationException { 56 | return Factory.newDocument(STR1000); 57 | } 58 | 59 | /** 60 | * Add an annotation to the set with the given name and return the set. 61 | * @param doc the document to which to add to 62 | * @param setName annotation set to add to 63 | * @param from from offset 64 | * @param to to offset 65 | * @param type annotation type 66 | * @param fm feature map 67 | * @return the annotation 68 | */ 69 | public static Annotation addAnn(Document doc, String setName, int from, int to, String type, FeatureMap fm) { 70 | AnnotationSet set = doc.getAnnotations(setName); 71 | int id = gate.Utils.addAnn(set, from, to, type, fm); 72 | return set.get(id); 73 | } 74 | 75 | public static Instance newInstance() { 76 | return new Instance(new AugmentableFeatureVector(new LFAlphabet()),null,null,null); 77 | } 78 | public static Instance newInstance(Alphabet alph) { 79 | return new Instance(new AugmentableFeatureVector(alph),null,null,null); 80 | } 81 | 82 | public static Document loadDocument(File file) throws MalformedURLException, ResourceInstantiationException { 83 | FeatureMap parms = Factory.newFeatureMap(); 84 | parms.put("sourceUrl", file.toURI().toURL()); 85 | Document doc = (Document)Factory.createResource("gate.corpora.DocumentImpl", parms); 86 | return doc; 87 | } 88 | 89 | } 90 | -------------------------------------------------------------------------------- /src/test/resources/creole.properties: -------------------------------------------------------------------------------- 1 | groupId=${project.groupId} 2 | artifactId=${project.artifactId} 3 | version=${project.version} 4 | --------------------------------------------------------------------------------