├── .gitignore ├── .codeclimate.yml ├── datumbox-framework-core ├── src │ ├── test │ │ ├── resources │ │ │ └── datasets │ │ │ │ ├── cities.csv │ │ │ │ ├── carsCategorical.csv │ │ │ │ ├── regressionNumeric.csv │ │ │ │ ├── sentimentAnalysis.unlabelled.txt │ │ │ │ ├── regressionMixed.csv │ │ │ │ ├── carsNumeric.csv │ │ │ │ └── example.com.html │ │ └── java │ │ │ └── com │ │ │ └── datumbox │ │ │ └── framework │ │ │ └── core │ │ │ ├── statistics │ │ │ ├── nonparametrics │ │ │ │ ├── onesample │ │ │ │ │ ├── BinomialTest.java │ │ │ │ │ ├── ShapiroWilkTest.java │ │ │ │ │ ├── WaldWolfowitzTest.java │ │ │ │ │ ├── LillieforsTest.java │ │ │ │ │ ├── SignOneSampleTest.java │ │ │ │ │ ├── WilcoxonOneSampleTest.java │ │ │ │ │ └── KolmogorovSmirnovOneSampleTest.java │ │ │ │ ├── independentsamples │ │ │ │ │ ├── FisherTest.java │ │ │ │ │ ├── ChisquareTest.java │ │ │ │ │ ├── MannWhitneyTest.java │ │ │ │ │ └── KruskalWallisTest.java │ │ │ │ └── relatedsamples │ │ │ │ │ ├── McNemarTest.java │ │ │ │ │ ├── SignRelatedSamplesTest.java │ │ │ │ │ ├── WilcoxonRelatedSamplesTest.java │ │ │ │ │ ├── SpearmanCorrelationTest.java │ │ │ │ │ ├── KendallTauCorrelationTest.java │ │ │ │ │ └── FriedmanTest.java │ │ │ ├── parametrics │ │ │ │ ├── onesample │ │ │ │ │ ├── ChisquareOneSampleTest.java │ │ │ │ │ ├── LjungBoxTest.java │ │ │ │ │ ├── StudentsOneSampleTest.java │ │ │ │ │ └── NormalOneSampleTest.java │ │ │ │ ├── relatedsamples │ │ │ │ │ ├── NormalRelatedSamplesTest.java │ │ │ │ │ ├── StudentsRelatedSamplesTest.java │ │ │ │ │ └── PearsonCorrelationTest.java │ │ │ │ └── independentsamples │ │ │ │ │ ├── FIndependentSamplesTest.java │ │ │ │ │ ├── LevenesIndependentSamplesTest.java │ │ │ │ │ └── StudentsIndependentSamplesTest.java │ │ │ ├── descriptivestatistics │ │ │ │ └── RanksTest.java │ │ │ └── survival │ │ │ │ └── nonparametrics │ │ │ │ └── independentsamples │ │ │ │ ├── LogrankTest.java │ │ │ │ ├── CoxMantelTest.java │ │ │ │ └── PetoPetoWilcoxonTest.java │ │ │ ├── mathematics │ │ │ ├── discrete │ │ │ │ ├── ArithmeticsTest.java │ │ │ │ └── CombinatoricsTest.java │ │ │ └── linearprogramming │ │ │ │ └── LPSolverTest.java │ │ │ └── common │ │ │ ├── text │ │ │ ├── extractors │ │ │ │ ├── UniqueWordSequenceExtractorTest.java │ │ │ │ └── WordSequenceExtractorTest.java │ │ │ ├── analyzers │ │ │ │ └── PHPSimilarTextTest.java │ │ │ └── tokenizers │ │ │ │ └── WhitespaceTokenizerTest.java │ │ │ └── utilities │ │ │ └── MapMethodsTest.java │ └── main │ │ └── java │ │ └── com │ │ └── datumbox │ │ └── framework │ │ └── core │ │ ├── common │ │ ├── interfaces │ │ │ ├── Learnable.java │ │ │ ├── Parameterizable.java │ │ │ ├── Savable.java │ │ │ └── Extractable.java │ │ └── text │ │ │ ├── tokenizers │ │ │ ├── AbstractTokenizer.java │ │ │ └── WhitespaceTokenizer.java │ │ │ └── extractors │ │ │ └── WordSequenceExtractor.java │ │ ├── machinelearning │ │ └── common │ │ │ ├── interfaces │ │ │ ├── TrainParallelizable.java │ │ │ ├── Cluster.java │ │ │ ├── StepwiseCompatible.java │ │ │ ├── Parallelizable.java │ │ │ ├── Trainable.java │ │ │ ├── TrainingParameters.java │ │ │ └── ModelParameters.java │ │ │ └── abstracts │ │ │ ├── modelselection │ │ │ └── AbstractMetrics.java │ │ │ ├── modelers │ │ │ ├── AbstractRegressor.java │ │ │ ├── AbstractRecommender.java │ │ │ ├── AbstractTopicModeler.java │ │ │ └── AbstractModeler.java │ │ │ └── transformers │ │ │ └── AbstractEncoder.java │ │ ├── mathematics │ │ ├── discrete │ │ │ └── Arithmetics.java │ │ └── regularization │ │ │ ├── L2Regularizer.java │ │ │ └── ElasticNetRegularizer.java │ │ └── statistics │ │ ├── nonparametrics │ │ ├── onesample │ │ │ └── Binomial.java │ │ └── relatedsamples │ │ │ └── McNemar.java │ │ └── parametrics │ │ ├── relatedsamples │ │ ├── NormalRelatedSamples.java │ │ └── StudentsRelatedSamples.java │ │ ├── onesample │ │ └── ChisquareOneSample.java │ │ └── independentsamples │ │ └── FIndependentSamples.java └── pom.xml ├── .travis.yml ├── appveyor.yml ├── datumbox-framework-common ├── src │ └── main │ │ ├── resources │ │ ├── license.txt │ │ ├── datumbox.configuration.default.properties │ │ └── datumbox.concurrencyconfiguration.default.properties │ │ └── java │ │ └── com │ │ └── datumbox │ │ └── framework │ │ ├── common │ │ ├── interfaces │ │ │ ├── Copyable.java │ │ │ └── Configurable.java │ │ ├── dataobjects │ │ │ ├── AbstractDataStructureList.java │ │ │ └── AbstractDataStructureMap.java │ │ ├── storage │ │ │ ├── interfaces │ │ │ │ ├── StorageConfiguration.java │ │ │ │ └── BigMap.java │ │ │ └── abstracts │ │ │ │ └── AbstractFileStorageConfiguration.java │ │ └── concurrency │ │ │ └── ThrottledExecutor.java │ │ └── development │ │ ├── interfaces │ │ ├── Feature.java │ │ └── FeatureMark.java │ │ ├── switchers │ │ ├── Example.java │ │ └── ExampleMark.java │ │ └── FeatureContext.java └── pom.xml ├── datumbox-framework-tests ├── src │ └── main │ │ ├── resources │ │ ├── datumbox.inmemoryconfiguration.properties │ │ ├── datumbox.configuration.properties │ │ ├── logback-test.xml │ │ ├── datumbox.concurrencyconfiguration.properties │ │ └── datumbox.mapdbconfiguration.properties │ │ └── java │ │ └── com │ │ └── datumbox │ │ └── framework │ │ └── tests │ │ ├── Constants.java │ │ └── utilities │ │ └── TestUtils.java └── pom.xml ├── datumbox-framework-storage ├── datumbox-framework-storage-inmemory │ ├── src │ │ └── main │ │ │ ├── resources │ │ │ └── datumbox.inmemoryconfiguration.default.properties │ │ │ └── java │ │ │ └── com │ │ │ └── datumbox │ │ │ └── framework │ │ │ └── storage │ │ │ └── inmemory │ │ │ ├── InMemoryConfiguration.java │ │ │ └── DeepCopy.java │ └── pom.xml ├── datumbox-framework-storage-mapdb │ ├── src │ │ └── main │ │ │ └── resources │ │ │ └── datumbox.mapdbconfiguration.default.properties │ └── pom.xml └── pom.xml ├── datumbox-framework-lib ├── src │ └── main │ │ └── java │ │ └── com │ │ └── datumbox │ │ └── framework │ │ └── lib │ │ └── Datumbox.java └── pom.xml ├── NOTICE ├── datumbox-framework-applications ├── src │ └── test │ │ └── java │ │ └── com │ │ └── datumbox │ │ └── framework │ │ └── applications │ │ └── nlp │ │ └── CETRTest.java └── pom.xml └── TODO.txt /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.jar 3 | *.war 4 | *.ear 5 | *.iml 6 | 7 | target/ 8 | /.settings/ 9 | /.idea/ 10 | .classpath 11 | .project 12 | nbactions.xml 13 | nb-configuration.xml 14 | build-datumbox-framework.sh 15 | -------------------------------------------------------------------------------- /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | --- 2 | engines: 3 | fixme: 4 | enabled: true 5 | exclude_fingerprints: 6 | - f4fdfaaab3dfb8670a495ee1f2b9e20e 7 | pmd: 8 | enabled: true 9 | channel: "beta" 10 | exclude_fingerprints: 11 | - d58713d342d24b2a02c9b3d13755d65d 12 | ratings: 13 | paths: 14 | - "**.java" 15 | exclude_paths: [] 16 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/resources/datasets/cities.csv: -------------------------------------------------------------------------------- 1 | city,temperature,is_sunny,traffic_rank,is_capital,name_of_port,metro_population 2 | Athens,30.0,yes,3,true,"Piraeus",3753783 3 | London,14.,No,2,True,Port of London,13614409.0 4 | "New York",-12,Yes,1,False,New York's port,null 5 | "Atlantis, ""the lost city""",,,4,INVALID_VALUE,null,null 6 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/resources/datasets/carsCategorical.csv: -------------------------------------------------------------------------------- 1 | color,type,origin,stolen 2 | red,sports,domestic,yes 3 | red,sports,domestic,no 4 | red,sports,domestic,yes 5 | yellow,sports,domestic,no 6 | yellow,sports,imported,yes 7 | yellow,suv,imported,no 8 | yellow,suv,imported,yes 9 | yellow,suv,domestic,no 10 | red,suv,imported,no 11 | red,sports,imported,yes 12 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/resources/datasets/regressionNumeric.csv: -------------------------------------------------------------------------------- 1 | c1,c2,y 2 | 49,4.5,137.098 3 | 46,2.9,89.092 4 | 46,1.9,59.092 5 | 40,1.7,53.08 6 | 45,2.1,65.09 7 | 41,3.8,116.082 8 | 47,5.0,152.094 9 | 41,2.0,62.082 10 | 40,0.9,29.08 11 | 46,1.2,38.092 12 | 49,4.5,137.098 13 | 46,2.9,89.092 14 | 46,1.9,59.092 15 | 40,1.7,53.08 16 | 45,2.1,65.09 17 | 41,3.8,116.082 18 | 47,5.0,152.094 19 | 41,2.0,62.082 20 | 40,0.9,29.08 21 | 46,1.2,38.092 22 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | sudo: required 3 | dist: bionic 4 | cache: 5 | directories: 6 | - .autoconf 7 | - $HOME/.m2 8 | before_install: 9 | - echo $JAVA_HOME 10 | matrix: 11 | include: 12 | - os: linux 13 | jdk: openjdk11 14 | - os: osx 15 | jdk: openjdk11 16 | install: 17 | - mvn clean install -DskipTests=true 18 | script: 19 | - mvn clean test -DstorageEngine=InMemory 20 | - mvn clean test -DstorageEngine=MapDB 21 | notifications: 22 | email: 23 | on_success: never 24 | on_failure: always 25 | -------------------------------------------------------------------------------- /appveyor.yml: -------------------------------------------------------------------------------- 1 | version: '{build}' 2 | 3 | image: Visual Studio 2019 4 | 5 | environment: 6 | JAVA_HOME: 'C:\Program Files\Java\jdk11' 7 | install: 8 | - cmd: echo "%JAVA_HOME%" 9 | - cmd: set PATH=%JAVA_HOME%\bin;%PATH% 10 | - cmd: echo "%PATH%" 11 | - cmd: java -version 12 | - ps: choco install -i maven 13 | - cmd: mvn --version 14 | build_script: 15 | - mvn clean install -DskipTests=true 16 | test_script: 17 | - mvn clean test -DstorageEngine=InMemory 18 | - mvn clean test -DstorageEngine=MapDB 19 | cache: 20 | - C:\Users\appveyor\.m2 -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/resources/datasets/sentimentAnalysis.unlabelled.txt: -------------------------------------------------------------------------------- 1 | Our current broken immigration system has fueled, among other things, violence against contributing members of our community. I am referring to the recent murders, beatings and rape of Latino agricultural farm workers in South Georgia. Has our society gotten to the point that labels such as "illegal" connote "otherness" to such a degree that our immigrant brothers and sisters are beaten to death as a matter of course? We must all work together to ensure that these attacks are the last. 2 | A good family car that is also good for a single man out to have some fun -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/resources/license.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) ${copyrightYears} ${copyrightOwner} <${copyrightEmail}> 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/resources/datasets/regressionMixed.csv: -------------------------------------------------------------------------------- 1 | c1,c2,c3,c4,y 2 | 3,49,4.5,0,167.098 3 | 1,46,2.9,0,99.092 4 | 1,46,1.9,2,89.092 5 | 2,40,1.7,3,103.08 6 | 3,45,2.1,0,95.09 7 | 1,41,3.8,1,136.082 8 | 2,47,5.0,3,202.094 9 | 1,41,2.0,4,112.082 10 | 3,40,0.9,0,59.08 11 | 2,46,1.2,4,98.092 12 | 3,49,4.5,0,167.098 13 | 1,46,2.9,0,99.092 14 | 1,46,1.9,2,89.092 15 | 2,40,1.7,3,103.08 16 | 3,45,2.1,0,95.09 17 | 1,41,3.8,1,136.082 18 | 2,47,5.0,3,202.094 19 | 1,41,2.0,4,112.082 20 | 3,40,0.9,0,59.08 21 | 2,46,1.2,4,98.092 22 | 3,49,4.5,0,167.098 23 | 1,46,2.9,0,99.092 24 | 1,46,1.9,2,89.092 25 | 2,40,1.7,3,103.08 26 | 3,45,2.1,0,95.09 27 | 1,41,3.8,1,136.082 28 | 2,47,5.0,3,202.094 29 | 1,41,2.0,4,112.082 30 | 3,40,0.9,0,59.08 31 | 2,46,1.2,4,98.092 32 | -------------------------------------------------------------------------------- /datumbox-framework-tests/src/main/resources/datumbox.inmemoryconfiguration.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2013-2020 Vasilis Vryniotis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # The relative or absolute path for the directory where the models are stored (if not specified the temporary directory is used): 18 | inMemoryConfiguration.directory= 19 | -------------------------------------------------------------------------------- /datumbox-framework-storage/datumbox-framework-storage-inmemory/src/main/resources/datumbox.inmemoryconfiguration.default.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2013-2020 Vasilis Vryniotis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # The relative or absolute path for the directory where the models are stored (if not specified the temporary directory is used): 18 | inMemoryConfiguration.directory= 19 | -------------------------------------------------------------------------------- /datumbox-framework-tests/src/main/resources/datumbox.configuration.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2013-2020 Vasilis Vryniotis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # The full package name of the Storage Engine. This determines the default storage engine which is used for storing the models: 18 | configuration.storageConfiguration=com.datumbox.framework.storage.inmemory.InMemoryConfiguration 19 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/resources/datumbox.configuration.default.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2013-2020 Vasilis Vryniotis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # The full class name of the Configuration of the Storage Engine. This determines the default storage engine which is used for storing the models: 18 | configuration.storageConfiguration=com.datumbox.framework.storage.inmemory.InMemoryConfiguration 19 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/resources/datasets/carsNumeric.csv: -------------------------------------------------------------------------------- 1 | red,yellow,sports,suv,domestic,imported,stolen 2 | 1,0,1,0,1,0,1 3 | 1,0,1,0,1,0,0 4 | 1,0,1,0,1,0,1 5 | 0,1,1,0,1,0,0 6 | 0,1,1,0,0,1,1 7 | 0,1,0,1,0,1,0 8 | 0,1,0,1,0,1,1 9 | 0,1,0,1,1,0,0 10 | 1,0,0,1,0,1,0 11 | 1,0,1,0,0,1,1 12 | 1,0,1,0,1,0,1 13 | 1,0,1,0,1,0,0 14 | 1,0,1,0,1,0,1 15 | 0,1,1,0,1,0,0 16 | 0,1,1,0,0,1,1 17 | 0,1,0,1,0,1,0 18 | 0,1,0,1,0,1,1 19 | 0,1,0,1,1,0,0 20 | 1,0,0,1,0,1,0 21 | 1,0,1,0,0,1,1 22 | 1,0,1,0,1,0,1 23 | 1,0,1,0,1,0,0 24 | 1,0,1,0,1,0,1 25 | 0,1,1,0,1,0,0 26 | 0,1,1,0,0,1,1 27 | 0,1,0,1,0,1,0 28 | 0,1,0,1,0,1,1 29 | 0,1,0,1,1,0,0 30 | 1,0,0,1,0,1,0 31 | 1,0,1,0,0,1,1 32 | 1,0,1,0,1,0,1 33 | 1,0,1,0,1,0,0 34 | 1,0,1,0,1,0,1 35 | 0,1,1,0,1,0,0 36 | 0,1,1,0,0,1,1 37 | 0,1,0,1,0,1,0 38 | 0,1,0,1,0,1,1 39 | 0,1,0,1,1,0,0 40 | 1,0,0,1,0,1,0 41 | 1,0,1,0,0,1,1 42 | 1,0,1,0,1,0,1 43 | 1,0,1,0,1,0,0 44 | 1,0,1,0,1,0,1 45 | 0,1,1,0,1,0,0 46 | 0,1,1,0,0,1,1 47 | 0,1,0,1,0,1,0 48 | 0,1,0,1,0,1,1 49 | 0,1,0,1,1,0,0 50 | 1,0,0,1,0,1,0 51 | 1,0,1,0,0,1,1 52 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/common/interfaces/Learnable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.common.interfaces; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * This interface is used to mark classes which store parameters that are learned 22 | * during training. 23 | * 24 | * @author Vasilis Vryniotis 25 | */ 26 | public interface Learnable extends Serializable { 27 | 28 | } 29 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/interfaces/TrainParallelizable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.interfaces; 17 | 18 | /** 19 | * All Machine Learning models capable of using parallelism during training 20 | * implement this interface. 21 | * 22 | * @author Vasilis Vryniotis 23 | */ 24 | public interface TrainParallelizable extends Parallelizable { 25 | 26 | } 27 | -------------------------------------------------------------------------------- /datumbox-framework-tests/src/main/resources/logback-test.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 21 | 22 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{5} - %msg%n 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/common/interfaces/Parameterizable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.common.interfaces; 17 | 18 | import java.io.Serializable; 19 | 20 | /** 21 | * This interface is used to mark classes that work as wrappers/containers of 22 | * fields that parameterize other algorithms. 23 | * 24 | * @author Vasilis Vryniotis 25 | */ 26 | public interface Parameterizable extends Serializable { 27 | 28 | } 29 | -------------------------------------------------------------------------------- /datumbox-framework-lib/src/main/java/com/datumbox/framework/lib/Datumbox.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.lib; 17 | 18 | /** 19 | * Main class of the Framework. 20 | * 21 | * @author Vasilis Vryniotis 22 | */ 23 | public class Datumbox { 24 | 25 | /** 26 | * It prints on stdout the Name of the Framework. 27 | * 28 | * @param args the command line arguments 29 | */ 30 | public static void main(String[] args) { 31 | System.out.println("Datumbox Machine Learning Framework"); 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/common/interfaces/Copyable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.common.interfaces; 17 | 18 | /** 19 | * This interface is implemented by all classes which implement the copy() method. 20 | * 21 | * @author Vasilis Vryniotis 22 | * @param 23 | */ 24 | public interface Copyable { 25 | 26 | /** 27 | * Copies itself and returns a new instance of the same type. 28 | * 29 | * @return 30 | */ 31 | public T copy(); 32 | 33 | } 34 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/common/interfaces/Configurable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.common.interfaces; 17 | 18 | import java.util.Properties; 19 | 20 | /** 21 | * All configuration classes of the framework need to inherit this interface. 22 | * 23 | * @author Vasilis Vryniotis 24 | */ 25 | public interface Configurable { 26 | 27 | /** 28 | * Initializes the Configuration object by using a property file. 29 | * 30 | * @param properties 31 | */ 32 | public void load(Properties properties); 33 | 34 | } 35 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/development/interfaces/Feature.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.development.interfaces; 17 | 18 | /** 19 | * This interface must be implemented by all the feature switches. 20 | * 21 | * @author Vasilis Vryniotis 22 | */ 23 | public interface Feature { 24 | 25 | /** 26 | * Tests whether the particular switch level is activated. 27 | * 28 | * @return 29 | */ 30 | public boolean isActivated(); 31 | 32 | /** {@inheritDoc} */ 33 | @Override 34 | public String toString(); 35 | } 36 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/common/interfaces/Savable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.common.interfaces; 17 | 18 | /** 19 | * The Savable interface is implemented by all the objects that can be stored. 20 | * 21 | * @author Vasilis Vryniotis 22 | */ 23 | public interface Savable extends AutoCloseable { 24 | 25 | /** 26 | * Saves the data of the object. 27 | * 28 | * @param storageName 29 | */ 30 | public void save(String storageName); 31 | 32 | /** 33 | * Deletes the data of the object. 34 | */ 35 | public void delete(); 36 | 37 | } 38 | -------------------------------------------------------------------------------- /datumbox-framework-tests/src/main/resources/datumbox.concurrencyconfiguration.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2013-2020 Vasilis Vryniotis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Whether the concurrent execution of tasks is allowed (options: true/false): 18 | concurrencyConfiguration.parallelized=true 19 | 20 | # The maximum number of Threads that can be executed concurrently for each task: 21 | # - Use 0 for setting it equal to the number of CPUs on the system. 22 | # - Use 1 to turn off concurrency (same as concurrencyConfiguration.parallelized=false). 23 | # - Any other positive value acts as a limit on the concurrency level, provided that the concurrencyConfiguration.parallelized=true. 24 | concurrencyConfiguration.maxNumberOfThreadsPerTask=0 25 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/development/interfaces/FeatureMark.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.development.interfaces; 17 | 18 | import java.lang.annotation.Retention; 19 | import java.lang.annotation.RetentionPolicy; 20 | import java.lang.annotation.Target; 21 | 22 | import static java.lang.annotation.ElementType.ANNOTATION_TYPE; 23 | 24 | /** 25 | * This is a meta annotation which is applied to all annotations with enums. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | @Retention(RetentionPolicy.RUNTIME) 30 | @Target({ANNOTATION_TYPE}) 31 | public @interface FeatureMark { 32 | 33 | } -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/resources/datumbox.concurrencyconfiguration.default.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2013-2020 Vasilis Vryniotis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # Whether the concurrent execution of tasks is allowed (options: true/false): 18 | concurrencyConfiguration.parallelized=true 19 | 20 | # The maximum number of Threads that can be executed concurrently for each task: 21 | # - Use 0 for setting it equal to the number of CPUs on the system. 22 | # - Use 1 to turn off concurrency (same as concurrencyConfiguration.parallelized=false). 23 | # - Any other positive value acts as a limit on the concurrency level, provided that the concurrencyConfiguration.parallelized=true. 24 | concurrencyConfiguration.maxNumberOfThreadsPerTask=0 25 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/interfaces/Cluster.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.interfaces; 17 | 18 | import com.datumbox.framework.core.common.interfaces.Learnable; 19 | 20 | 21 | /** 22 | * The Cluster object stores all the information of the cluster, including the 23 | * assigned points, the parameters and the label. 24 | * 25 | * @author Vasilis Vryniotis 26 | */ 27 | public interface Cluster extends Learnable { 28 | 29 | /** 30 | * Returns the number of records assigned to this cluster. 31 | * 32 | * @return 33 | */ 34 | public int size(); 35 | 36 | } 37 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/interfaces/StepwiseCompatible.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.interfaces; 17 | 18 | import java.util.Map; 19 | 20 | /** 21 | * Stepwise regression requires the estimation of p-values for each parameter. 22 | * The regression models that support this estimation is marked with this interface. 23 | * 24 | * @author Vasilis Vryniotis 25 | */ 26 | public interface StepwiseCompatible { 27 | 28 | /** 29 | * Getter for the p-values of the Features. 30 | * 31 | * @return 32 | */ 33 | public Map getFeaturePvalues(); 34 | } 35 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/common/interfaces/Extractable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.common.interfaces; 17 | 18 | import java.util.Map; 19 | 20 | /** 21 | * This interface is used to mark classes that work as extractors. 22 | * 23 | * @author Vasilis Vryniotis 24 | * @param 25 | * @param 26 | * @param 27 | */ 28 | public interface Extractable { 29 | 30 | /** 31 | * The extract method gets an input, performs extraction and returns the output 32 | * in a map. 33 | * 34 | * @param input 35 | * @return 36 | */ 37 | public Map extract(final I input); 38 | 39 | } 40 | -------------------------------------------------------------------------------- /datumbox-framework-tests/src/main/java/com/datumbox/framework/tests/Constants.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.tests; 17 | 18 | /** 19 | * Configuration constants for the Tests. 20 | * 21 | * @author Vasilis Vryniotis 22 | */ 23 | public class Constants { 24 | 25 | /** 26 | * High Accuracy Level for assert. 27 | */ 28 | public static final double DOUBLE_ACCURACY_HIGH = 0.00001; 29 | 30 | /** 31 | * Medium Accuracy Level for assert. 32 | */ 33 | public static final double DOUBLE_ACCURACY_MEDIUM = 0.0001; 34 | 35 | /** 36 | * Seed of the RandomGenerator. 37 | */ 38 | public static final long RANDOM_SEED = 42L; 39 | 40 | } 41 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/common/text/tokenizers/AbstractTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.common.text.tokenizers; 17 | 18 | import java.util.List; 19 | 20 | /** 21 | * This abstract class should be implemented by classes that are responsible to get 22 | * a string as input and separate it into tokens/keywords. 23 | * 24 | * @author Vasilis Vryniotis 25 | */ 26 | public abstract class AbstractTokenizer { 27 | 28 | /** 29 | * The tokenize method accepts a string as input and returns a list of tokens. 30 | * 31 | * @param text 32 | * @return 33 | */ 34 | public abstract List tokenize(String text); 35 | } 36 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/interfaces/Parallelizable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.interfaces; 17 | 18 | /** 19 | * All algorithms capable of using parallelism implement this interface. 20 | * 21 | * @author Vasilis Vryniotis 22 | */ 23 | public interface Parallelizable { 24 | 25 | /** 26 | * Getter for the parallelized parameter. 27 | * 28 | * @return 29 | */ 30 | public boolean isParallelized(); 31 | 32 | /** 33 | * Setter for the parallelized parameter. 34 | * 35 | * @param parallelized 36 | */ 37 | public void setParallelized(boolean parallelized); 38 | 39 | } 40 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/common/dataobjects/AbstractDataStructureList.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.common.dataobjects; 17 | 18 | import java.util.List; 19 | 20 | /** 21 | * Abstract class for every DataStructure which internally uses a List 22 | * Object. 23 | * 24 | * @author Vasilis Vryniotis 25 | * @param 26 | */ 27 | public abstract class AbstractDataStructureList> extends AbstractDataStructureCollection { 28 | 29 | /** 30 | * Public constructor which takes as argument the appropriate Java collection. 31 | * 32 | * @param data 33 | */ 34 | public AbstractDataStructureList(T data) { 35 | super(data); 36 | } 37 | 38 | } 39 | -------------------------------------------------------------------------------- /datumbox-framework-tests/src/main/resources/datumbox.mapdbconfiguration.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2013-2020 Vasilis Vryniotis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # The relative or absolute path for the directory where the models are stored (if not specified the temporary directory is used): 18 | mapDBConfiguration.directory= 19 | 20 | # The number of records kept in each LRU cache. Setting it to 0 will disable caching (not recommended): 21 | mapDBConfiguration.cacheSize=10000 22 | 23 | # Whether compression will be used in storage (options: true/false): 24 | mapDBConfiguration.compressed=true 25 | 26 | # The hybridized mode enables small and important data to be stored directly In-Memory (options: true/false): 27 | mapDBConfiguration.hybridized=true 28 | 29 | # Whether the writes will be performed asynchronously (options: true/false): 30 | mapDBConfiguration.asynchronous=true 31 | -------------------------------------------------------------------------------- /datumbox-framework-storage/datumbox-framework-storage-mapdb/src/main/resources/datumbox.mapdbconfiguration.default.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (C) 2013-2020 Vasilis Vryniotis 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | 17 | # The relative or absolute path for the directory where the models are stored (if not specified the temporary directory is used): 18 | mapDBConfiguration.directory= 19 | 20 | # The number of records kept in each LRU cache. Setting it to 0 will disable caching (not recommended): 21 | mapDBConfiguration.cacheSize=10000 22 | 23 | # Whether compression will be used in storage (options: true/false): 24 | mapDBConfiguration.compressed=true 25 | 26 | # The hybridized mode enables small and important data to be stored directly In-Memory (options: true/false): 27 | mapDBConfiguration.hybridized=true 28 | 29 | # Whether the writes will be performed asynchronously (options: true/false): 30 | mapDBConfiguration.asynchronous=true 31 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/development/switchers/Example.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.development.switchers; 17 | 18 | import com.datumbox.framework.development.FeatureContext; 19 | import com.datumbox.framework.development.interfaces.Feature; 20 | 21 | /** 22 | * Example of a Feature Switch. 23 | * 24 | * @author Vasilis Vryniotis 25 | */ 26 | public enum Example implements Feature { 27 | 28 | /** 29 | * 1st Option. 30 | */ 31 | OPTION1, 32 | 33 | /** 34 | * 2nd Option. 35 | */ 36 | OPTION2; 37 | 38 | /** {@inheritDoc} */ 39 | @Override 40 | public boolean isActivated() { 41 | return FeatureContext.isActive(this); 42 | } 43 | 44 | /** {@inheritDoc} */ 45 | @Override 46 | public String toString() { 47 | return name(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/common/text/tokenizers/WhitespaceTokenizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.common.text.tokenizers; 17 | 18 | import java.util.ArrayList; 19 | import java.util.Arrays; 20 | import java.util.List; 21 | 22 | /** 23 | * The white space tokenizer separates the keywords of a string using the white 24 | * space. 25 | * 26 | * @author Vasilis Vryniotis 27 | */ 28 | public class WhitespaceTokenizer extends AbstractTokenizer { 29 | 30 | /** 31 | * Separates the tokens of a string by splitting it on white space. 32 | * 33 | * @param text 34 | * @return 35 | */ 36 | @Override 37 | public List tokenize(String text) { 38 | List tokens = new ArrayList<>(Arrays.asList(text.split("[\\p{Z}\\p{C}]+"))); 39 | return tokens; 40 | } 41 | 42 | } 43 | -------------------------------------------------------------------------------- /datumbox-framework-storage/datumbox-framework-storage-inmemory/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 4.0.0 21 | com.datumbox 22 | datumbox-framework-storage-inmemory 23 | 24 | Datumbox Framework InMemory Storage Engine 25 | 26 | 27 | com.datumbox 28 | datumbox-framework-storage 29 | 0.8.3-SNAPSHOT 30 | ../pom.xml 31 | 32 | 33 | 34 | ../.. 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/resources/datasets/example.com.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Example Domain 5 | 6 | 7 | 8 | 9 | 40 | 41 | 42 | 43 |
44 |

Example Domain

45 |

This domain is established to be used for illustrative examples in documents. You may use this 46 | domain in examples without prior coordination or asking for permission.

47 |

More information...

48 |
49 | 50 | 51 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/development/switchers/ExampleMark.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.development.switchers; 17 | 18 | import com.datumbox.framework.development.interfaces.FeatureMark; 19 | 20 | import java.lang.annotation.*; 21 | 22 | /** 23 | * Example class for the Mark annotation which is used to mark fields, methods etc 24 | * which belong only to specific options. 25 | * 26 | * @author Vasilis Vryniotis 27 | */ 28 | @FeatureMark 29 | @Target({ElementType.CONSTRUCTOR, ElementType.FIELD, ElementType.LOCAL_VARIABLE, ElementType.METHOD, ElementType.PACKAGE, ElementType.PARAMETER, ElementType.TYPE}) 30 | @Inherited 31 | @Retention(RetentionPolicy.RUNTIME) 32 | public @interface ExampleMark { 33 | 34 | /** 35 | * Parameter that passes the options of the enum. 36 | * 37 | * @return 38 | */ 39 | public Example[] options(); 40 | 41 | } -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/onesample/BinomialTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.onesample; 17 | 18 | 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for Binomial. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class BinomialTest extends AbstractTest { 30 | 31 | /** 32 | * Test of test method, of class Binomial. 33 | */ 34 | @Test 35 | public void testTest() { 36 | logger.info("test"); 37 | int k = 10; 38 | int n = 40; 39 | double p = 0.35; 40 | boolean is_twoTailed = true; 41 | double aLevel = 0.05; 42 | boolean expResult = false; 43 | boolean result = Binomial.test(k, n, p, is_twoTailed, aLevel); 44 | assertEquals(expResult, result); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/common/storage/interfaces/StorageConfiguration.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.common.storage.interfaces; 17 | 18 | import com.datumbox.framework.common.interfaces.Configurable; 19 | 20 | /** 21 | * This interface should be implemented by objects that store the configuration 22 | * of storage engines. 23 | * 24 | * @author Vasilis Vryniotis 25 | */ 26 | public interface StorageConfiguration extends Configurable { 27 | 28 | /** 29 | * Returns the separator that is used in the storage names. Usually the storage 30 | * names used by the algorithms are concatenations of various words separated 31 | * by this character. 32 | * 33 | * @return 34 | */ 35 | public String getStorageNameSeparator(); 36 | 37 | /** 38 | * Initializes and returns a storage engine. 39 | * 40 | * @param storageName 41 | * @return 42 | */ 43 | public StorageEngine createStorageEngine(String storageName); 44 | 45 | } 46 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/independentsamples/FisherTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.independentsamples; 17 | 18 | 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for Fisher. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class FisherTest extends AbstractTest { 30 | 31 | /** 32 | * Test of test method, of class Fisher. 33 | */ 34 | @Test 35 | public void testTest() { 36 | logger.info("test"); 37 | //Example from Mpesmpeas Notes, rejests null hypothesis 38 | int n11 = 1; 39 | int n12 = 5; 40 | int n21 = 4; 41 | int n22 = 0; 42 | double aLevel = 0.05; 43 | boolean expResult = true; 44 | boolean result = Fisher.test(n11, n12, n21, n22, aLevel); 45 | assertEquals(expResult, result); 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /datumbox-framework-storage/datumbox-framework-storage-mapdb/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 4.0.0 21 | com.datumbox 22 | datumbox-framework-storage-mapdb 23 | 24 | Datumbox Framework MapDB Storage Engine 25 | 26 | 27 | com.datumbox 28 | datumbox-framework-storage 29 | 0.8.3-SNAPSHOT 30 | ../pom.xml 31 | 32 | 33 | 34 | ../.. 35 | 36 | 37 | 38 | 39 | org.mapdb 40 | mapdb 41 | 42 | 43 | 44 | 45 | -------------------------------------------------------------------------------- /datumbox-framework-lib/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 4.0.0 21 | com.datumbox 22 | datumbox-framework-lib 23 | 24 | Datumbox Framework Lib 25 | 26 | 27 | com.datumbox 28 | datumbox-framework 29 | 0.8.3-SNAPSHOT 30 | ../pom.xml 31 | 32 | 33 | 34 | .. 35 | 36 | 37 | 38 | 39 | com.datumbox 40 | datumbox-framework-applications 41 | ${project.version} 42 | 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/parametrics/onesample/ChisquareOneSampleTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.onesample; 17 | 18 | 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for ChisquareOneSample. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class ChisquareOneSampleTest extends AbstractTest { 30 | 31 | /** 32 | * Test of testVariance method, of class ChisquareOneSample. 33 | */ 34 | @Test 35 | public void testTestVariance() { 36 | logger.info("testVariance"); 37 | double stdbar = 0.0063; 38 | int n = 100; 39 | double H0std = 0.01; 40 | boolean is_twoTailed = true; 41 | double aLevel = 0.05; 42 | boolean expResult = true; 43 | boolean result = ChisquareOneSample.testVariance(stdbar, n, H0std, is_twoTailed, aLevel); 44 | assertEquals(expResult, result); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/parametrics/relatedsamples/NormalRelatedSamplesTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.relatedsamples; 17 | 18 | 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for NormalRelatedSamples. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class NormalRelatedSamplesTest extends AbstractTest { 30 | 31 | /** 32 | * Test of testMean method, of class NormalRelatedSamples. 33 | */ 34 | @Test 35 | public void testTestMean() { 36 | logger.info("testMean"); 37 | double dbar = 2.2; 38 | int n = 30; 39 | double dbarStd = 1.924; 40 | boolean is_twoTailed = false; 41 | double aLevel = 0.05; 42 | boolean expResult = true; 43 | boolean result = NormalRelatedSamples.testMean(dbar, n, dbarStd, is_twoTailed, aLevel); 44 | assertEquals(expResult, result); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/parametrics/relatedsamples/StudentsRelatedSamplesTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.relatedsamples; 17 | 18 | 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for StudentsRelatedSamples. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class StudentsRelatedSamplesTest extends AbstractTest { 30 | 31 | /** 32 | * Test of testMean method, of class StudentsRelatedSamples. 33 | */ 34 | @Test 35 | public void testTestMean() { 36 | logger.info("testMean"); 37 | double dbar = 2.2; 38 | int n = 10; 39 | double dbarStd = 1.924; 40 | boolean is_twoTailed = false; 41 | double aLevel = 0.05; 42 | boolean expResult = true; 43 | boolean result = StudentsRelatedSamples.testMean(dbar, n, dbarStd, is_twoTailed, aLevel); 44 | assertEquals(expResult, result); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /datumbox-framework-storage/datumbox-framework-storage-inmemory/src/main/java/com/datumbox/framework/storage/inmemory/InMemoryConfiguration.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.storage.inmemory; 17 | 18 | import com.datumbox.framework.common.storage.abstracts.AbstractFileStorageConfiguration; 19 | import com.datumbox.framework.common.storage.interfaces.StorageEngine; 20 | 21 | import java.util.Properties; 22 | 23 | /** 24 | * The InMemoryConfiguration class is used to configure the InMemory storage 25 | * and generate new storage engines. InMemory storage loads all the 26 | * data in memory and stores them in serialized files. 27 | * 28 | * @author Vasilis Vryniotis 29 | */ 30 | public class InMemoryConfiguration extends AbstractFileStorageConfiguration { 31 | 32 | /** {@inheritDoc} */ 33 | @Override 34 | public StorageEngine createStorageEngine(String storageName) { 35 | return new InMemoryEngine(storageName, this); 36 | } 37 | 38 | /** {@inheritDoc} */ 39 | @Override 40 | public void load(Properties properties) { 41 | directory = properties.getProperty("inMemoryConfiguration.directory"); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/parametrics/independentsamples/FIndependentSamplesTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.independentsamples; 17 | 18 | 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for FIndependentSamples. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class FIndependentSamplesTest extends AbstractTest { 30 | 31 | /** 32 | * Test of testVariances method, of class FIndependentSamples. 33 | */ 34 | @Test 35 | public void testTestVariances() { 36 | logger.info("testVariances"); 37 | double stdbarx = 65.54909; 38 | double stdbary = 61.85425; 39 | int n = 100; 40 | int m = 240; 41 | boolean is_twoTailed = true; 42 | double aLevel = 0.05; 43 | boolean expResult = false; 44 | boolean result = FIndependentSamples.testVariances(stdbarx, stdbary, n, m, is_twoTailed, aLevel); 45 | assertEquals(expResult, result); 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/parametrics/onesample/LjungBoxTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.onesample; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import java.util.Arrays; 23 | 24 | import static org.junit.Assert.assertEquals; 25 | 26 | /** 27 | * Test cases for LjungBox. 28 | * 29 | * @author Vasilis Vryniotis 30 | */ 31 | public class LjungBoxTest extends AbstractTest { 32 | 33 | /** 34 | * Test of testAutocorrelation method, of class LjungBox. 35 | */ 36 | @Test 37 | public void testTestAutocorrelation() { 38 | logger.info("testAutocorrelation"); 39 | FlatDataCollection pkList = new FlatDataCollection(Arrays.asList(new Object[]{0.810,0.631,0.469,0.349})); 40 | int n = 100; 41 | double aLevel = 0.05; 42 | boolean expResult = true; 43 | boolean result = LjungBox.testAutocorrelation(pkList, n, aLevel); 44 | assertEquals(expResult, result); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /datumbox-framework-common/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 4.0.0 21 | com.datumbox 22 | datumbox-framework-common 23 | 24 | Datumbox Framework Common 25 | 26 | 27 | com.datumbox 28 | datumbox-framework 29 | 0.8.3-SNAPSHOT 30 | ../pom.xml 31 | 32 | 33 | 34 | .. 35 | 36 | 37 | 38 | 39 | org.apache.commons 40 | commons-csv 41 | 42 | 43 | org.slf4j 44 | slf4j-api 45 | 46 | 47 | 48 | 49 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/abstracts/modelselection/AbstractMetrics.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.abstracts.modelselection; 17 | 18 | import com.datumbox.framework.core.common.dataobjects.Dataframe; 19 | import com.datumbox.framework.core.machinelearning.common.interfaces.ValidationMetrics; 20 | 21 | import java.util.List; 22 | 23 | /** 24 | * The AbstractMetrics class stores and estimates information about the performance of the algorithm. 25 | * 26 | * @author Vasilis Vryniotis 27 | */ 28 | public abstract class AbstractMetrics implements ValidationMetrics { 29 | 30 | /** 31 | * Estimates the validation metrics on the predicted data. 32 | * 33 | * @param predictedData 34 | */ 35 | protected AbstractMetrics(Dataframe predictedData) { 36 | 37 | } 38 | 39 | /** 40 | * Calculates the average validation metrics by combining the results of the 41 | * provided list. 42 | * 43 | * @param validationMetricsList 44 | */ 45 | protected AbstractMetrics(List validationMetricsList) { 46 | 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/relatedsamples/McNemarTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.relatedsamples; 17 | 18 | 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for McNemar. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class McNemarTest extends AbstractTest { 30 | 31 | /** 32 | * Test of test method, of class McNemar. 33 | */ 34 | @Test 35 | public void testTest() { 36 | logger.info("test"); 37 | //Example from Wikipedia: http://en.wikipedia.org/wiki/McNemar's_test 38 | //It should reject the null hypothesis and return true. 39 | int n11 = 101; 40 | int n12 = 121; 41 | int n21 = 59; 42 | int n22 = 33; 43 | boolean is_twoTailed = true; 44 | double aLevel = 0.05; 45 | boolean expResult = true; 46 | boolean result = McNemar.test(n11, n12, n21, n22, is_twoTailed, aLevel); 47 | assertEquals(expResult, result); 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/development/FeatureContext.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.development; 17 | 18 | import com.datumbox.framework.development.interfaces.Feature; 19 | 20 | import java.util.HashMap; 21 | import java.util.Map; 22 | 23 | /** 24 | * This class stores all the active feature switches of the framework which are 25 | * used during development. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class FeatureContext { 30 | 31 | /** 32 | * Map that stores all the active feature switches. 33 | */ 34 | private static final Map, Enum> ACTIVE_SWITCHES = new HashMap<>(); 35 | 36 | /** 37 | * This static block initializes the ACTIVE_SWITCHES map by adding all the 38 | * active features switches along with activated options. 39 | */ 40 | static { 41 | 42 | } 43 | 44 | /** 45 | * Validates whether the feature is active. 46 | * 47 | * @param obj 48 | * @return 49 | */ 50 | public static boolean isActive(Enum obj) { 51 | Enum value = ACTIVE_SWITCHES.get((Class)obj.getClass()); 52 | return value != null && value == obj; 53 | } 54 | 55 | } 56 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/common/storage/abstracts/AbstractFileStorageConfiguration.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.common.storage.abstracts; 17 | 18 | import com.datumbox.framework.common.storage.interfaces.StorageConfiguration; 19 | 20 | import java.io.File; 21 | 22 | /** 23 | * Parent class of all File-based Storage Configurations. 24 | * 25 | * @author Vasilis Vryniotis 26 | */ 27 | public abstract class AbstractFileStorageConfiguration implements StorageConfiguration { 28 | 29 | /** 30 | * The directory of the models. 31 | */ 32 | protected String directory = null; 33 | 34 | /** {@inheritDoc} */ 35 | @Override 36 | public String getStorageNameSeparator() { 37 | return File.separator; 38 | } 39 | 40 | /** 41 | * Getter for the directory where the data files are stored. 42 | * 43 | * @return 44 | */ 45 | public String getDirectory() { 46 | return directory; 47 | } 48 | 49 | /** 50 | * Setter for the directory where the data files are stored. 51 | * 52 | * @param directory 53 | */ 54 | public void setDirectory(String directory) { 55 | this.directory = directory; 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/onesample/ShapiroWilkTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.onesample; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import java.util.Arrays; 23 | 24 | import static org.junit.Assert.assertEquals; 25 | 26 | /** 27 | * Test cases for ShapiroWilk. 28 | * 29 | * @author Vasilis Vryniotis 30 | */ 31 | public class ShapiroWilkTest extends AbstractTest { 32 | 33 | /** 34 | * Test of test method, of class ShapiroWilk. 35 | */ 36 | @Test 37 | public void testTest() { 38 | logger.info("test"); 39 | //Example from Dimaki's Non-parametrics notes. It should NOT reject the null hypothesis and return false. 40 | FlatDataCollection flatDataCollection = new FlatDataCollection(Arrays.asList(new Object[]{33.4, 33.3, 31.0, 31.4, 33.5, 34.4, 33.7, 36.2, 34.9, 37.0})); 41 | double aLevel = 0.05; 42 | boolean expResult = false; 43 | boolean result = ShapiroWilk.test(flatDataCollection, aLevel); 44 | assertEquals(expResult, result); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/mathematics/discrete/Arithmetics.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.mathematics.discrete; 17 | 18 | /** 19 | * Utility class with useful arithmetic methods. 20 | * 21 | * @author Vasilis Vryniotis 22 | */ 23 | public class Arithmetics { 24 | 25 | /** 26 | * It estimates the factorial of an integer. 27 | * 28 | * @param k 29 | * @return 30 | */ 31 | public static double factorial(int k) { 32 | double factorial=1.0; 33 | while(k>0) { 34 | factorial*=k; 35 | --k; 36 | } 37 | 38 | return factorial; 39 | } 40 | 41 | /** 42 | * It estimates the number of k-combinations of n objects. 43 | * 44 | * @param n 45 | * @param k 46 | * @return 47 | */ 48 | public static double combination(int n, int k) { 49 | if(nlowerBound;i--) { 55 | combinations *= i/(i-lowerBound); 56 | } 57 | return combinations; 58 | } 59 | 60 | 61 | } 62 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/onesample/WaldWolfowitzTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.onesample; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import java.util.Arrays; 23 | 24 | import static org.junit.Assert.assertEquals; 25 | 26 | /** 27 | * Test cases for WaldWolfowitz. 28 | * 29 | * @author Vasilis Vryniotis 30 | */ 31 | public class WaldWolfowitzTest extends AbstractTest { 32 | 33 | /** 34 | * Test of test method, of class WaldWolfowitz. 35 | */ 36 | @Test 37 | public void testTest() { 38 | logger.info("test"); 39 | //Example from Dimaki's Non-parametrics notes. It should reject the null hypothesis and return True. 40 | FlatDataCollection flatDataCollection = new FlatDataCollection(Arrays.asList(new Object[]{'Α', 'Β', 'Α', 'Β', 'Α', 'Β', 'Α', 'Β', 'Α', 'Β', 'Β', 'Α', 'Β', 'Α', 'Β', 'Α', 'Α', 'Β'})); 41 | double aLevel = 0.05; 42 | boolean expResult = true; 43 | boolean result = WaldWolfowitz.test(flatDataCollection, aLevel); 44 | assertEquals(expResult, result); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /datumbox-framework-storage/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 4.0.0 21 | com.datumbox 22 | datumbox-framework-storage 23 | 24 | pom 25 | 26 | Datumbox Framework Storage 27 | 28 | 29 | com.datumbox 30 | datumbox-framework 31 | 0.8.3-SNAPSHOT 32 | ../pom.xml 33 | 34 | 35 | 36 | .. 37 | 38 | 39 | 40 | datumbox-framework-storage-inmemory 41 | datumbox-framework-storage-mapdb 42 | 43 | 44 | 45 | 46 | com.datumbox 47 | datumbox-framework-common 48 | ${project.version} 49 | 50 | 51 | 52 | 53 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/onesample/LillieforsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.onesample; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import java.util.Arrays; 23 | 24 | import static org.junit.Assert.assertEquals; 25 | 26 | /** 27 | * Test cases for Lilliefors. 28 | * 29 | * @author Vasilis Vryniotis 30 | */ 31 | public class LillieforsTest extends AbstractTest { 32 | 33 | /** 34 | * Test of test method, of class Lilliefors. 35 | */ 36 | @Test 37 | public void testTest() { 38 | logger.info("test"); 39 | //Example from Dimaki's Non-parametrics notes. It should NOT reject the null hypothesis and return false. 40 | FlatDataCollection flatDataCollection = new FlatDataCollection(Arrays.asList(new Object[]{33.4, 33.3, 31.0, 31.4, 33.5, 34.4, 33.7, 36.2, 34.9, 37.0})); 41 | String cdfMethod = "normalDistribution"; 42 | double aLevel = 0.3; 43 | boolean expResult = false; 44 | boolean result = Lilliefors.test(flatDataCollection, cdfMethod, aLevel); 45 | assertEquals(expResult, result); 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/abstracts/modelers/AbstractRegressor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.abstracts.modelers; 17 | 18 | import com.datumbox.framework.common.Configuration; 19 | import com.datumbox.framework.core.machinelearning.common.abstracts.AbstractTrainer; 20 | 21 | /** 22 | * Base Class for all the Regression algorithms. 23 | * 24 | * @author Vasilis Vryniotis 25 | * @param 26 | * @param 27 | */ 28 | public abstract class AbstractRegressor extends AbstractModeler { 29 | 30 | /** 31 | * @param trainingParameters 32 | * @param configuration 33 | * @see AbstractTrainer#AbstractTrainer(AbstractTrainingParameters, Configuration) 34 | */ 35 | protected AbstractRegressor(TP trainingParameters, Configuration configuration) { 36 | super(trainingParameters, configuration); 37 | } 38 | 39 | /** 40 | * @param storageName 41 | * @param configuration 42 | * @see AbstractTrainer#AbstractTrainer(String, Configuration) 43 | */ 44 | protected AbstractRegressor(String storageName, Configuration configuration) { 45 | super(storageName, configuration); 46 | } 47 | 48 | 49 | } -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/abstracts/modelers/AbstractRecommender.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.abstracts.modelers; 17 | 18 | import com.datumbox.framework.common.Configuration; 19 | import com.datumbox.framework.core.machinelearning.common.abstracts.AbstractTrainer; 20 | 21 | /** 22 | * Abstract Class for recommender algorithms. 23 | * 24 | * @author Vasilis Vryniotis 25 | * @param 26 | * @param 27 | */ 28 | public abstract class AbstractRecommender extends AbstractModeler { 29 | 30 | /** 31 | * @param trainingParameters 32 | * @param configuration 33 | * @see AbstractTrainer#AbstractTrainer(AbstractTrainingParameters, Configuration) 34 | */ 35 | protected AbstractRecommender(TP trainingParameters, Configuration configuration) { 36 | super(trainingParameters, configuration); 37 | } 38 | 39 | /** 40 | * @param storageName 41 | * @param configuration 42 | * @see AbstractTrainer#AbstractTrainer(String, Configuration) 43 | */ 44 | protected AbstractRecommender(String storageName, Configuration configuration) { 45 | super(storageName, configuration); 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/abstracts/modelers/AbstractTopicModeler.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.abstracts.modelers; 17 | 18 | import com.datumbox.framework.common.Configuration; 19 | import com.datumbox.framework.core.machinelearning.common.abstracts.AbstractTrainer; 20 | 21 | /** 22 | * Base Class for all the Topic Modeling algorithms. 23 | * 24 | * @author Vasilis Vryniotis 25 | * @param 26 | * @param 27 | */ 28 | public abstract class AbstractTopicModeler extends AbstractModeler { 29 | 30 | /** 31 | * @param trainingParameters 32 | * @param configuration 33 | * @see AbstractTrainer#AbstractTrainer(AbstractTrainingParameters, Configuration) 34 | */ 35 | protected AbstractTopicModeler(TP trainingParameters, Configuration configuration) { 36 | super(trainingParameters, configuration); 37 | } 38 | 39 | /** 40 | * @param storageName 41 | * @param configuration 42 | * @see AbstractTrainer#AbstractTrainer(String, Configuration) 43 | */ 44 | protected AbstractTopicModeler(String storageName, Configuration configuration) { 45 | super(storageName, configuration); 46 | } 47 | 48 | } -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/mathematics/discrete/ArithmeticsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.mathematics.discrete; 17 | 18 | import com.datumbox.framework.tests.Constants; 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Assert; 21 | import org.junit.Test; 22 | 23 | import static org.junit.Assert.assertEquals; 24 | 25 | /** 26 | * Test cases for Arithmetics. 27 | * 28 | * @author Vasilis Vryniotis 29 | */ 30 | public class ArithmeticsTest extends AbstractTest { 31 | 32 | /** 33 | * Test of factorial method, of class Arithmetics. 34 | */ 35 | @Test 36 | public void testFactorial() { 37 | logger.info("factorial"); 38 | int k = 10; 39 | double expResult = 3628800.0; 40 | double result = Arithmetics.factorial(k); 41 | Assert.assertEquals(expResult, result, Constants.DOUBLE_ACCURACY_HIGH); 42 | } 43 | 44 | /** 45 | * Test of combination method, of class Arithmetics. 46 | */ 47 | @Test 48 | public void testCombination() { 49 | logger.info("combination"); 50 | int n = 52; 51 | int k = 5; 52 | double expResult = 2598960.0; 53 | double result = Arithmetics.combination(n, k); 54 | assertEquals(expResult, result, Constants.DOUBLE_ACCURACY_HIGH); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /datumbox-framework-tests/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 4.0.0 21 | com.datumbox 22 | datumbox-framework-tests 23 | 24 | Datumbox Framework Tests 25 | 26 | 27 | com.datumbox 28 | datumbox-framework 29 | 0.8.3-SNAPSHOT 30 | ../pom.xml 31 | 32 | 33 | 34 | .. 35 | 36 | 37 | 38 | 39 | junit 40 | junit 41 | 42 | 43 | ch.qos.logback 44 | logback-classic 45 | 46 | 47 | 48 | com.datumbox 49 | datumbox-framework-common 50 | ${project.version} 51 | 52 | 53 | 54 | 55 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/interfaces/Trainable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.interfaces; 17 | 18 | import com.datumbox.framework.core.common.dataobjects.Dataframe; 19 | import com.datumbox.framework.core.common.interfaces.Learnable; 20 | import com.datumbox.framework.core.common.interfaces.Parameterizable; 21 | import com.datumbox.framework.core.common.interfaces.Savable; 22 | 23 | /** 24 | * This interface is used to mark classes that can be trained. This interface 25 | * used for classes that perform training/analysis and learn parameters. 26 | * 27 | * @author Vasilis Vryniotis 28 | * @param 29 | * @param 30 | */ 31 | public interface Trainable extends Savable { 32 | 33 | /** 34 | * Returns the model parameters that were estimated after training. 35 | * 36 | * @return 37 | */ 38 | public MP getModelParameters(); 39 | 40 | /** 41 | * It returns the training parameters that configure the algorithm. 42 | * 43 | * @return 44 | */ 45 | public TP getTrainingParameters(); 46 | 47 | /** 48 | * Trains a model using the provided training parameters and data. 49 | * 50 | * @param trainingData 51 | */ 52 | public void fit(Dataframe trainingData); 53 | 54 | } 55 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/onesample/SignOneSampleTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.onesample; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import java.util.Arrays; 23 | 24 | import static org.junit.Assert.assertEquals; 25 | 26 | /** 27 | * Test cases for SignOneSample. 28 | * 29 | * @author Vasilis Vryniotis 30 | */ 31 | public class SignOneSampleTest extends AbstractTest { 32 | 33 | /** 34 | * Test of test method, of class SignOneSample. 35 | */ 36 | @Test 37 | public void testTest() { 38 | logger.info("test"); 39 | //Example from Dimaki's Non-parametrics notes. It should reject the null hypothesis and return true. 40 | FlatDataCollection flatDataCollection = new FlatDataCollection(Arrays.asList(new Object[]{0.16,0.12,0.19,0.16,0.17,0.18,0.15,0.20,0.16,0.18,0.13,0.17,0.18,0.21,0.18,0.17,0.19,0.11,0.16,0.16})); 41 | double median = 0.15; 42 | boolean is_twoTailed = true; 43 | double aLevel = 0.05; 44 | boolean expResult = true; 45 | boolean result = SignOneSample.test(flatDataCollection, median, is_twoTailed, aLevel); 46 | assertEquals(expResult, result); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Datumbox Machine Learning Framework 2 | Copyright (C) 2013 Vasilis Vryniotis 3 | 4 | 5 | The following libraries are included in packaged versions of this project: 6 | 7 | * Apache Commons Math 8 | * COPYRIGHT: Copyright 2003 The Apache Software Foundation 9 | * LICENSE: http://www.apache.org/licenses/LICENSE-2.0.txt (Apache License, Version 2.0) 10 | * HOMEPAGE: https://commons.apache.org/proper/commons-math/ 11 | 12 | * Apache Commons CSV 13 | * COPYRIGHT: Copyright 2014 The Apache Software Foundation 14 | * LICENSE: http://www.apache.org/licenses/LICENSE-2.0.txt (Apache License, Version 2.0) 15 | * HOMEPAGE: http://commons.apache.org/proper/commons-csv/ 16 | 17 | * SLF4J API 18 | * COPYRIGHT: Copyright 2004 QOS.ch 19 | * LICENSE: http://www.slf4j.org/license.html (MIT License) 20 | * HOMEPAGE: http://www.slf4j.org/ 21 | 22 | * LIBSVM 23 | * COPYRIGHT: Copyright 2000 Chih-Chung Chang and Chih-Jen Lin 24 | * LICENSE: http://www.csie.ntu.edu.tw/~cjlin/libsvm/COPYRIGHT (BSD 3-Clause License) 25 | * HOMEPAGE: http://www.csie.ntu.edu.tw/~cjlin/libsvm/ 26 | 27 | * MapDB 28 | * COPYRIGHT: Copyright 2012 Jan Kotek 29 | * LICENSE: http://www.apache.org/licenses/LICENSE-2.0.txt (Apache License, Version 2.0) 30 | * HOMEPAGE: http://www.mapdb.org/ 31 | 32 | 33 | Code from the following software is included in this project: 34 | 35 | * Guava 36 | * COPYRIGHT: Copyright 2007 Google Inc. 37 | * LICENSE: http://www.apache.org/licenses/LICENSE-2.0.txt (Apache License, Version 2.0) 38 | * HOMEPAGE: http://code.google.com/p/guava-libraries/ 39 | 40 | 41 | The following libraries are required for the tests of this project: 42 | 43 | * JUnit 44 | * COPYRIGHT: Copyright 2002 JUnit 45 | * LICENSE: http://www.opensource.org/licenses/cpl.php (Common Public License Version 1.0) 46 | * HOMEPAGE: http://www.junit.org/ 47 | 48 | * Logback 49 | * COPYRIGHT: Copyright 1999 QOS.ch 50 | * LICENSE: http://logback.qos.ch/license.html (Eclipse Public License v1.0 / GNU Lesser General Public License version 2.1) 51 | * HOMEPAGE: http://logback.qos.ch/ 52 | 53 | 54 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/interfaces/TrainingParameters.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.interfaces; 17 | 18 | import com.datumbox.framework.core.common.interfaces.Parameterizable; 19 | 20 | /** 21 | * The TrainingParameters objects store all the initial parameters provided 22 | * to the algorithms during training. 23 | * 24 | * @author Vasilis Vryniotis 25 | */ 26 | 27 | public interface TrainingParameters extends Parameterizable { 28 | 29 | /** 30 | * Retrieves the ModelParameters class. 31 | * 32 | * @param 33 | * @return 34 | */ 35 | @SuppressWarnings("unchecked") 36 | default public Class getMPClass() { 37 | try { 38 | //By convention the training and model parameters are one level below the algorithm class.; 39 | return (Class) Class.forName(getTClass().getCanonicalName() + "$ModelParameters"); 40 | } 41 | catch (ClassNotFoundException ex) { 42 | throw new RuntimeException(ex); 43 | } 44 | } 45 | 46 | /** 47 | * Retrives the Trainable class. 48 | * 49 | * @param 50 | * @return 51 | */ 52 | default public Class getTClass() { 53 | return (Class) this.getClass().getEnclosingClass(); 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/descriptivestatistics/RanksTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.descriptivestatistics; 17 | 18 | import com.datumbox.framework.common.dataobjects.AssociativeArray; 19 | import com.datumbox.framework.common.dataobjects.FlatDataList; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | import java.util.HashMap; 25 | 26 | import static org.junit.Assert.assertEquals; 27 | 28 | /** 29 | * Test cases for Ranks. 30 | * 31 | * @author Vasilis Vryniotis 32 | */ 33 | public class RanksTest extends AbstractTest { 34 | 35 | /** 36 | * Test of getRanksFromValues method, of class Ranks. 37 | */ 38 | @Test 39 | public void testGetRanksFromValues() { 40 | logger.info("getRanksFromValues"); 41 | FlatDataList flatDataCollection = new FlatDataList(Arrays.asList(new Object[]{50,10,10,30,40})); 42 | FlatDataList expResult = new FlatDataList(Arrays.asList(new Object[]{5.0,1.5,1.5,3.0,4.0})); 43 | AssociativeArray expResult2 = new AssociativeArray(new HashMap<>()); 44 | expResult2.put(10, 2); 45 | AssociativeArray tiesCounter = Ranks.getRanksFromValues(flatDataCollection); 46 | assertEquals(expResult, flatDataCollection); 47 | assertEquals(expResult2, tiesCounter); 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/onesample/WilcoxonOneSampleTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.onesample; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import java.util.Arrays; 23 | 24 | import static org.junit.Assert.assertEquals; 25 | 26 | /** 27 | * Test cases for WilcoxonOneSample. 28 | * 29 | * @author Vasilis Vryniotis 30 | */ 31 | public class WilcoxonOneSampleTest extends AbstractTest { 32 | 33 | /** 34 | * Test of test method, of class WilcoxonOneSample. 35 | */ 36 | @Test 37 | public void testTest() { 38 | logger.info("test"); 39 | //Example from Dimaki's Non-parametrics notes. It should reject the null hypothesis and return true. 40 | FlatDataCollection flatDataCollection = new FlatDataCollection(Arrays.asList(new Object[]{19.5,19.8,18.9,20.4,20.2,21.5,19.9,20.9,18.1,20.5,18.3,19.5,18.3,19.0,18.2,23.9,17.0,19.7,21.7,19.5})); 41 | double median = 20.8; 42 | boolean is_twoTailed = true; 43 | double aLevel = 0.05; 44 | boolean expResult = true; 45 | boolean result = WilcoxonOneSample.test(flatDataCollection, median, is_twoTailed, aLevel); 46 | assertEquals(expResult, result); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/parametrics/relatedsamples/PearsonCorrelationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.relatedsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataList; 19 | import com.datumbox.framework.common.dataobjects.TransposeDataList; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for PearsonCorrelation. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class PearsonCorrelationTest extends AbstractTest { 33 | 34 | /** 35 | * Test of test method, of class PearsonCorrelation. 36 | */ 37 | @Test 38 | public void testTest() { 39 | logger.info("test"); 40 | TransposeDataList transposeDataList = new TransposeDataList(); 41 | transposeDataList.put(0, new FlatDataList(Arrays.asList(new Object[]{64,61,84,70,88,92,72,77}))); 42 | transposeDataList.put(1, new FlatDataList(Arrays.asList(new Object[]{20,16,34,23,27,32,18,22}))); 43 | 44 | boolean is_twoTailed = true; 45 | double aLevel = 0.05; 46 | boolean expResult = true; 47 | boolean result = PearsonCorrelation.test(transposeDataList, is_twoTailed, aLevel); 48 | assertEquals(expResult, result); 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /datumbox-framework-applications/src/test/java/com/datumbox/framework/applications/nlp/CETRTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.applications.nlp; 17 | 18 | import com.datumbox.framework.common.Configuration; 19 | import com.datumbox.framework.core.Datasets; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import static org.junit.Assert.assertEquals; 24 | 25 | /** 26 | * Test cases for CETR. 27 | * 28 | * @author Vasilis Vryniotis 29 | */ 30 | public class CETRTest extends AbstractTest { 31 | 32 | /** 33 | * Test of extract method, of class CETR. 34 | */ 35 | @Test 36 | public void testExtract() { 37 | logger.info("extract"); 38 | 39 | Configuration configuration = getConfiguration(); 40 | 41 | String text = Datasets.exampleHtmlCode(); 42 | 43 | CETR.Parameters parameters = new CETR.Parameters(); 44 | parameters.setNumberOfClusters(2); 45 | parameters.setAlphaWindowSizeFor2DModel(3); 46 | parameters.setSmoothingAverageRadius(2); 47 | CETR instance = new CETR(configuration); 48 | String expResult = "This domain is established to be used for illustrative examples in documents. You may use this domain in examples without prior coordination or asking for permission."; 49 | String result = instance.extract(text, parameters); 50 | assertEquals(expResult, result); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/common/text/extractors/UniqueWordSequenceExtractorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.common.text.extractors; 17 | 18 | import com.datumbox.framework.tests.abstracts.AbstractTest; 19 | import org.junit.Test; 20 | 21 | import java.util.LinkedHashMap; 22 | import java.util.Map; 23 | 24 | import static org.junit.Assert.assertEquals; 25 | 26 | /** 27 | * Test cases for UniqueWordSequenceExtractor. 28 | * 29 | * @author Vasilis Vryniotis 30 | */ 31 | public class UniqueWordSequenceExtractorTest extends AbstractTest { 32 | 33 | /** 34 | * Test of extract method, of class UniqueWordSequenceExtractor. 35 | */ 36 | @Test 37 | public void testExtract() { 38 | logger.info("extract"); 39 | String text = "this is a text sequence that is amazing text sequence"; 40 | UniqueWordSequenceExtractor instance = new UniqueWordSequenceExtractor(new UniqueWordSequenceExtractor.Parameters()); 41 | 42 | Map expResult = new LinkedHashMap<>(); 43 | expResult.put(0, "this"); 44 | expResult.put(1, "is"); 45 | expResult.put(2, "a"); 46 | expResult.put(3, "text"); 47 | expResult.put(4, "sequence"); 48 | expResult.put(5, "that"); 49 | expResult.put(6, "amazing"); 50 | Map result = instance.extract(text); 51 | assertEquals(expResult, result); 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/interfaces/ModelParameters.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.interfaces; 17 | 18 | import com.datumbox.framework.core.common.interfaces.Learnable; 19 | import com.datumbox.framework.common.storage.interfaces.StorageEngine; 20 | 21 | import java.lang.reflect.Constructor; 22 | import java.lang.reflect.InvocationTargetException; 23 | 24 | /** 25 | * The ModelParameter objects stores the coefficients that were learned during 26 | * the training of the algorithm. 27 | * 28 | * @author Vasilis Vryniotis 29 | */ 30 | public interface ModelParameters extends Learnable { 31 | 32 | /** 33 | * Generates a new instance of the Model Parameters by using the provided class. 34 | * 35 | * @param 36 | * @param mpClass 37 | * @param storageEngine 38 | * @return 39 | */ 40 | public static MP newInstance(Class mpClass, StorageEngine storageEngine) { 41 | try { 42 | Constructor c = mpClass.getDeclaredConstructor(StorageEngine.class); 43 | c.setAccessible(true); 44 | return c.newInstance(storageEngine); 45 | } 46 | catch (InstantiationException | IllegalAccessException | IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException ex) { 47 | throw new RuntimeException(ex); 48 | } 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/relatedsamples/SignRelatedSamplesTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.relatedsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataList; 19 | import com.datumbox.framework.common.dataobjects.TransposeDataList; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for SignRelatedSamples. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class SignRelatedSamplesTest extends AbstractTest { 33 | 34 | /** 35 | * Test of test method, of class SignRelatedSamples. 36 | */ 37 | @Test 38 | public void testTest() { 39 | logger.info("test"); 40 | TransposeDataList transposeDataList = new TransposeDataList(); 41 | transposeDataList.put(0, new FlatDataList(Arrays.asList(new Object[]{136,115,142,140,123,147,133,150,138,147,151,145,147.0}))); 42 | transposeDataList.put(1, new FlatDataList(Arrays.asList(new Object[]{141.0,117,141,145,127,146,135,152,135,152,149,148,147}))); 43 | 44 | boolean is_twoTailed = true; 45 | double aLevel = 0.05; 46 | boolean expResult = false; 47 | boolean result = SignRelatedSamples.test(transposeDataList, is_twoTailed, aLevel); 48 | assertEquals(expResult, result); 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/common/text/analyzers/PHPSimilarTextTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.common.text.analyzers; 17 | 18 | import com.datumbox.framework.tests.Constants; 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for PHPSimilarText. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class PHPSimilarTextTest extends AbstractTest { 30 | 31 | /** 32 | * Test of similarityChars method, of class PHPSimilarText. 33 | */ 34 | @Test 35 | public void testSimilarityChars() { 36 | logger.info("similarityChars"); 37 | String txt1 = "this is a fine text"; 38 | String txt2 = "this is a great document"; 39 | int expResult = 12; 40 | int result = PHPSimilarText.similarityChars(txt1, txt2); 41 | assertEquals(expResult, result); 42 | } 43 | 44 | /** 45 | * Test of similarityPercentage method, of class PHPSimilarText. 46 | */ 47 | @Test 48 | public void testSimilarityPercentage() { 49 | logger.info("similarityPercentage"); 50 | String txt1 = "this is a fine text"; 51 | String txt2 = "this is a great document"; 52 | double expResult = 55.813953488372; 53 | double result = PHPSimilarText.similarityPercentage(txt1, txt2); 54 | assertEquals(expResult, result, Constants.DOUBLE_ACCURACY_HIGH); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/common/dataobjects/AbstractDataStructureMap.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.common.dataobjects; 17 | 18 | import java.util.Map; 19 | 20 | /** 21 | * Abstract class for every AbstractDataStructure which internally uses a Map 22 | Object. 23 | * 24 | * @author Vasilis Vryniotis 25 | * @param 26 | */ 27 | public abstract class AbstractDataStructureMap> extends AbstractDataStructure { 28 | 29 | /** 30 | * Public constructor which takes as argument the appropriate Java collection. 31 | * 32 | * @param data 33 | */ 34 | public AbstractDataStructureMap(T data) { 35 | super(data); 36 | } 37 | 38 | /** 39 | * Returns the size of the map. 40 | * 41 | * @return 42 | */ 43 | public final int size() { 44 | return internalData.size(); 45 | } 46 | 47 | /** 48 | * Clears the internal data. 49 | */ 50 | public final void clear() { 51 | internalData.clear(); 52 | } 53 | 54 | /** 55 | * Checks if the internal data are empty. 56 | * 57 | * @return 58 | */ 59 | public final boolean isEmpty() { 60 | return internalData.isEmpty(); 61 | } 62 | 63 | /** 64 | * Checks if the provided key exists in the map. 65 | * 66 | * @param key 67 | * @return 68 | */ 69 | public final boolean containsKey(Object key) { 70 | return internalData.containsKey(key); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/common/text/extractors/WordSequenceExtractorTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.common.text.extractors; 17 | 18 | import com.datumbox.framework.tests.abstracts.AbstractTest; 19 | import org.junit.Test; 20 | 21 | import java.util.LinkedHashMap; 22 | import java.util.Map; 23 | 24 | import static org.junit.Assert.assertEquals; 25 | 26 | /** 27 | * Test cases for WordSequenceExtractor. 28 | * 29 | * @author Vasilis Vryniotis 30 | */ 31 | public class WordSequenceExtractorTest extends AbstractTest { 32 | /** 33 | * Test of extract method, of class WordSequenceExtractor. 34 | */ 35 | @Test 36 | public void testExtract() { 37 | logger.info("extract"); 38 | String text = "this is a text sequence that is amazing text sequence"; 39 | WordSequenceExtractor instance = new WordSequenceExtractor(new WordSequenceExtractor.Parameters()); 40 | 41 | Map expResult = new LinkedHashMap<>(); 42 | expResult.put(0, "this"); 43 | expResult.put(1, "is"); 44 | expResult.put(2, "a"); 45 | expResult.put(3, "text"); 46 | expResult.put(4, "sequence"); 47 | expResult.put(5, "that"); 48 | expResult.put(6, "is"); 49 | expResult.put(7, "amazing"); 50 | expResult.put(8, "text"); 51 | expResult.put(9, "sequence"); 52 | 53 | Map result = instance.extract(text); 54 | assertEquals(expResult, result); 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/independentsamples/ChisquareTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.independentsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.DataTable2D; 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for Chisquare. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class ChisquareTest extends AbstractTest { 30 | 31 | /** 32 | * Test of test method, of class Chisquare. 33 | */ 34 | @Test 35 | public void testTest() { 36 | logger.info("test"); 37 | //Example from Dimaki's Non-parametrics notes. It should reject the null hypothesis and return True. 38 | DataTable2D dataTable = new DataTable2D(); 39 | dataTable.put2d(0, 0, 13); 40 | dataTable.put2d(0, 1, 8); 41 | dataTable.put2d(0, 2, 10); 42 | dataTable.put2d(0, 3, 3); 43 | dataTable.put2d(1, 0, 20); 44 | dataTable.put2d(1, 1, 23); 45 | dataTable.put2d(1, 2, 27); 46 | dataTable.put2d(1, 3, 18); 47 | dataTable.put2d(2, 0, 11); 48 | dataTable.put2d(2, 1, 12); 49 | dataTable.put2d(2, 2, 12); 50 | dataTable.put2d(2, 3, 21); 51 | 52 | double aLevel = 0.05; 53 | boolean expResult = true; 54 | boolean result = Chisquare.test(dataTable, aLevel); 55 | assertEquals(expResult, result); 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/survival/nonparametrics/independentsamples/LogrankTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.survival.nonparametrics.independentsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 19 | import com.datumbox.framework.common.dataobjects.TransposeDataCollection; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for Logrank. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class LogrankTest extends AbstractTest { 33 | 34 | /** 35 | * Test of test method, of class Logrank. 36 | */ 37 | @Test 38 | public void testTest() { 39 | logger.info("test"); 40 | //Example from Dimaki's Survival Non-parametrics notes. It should reject the null hypothesis and return true. 41 | TransposeDataCollection transposeDataCollection = new TransposeDataCollection(); 42 | transposeDataCollection.put(0, new FlatDataCollection(Arrays.asList(new Object[]{23,"16+","18+","20+","24+"}))); 43 | transposeDataCollection.put(1, new FlatDataCollection(Arrays.asList(new Object[]{15,18,19,19,20.0}))); 44 | boolean is_twoTailed = true; 45 | double aLevel = 0.05; 46 | boolean expResult = true; 47 | boolean result = Logrank.test(transposeDataCollection, is_twoTailed, aLevel); 48 | assertEquals(expResult, result); 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/survival/nonparametrics/independentsamples/CoxMantelTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.survival.nonparametrics.independentsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 19 | import com.datumbox.framework.common.dataobjects.TransposeDataCollection; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for CoxMantel. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class CoxMantelTest extends AbstractTest { 33 | 34 | /** 35 | * Test of test method, of class CoxMantel. 36 | */ 37 | @Test 38 | public void testTest() { 39 | logger.info("test"); 40 | //Example from Dimaki's Survival Non-parametrics notes. It should reject the null hypothesis and return true. 41 | TransposeDataCollection transposeDataCollection = new TransposeDataCollection(); 42 | transposeDataCollection.put(0, new FlatDataCollection(Arrays.asList(new Object[]{23,"16+","18+","20+","24+"}))); 43 | transposeDataCollection.put(1, new FlatDataCollection(Arrays.asList(new Object[]{15,18,19,19,20.0}))); 44 | boolean is_twoTailed = true; 45 | double aLevel = 0.05; 46 | boolean expResult = true; 47 | boolean result = CoxMantel.test(transposeDataCollection, is_twoTailed, aLevel); 48 | assertEquals(expResult, result); 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/common/storage/interfaces/BigMap.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.common.storage.interfaces; 17 | 18 | import com.datumbox.framework.common.storage.interfaces.StorageEngine.MapType; 19 | import com.datumbox.framework.common.storage.interfaces.StorageEngine.StorageHint; 20 | 21 | import java.lang.annotation.*; 22 | 23 | /** 24 | * BigMap annotation is used to declare large Maps in the ModelParameters classes. 25 | * Fields that are annotated with this annotation, are automatically initialized 26 | * by the BigMapHolder object. 27 | * 28 | * @author Vasilis Vryniotis 29 | */ 30 | @Target(ElementType.FIELD) 31 | @Inherited 32 | @Retention(RetentionPolicy.RUNTIME) 33 | public @interface BigMap { 34 | /** 35 | * Parameter that passes the Class of the Key of the BigMap. 36 | * 37 | * @return 38 | */ 39 | public Class keyClass(); 40 | 41 | /** 42 | * Parameter that passes the Class of the Value of the BigMap. 43 | * 44 | * @return 45 | */ 46 | public Class valueClass(); 47 | 48 | /** 49 | * Parameter that passes the MapType of the BigMap. 50 | * 51 | * @return 52 | */ 53 | public MapType mapType(); 54 | 55 | /** 56 | * Parameter that passes the StorageHint of the BigMap. 57 | * 58 | * @return 59 | */ 60 | public StorageHint storageHint(); 61 | 62 | /** 63 | * Parameter that passes whether the BigMap should be tread-safe. 64 | * 65 | * @return 66 | */ 67 | public boolean concurrent(); 68 | } -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/independentsamples/MannWhitneyTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.independentsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 19 | import com.datumbox.framework.common.dataobjects.TransposeDataCollection; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for MannWhitney. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class MannWhitneyTest extends AbstractTest { 33 | 34 | /** 35 | * Test of test method, of class MannWhitney. 36 | */ 37 | @Test 38 | public void testTest() { 39 | logger.info("test"); 40 | //Example from Dimaki's Non-parametrics notes. It should reject the null hypothesis and return true. 41 | TransposeDataCollection transposeDataCollection = new TransposeDataCollection(); 42 | transposeDataCollection.put("group1", new FlatDataCollection(Arrays.asList(new Object[]{32,26.5,28.5,30,26}))); 43 | transposeDataCollection.put("group2", new FlatDataCollection(Arrays.asList(new Object[]{18.5,16,19.5,20}))); 44 | 45 | boolean is_twoTailed = true; 46 | double aLevel = 0.05; 47 | boolean expResult = true; 48 | boolean result = MannWhitney.test(transposeDataCollection, is_twoTailed, aLevel); 49 | assertEquals(expResult, result); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/mathematics/regularization/L2Regularizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.mathematics.regularization; 17 | 18 | import java.util.Map; 19 | 20 | /** 21 | * Utility class for L2 regularization. 22 | * 23 | * @author Vasilis Vryniotis 24 | */ 25 | public class L2Regularizer { 26 | 27 | /** 28 | * Updates the weights by applying the L2 regularization. 29 | * 30 | * @param l2 31 | * @param learningRate 32 | * @param weights 33 | * @param newWeights 34 | * @param 35 | */ 36 | public static void updateWeights(double l2, double learningRate, Map weights, Map newWeights) { 37 | if(l2 > 0.0) { 38 | for(Map.Entry e : weights.entrySet()) { 39 | K column = e.getKey(); 40 | newWeights.put(column, newWeights.get(column) + l2*e.getValue()*(-learningRate)); 41 | } 42 | } 43 | } 44 | 45 | /** 46 | * Estimates the penalty by adding the L2 regularization. 47 | * 48 | * @param l2 49 | * @param weights 50 | * @param 51 | * @return 52 | */ 53 | public static double estimatePenalty(double l2, Map weights) { 54 | double penalty = 0.0; 55 | if(l2 > 0.0) { 56 | double sumWeightsSquared = 0.0; 57 | for(double w : weights.values()) { 58 | sumWeightsSquared += w*w; 59 | } 60 | penalty = l2*sumWeightsSquared/2.0; 61 | } 62 | return penalty; 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/relatedsamples/WilcoxonRelatedSamplesTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.relatedsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataList; 19 | import com.datumbox.framework.common.dataobjects.TransposeDataList; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for WilcoxonRelatedSamples. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class WilcoxonRelatedSamplesTest extends AbstractTest { 33 | 34 | /** 35 | * Test of test method, of class WilcoxonRelatedSamples. 36 | */ 37 | @Test 38 | public void testTest() { 39 | logger.info("test"); 40 | TransposeDataList transposeDataList = new TransposeDataList(); 41 | //Example from Dimaki's Non-parametrics notes. It should reject the null hypothesis and return true. 42 | transposeDataList.put(0, new FlatDataList(Arrays.asList(new Object[]{39.8,38.8,38.4,39.9,39.4,38.4,38.6,41.2,39.0,39.1}))); 43 | transposeDataList.put(1, new FlatDataList(Arrays.asList(new Object[]{38.8,38.6,37.5,38.0,38.7,38.4,38.7,38.6,38.3,38.6}))); 44 | 45 | boolean is_twoTailed = false; 46 | double aLevel = 0.05; 47 | boolean expResult = true; 48 | boolean result = WilcoxonRelatedSamples.test(transposeDataList, is_twoTailed, aLevel); 49 | assertEquals(expResult, result); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/survival/nonparametrics/independentsamples/PetoPetoWilcoxonTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.survival.nonparametrics.independentsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 19 | import com.datumbox.framework.common.dataobjects.TransposeDataCollection; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for PetoPetoWilcoxon. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class PetoPetoWilcoxonTest extends AbstractTest { 33 | 34 | /** 35 | * Test of test method, of class PetoPetoWilcoxon. 36 | */ 37 | @Test 38 | public void testTest() { 39 | logger.info("test"); 40 | //Example from Dimaki's Survival Non-parametrics notes. It should reject the null hypothesis and return true. 41 | TransposeDataCollection transposeDataCollection = new TransposeDataCollection(); 42 | transposeDataCollection.put(0, new FlatDataCollection(Arrays.asList(new Object[]{23,"16+","18+","20+","24+"}))); 43 | transposeDataCollection.put(1, new FlatDataCollection(Arrays.asList(new Object[]{15,18,19,19,20.0}))); 44 | boolean is_twoTailed = true; 45 | double aLevel = 0.05; 46 | boolean expResult = true; 47 | boolean result = PetoPetoWilcoxon.test(transposeDataCollection, is_twoTailed, aLevel); 48 | assertEquals(expResult, result); 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/parametrics/onesample/StudentsOneSampleTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.onesample; 17 | 18 | 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for StudentsOneSample. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class StudentsOneSampleTest extends AbstractTest { 30 | 31 | /** 32 | * Test of testMean method, of class StudentsOneSample. 33 | */ 34 | @Test 35 | public void testTestMean() { 36 | logger.info("testMean"); 37 | double xbar = 7.4; 38 | int n = 28; 39 | double H0mean = 7.0; 40 | double std = 0.95; 41 | boolean is_twoTailed = false; 42 | double aLevel = 0.05; 43 | boolean expResult = true; 44 | boolean result = StudentsOneSample.testMean(xbar, n, H0mean, std, is_twoTailed, aLevel); 45 | assertEquals(expResult, result); 46 | } 47 | 48 | /** 49 | * Test of testAutocorrelation method, of class StudentsOneSample. 50 | */ 51 | @Test 52 | public void testTestAutocorrelation() { 53 | logger.info("testAutocorrelation"); 54 | double pk = 0.2; 55 | int n = 50; 56 | boolean is_twoTailed = false; 57 | double aLevel = 0.05; 58 | boolean expResult = false; 59 | boolean result = StudentsOneSample.testAutocorrelation(pk, n, is_twoTailed, aLevel); 60 | assertEquals(expResult, result); 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/mathematics/regularization/ElasticNetRegularizer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.mathematics.regularization; 17 | 18 | import java.util.Map; 19 | 20 | /** 21 | * Utility class for ElasticNet regularization. 22 | * 23 | * https://web.stanford.edu/~hastie/Papers/B67.2%20(2005)%20301-320%20Zou%20&%20Hastie.pdf 24 | * http://web.stanford.edu/~hastie/TALKS/enet_talk.pdf 25 | * 26 | * @author Vasilis Vryniotis 27 | */ 28 | public class ElasticNetRegularizer { 29 | 30 | /** 31 | * Updates the weights by applying the ElasticNet regularization. 32 | * 33 | * @param l1 34 | * @param l2 35 | * @param learningRate 36 | * @param weights 37 | * @param newWeights 38 | * @param 39 | */ 40 | public static void updateWeights(double l1, double l2, double learningRate, Map weights, Map newWeights) { 41 | L2Regularizer.updateWeights(l2, learningRate, weights, newWeights); 42 | L1Regularizer.updateWeights(l1, learningRate, weights, newWeights); 43 | } 44 | 45 | /** 46 | * Estimates the penalty by adding the ElasticNet regularization. 47 | * 48 | * @param l1 49 | * @param l2 50 | * @param weights 51 | * @param 52 | * @return 53 | */ 54 | public static double estimatePenalty(double l1, double l2, Map weights) { 55 | double penalty = 0.0; 56 | penalty += L2Regularizer.estimatePenalty(l2, weights); 57 | penalty += L1Regularizer.estimatePenalty(l1, weights); 58 | return penalty; 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/relatedsamples/SpearmanCorrelationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.relatedsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataList; 19 | import com.datumbox.framework.common.dataobjects.TransposeDataList; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for SpearmanCorrelation. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class SpearmanCorrelationTest extends AbstractTest { 33 | 34 | /** 35 | * Test of test method, of class SpearmanCorrelation. 36 | */ 37 | @Test 38 | public void testTest() { 39 | logger.info("test"); 40 | TransposeDataList transposeDataList = new TransposeDataList(); 41 | //Example from https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php. 42 | //It should reject the null hypothesis and return true. 43 | transposeDataList.put(0, new FlatDataList(Arrays.asList(new Object[]{56,75,45,71,61,64,58,80,76,61}))); 44 | transposeDataList.put(1, new FlatDataList(Arrays.asList(new Object[]{66,70,40,60,65,56,59,77,67,63}))); 45 | boolean is_twoTailed = true; 46 | double aLevel = 0.05; 47 | boolean expResult = true; 48 | boolean result = SpearmanCorrelation.test(transposeDataList, is_twoTailed, aLevel); 49 | assertEquals(expResult, result); 50 | } 51 | 52 | } 53 | -------------------------------------------------------------------------------- /datumbox-framework-applications/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 4.0.0 21 | com.datumbox 22 | datumbox-framework-applications 23 | 24 | Datumbox Framework Applications 25 | 26 | 27 | com.datumbox 28 | datumbox-framework 29 | 0.8.3-SNAPSHOT 30 | ../pom.xml 31 | 32 | 33 | 34 | .. 35 | 36 | 37 | 38 | 39 | com.datumbox 40 | datumbox-framework-core 41 | ${project.version} 42 | 43 | 44 | 45 | com.datumbox 46 | datumbox-framework-tests 47 | ${project.version} 48 | test 49 | 50 | 51 | com.datumbox 52 | datumbox-framework-core 53 | ${project.version} 54 | tests 55 | test 56 | 57 | 58 | 59 | 60 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/relatedsamples/KendallTauCorrelationTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.relatedsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataList; 19 | import com.datumbox.framework.common.dataobjects.TransposeDataList; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for KendallTauCorrelation. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class KendallTauCorrelationTest extends AbstractTest { 33 | 34 | /** 35 | * Test of test method, of class KendallTauCorrelation. 36 | */ 37 | @Test 38 | public void testTest() { 39 | logger.info("test"); 40 | //Example from https://statistics.laerd.com/statistical-guides/spearmans-rank-order-correlation-statistical-guide.php. 41 | //It should reject the null hypothesis and return true. 42 | TransposeDataList transposeDataList = new TransposeDataList(); 43 | transposeDataList.put(0, new FlatDataList(Arrays.asList(new Object[]{56,75,45,71,61,64,58,80,76,61}))); 44 | transposeDataList.put(1, new FlatDataList(Arrays.asList(new Object[]{66,70,40,60,65,56,59,77,67,63}))); 45 | 46 | boolean is_twoTailed = true; 47 | double aLevel = 0.05; 48 | boolean expResult = true; 49 | boolean result = KendallTauCorrelation.test(transposeDataList, is_twoTailed, aLevel); 50 | assertEquals(expResult, result); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/onesample/KolmogorovSmirnovOneSampleTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.onesample; 17 | 18 | import com.datumbox.framework.common.dataobjects.AssociativeArray; 19 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for KolmogorovSmirnovOneSample. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class KolmogorovSmirnovOneSampleTest extends AbstractTest { 33 | 34 | /** 35 | * Test of test method, of class KolmogorovSmirnovOneSample. 36 | */ 37 | @Test 38 | public void testTest() { 39 | logger.info("test"); 40 | //Example from Dimaki's Non-parametrics notes. It should reject the null hypothesis and return True. 41 | FlatDataCollection flatDataCollection = new FlatDataCollection(Arrays.asList(new Object[]{33.4, 33.3, 31.0, 31.4, 33.5, 34.4, 33.7, 36.2, 34.9, 37.0})); 42 | String cdfMethod = "normalDistribution"; 43 | AssociativeArray params = new AssociativeArray(); 44 | params.put("mean", 32.0); 45 | params.put("variance", 3.24); 46 | 47 | boolean is_twoTailed = true; 48 | double aLevel = 0.05; 49 | boolean expResult = true; 50 | boolean result = KolmogorovSmirnovOneSample.test(flatDataCollection, cdfMethod, params, is_twoTailed, aLevel); 51 | assertEquals(expResult, result); 52 | } 53 | 54 | } 55 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/independentsamples/KruskalWallisTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.independentsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 19 | import com.datumbox.framework.common.dataobjects.TransposeDataCollection; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for KruskalWallis. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class KruskalWallisTest extends AbstractTest { 33 | 34 | /** 35 | * Test of test method, of class KruskalWallis. 36 | */ 37 | @Test 38 | public void testTest() { 39 | logger.info("test"); 40 | TransposeDataCollection transposeDataCollection = new TransposeDataCollection(); 41 | transposeDataCollection.put(0, new FlatDataCollection(Arrays.asList(new Object[]{82,93,86,87,99,95,93,89,96}))); 42 | transposeDataCollection.put(1, new FlatDataCollection(Arrays.asList(new Object[]{81,85,93,91,84,88,84,92,81,92}))); 43 | transposeDataCollection.put(2, new FlatDataCollection(Arrays.asList(new Object[]{97,85,83,93,88,86,90,94,87,93}))); 44 | transposeDataCollection.put(3, new FlatDataCollection(Arrays.asList(new Object[]{93,89,94,96,81,84,80,84,92}))); 45 | double aLevel = 0.05; 46 | boolean expResult = false; 47 | boolean result = KruskalWallis.test(transposeDataCollection, aLevel); 48 | assertEquals(expResult, result); 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/nonparametrics/relatedsamples/FriedmanTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.relatedsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.DataTable2D; 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for Friedman. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class FriedmanTest extends AbstractTest { 30 | 31 | /** 32 | * Test of test method, of class Friedman. 33 | */ 34 | @Test 35 | public void testTest() { 36 | logger.info("test"); 37 | DataTable2D dataTable = new DataTable2D(); 38 | //Example from Dimaki's Non-parametrics notes. It should reject the null hypothesis and return true. 39 | dataTable.put2d(0,0,4); dataTable.put2d(0,1,7); dataTable.put2d(0,2,8); dataTable.put2d(0,3,6); dataTable.put2d(0,4,5); dataTable.put2d(0,5,5); 40 | dataTable.put2d(1,0,6); dataTable.put2d(1,1,9); dataTable.put2d(1,2,7); dataTable.put2d(1,3,6); dataTable.put2d(1,4,4); dataTable.put2d(1,5,5); 41 | dataTable.put2d(2,0,8); dataTable.put2d(2,1,10); dataTable.put2d(2,2,10); dataTable.put2d(2,3,9); dataTable.put2d(2,4,6); dataTable.put2d(2,5,7); 42 | dataTable.put2d(3,0,6); dataTable.put2d(3,1,4); dataTable.put2d(3,2,8); dataTable.put2d(3,3,5); dataTable.put2d(3,4,3); dataTable.put2d(3,5,7); 43 | 44 | double aLevel = 0.05; 45 | boolean expResult = true; 46 | boolean result = Friedman.test(dataTable, aLevel); 47 | assertEquals(expResult, result); 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/parametrics/independentsamples/LevenesIndependentSamplesTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.independentsamples; 17 | 18 | import com.datumbox.framework.common.dataobjects.FlatDataCollection; 19 | import com.datumbox.framework.common.dataobjects.TransposeDataCollection; 20 | import com.datumbox.framework.tests.abstracts.AbstractTest; 21 | import org.junit.Test; 22 | 23 | import java.util.Arrays; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for LevenesIndependentSamples. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class LevenesIndependentSamplesTest extends AbstractTest { 33 | 34 | /** 35 | * Test of testVariances method, of class LevenesIndependentSamples. 36 | */ 37 | @Test 38 | public void testTestVariances() { 39 | logger.info("testVariances"); 40 | TransposeDataCollection transposeDataCollection = new TransposeDataCollection(); 41 | 42 | transposeDataCollection.put(0, new FlatDataCollection(Arrays.asList(new Object[]{60.8,57.0,65.0,58.6,61.7}))); 43 | transposeDataCollection.put(1, new FlatDataCollection(Arrays.asList(new Object[]{68.7,67.7,74.0,66.3,69.8}))); 44 | transposeDataCollection.put(2, new FlatDataCollection(Arrays.asList(new Object[]{102.6,103.1,100.2,96.5}))); 45 | transposeDataCollection.put(3, new FlatDataCollection(Arrays.asList(new Object[]{87.9,84.2,83.1,85.7,90.3}))); 46 | 47 | double aLevel = 0.05; 48 | boolean expResult = true; 49 | boolean result = LevenesIndependentSamples.testVariances(transposeDataCollection, aLevel); 50 | assertEquals(expResult, result); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/abstracts/transformers/AbstractEncoder.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.abstracts.transformers; 17 | 18 | import com.datumbox.framework.common.Configuration; 19 | import com.datumbox.framework.common.dataobjects.TypeInference; 20 | import com.datumbox.framework.core.machinelearning.common.abstracts.AbstractTrainer; 21 | 22 | import java.util.Arrays; 23 | import java.util.HashSet; 24 | import java.util.Set; 25 | 26 | /** 27 | * Base class for all categorical/ordinal encoders of the framework. 28 | * 29 | * @author Vasilis Vryniotis 30 | * @param 31 | * @param 32 | */ 33 | public abstract class AbstractEncoder extends AbstractTransformer { 34 | 35 | /** 36 | * @param trainingParameters 37 | * @param configuration 38 | * @see AbstractTrainer#AbstractTrainer(AbstractTrainer.AbstractTrainingParameters, Configuration) 39 | */ 40 | protected AbstractEncoder(TP trainingParameters, Configuration configuration) { 41 | super(trainingParameters, configuration); 42 | } 43 | 44 | /** 45 | * @param storageName 46 | * @param configuration 47 | * @see AbstractTrainer#AbstractTrainer(String, Configuration) 48 | */ 49 | protected AbstractEncoder(String storageName, Configuration configuration) { 50 | super(storageName, configuration); 51 | } 52 | 53 | /** {@inheritDoc} */ 54 | @Override 55 | protected Set getSupportedXDataTypes() { 56 | return new HashSet<>(Arrays.asList(TypeInference.DataType.CATEGORICAL, TypeInference.DataType.ORDINAL)); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /datumbox-framework-storage/datumbox-framework-storage-inmemory/src/main/java/com/datumbox/framework/storage/inmemory/DeepCopy.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.storage.inmemory; 17 | 18 | 19 | import java.io.*; 20 | 21 | /** 22 | * Creates a Deep Copy of an object by serializing and deserializing it. 23 | * 24 | * @author Vasilis Vryniotis 25 | */ 26 | public class DeepCopy { 27 | 28 | /** 29 | * Serialized the Object to byte array. 30 | * 31 | * @param obj 32 | * @return 33 | */ 34 | public static byte[] serialize(Object obj) { 35 | try(ByteArrayOutputStream bos = new ByteArrayOutputStream(); 36 | ObjectOutputStream oos = new ObjectOutputStream(bos)) { 37 | oos.writeObject(obj); 38 | return bos.toByteArray(); 39 | } 40 | catch (IOException ex) { 41 | throw new UncheckedIOException(ex); 42 | } 43 | } 44 | 45 | /** 46 | * Deserializes the byte array. 47 | * 48 | * @param arr 49 | * @return 50 | */ 51 | public static Object deserialize(byte[] arr) { 52 | try (InputStream bis = new ByteArrayInputStream(arr); 53 | ObjectInputStream ois = new ObjectInputStream(bis)) { 54 | return ois.readObject(); 55 | } 56 | catch (IOException ex) { 57 | throw new UncheckedIOException(ex); 58 | } 59 | catch (ClassNotFoundException ex) { 60 | throw new RuntimeException(ex); 61 | } 62 | } 63 | 64 | /** 65 | * Deep clone Object by serialization and deserialization. 66 | * 67 | * @param 68 | * @param obj 69 | * @return 70 | */ 71 | @SuppressWarnings("unchecked") 72 | public static T clone(T obj) { 73 | return (T)deserialize(serialize((Object)obj)); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/statistics/nonparametrics/onesample/Binomial.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.onesample; 17 | 18 | import com.datumbox.framework.core.statistics.distributions.ContinuousDistributions; 19 | 20 | /** 21 | * Binomial exact test. 22 | * 23 | * @author Vasilis Vryniotis 24 | */ 25 | public class Binomial { 26 | 27 | /** 28 | * Tests the rejection of null Hypothesis for a particular confidence level 29 | * 30 | * @param k 31 | * @param n 32 | * @param p 33 | * @param is_twoTailed 34 | * @param aLevel 35 | * @return 36 | */ 37 | public static boolean test(int k, int n, double p, boolean is_twoTailed, double aLevel) { 38 | if(k<0 || n<=0 || p<0) { 39 | throw new IllegalArgumentException("All the parameters must be positive."); 40 | } 41 | double pvalue = scoreToPvalue((double)k, n, p); 42 | 43 | boolean rejectH0=false; 44 | 45 | double a=aLevel; 46 | if(is_twoTailed) { //if to tailed test then split the statistical significance in half 47 | a=aLevel/2.0; 48 | } 49 | if(pvalue<=a || pvalue>=(1.0-a)) { 50 | rejectH0=true; 51 | } 52 | 53 | return rejectH0; 54 | } 55 | 56 | /** 57 | * Returns the Pvalue for a particular score 58 | * 59 | * @param score 60 | * @param n 61 | * @param p 62 | * @return 63 | */ 64 | private static double scoreToPvalue(double score, int n, double p) { 65 | /* 66 | if(n<=20) { 67 | //calculate it from binomial distribution 68 | } 69 | */ 70 | 71 | double z=(score+0.5-n*p)/Math.sqrt(n*p*(1.0-p)); 72 | 73 | return ContinuousDistributions.gaussCdf(z); 74 | } 75 | 76 | } 77 | -------------------------------------------------------------------------------- /datumbox-framework-core/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 19 | 20 | 4.0.0 21 | com.datumbox 22 | datumbox-framework-core 23 | 24 | Datumbox Framework Core 25 | 26 | 27 | com.datumbox 28 | datumbox-framework 29 | 0.8.3-SNAPSHOT 30 | ../pom.xml 31 | 32 | 33 | 34 | .. 35 | 36 | 37 | 38 | 39 | org.apache.commons 40 | commons-math3 41 | 42 | 43 | tw.edu.ntu.csie 44 | libsvm 45 | 46 | 47 | 48 | com.datumbox 49 | datumbox-framework-storage-inmemory 50 | ${project.version} 51 | 52 | 53 | com.datumbox 54 | datumbox-framework-storage-mapdb 55 | ${project.version} 56 | 57 | 58 | 59 | com.datumbox 60 | datumbox-framework-tests 61 | ${project.version} 62 | test 63 | 64 | 65 | 66 | 67 | -------------------------------------------------------------------------------- /datumbox-framework-tests/src/main/java/com/datumbox/framework/tests/utilities/TestUtils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.tests.utilities; 17 | 18 | import com.datumbox.framework.common.dataobjects.AssociativeArray; 19 | import com.datumbox.framework.common.dataobjects.DataTable2D; 20 | import com.datumbox.framework.common.dataobjects.TypeInference; 21 | import com.datumbox.framework.tests.Constants; 22 | 23 | import static org.junit.Assert.assertEquals; 24 | 25 | /** 26 | * Utility methods used only by the JUnit tests. 27 | * 28 | * @author Vasilis Vryniotis 29 | */ 30 | public class TestUtils { 31 | 32 | /** 33 | * Assert method for DataTable2D data structure which stores double values. 34 | * 35 | * @param expResult 36 | * @param result 37 | */ 38 | public static void assertDoubleDataTable2D(DataTable2D expResult, DataTable2D result) { 39 | for (Object key1 : result.keySet()) { 40 | for (Object key2 : result.get(key1).keySet()) { 41 | 42 | double v1 = TypeInference.toDouble(expResult.get2d(key1, key2)); 43 | double v2 = TypeInference.toDouble(result.get2d(key1, key2)); 44 | 45 | assertEquals(v1, v2, Constants.DOUBLE_ACCURACY_HIGH); 46 | } 47 | } 48 | } 49 | 50 | /** 51 | * Assert method for AssociativeArray data structure which stores double values. 52 | * 53 | * @param expResult 54 | * @param result 55 | */ 56 | public static void assetDoubleAssociativeArray(AssociativeArray expResult, AssociativeArray result) { 57 | 58 | for (Object key : result.keySet()) { 59 | double v1 = expResult.getDouble(key); 60 | double v2 = result.getDouble(key); 61 | 62 | assertEquals(v1, v2, Constants.DOUBLE_ACCURACY_HIGH); 63 | } 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/machinelearning/common/abstracts/modelers/AbstractModeler.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.machinelearning.common.abstracts.modelers; 17 | 18 | import com.datumbox.framework.common.Configuration; 19 | import com.datumbox.framework.core.common.dataobjects.Dataframe; 20 | import com.datumbox.framework.core.machinelearning.common.abstracts.AbstractTrainer; 21 | 22 | /** 23 | * Base Class for Machine Learning algorithms. 24 | * 25 | * @author Vasilis Vryniotis 26 | * @param 27 | * @param 28 | */ 29 | public abstract class AbstractModeler extends AbstractTrainer { 30 | 31 | /** 32 | * @param trainingParameters 33 | * @param configuration 34 | * @see AbstractTrainer#AbstractTrainer(AbstractTrainingParameters, Configuration) 35 | */ 36 | protected AbstractModeler(TP trainingParameters, Configuration configuration) { 37 | super(trainingParameters, configuration); 38 | } 39 | 40 | /** 41 | * @param storageName 42 | * @param configuration 43 | * @see AbstractTrainer#AbstractTrainer(String, Configuration) 44 | */ 45 | protected AbstractModeler(String storageName, Configuration configuration) { 46 | super(storageName, configuration); 47 | } 48 | 49 | /** 50 | * Calculates the predictions for the newData and stores them in the provided 51 | * Dataframe. 52 | * 53 | * @param newData 54 | */ 55 | public void predict(Dataframe newData) { 56 | logger.info("predict()"); 57 | 58 | _predict(newData); 59 | } 60 | 61 | /** 62 | * Estimates the predictions for a new Dataframe. 63 | * 64 | * @param newData 65 | */ 66 | protected abstract void _predict(Dataframe newData); 67 | } 68 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/common/text/extractors/WordSequenceExtractor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.common.text.extractors; 17 | 18 | import java.util.LinkedHashMap; 19 | import java.util.List; 20 | import java.util.Map; 21 | 22 | /** 23 | * This extractor class extracts the keywords of a string as a sequence of words. 24 | * 25 | * @author Vasilis Vryniotis 26 | */ 27 | public class WordSequenceExtractor extends AbstractTextExtractor { 28 | 29 | /** 30 | * AbstractParameters of the WordSequenceExtractor. 31 | */ 32 | public static class Parameters extends AbstractTextExtractor.AbstractParameters { 33 | private static final long serialVersionUID = 1L; 34 | 35 | } 36 | 37 | /** 38 | * Public constructor that accepts as arguments the AbstractParameters object. 39 | * 40 | * @param parameters 41 | */ 42 | public WordSequenceExtractor(Parameters parameters) { 43 | super(parameters); 44 | } 45 | 46 | /** 47 | * This method gets as input a string and returns as output a numbered sequence 48 | * of the tokens. In the returned map as keys we store the position of the word 49 | * in the original string and as value the actual token in that position. 50 | * 51 | * @param text 52 | * @return 53 | */ 54 | @Override 55 | public Map extract(final String text) { 56 | List tmpKwd = generateTokenizer().tokenize(text); 57 | 58 | Map keywordSequence = new LinkedHashMap<>(); 59 | 60 | int position = 0; 61 | for(String keyword : tmpKwd) { 62 | keywordSequence.put(position, keyword); 63 | ++position; 64 | } 65 | 66 | return keywordSequence; 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/mathematics/discrete/CombinatoricsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.mathematics.discrete; 17 | 18 | import com.datumbox.framework.tests.abstracts.AbstractTest; 19 | import org.junit.Test; 20 | 21 | import java.util.*; 22 | 23 | import static org.junit.Assert.assertEquals; 24 | 25 | /** 26 | * Test cases for Combinatorics. 27 | * 28 | * @author Vasilis Vryniotis 29 | */ 30 | public class CombinatoricsTest extends AbstractTest { 31 | 32 | /** 33 | * Test of permutations method, of class Combinatorics. 34 | */ 35 | @Test 36 | public void testPermutations() { 37 | logger.info("permutations"); 38 | Collection> expResult = new ArrayList<>(); 39 | expResult.add(new ArrayList<>(Arrays.asList("a","b","c"))); 40 | expResult.add(new ArrayList<>(Arrays.asList("b","a","c"))); 41 | expResult.add(new ArrayList<>(Arrays.asList("b","c","a"))); 42 | expResult.add(new ArrayList<>(Arrays.asList("a","c","b"))); 43 | expResult.add(new ArrayList<>(Arrays.asList("c","a","b"))); 44 | expResult.add(new ArrayList<>(Arrays.asList("c","b","a"))); 45 | Collection> result = Combinatorics.permutations(Arrays.asList("a","b","c")); 46 | assertEquals(expResult, result); 47 | } 48 | 49 | /** 50 | * Test of combinations method, of class Combinatorics. 51 | */ 52 | @Test 53 | public void testCombinations() { 54 | logger.info("combinations"); 55 | Set> expResult = new HashSet<>(); 56 | expResult.add(new HashSet<>(Arrays.asList("a","b"))); 57 | expResult.add(new HashSet<>(Arrays.asList("a","c"))); 58 | expResult.add(new HashSet<>(Arrays.asList("b","c"))); 59 | Set> result = Combinatorics.combinations(new HashSet<>(Arrays.asList("a","b","c","a")), 2); 60 | assertEquals(expResult, result); 61 | } 62 | 63 | } 64 | -------------------------------------------------------------------------------- /datumbox-framework-common/src/main/java/com/datumbox/framework/common/concurrency/ThrottledExecutor.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.common.concurrency; 17 | 18 | import java.util.concurrent.Executor; 19 | import java.util.concurrent.RejectedExecutionException; 20 | import java.util.concurrent.Semaphore; 21 | 22 | /** 23 | * The ThrottledExecutor enables us to throttle the input of the executor. This 24 | * can be useful when we don't wish to submit all the tasks at once in order to 25 | * preserve memory. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class ThrottledExecutor implements Executor { 30 | 31 | private final Executor wrappedExecutor; 32 | 33 | private final Semaphore semaphore; 34 | 35 | /** 36 | * This Executor will block the main thread (when execute() is called) if the 37 | * number of submitted and unfinished tasks reaches the provided limit. This 38 | * enabled us to pace the input and reduce memory consumption. 39 | * 40 | * @param executor 41 | * @param maxConcurrentTasks 42 | */ 43 | public ThrottledExecutor(Executor executor, int maxConcurrentTasks) { 44 | this.wrappedExecutor = executor; 45 | this.semaphore = new Semaphore(maxConcurrentTasks); 46 | } 47 | 48 | /** {@inheritDoc} */ 49 | @Override 50 | public void execute(final Runnable command) { 51 | try { 52 | semaphore.acquire(); 53 | } 54 | catch (InterruptedException ex) { 55 | throw new RuntimeException(ex); 56 | } 57 | 58 | try { 59 | wrappedExecutor.execute(() -> { 60 | try { 61 | command.run(); 62 | } 63 | finally { 64 | semaphore.release(); 65 | } 66 | }); 67 | } 68 | catch (RejectedExecutionException ex) { 69 | semaphore.release(); 70 | throw new RuntimeException(ex); 71 | } 72 | } 73 | } -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/statistics/parametrics/relatedsamples/NormalRelatedSamples.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.relatedsamples; 17 | 18 | import com.datumbox.framework.core.statistics.distributions.ContinuousDistributions; 19 | 20 | /** 21 | * Normal Related Samples parametric tests. 22 | * 23 | * @author Vasilis Vryniotis 24 | */ 25 | public class NormalRelatedSamples { 26 | 27 | /** 28 | * Related Samples (Paired) Mean Test for Normal. 29 | * Requirements: Not Normal with large sample and known or unknown variances 30 | * 31 | * @param dbar 32 | * @param n 33 | * @param dbarStd 34 | * @param is_twoTailed 35 | * @param aLevel 36 | * @return 37 | */ 38 | public static boolean testMean(double dbar, int n, double dbarStd, boolean is_twoTailed, double aLevel) { 39 | if(n<=0 || dbarStd<=0) { 40 | throw new IllegalArgumentException("All the parameters must be positive."); 41 | } 42 | 43 | //standardize it 44 | double z=(dbar)/(dbarStd/Math.sqrt(n)); 45 | 46 | boolean rejectH0=checkCriticalValue(z, is_twoTailed, aLevel); 47 | 48 | return rejectH0; 49 | } 50 | 51 | /** 52 | * Checks the Critical Value to determine if the Hypothesis should be rejected 53 | * 54 | * @param score 55 | * @param is_twoTailed 56 | * @param aLevel 57 | * @return 58 | */ 59 | private static boolean checkCriticalValue(double score, boolean is_twoTailed, double aLevel) { 60 | double probability=ContinuousDistributions.gaussCdf(score); 61 | 62 | boolean rejectH0=false; 63 | 64 | double a=aLevel; 65 | if(is_twoTailed) { //if to tailed test then split the statistical significance in half 66 | a=aLevel/2; 67 | } 68 | if(probability<=a || probability>=(1-a)) { 69 | rejectH0=true; 70 | } 71 | 72 | return rejectH0; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/mathematics/linearprogramming/LPSolverTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.mathematics.linearprogramming; 17 | 18 | import com.datumbox.framework.tests.Constants; 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import java.util.ArrayList; 23 | import java.util.List; 24 | 25 | import static org.junit.Assert.assertArrayEquals; 26 | import static org.junit.Assert.assertEquals; 27 | 28 | /** 29 | * Test cases for LPSolver. 30 | * 31 | * @author Vasilis Vryniotis 32 | */ 33 | public class LPSolverTest extends AbstractTest { 34 | 35 | /** 36 | * Test of solve method, of class LPSolver. 37 | */ 38 | @Test 39 | public void testSolve() { 40 | logger.info("solve"); 41 | 42 | //Example from http://lpsolve.sourceforge.net/5.5/PHP.htm 43 | double[] linearObjectiveFunction = {143.0, 60.0, 195.0}; 44 | List linearConstraintsList = new ArrayList<>(); 45 | linearConstraintsList.add(new LPSolver.LPConstraint(new double[]{120.0, 210.0, 150.75}, LPSolver.LEQ, 15000.0)); 46 | linearConstraintsList.add(new LPSolver.LPConstraint(new double[]{110.0, 30.0, 125.0}, LPSolver.LEQ, 4000.0)); 47 | linearConstraintsList.add(new LPSolver.LPConstraint(new double[]{1.0, 1.0, 1.0}, LPSolver.LEQ, 75.0)); 48 | 49 | LPSolver.LPResult expResult = new LPSolver.LPResult(); 50 | expResult.setObjectiveValue(6986.8421052632); 51 | expResult.setVariableValues(new double[]{0.0, 56.578947368421, 18.421052631579}); 52 | 53 | LPSolver.LPResult result = LPSolver.solve(linearObjectiveFunction, linearConstraintsList, true, true); 54 | assertEquals(expResult.getObjectiveValue(), result.getObjectiveValue(), Constants.DOUBLE_ACCURACY_HIGH); 55 | assertArrayEquals(expResult.getVariableValues(), result.getVariableValues(), Constants.DOUBLE_ACCURACY_HIGH); 56 | 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/common/utilities/MapMethodsTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.common.utilities; 17 | 18 | import com.datumbox.framework.common.dataobjects.AssociativeArray; 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import java.util.AbstractMap; 23 | import java.util.Map; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for MapMethods. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class MapMethodsTest extends AbstractTest { 33 | 34 | /** 35 | * Test of selectMaxKeyValue method, of class MapMethods. 36 | */ 37 | @Test 38 | public void testSelectMaxKeyValue() { 39 | logger.info("selectMaxKeyValue"); 40 | AssociativeArray keyValueMap = new AssociativeArray(); 41 | keyValueMap.put("1", 1.0); 42 | keyValueMap.put("2", 2.0); 43 | keyValueMap.put("5", 5.0); 44 | keyValueMap.put("3", 3.0); 45 | keyValueMap.put("4", 4.0); 46 | 47 | Map.Entry expResult = new AbstractMap.SimpleEntry<>("5", 5.0); 48 | Map.Entry result = MapMethods.selectMaxKeyValue(keyValueMap); 49 | assertEquals(expResult, result); 50 | } 51 | 52 | /** 53 | * Test of selectMinKeyValue method, of class MapMethods. 54 | */ 55 | @Test 56 | public void testSelectMinKeyValue() { 57 | logger.info("selectMaxKeyValue"); 58 | AssociativeArray keyValueMap = new AssociativeArray(); 59 | keyValueMap.put("1", 1); 60 | keyValueMap.put("2", 2); 61 | keyValueMap.put("5", 5); 62 | keyValueMap.put("3", 3); 63 | keyValueMap.put("4", 4); 64 | 65 | Map.Entry expResult = new AbstractMap.SimpleEntry<>("1", 1); 66 | Map.Entry result = MapMethods.selectMinKeyValue(keyValueMap); 67 | assertEquals(expResult, result); 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/statistics/parametrics/relatedsamples/StudentsRelatedSamples.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.relatedsamples; 17 | 18 | import com.datumbox.framework.core.statistics.distributions.ContinuousDistributions; 19 | 20 | /** 21 | * Student's Paired T-test for mean. 22 | * 23 | * @author Vasilis Vryniotis 24 | */ 25 | public class StudentsRelatedSamples { 26 | 27 | /** 28 | * Related Samples (Paired) Mean Test of Student's. 29 | * Requirements: Normal with known or unknown variances 30 | * 31 | * @param dbar 32 | * @param n 33 | * @param dbarStd 34 | * @param is_twoTailed 35 | * @param aLevel 36 | * @return 37 | */ 38 | public static boolean testMean(double dbar, int n, double dbarStd, boolean is_twoTailed, double aLevel) { 39 | if(n<=0 || dbarStd<=0) { 40 | throw new IllegalArgumentException("All the parameters must be positive."); 41 | } 42 | 43 | //standardize it 44 | double t=(dbar)/(dbarStd/Math.sqrt(n)); 45 | 46 | boolean rejectH0=checkCriticalValue(t, n, is_twoTailed, aLevel); 47 | 48 | return rejectH0; 49 | } 50 | 51 | /** 52 | * Checks the Critical Value to determine if the Hypothesis should be rejected 53 | * 54 | * @param score 55 | * @param n 56 | * @param is_twoTailed 57 | * @param aLevel 58 | * @return 59 | */ 60 | private static boolean checkCriticalValue(double score, int n, boolean is_twoTailed, double aLevel) { 61 | double probability= ContinuousDistributions.studentsCdf(score,n-1); 62 | 63 | boolean rejectH0=false; 64 | 65 | double a=aLevel; 66 | if(is_twoTailed) { //if to tailed test then split the statistical significance in half 67 | a=aLevel/2; 68 | } 69 | if(probability<=a || probability>=(1-a)) { 70 | rejectH0=true; 71 | } 72 | 73 | return rejectH0; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/parametrics/independentsamples/StudentsIndependentSamplesTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.independentsamples; 17 | 18 | 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for StudentsIndependentSamples. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class StudentsIndependentSamplesTest extends AbstractTest { 30 | 31 | /** 32 | * Test of testMeansUnknownNotEqualVars method, of class StudentsIndependentSamples. 33 | */ 34 | @Test 35 | public void testTestMeansUnknownNotEqualVars() { 36 | logger.info("testMeansUnknownNotEqualVars"); 37 | double xbar = 215.0; 38 | double ybar = 200.0; 39 | int n = 60; 40 | int m = 50; 41 | double stdx = 55.0; 42 | double stdy = 50.0; 43 | boolean is_twoTailed = false; 44 | double aLevel = 0.05; 45 | boolean expResult = false; 46 | boolean result = StudentsIndependentSamples.testMeansUnknownNotEqualVars(xbar, ybar, n, m, stdx, stdy, is_twoTailed, aLevel); 47 | assertEquals(expResult, result); 48 | } 49 | 50 | /** 51 | * Test of testMeansUnknownEqualVars method, of class StudentsIndependentSamples. 52 | */ 53 | @Test 54 | public void testTestMeansUnknownEqualVars() { 55 | logger.info("testMeansUnknownEqualVars"); 56 | double xbar = 14.14; 57 | double ybar = 12.08; 58 | int n = 15; 59 | int m = 15; 60 | double stdx = 1.020784; 61 | double stdy = 0.951314; 62 | boolean is_twoTailed = false; 63 | double aLevel = 0.05; 64 | boolean expResult = true; 65 | boolean result = StudentsIndependentSamples.testMeansUnknownEqualVars(xbar, ybar, n, m, stdx, stdy, is_twoTailed, aLevel); 66 | assertEquals(expResult, result); 67 | } 68 | 69 | } 70 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/statistics/nonparametrics/relatedsamples/McNemar.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.nonparametrics.relatedsamples; 17 | 18 | import com.datumbox.framework.core.statistics.distributions.ContinuousDistributions; 19 | 20 | /** 21 | * McNemar's test for paired nominal data. 22 | * 23 | * @author Vasilis Vryniotis 24 | */ 25 | public class McNemar { 26 | 27 | /** 28 | * Calculates the p-value of null Hypothesis 29 | * 30 | * @param n11 31 | * @param n12 32 | * @param n21 33 | * @param n22 34 | * @return 35 | */ 36 | public static double getPvalue(int n11, int n12, int n21, int n22) { 37 | double Chisquare=Math.pow(Math.abs(n12-n21) - 0.5,2)/(n12+n21); //McNemar with Yates's correction for continuity 38 | 39 | double pvalue= scoreToPvalue(Chisquare); 40 | 41 | return pvalue; 42 | } 43 | 44 | /** 45 | * Tests the rejection of null Hypothesis for a particular confidence level 46 | * 47 | * @param n11 48 | * @param n12 49 | * @param n21 50 | * @param n22 51 | * @param is_twoTailed 52 | * @param aLevel 53 | * @return 54 | */ 55 | public static boolean test(int n11, int n12, int n21, int n22, boolean is_twoTailed, double aLevel) { 56 | double pvalue= getPvalue(n11,n12,n21,n22); 57 | 58 | boolean rejectH0=false; 59 | 60 | double a=aLevel; 61 | if(is_twoTailed) { //if to tailed test then split the statistical significance in half 62 | a=aLevel/2; 63 | } 64 | if(pvalue<=a || pvalue>=(1-a)) { 65 | rejectH0=true; 66 | } 67 | 68 | return rejectH0; 69 | } 70 | 71 | /** 72 | * Returns the Pvalue for a particular score 73 | * 74 | * @param score 75 | * @return 76 | */ 77 | private static double scoreToPvalue(double score) { 78 | return 1.0-ContinuousDistributions.chisquareCdf(score, 1); 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/statistics/parametrics/onesample/ChisquareOneSample.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.onesample; 17 | 18 | import com.datumbox.framework.core.statistics.distributions.ContinuousDistributions; 19 | 20 | /** 21 | * One-sample Parametric Chisquare test. 22 | * 23 | * @author Vasilis Vryniotis 24 | */ 25 | public class ChisquareOneSample { 26 | 27 | /** 28 | * One Sample Variance Test for Chisquare. 29 | * Requirements: Normal with known variance 30 | * 31 | * @param stdbar 32 | * @param n 33 | * @param H0std 34 | * @param is_twoTailed 35 | * @param aLevel 36 | * @return 37 | */ 38 | public static boolean testVariance(double stdbar, int n, double H0std, boolean is_twoTailed, double aLevel) { 39 | if(n<=1 || H0std<=0) { 40 | throw new IllegalArgumentException("The values of the provided parameters are not within the permitted range."); 41 | } 42 | 43 | //standardize it 44 | double chisquare=(n-1.0)*stdbar*stdbar/(H0std*H0std); 45 | 46 | boolean rejectH0=checkCriticalValue(chisquare, n, is_twoTailed, aLevel); 47 | 48 | return rejectH0; 49 | } 50 | 51 | /** 52 | * Checks the Critical Value to determine if the Hypothesis should be rejected 53 | * 54 | * @param score 55 | * @param n 56 | * @param is_twoTailed 57 | * @param aLevel 58 | * @return 59 | */ 60 | private static boolean checkCriticalValue(double score, int n, boolean is_twoTailed, double aLevel) { 61 | double probability=ContinuousDistributions.chisquareCdf(score,n-1); 62 | 63 | boolean rejectH0=false; 64 | 65 | double a=aLevel; 66 | if(is_twoTailed) { //if to tailed test then split the statistical significance in half 67 | a=aLevel/2.0; 68 | } 69 | if(probability<=a || probability>=(1.0-a)) { 70 | rejectH0=true; 71 | } 72 | 73 | return rejectH0; 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/common/text/tokenizers/WhitespaceTokenizerTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.common.text.tokenizers; 17 | 18 | import com.datumbox.framework.tests.abstracts.AbstractTest; 19 | import org.junit.Test; 20 | 21 | import java.util.ArrayList; 22 | import java.util.Arrays; 23 | import java.util.List; 24 | 25 | import static org.junit.Assert.assertEquals; 26 | 27 | /** 28 | * Test cases for WhitespaceTokenizer. 29 | * 30 | * @author Vasilis Vryniotis 31 | */ 32 | public class WhitespaceTokenizerTest extends AbstractTest { 33 | 34 | /** 35 | * Test of tokenize method, of class WhitespaceTokenizer. 36 | */ 37 | @Test 38 | public void testTokenize() { 39 | logger.info("tokenize"); 40 | String text = "In publishing and graphic design, lorem ipsum[1] is a placeholder text (filler text) commonly used to demonstrate the graphic elements of a document or visual presentation, such as font, typography, and layout, by removing the distraction of meaningful content. The lorem ipsum text is typically a section of a Latin text by Cicero with words altered, added, and removed that make it nonsensical and not proper Latin.[1]"; 41 | WhitespaceTokenizer instance = new WhitespaceTokenizer(); 42 | List expResult = new ArrayList<>(Arrays.asList("In", "publishing", "and", "graphic", "design,", "lorem", "ipsum[1]", "is", "a", "placeholder", "text", "(filler", "text)", "commonly", "used", "to", "demonstrate", "the", "graphic", "elements", "of", "a", "document", "or", "visual", "presentation,", "such", "as", "font,", "typography,", "and", "layout,", "by", "removing", "the", "distraction", "of", "meaningful", "content.", "The", "lorem", "ipsum", "text", "is", "typically", "a", "section", "of", "a", "Latin", "text", "by", "Cicero", "with", "words", "altered,", "added,", "and", "removed", "that", "make", "it", "nonsensical", "and", "not", "proper", "Latin.[1]")); 43 | List result = instance.tokenize(text); 44 | assertEquals(expResult, result); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/main/java/com/datumbox/framework/core/statistics/parametrics/independentsamples/FIndependentSamples.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.independentsamples; 17 | 18 | import com.datumbox.framework.core.statistics.distributions.ContinuousDistributions; 19 | 20 | /** 21 | * F-test of equality of variances. 22 | * 23 | * @author Vasilis Vryniotis 24 | */ 25 | public class FIndependentSamples { 26 | 27 | /** 28 | * Independent Samples Variance Test for F. 29 | * Requirements: Normal 30 | * 31 | * @param stdbarx 32 | * @param stdbary 33 | * @param n 34 | * @param m 35 | * @param is_twoTailed 36 | * @param aLevel 37 | * @return 38 | */ 39 | public static boolean testVariances(double stdbarx, double stdbary, int n, int m, boolean is_twoTailed, double aLevel) { 40 | if(n<=1 || stdbarx<=0 || m<=1 || stdbary<=0) { 41 | throw new IllegalArgumentException("The values of the provided parameters are not within the permitted range."); 42 | } 43 | 44 | //standardize it 45 | double F=(stdbarx*stdbarx)/(stdbary*stdbary); 46 | 47 | boolean rejectH0=checkCriticalValue(F, n, m, is_twoTailed, aLevel); 48 | 49 | return rejectH0; 50 | } 51 | 52 | /** 53 | * Checks the Critical Value to determine if the Hypothesis should be rejected 54 | * 55 | * @param score 56 | * @param n 57 | * @param m 58 | * @param is_twoTailed 59 | * @param aLevel 60 | * @return 61 | */ 62 | private static boolean checkCriticalValue(double score, int n, int m, boolean is_twoTailed, double aLevel) { 63 | double probability=ContinuousDistributions.fCdf(score,n-1,m-1); 64 | 65 | boolean rejectH0=false; 66 | 67 | double a=aLevel; 68 | if(is_twoTailed) { //if to tailed test then split the statistical significance in half 69 | a=aLevel/2; 70 | } 71 | if(probability<=a || probability>=(1-a)) { 72 | rejectH0=true; 73 | } 74 | 75 | return rejectH0; 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /TODO.txt: -------------------------------------------------------------------------------- 1 | CODE IMPROVEMENTS 2 | ================= 3 | 4 | - Consider dropping all the common.dataobjects and use their internalData directly instead. 5 | - Refactor the statistics package and replace all the static methods with proper inheritance. 6 | - Write generic optimizers instead of having optimization methods in the algorithms. Add the optimizers and regularization packages under mathematics. 7 | 8 | NEW FEATURES 9 | ============ 10 | 11 | - Create a storage engine for MapDB 3 once caching, asynchronous writing and compression is supported. 12 | - Create a storage engine for BerkeleyDB. 13 | - Add the ability to call Machine Learning algorithms from command line or Python: 14 | - https://pypi.python.org/pypi/javabridge 15 | - https://github.com/LeeKamentsky/python-javabridge/ 16 | - https://github.com/fracpete/python-weka-wrapper 17 | 18 | 19 | DOCUMENTATION 20 | ============= 21 | 22 | - Improve the code documentation. 23 | - Write How-to blog posts on building Text Classification models. 24 | - Update the website and link directly to the latest and previous documentations. 25 | 26 | 27 | NEW ALGORITHMS 28 | ============== 29 | 30 | - Create a PercentileScaler numerical scaler. 31 | - Create the following FeatureSelectors: AnovaSelect, KruskalWallisSelect, SpearmanSelect. 32 | - Speed up LDA: http://www.cs.ucsb.edu/~mingjia/cs240/doc/273811.pdf 33 | - Factorization Machines: http://www.csie.ntu.edu.tw/~b97053/paper/Rendle2010FM.pdf 34 | - Develop the FunkSVD and PLSI as probabilistic version of SVD. 35 | - Collaborative Filtering for Implicit Feedback Datasets: http://yifanhu.net/PUB/cf.pdf 36 | - Write a Mixture of Gaussians clustering method. 37 | - Include an anomaly detection algorithm. 38 | - Provide a wrapper for DBSCANClusterer and NeuralNet implementations of Maths. 39 | - Add the ability to search through the configuration space and find the best performing algorithmic configuration. 40 | 41 | 42 | TO CHECK OUT 43 | ============ 44 | 45 | Linear Algebra 46 | -------------- 47 | 48 | - JBLAS - Linear Algebra for Java: 49 | https://github.com/mikiobraun/jblas 50 | http://jblas.org/ 51 | 52 | Huge Collection libs, DBs and Storage 53 | ------------------------------------- 54 | 55 | - Vanilla-java - HugeCollections: 56 | https://code.google.com/p/vanilla-java/wiki/HugeCollections 57 | 58 | - Fastutil: 59 | http://fastutil.di.unimi.it/#install 60 | 61 | - Joafip: 62 | http://joafip.sourceforge.net/javadoc/net/sf/joafip/java/util/PHashMap.html 63 | 64 | - Chronicle Map: 65 | https://github.com/OpenHFT/Chronicle-Map/ 66 | 67 | - H2 Database: 68 | http://www.h2database.com/html/main.html 69 | 70 | - ehcache: 71 | http://www.ehcache.org/ 72 | http://stackoverflow.com/questions/4726370/looking-for-a-drop-in-replacement-for-a-java-util-map 73 | 74 | - redisson: 75 | https://github.com/redisson/redisson 76 | -------------------------------------------------------------------------------- /datumbox-framework-core/src/test/java/com/datumbox/framework/core/statistics/parametrics/onesample/NormalOneSampleTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright (C) 2013-2020 Vasilis Vryniotis 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | package com.datumbox.framework.core.statistics.parametrics.onesample; 17 | 18 | 19 | import com.datumbox.framework.tests.abstracts.AbstractTest; 20 | import org.junit.Test; 21 | 22 | import static org.junit.Assert.assertEquals; 23 | 24 | /** 25 | * Test cases for NormalOneSample. 26 | * 27 | * @author Vasilis Vryniotis 28 | */ 29 | public class NormalOneSampleTest extends AbstractTest { 30 | 31 | /** 32 | * Test of testMean method, of class NormalOneSample. 33 | */ 34 | @Test 35 | public void testTestMean() { 36 | logger.info("testMean"); 37 | double xbar = 215.0; 38 | int n = 60; 39 | double H0mean = 200.0; 40 | double std = 55.0; 41 | boolean is_twoTailed = false; 42 | double aLevel = 0.05; 43 | boolean expResult = true; 44 | boolean result = NormalOneSample.testMean(xbar, n, H0mean, std, is_twoTailed, aLevel); 45 | assertEquals(expResult, result); 46 | } 47 | 48 | /** 49 | * Test of testSum method, of class NormalOneSample. 50 | */ 51 | @Test 52 | public void testTestSum() { 53 | logger.info("testSum"); 54 | double xsum = 65.3; 55 | int n = 100; 56 | double H0sum = 0.65; 57 | double std = 0.02; 58 | boolean is_twoTailed = false; 59 | double aLevel = 0.05; 60 | boolean expResult = false; 61 | boolean result = NormalOneSample.testSum(xsum, n, H0sum, std, is_twoTailed, aLevel); 62 | assertEquals(expResult, result); 63 | } 64 | 65 | /** 66 | * Test of testPercentage method, of class NormalOneSample. 67 | */ 68 | @Test 69 | public void testTestPercentage() { 70 | logger.info("testPercentage"); 71 | double pbar = 0.60; 72 | int n = 100; 73 | double H0p = 0.65; 74 | boolean is_twoTailed = false; 75 | double aLevel = 0.05; 76 | boolean expResult = false; 77 | boolean result = NormalOneSample.testPercentage(pbar, n, H0p, is_twoTailed, aLevel); 78 | assertEquals(expResult, result); 79 | } 80 | 81 | } 82 | --------------------------------------------------------------------------------