├── .gitignore ├── LICENSE.txt ├── README.md ├── build.gradle ├── etc ├── includes.gradle ├── properties.gradle ├── providedCompile.gradle ├── s3Upload.gradle ├── synchronizer.properties ├── testing.gradle └── version.gradle ├── pattern-core ├── README.md ├── build.gradle └── src │ ├── main │ └── java │ │ └── cascading │ │ └── pattern │ │ ├── PatternException.java │ │ ├── datafield │ │ ├── CategoricalDataField.java │ │ ├── ContinuousDataField.java │ │ └── DataField.java │ │ ├── ensemble │ │ ├── EnsembleSpec.java │ │ ├── ParallelEnsembleAssembly.java │ │ ├── function │ │ │ └── InsertGUID.java │ │ └── selection │ │ │ ├── Average.java │ │ │ ├── CategoricalSelectionBuffer.java │ │ │ ├── CategoricalSelector.java │ │ │ ├── MajorityVote.java │ │ │ ├── PredictionSelectionBuffer.java │ │ │ ├── PredictionSelector.java │ │ │ ├── SelectionBuffer.java │ │ │ └── SelectionStrategy.java │ │ ├── model │ │ ├── ModelSchema.java │ │ ├── ModelScoringFunction.java │ │ ├── Spec.java │ │ ├── clustering │ │ │ ├── Cluster.java │ │ │ ├── ClusterEvaluator.java │ │ │ ├── ClusteringFunction.java │ │ │ ├── ClusteringSpec.java │ │ │ ├── compare │ │ │ │ ├── AbsoluteDifferenceCompareFunction.java │ │ │ │ └── CompareFunction.java │ │ │ └── measure │ │ │ │ ├── ComparisonMeasure.java │ │ │ │ ├── DistanceMeasure.java │ │ │ │ ├── EuclideanMeasure.java │ │ │ │ └── SquaredEuclideanMeasure.java │ │ ├── generalregression │ │ │ ├── BaseRegressionFunction.java │ │ │ ├── CategoricalRegressionFunction.java │ │ │ ├── GeneralRegressionSpec.java │ │ │ ├── LinkFunction.java │ │ │ ├── Parameter.java │ │ │ ├── PredictionRegressionFunction.java │ │ │ ├── RegressionTable.java │ │ │ ├── expression │ │ │ │ ├── ExpressionEvaluator.java │ │ │ │ └── ParameterExpression.java │ │ │ ├── normalization │ │ │ │ ├── Normalization.java │ │ │ │ └── SoftMaxNormalization.java │ │ │ └── predictor │ │ │ │ ├── CovariantPredictor.java │ │ │ │ ├── FactorPredictor.java │ │ │ │ └── Predictor.java │ │ └── tree │ │ │ ├── Node.java │ │ │ ├── Tree.java │ │ │ ├── TreeFunction.java │ │ │ ├── TreeSpec.java │ │ │ ├── decision │ │ │ ├── Decision.java │ │ │ ├── DecisionTree.java │ │ │ ├── FinalDecision.java │ │ │ ├── ParentDecision.java │ │ │ ├── PredicateEvaluator.java │ │ │ └── PredicatedDecision.java │ │ │ └── predicate │ │ │ ├── ComparablePredicate.java │ │ │ ├── EqualsToPredicate.java │ │ │ ├── FalsePredicate.java │ │ │ ├── GreaterOrEqualThanPredicate.java │ │ │ ├── GreaterThanPredicate.java │ │ │ ├── IsInSetPredicate.java │ │ │ ├── IsMissingPredicate.java │ │ │ ├── IsNotInSetPredicate.java │ │ │ ├── IsNotMissingPredicate.java │ │ │ ├── LessOrEqualThanPredicate.java │ │ │ ├── LessThanPredicate.java │ │ │ ├── NotEqualsToPredicate.java │ │ │ ├── Predicate.java │ │ │ ├── SimplePredicate.java │ │ │ ├── SimpleSetPredicate.java │ │ │ ├── TruePredicate.java │ │ │ └── compound │ │ │ ├── AndPredicate.java │ │ │ ├── CompoundPredicate.java │ │ │ ├── OrPredicate.java │ │ │ ├── SurrogatePredicate.java │ │ │ └── XorPredicate.java │ │ └── util │ │ ├── Logging.java │ │ └── Reflection.java │ └── test │ ├── java │ └── cascading │ │ └── pattern │ │ ├── PatternPlatformTestCase.java │ │ ├── ensemble │ │ ├── EnsemblePlatformTestCase.java │ │ └── SimpleEnsemblePlatformTest.java │ │ └── model │ │ ├── ModelTest.java │ │ └── tree │ │ └── PredicateTest.java │ └── resources │ ├── data │ ├── randomforest-predict.tsv │ └── randomforest.tsv │ └── log4j.properties ├── pattern-examples ├── .gitignore ├── README.md ├── build.gradle ├── data │ ├── groc.arules.tsv │ ├── groc.arules.xml │ ├── iris.glm.tsv │ ├── iris.glm.xml │ ├── iris.hc.tsv │ ├── iris.hc.xml │ ├── iris.kmeans.tsv │ ├── iris.kmeans.xml │ ├── iris.lm_p.tsv │ ├── iris.lm_p.xml │ ├── iris.multinom.tsv │ ├── iris.multinom.xml │ ├── iris.nn.tsv │ ├── iris.nn.xml │ ├── iris.rf.tsv │ ├── iris.rf.xml │ ├── iris.rpart.tsv │ ├── iris.rpart.xml │ ├── iris.svm.tsv │ ├── iris.svm.xml │ ├── orders.tsv │ ├── sample.rf.xml │ └── sample.tsv ├── examples │ ├── py │ │ ├── gen_orders.py │ │ └── rf_eval.py │ └── r │ │ ├── pmml_models.R │ │ ├── rattle_pmml.R │ │ └── rf_pmml.R └── src │ └── main │ └── java │ └── cascading │ └── pattern │ ├── Main.java │ ├── function │ ├── RandomForestFunction.java │ └── RandomForestSpec.java │ └── pmml │ └── iris │ └── RegressionFlowExample.java ├── pattern-hadoop ├── README.md └── build.gradle ├── pattern-local ├── README.md ├── build.gradle └── src │ └── main │ └── resources │ └── log4j.properties ├── pattern-pmml ├── README.md ├── build.gradle └── src │ ├── main │ └── java │ │ └── cascading │ │ └── pattern │ │ └── pmml │ │ ├── ArrayUtil.java │ │ ├── ClusteringUtil.java │ │ ├── DataFields.java │ │ ├── DataTypes.java │ │ ├── GeneralRegressionUtil.java │ │ ├── PMMLModel.java │ │ ├── PMMLPlanner.java │ │ ├── PMMLTypeResolver.java │ │ ├── PMMLUtil.java │ │ ├── RegressionUtil.java │ │ └── TreeUtil.java │ └── test │ ├── java │ └── cascading │ │ └── pattern │ │ └── pmml │ │ ├── PMMLPlatformTestCase.java │ │ └── SimplePMMLPlatformTest.java │ └── resources │ └── pmml │ ├── iris.glm.pmml │ ├── iris.glm.tsv │ ├── iris.hc.pmml │ ├── iris.hc.tsv │ ├── iris.kmeans.pmml │ ├── iris.kmeans.tsv │ ├── iris.lm_p.pmml │ ├── iris.lm_p.tsv │ ├── iris.multinom.pmml │ ├── iris.multinom.tsv │ ├── iris.nn.pmml │ ├── iris.nn.tsv │ ├── iris.rf.bin.pmml │ ├── iris.rf.bin.tsv │ ├── iris.rf.pmml │ ├── iris.rf.tsv │ ├── iris.rpart.pmml │ ├── iris.rpart.tsv │ ├── iris.svm.pmml │ ├── iris.svm.tsv │ ├── kmeans.pmml │ ├── kmeans.tsv │ ├── randomforest.pmml │ ├── randomforest.tsv │ ├── sample.rf.pmml │ └── sample.rf.tsv ├── settings.gradle └── version.properties /.gitignore: -------------------------------------------------------------------------------- 1 | junitvm*.properties 2 | build.properties 3 | gradle.properties 4 | .DS_Store 5 | build 6 | *.iml 7 | *.iws 8 | *.ipr 9 | .idea/ 10 | .gradle/ -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | License: 3 | 4 | Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 5 | 6 | Project and contact information: http://www.cascading.org/ 7 | 8 | This file is part of the Cascading project. 9 | 10 | Licensed under the Apache License, Version 2.0 (the "License"); 11 | you may not use this file except in compliance with the License. 12 | You may obtain a copy of the License at 13 | 14 | http://www.apache.org/licenses/LICENSE-2.0 15 | 16 | Unless required by applicable law or agreed to in writing, software 17 | distributed under the License is distributed on an "AS IS" BASIS, 18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | See the License for the specific language governing permissions and 20 | limitations under the License. 21 | 22 | Third-party Licenses: 23 | 24 | All third-party dependencies are listed in the build.gradle files. 25 | 26 | jPMML - https://github.com/jpmml/jpmml 27 | 28 | Copyright (c) 2009 University of Tartu 29 | All rights reserved. 30 | 31 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the 32 | following conditions are met: 33 | 34 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the 35 | following disclaimer. 36 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the 37 | following disclaimer in the documentation and/or other materials provided with the distribution. 38 | * Neither the name of the University of Tartu nor the names of its contributors may be used to endorse or promote 39 | products derived from this software without specific prior written permission. 40 | 41 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, 42 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 43 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 44 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 45 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, 46 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE 47 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /etc/includes.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | apply from: "${rootDir}/etc/providedCompile.gradle" 22 | apply from: "${rootDir}/etc/testing.gradle" 23 | apply from: "${rootDir}/etc/s3Upload.gradle" 24 | -------------------------------------------------------------------------------- /etc/properties.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | if( project.properties[ 'teamcity' ] ) // make them system properties 22 | System.properties.putAll( project.properties[ 'teamcity' ] ) 23 | 24 | if( System.properties[ 'aws.properties' ] ) 25 | { 26 | file( System.properties[ 'aws.properties' ] ).withReader { reader -> 27 | def awsProperties = new Properties() 28 | awsProperties.load( reader ) 29 | System.properties.putAll( awsProperties ) 30 | } 31 | } 32 | 33 | ext.repoUrl = 'http://conjars.org/repo/' 34 | ext.repoUserName = System.properties[ 'publish.repo.userName' ] 35 | ext.repoPassword = System.properties[ 'publish.repo.password' ] 36 | 37 | if( System.properties[ 'publish.repo.url' ] ) 38 | repoUrl = System.properties[ 'publish.repo.url' ] 39 | 40 | ext.awsAccessId = System.properties[ 'publish.aws.accessId' ] 41 | ext.awsSecretKey = System.properties[ 'publish.aws.secretKey' ] 42 | ext.s3Bucket = System.properties[ 'publish.bucket' ] 43 | ext.s3BucketDocs = System.properties[ 'publish.docs.bucket' ] 44 | -------------------------------------------------------------------------------- /etc/providedCompile.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | configurations { 22 | providedCompile 23 | } 24 | 25 | sourceSets { 26 | main.compileClasspath += configurations.providedCompile 27 | } 28 | 29 | task mappings { 30 | conf2ScopeMappings.addMapping( 0, configurations.providedCompile, Conf2ScopeMappingContainer.PROVIDED ) 31 | } 32 | 33 | idea { 34 | module { 35 | scopes.PROVIDED.plus += configurations.providedCompile 36 | } 37 | } 38 | 39 | javadoc { 40 | classpath += configurations.providedCompile 41 | } 42 | 43 | -------------------------------------------------------------------------------- /etc/s3Upload.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | import com.monochromeroad.gradle.plugin.aws.s3.S3Sync 22 | import com.monochromeroad.gradle.plugin.aws.s3.ACL 23 | 24 | buildscript { 25 | repositories { 26 | mavenLocal() 27 | mavenCentral() 28 | mavenRepo name: 'monochromeroad', url: 'http://conjars.org/repo/' 29 | } 30 | dependencies { 31 | classpath 'thirdparty:gradle-aws-s3-sync:0.5.1' 32 | } 33 | } 34 | 35 | task s3Upload( type: S3Sync ) { 36 | 37 | accessKey = rootProject.awsAccessId 38 | secretKey = rootProject.awsSecretKey 39 | 40 | keepFiles = true // prevents deletion from bucket 41 | 42 | acl ACL.PublicRead 43 | 44 | configFile "${rootProject.projectDir}/etc/synchronizer.properties" 45 | 46 | ext.source = "${buildDir}/publish" 47 | 48 | if( project == rootProject ) // only publishes docs 49 | ext.destination = "${project.s3BucketDocs}/${rootProject.name}/${majorVersion}/" 50 | else 51 | ext.destination = "${project.s3Bucket}/${rootProject.name}/${majorVersion}/${project.name}/" 52 | 53 | from source 54 | into destination 55 | } 56 | -------------------------------------------------------------------------------- /etc/synchronizer.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | # see http://jets3t.s3.amazonaws.com/toolkit/configuration.html 22 | 23 | #s3service.default-bucket-location=Tokyo 24 | # httpclient.max-connections=2 25 | # threaded-service.admin-max-thread-count=5 26 | 27 | ### 28 | # File/Object comparison properties 29 | ### 30 | 31 | filecomparer.skip-symlinks=true 32 | #filecomparer.use-md5-files=true 33 | #filecomparer.generate-md5-files=true 34 | #filecomparer.md5-files-root-dir=.cache 35 | filecomparer.skip-upload-of-md5-files=true 36 | filecomparer.assume-local-latest-in-mismatch=false 37 | 38 | # Page Caching - none 39 | upload.metadata.Cache-Control=no-cache 40 | 41 | upload.transformed-files-batch-size=1000 42 | -------------------------------------------------------------------------------- /etc/testing.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | rootProject.ext.testRoots = [] 22 | 23 | project.ext.setTestingProperties = { 24 | 25 | if( !System.properties[ 'test.cluster.enabled' ] ) 26 | System.properties[ 'test.cluster.enabled' ] = 'false' 27 | 28 | // hadoop hard codes 'build/test' into its test harness, so might as well play along 29 | System.properties[ 'test.data.path' ] = new String( "${projectDir}/../pattern-pmml/src/test/resources/pmml/" ) 30 | System.properties[ 'test.output.root' ] = new String( "${buildDir}/test/output/" ) 31 | 32 | System.properties 33 | } 34 | 35 | test { 36 | jvmArgs '-Xmx756m' 37 | 38 | enableAssertions = false 39 | 40 | systemProperties = setTestingProperties() 41 | } 42 | 43 | task platformTest( type: Test, dependsOn: test ) { 44 | 45 | forkEvery = 1 // static fields on the platform test get munged otherwise 46 | 47 | if( System.properties[ 'platformTest.single' ] ) // test a single class from the command line 48 | include "**/${System.properties[ 'platformTest.single' ]}.class" 49 | else 50 | include '**/*PlatformTest.class' 51 | 52 | jvmArgs '-Xmx756m' 53 | 54 | enableAssertions = false 55 | 56 | systemProperties = setTestingProperties() 57 | 58 | setTestClassesDir( file( project( ':pattern-pmml' ).sourceSets.test.output.classesDir ) ) 59 | 60 | // not called when disabled 61 | doFirst() { 62 | 63 | rootProject.ext.testRoots << systemProperties[ 'test.output.root' ] 64 | } 65 | } 66 | 67 | check { 68 | dependsOn << platformTest 69 | } -------------------------------------------------------------------------------- /etc/version.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | apply from: './etc/properties.gradle' 22 | 23 | project.ext.currentCommit = System.properties[ 'build.vcs.number' ]; 24 | 25 | if( !currentCommit ) 26 | { 27 | def commitPath = File.createTempFile( "commit", "tmp" ) 28 | 29 | ant.exec( dir: '.', executable: "git", output: commitPath ) { 30 | arg( line: 'rev-parse HEAD' ) 31 | } 32 | 33 | currentCommit = commitPath.readLines().get( 0 ) 34 | 35 | commitPath.delete() 36 | } 37 | 38 | def versionProperties = new Properties() 39 | file( 'version.properties' ).withInputStream { versionProperties.load( it ) } 40 | 41 | ext.majorVersion = versionProperties[ 'pattern.release.major' ] 42 | ext.minorVersion = versionProperties[ 'pattern.release.minor' ] 43 | 44 | ext.buildNumber = System.getProperty( 'build.number', 'dev' ) 45 | 46 | if( System.properties[ 'pattern.release.private' ] ) 47 | buildNumber = "priv-${buildNumber}" 48 | else if( !System.properties[ 'pattern.release.final' ] ) 49 | buildNumber = "wip-${buildNumber}" 50 | 51 | ext.releaseTag = "${majorVersion}-${buildNumber}" 52 | 53 | if( !System.properties[ 'build.number' ] ) 54 | releaseTag = "wip-${majorVersion}" 55 | 56 | ext.releaseVersion = majorVersion 57 | 58 | if( minorVersion ) 59 | releaseVersion = "${releaseVersion}.${minorVersion}" 60 | 61 | if( !System.properties[ 'pattern.release.final' ] ) 62 | releaseVersion = "${releaseVersion}-${buildNumber}" -------------------------------------------------------------------------------- /pattern-core/README.md: -------------------------------------------------------------------------------- 1 | # pattern-core 2 | 3 | This sub-project contains all core APIs and base API JUnit test cases. 4 | 5 | Feel free to use `PatternPlatformTestCase` as the base class for any new custom tests in dependent projects. -------------------------------------------------------------------------------- /pattern-core/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | dependencies { 22 | compile( group: 'cascading', name: 'cascading-core', version: cascadingVersion ) 23 | 24 | compile group: 'com.google.guava', name: 'guava', version: '14.0.1' 25 | 26 | compile group: 'org.slf4j', name: 'slf4j-api', version: '1.6.1' 27 | 28 | testCompile group: 'cascading', name: 'cascading-core', version: cascadingVersion, classifier: 'tests', changing: true 29 | testCompile group: 'cascading', name: 'cascading-platform', version: cascadingVersion, classifier: 'tests', changing: true 30 | 31 | testRuntime group: 'log4j', name: 'log4j', version: '1.2.16' 32 | testRuntime group: 'org.slf4j', name: 'slf4j-api', version: '1.6.1' 33 | testRuntime group: 'org.slf4j', name: 'slf4j-log4j12', version: '1.6.1' 34 | } 35 | 36 | jar { 37 | doFirst { 38 | if( !System.properties[ 'pattern.release.final' ] ) 39 | file( "${buildDir}/build.number.properties" ).write( "pattern.build.number=${buildNumber}" ) 40 | } 41 | 42 | into( 'cascading/pattern' ) { 43 | from '../version.properties' 44 | 45 | if( !System.properties[ 'pattern.release.final' ] ) 46 | from "${buildDir}/build.number.properties" 47 | } 48 | } 49 | 50 | platformTest.enabled = false 51 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/PatternException.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern; 22 | 23 | 24 | public class PatternException extends RuntimeException 25 | { 26 | /** 27 | * 28 | */ 29 | public PatternException() 30 | { 31 | } 32 | 33 | /** @param message */ 34 | public PatternException( String message ) 35 | { 36 | super( message ); 37 | } 38 | 39 | /** 40 | * @param message 41 | * @param cause 42 | */ 43 | public PatternException( String message, Throwable cause ) 44 | { 45 | super( message, cause ); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/datafield/CategoricalDataField.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.datafield; 22 | 23 | import java.lang.reflect.Type; 24 | import java.util.ArrayList; 25 | import java.util.Arrays; 26 | import java.util.Collections; 27 | import java.util.List; 28 | 29 | import cascading.tuple.Fields; 30 | 31 | 32 | /** 33 | * Class CategoricalDataField represent a field with a fixed set of possible values. 34 | *

35 | * For example, if the field name is {@code SIZE}, it could have three possible categories, 36 | * {@code small}, {@code medium}, and {@code large}. 37 | *

38 | * Order of categories is retained so that indexes into the internal list of categories can be used 39 | * to speed up some operations. 40 | */ 41 | public class CategoricalDataField extends DataField 42 | { 43 | protected List categories = new ArrayList(); 44 | 45 | public CategoricalDataField( CategoricalDataField dataField, String... categories ) 46 | { 47 | this( dataField.name, dataField.getType(), categories ); 48 | } 49 | 50 | public CategoricalDataField( Fields fields, String... categories ) 51 | { 52 | this( fields.get( 0 ).toString(), fields.getType( 0 ), categories ); 53 | } 54 | 55 | public CategoricalDataField( Fields fields, List categories ) 56 | { 57 | this( fields.get( 0 ).toString(), fields.getType( 0 ), categories ); 58 | } 59 | 60 | public CategoricalDataField( String name, Type type, String... categories ) 61 | { 62 | this( name, type, Arrays.asList( categories ) ); 63 | } 64 | 65 | public CategoricalDataField( String name, Type type, List categories ) 66 | { 67 | super( name, type ); 68 | this.categories.addAll( categories ); 69 | } 70 | 71 | /** 72 | * Gets an unmodifiable list of the current categories. 73 | * 74 | * @return the categories 75 | */ 76 | public List getCategories() 77 | { 78 | return Collections.unmodifiableList( categories ); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/datafield/ContinuousDataField.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.datafield; 22 | 23 | import java.lang.reflect.Type; 24 | 25 | import cascading.tuple.Fields; 26 | 27 | 28 | /** Class ContinuousDataField represent a field with a continuous set of values, like an {@link Double} value. */ 29 | public class ContinuousDataField extends DataField 30 | { 31 | public ContinuousDataField( Fields fields ) 32 | { 33 | super( fields ); 34 | } 35 | 36 | public ContinuousDataField( String name, Type type ) 37 | { 38 | super( name, type ); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/datafield/DataField.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.datafield; 22 | 23 | import java.io.Serializable; 24 | import java.lang.reflect.Type; 25 | 26 | import cascading.tuple.Fields; 27 | 28 | 29 | public abstract class DataField implements Serializable 30 | { 31 | public String name; 32 | public Type type; 33 | 34 | protected DataField( Fields fields ) 35 | { 36 | this( fields.get( 0 ).toString(), fields.getType( 0 ) ); 37 | } 38 | 39 | protected DataField( String name, Type type ) 40 | { 41 | if( name == null || name.isEmpty() ) 42 | throw new IllegalArgumentException( "name may not be null or empty" ); 43 | 44 | if( type == null ) 45 | throw new IllegalArgumentException( "type may not be null" ); 46 | 47 | this.name = name; 48 | this.type = type; 49 | } 50 | 51 | public String getName() 52 | { 53 | return name; 54 | } 55 | 56 | public Type getType() 57 | { 58 | return type; 59 | } 60 | 61 | @Override 62 | public String toString() 63 | { 64 | return name + ":" + getClass().getSimpleName() + ":" + type; 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/ensemble/EnsembleSpec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.ensemble; 22 | 23 | 24 | import java.io.Serializable; 25 | import java.util.ArrayList; 26 | import java.util.LinkedHashSet; 27 | import java.util.List; 28 | import java.util.Set; 29 | 30 | import cascading.pattern.ensemble.selection.MajorityVote; 31 | import cascading.pattern.ensemble.selection.SelectionStrategy; 32 | import cascading.pattern.model.ModelSchema; 33 | import cascading.pattern.model.Spec; 34 | 35 | 36 | /** 37 | * Class EnsembleSpec is used to define an ensemble model. 38 | *

39 | * It is used with {@link ParallelEnsembleAssembly}. 40 | */ 41 | public class EnsembleSpec extends Spec implements Serializable 42 | { 43 | protected List modelSpecs = new ArrayList(); 44 | protected SelectionStrategy selectionStrategy = new MajorityVote(); 45 | 46 | public EnsembleSpec( ModelSchema modelSchema ) 47 | { 48 | super( modelSchema ); 49 | } 50 | 51 | public EnsembleSpec( ModelSchema modelSchema, List modelSpecs ) 52 | { 53 | super( modelSchema ); 54 | this.modelSpecs = modelSpecs; 55 | } 56 | 57 | public boolean isParallel() 58 | { 59 | return selectionStrategy.isParallel(); 60 | } 61 | 62 | public void addModelSpecs( List modelSpec ) 63 | { 64 | this.modelSpecs.addAll( modelSpec ); 65 | } 66 | 67 | public void addModelSpec( S modelSpec ) 68 | { 69 | this.modelSpecs.add( modelSpec ); 70 | } 71 | 72 | public List getModelSpecs() 73 | { 74 | return modelSpecs; 75 | } 76 | 77 | public SelectionStrategy getSelectionStrategy() 78 | { 79 | return selectionStrategy; 80 | } 81 | 82 | public void setSelectionStrategy( SelectionStrategy selectionStrategy ) 83 | { 84 | this.selectionStrategy = selectionStrategy; 85 | } 86 | 87 | /** 88 | * Returns unique list of all the underlying declared model categories. 89 | * 90 | * @return the model categories 91 | */ 92 | public List getModelCategories() 93 | { 94 | List categories = new ArrayList(); 95 | 96 | Set set = new LinkedHashSet(); 97 | 98 | for( S spec : getModelSpecs() ) 99 | set.addAll( spec.getCategories() ); 100 | 101 | categories.addAll( set ); 102 | 103 | return categories; 104 | } 105 | 106 | @Override 107 | public String toString() 108 | { 109 | final StringBuilder sb = new StringBuilder( "EnsembleSpec{" ); 110 | sb.append( "modelSpecs=" ).append( modelSpecs ); 111 | sb.append( ", selectionStrategy=" ).append( selectionStrategy ); 112 | sb.append( '}' ); 113 | return sb.toString(); 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/ensemble/function/InsertGUID.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.ensemble.function; 22 | 23 | import cascading.operation.expression.ExpressionFunction; 24 | import cascading.pipe.Each; 25 | import cascading.pipe.Pipe; 26 | import cascading.pipe.SubAssembly; 27 | import cascading.tuple.Fields; 28 | 29 | /** 30 | * Class InsertGUID creates a globally unique ID by calling {@link java.util.UUID#randomUUID()}. 31 | *

32 | * This Function also returns {@code false} for {@link cascading.operation.Operation#isSafe()}, preventing 33 | * duplicate ids from being generated for the same record. 34 | */ 35 | public class InsertGUID extends SubAssembly 36 | { 37 | public InsertGUID( Pipe previous, Fields declaredFields ) 38 | { 39 | super( previous ); 40 | 41 | String expression = "java.util.UUID.randomUUID().toString()"; 42 | 43 | ExpressionFunction expressionFunction = new ExpressionFunction( declaredFields, expression ) 44 | { 45 | @Override 46 | public boolean isSafe() 47 | { 48 | return false; 49 | } 50 | }; 51 | 52 | previous = new Each( previous, Fields.NONE, expressionFunction, Fields.ALL ); 53 | 54 | setTails( previous ); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/ensemble/selection/Average.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.ensemble.selection; 22 | 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | 26 | /** 27 | * 28 | */ 29 | public class Average extends PredictionSelector 30 | { 31 | private static final Logger LOG = LoggerFactory.getLogger( Average.class ); 32 | 33 | @Override 34 | public boolean isParallel() 35 | { 36 | return true; 37 | } 38 | 39 | @Override 40 | double predict( double[] results ) 41 | { 42 | double sum = 0; 43 | 44 | for( double result : results ) 45 | sum += result; 46 | 47 | double avg = sum / results.length; 48 | 49 | LOG.debug( "prediction: {}", avg ); 50 | 51 | return avg; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/ensemble/selection/CategoricalSelector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.ensemble.selection; 22 | 23 | /** 24 | * 25 | */ 26 | public abstract class CategoricalSelector extends SelectionStrategy 27 | { 28 | public abstract int select( int[] results ); 29 | } 30 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/ensemble/selection/MajorityVote.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.ensemble.selection; 22 | 23 | import com.google.common.primitives.Ints; 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | /** 28 | * 29 | */ 30 | public class MajorityVote extends CategoricalSelector 31 | { 32 | private static final Logger LOG = LoggerFactory.getLogger( MajorityVote.class ); 33 | 34 | @Override 35 | public boolean isParallel() 36 | { 37 | return true; 38 | } 39 | 40 | @Override 41 | public int select( int[] results ) 42 | { 43 | int max = Ints.max( results ); 44 | int index = Ints.indexOf( results, max ); 45 | 46 | LOG.debug( "score: {}, with votes: {}", index, max ); 47 | 48 | return index; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/ensemble/selection/PredictionSelectionBuffer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.ensemble.selection; 22 | 23 | import java.util.Arrays; 24 | import java.util.Iterator; 25 | 26 | import cascading.flow.FlowProcess; 27 | import cascading.operation.BufferCall; 28 | import cascading.operation.OperationCall; 29 | import cascading.pattern.ensemble.EnsembleSpec; 30 | import cascading.tuple.Tuple; 31 | import cascading.tuple.TupleEntry; 32 | import org.slf4j.Logger; 33 | import org.slf4j.LoggerFactory; 34 | 35 | /** 36 | * 37 | */ 38 | public class PredictionSelectionBuffer extends SelectionBuffer 39 | { 40 | private static final Logger LOG = LoggerFactory.getLogger( PredictionSelectionBuffer.class ); 41 | 42 | private PredictionSelector selection; 43 | 44 | protected class DecisionContext 45 | { 46 | public Tuple tuple; 47 | public double[] results; 48 | 49 | public Tuple result( Object value ) 50 | { 51 | tuple.set( 0, value ); 52 | 53 | return tuple; 54 | } 55 | } 56 | 57 | public PredictionSelectionBuffer( EnsembleSpec ensembleSpec ) 58 | { 59 | super( ensembleSpec.getModelSchema().getDeclaredFields(), ensembleSpec ); 60 | 61 | if( !( ensembleSpec.getSelectionStrategy() instanceof PredictionSelector ) ) 62 | throw new IllegalArgumentException( "selection strategy must be Prediction, got: " + ensembleSpec.getSelectionStrategy() ); 63 | 64 | this.selection = (PredictionSelector) ensembleSpec.getSelectionStrategy(); 65 | } 66 | 67 | @Override 68 | public void prepare( FlowProcess flowProcess, OperationCall operationCall ) 69 | { 70 | ( (BufferCall) operationCall ).setRetainValues( true ); 71 | 72 | DecisionContext context = new DecisionContext(); 73 | 74 | context.tuple = Tuple.size( getFieldDeclaration().size() ); 75 | context.results = new double[ ensembleSpec.getModelSpecs().size() ]; 76 | 77 | operationCall.setContext( context ); 78 | } 79 | 80 | @Override 81 | public void operate( FlowProcess flowProcess, BufferCall bufferCall ) 82 | { 83 | double[] results = bufferCall.getContext().results; 84 | 85 | Arrays.fill( results, 0 ); // clear before use 86 | 87 | Iterator iterator = bufferCall.getArgumentsIterator(); 88 | int count = 0; 89 | 90 | while( iterator.hasNext() ) 91 | { 92 | TupleEntry next = iterator.next(); 93 | Double score = next.getDouble( 0 ); 94 | 95 | results[ count++ ] += score; 96 | } 97 | 98 | double prediction = selection.predict( results ); 99 | 100 | LOG.debug( "prediction: {}", prediction ); 101 | 102 | bufferCall.getOutputCollector().add( bufferCall.getContext().result( prediction ) ); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/ensemble/selection/PredictionSelector.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.ensemble.selection; 22 | 23 | /** 24 | * 25 | */ 26 | public abstract class PredictionSelector extends SelectionStrategy 27 | { 28 | abstract double predict( double[] results ); 29 | } 30 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/ensemble/selection/SelectionBuffer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.ensemble.selection; 22 | 23 | import cascading.operation.BaseOperation; 24 | import cascading.operation.Buffer; 25 | import cascading.pattern.ensemble.EnsembleSpec; 26 | import cascading.tuple.Fields; 27 | 28 | /** 29 | * 30 | */ 31 | public abstract class SelectionBuffer extends BaseOperation implements Buffer 32 | { 33 | protected final EnsembleSpec ensembleSpec; 34 | 35 | public SelectionBuffer( Fields fieldDeclaration, EnsembleSpec ensembleSpec ) 36 | { 37 | super( fieldDeclaration ); 38 | 39 | this.ensembleSpec = ensembleSpec; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/ensemble/selection/SelectionStrategy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.ensemble.selection; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * 27 | */ 28 | public abstract class SelectionStrategy implements Serializable 29 | { 30 | public abstract boolean isParallel(); 31 | 32 | @Override 33 | public String toString() 34 | { 35 | return getClass().getSimpleName(); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/ModelScoringFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model; 22 | 23 | import java.io.Serializable; 24 | 25 | import cascading.flow.FlowProcess; 26 | import cascading.operation.BaseOperation; 27 | import cascading.operation.Function; 28 | import cascading.operation.OperationCall; 29 | import cascading.tuple.Tuple; 30 | 31 | 32 | public abstract class ModelScoringFunction extends BaseOperation> implements Function> 33 | { 34 | public static final boolean SAFE_DEFAULT = false; 35 | 36 | protected S spec; 37 | protected boolean isSafe = SAFE_DEFAULT; 38 | 39 | protected static interface Result extends Serializable 40 | { 41 | Out transform( In object ); 42 | } 43 | 44 | /** Class Context is used to hold intermediate values. */ 45 | protected static class Context 46 | { 47 | public final Tuple tuple; 48 | public Payload payload; 49 | 50 | public Context( int size ) 51 | { 52 | tuple = Tuple.size( size ); 53 | } 54 | 55 | public Tuple result( Object label ) 56 | { 57 | tuple.set( 0, label ); 58 | 59 | return tuple; 60 | } 61 | } 62 | 63 | protected ModelScoringFunction( S spec, boolean safe ) 64 | { 65 | this( spec ); 66 | isSafe = safe; 67 | } 68 | 69 | protected ModelScoringFunction( S spec ) 70 | { 71 | super( spec.getModelSchema().getInputFields().size(), spec.getModelSchema().getDeclaredFields() ); 72 | this.spec = spec; 73 | } 74 | 75 | public S getSpec() 76 | { 77 | return spec; 78 | } 79 | 80 | @Override 81 | public boolean isSafe() 82 | { 83 | return isSafe; 84 | } 85 | 86 | @Override 87 | public void prepare( FlowProcess flowProcess, OperationCall> operationCall ) 88 | { 89 | operationCall.setContext( new ModelScoringFunction.Context( getFieldDeclaration().size() ) ); 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/Spec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model; 22 | 23 | import java.io.Serializable; 24 | import java.util.ArrayList; 25 | import java.util.List; 26 | 27 | import cascading.pattern.datafield.CategoricalDataField; 28 | import cascading.pattern.datafield.DataField; 29 | 30 | 31 | public abstract class Spec implements Serializable 32 | { 33 | protected ModelSchema modelSchema = null; 34 | 35 | protected Spec() 36 | { 37 | } 38 | 39 | protected Spec( ModelSchema modelSchema ) 40 | { 41 | this.modelSchema = modelSchema; 42 | } 43 | 44 | public void setModelSchema( ModelSchema modelSchema ) 45 | { 46 | this.modelSchema = modelSchema; 47 | } 48 | 49 | public ModelSchema getModelSchema() 50 | { 51 | if( modelSchema == null ) 52 | modelSchema = new ModelSchema(); 53 | 54 | return modelSchema; 55 | } 56 | 57 | public boolean isPredictedCategorical() 58 | { 59 | DataField predictedField = getModelSchema().getPredictedField( getModelSchema().getPredictedFieldNames().get( 0 ) ); 60 | 61 | return predictedField instanceof CategoricalDataField; 62 | } 63 | 64 | public List getCategories() 65 | { 66 | DataField predictedField = getModelSchema().getPredictedField( getModelSchema().getPredictedFieldNames().get( 0 ) ); 67 | 68 | List categories = new ArrayList(); 69 | 70 | if( predictedField instanceof CategoricalDataField ) 71 | categories.addAll( ( (CategoricalDataField) predictedField ).getCategories() ); 72 | 73 | return categories; 74 | } 75 | 76 | public String[] getCategoriesArray() 77 | { 78 | List categories = getCategories(); 79 | 80 | return categories.toArray( new String[ categories.size() ] ); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/clustering/Cluster.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.clustering; 22 | 23 | import java.io.Serializable; 24 | import java.util.List; 25 | 26 | import com.google.common.primitives.Doubles; 27 | 28 | /** 29 | * Class Cluster represents a point in space denoted by the given collection of {@code points} which 30 | * in turn represents a particular {@code targetCategory}. 31 | */ 32 | public class Cluster implements Serializable 33 | { 34 | protected int ordinal; // set when added to spec 35 | protected String targetCategory; 36 | protected double[] points; 37 | 38 | public Cluster( String targetCategory, double... points ) 39 | { 40 | this( targetCategory ); 41 | this.points = new double[ points.length ]; 42 | 43 | System.arraycopy( points, 0, this.points, 0, points.length ); 44 | } 45 | 46 | public Cluster( String targetCategory, List points ) 47 | { 48 | this( targetCategory ); 49 | this.points = Doubles.toArray( points ); 50 | } 51 | 52 | private Cluster( String targetCategory ) 53 | { 54 | this.targetCategory = targetCategory; 55 | } 56 | 57 | protected void setOrdinal( int ordinal ) 58 | { 59 | this.ordinal = ordinal; 60 | } 61 | 62 | public String getTargetCategory() 63 | { 64 | if( targetCategory == null ) 65 | return Integer.toString( ordinal ); 66 | 67 | return targetCategory; 68 | } 69 | 70 | public double[] getPoints() 71 | { 72 | double[] dest = new double[ points.length ]; 73 | 74 | System.arraycopy( points, 0, dest, 0, points.length ); 75 | 76 | return dest; 77 | } 78 | 79 | public int getPointsSize() 80 | { 81 | return points.length; 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/clustering/ClusterEvaluator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.clustering; 22 | 23 | import java.util.Arrays; 24 | 25 | import cascading.pattern.model.clustering.compare.CompareFunction; 26 | import cascading.pattern.model.clustering.measure.ComparisonMeasure; 27 | import cascading.tuple.Fields; 28 | import cascading.tuple.TupleEntry; 29 | 30 | /** 31 | * 32 | */ 33 | class ClusterEvaluator 34 | { 35 | private final Cluster cluster; 36 | private final ComparisonMeasure comparisonMeasure; 37 | private final CompareFunction[] compareFunctions; 38 | private final double[] points; 39 | 40 | public ClusterEvaluator( Fields argumentFields, Cluster cluster, ComparisonMeasure comparisonMeasure, CompareFunction compareFunction ) 41 | { 42 | this.cluster = cluster; 43 | this.comparisonMeasure = comparisonMeasure; 44 | this.compareFunctions = createCompareFunctions( argumentFields, compareFunction ); 45 | this.points = cluster.getPoints(); 46 | } 47 | 48 | private CompareFunction[] createCompareFunctions( Fields fields, CompareFunction defaultFunction, CompareFunction... functions ) 49 | { 50 | CompareFunction[] results = new CompareFunction[ fields.size() ]; 51 | 52 | Arrays.fill( results, defaultFunction ); 53 | 54 | if( functions.length == 0 ) 55 | return results; 56 | 57 | if( functions.length != fields.size() ) 58 | throw new IllegalStateException( "fields and number of functions are not equal" ); 59 | 60 | for( int i = 0; i < functions.length; i++ ) 61 | { 62 | if( functions[ i ] != null ) 63 | results[ i ] = functions[ i ]; 64 | } 65 | 66 | return results; 67 | } 68 | 69 | double evaluate( TupleEntry tupleEntry ) 70 | { 71 | return comparisonMeasure.calculate( compareFunctions, tupleEntry.getTuple(), points ); 72 | } 73 | 74 | public String getTargetCategory() 75 | { 76 | return cluster.getTargetCategory(); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/clustering/compare/AbsoluteDifferenceCompareFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.clustering.compare; 22 | 23 | /** 24 | * 25 | */ 26 | public class AbsoluteDifferenceCompareFunction extends CompareFunction 27 | { 28 | @Override 29 | public double result( double lhs, double rhs ) 30 | { 31 | return Math.abs( lhs - rhs ); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/clustering/compare/CompareFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.clustering.compare; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * 27 | */ 28 | public abstract class CompareFunction implements Serializable 29 | { 30 | public abstract double result( double lhs, double rhs ); 31 | } 32 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/clustering/measure/ComparisonMeasure.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.clustering.measure; 22 | 23 | import java.io.Serializable; 24 | 25 | import cascading.pattern.model.clustering.compare.CompareFunction; 26 | import cascading.tuple.Tuple; 27 | 28 | /** 29 | * 30 | */ 31 | public abstract class ComparisonMeasure implements Serializable 32 | { 33 | /** 34 | * Calculate the distance from this cluster for the given tuple. 35 | * 36 | * @param compareFunctions 37 | * @param values array of tuple values 38 | * @return double 39 | */ 40 | public abstract double calculate( CompareFunction[] compareFunctions, Tuple values, double[] points ); 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/clustering/measure/DistanceMeasure.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.clustering.measure; 22 | 23 | /** 24 | * 25 | */ 26 | public abstract class DistanceMeasure extends ComparisonMeasure 27 | { 28 | } 29 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/clustering/measure/EuclideanMeasure.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.clustering.measure; 22 | 23 | import cascading.pattern.model.clustering.compare.CompareFunction; 24 | import cascading.tuple.Tuple; 25 | 26 | /** 27 | * Class EuclideanMeasure calculates Euclidean distance between two points 28 | * where the two points difference utilize the given {@link CompareFunction}. 29 | */ 30 | public class EuclideanMeasure extends SquaredEuclideanMeasure 31 | { 32 | public EuclideanMeasure() 33 | { 34 | } 35 | 36 | @Override 37 | public double calculate( CompareFunction[] compareFunctions, Tuple values, double[] points ) 38 | { 39 | return Math.sqrt( super.calculate( compareFunctions, values, points ) ); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/clustering/measure/SquaredEuclideanMeasure.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.clustering.measure; 22 | 23 | import cascading.pattern.model.clustering.compare.CompareFunction; 24 | import cascading.tuple.Tuple; 25 | 26 | /** 27 | * 28 | */ 29 | public class SquaredEuclideanMeasure extends DistanceMeasure 30 | { 31 | public SquaredEuclideanMeasure() 32 | { 33 | } 34 | 35 | @Override 36 | public double calculate( CompareFunction[] compareFunctions, Tuple values, double[] points ) 37 | { 38 | double sumOfSquares = 0.0; 39 | 40 | for( int i = 0; i < points.length; i++ ) 41 | sumOfSquares += Math.pow( values.getDouble( i ) - points[ i ], 2.0 ); 42 | 43 | return sumOfSquares; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/generalregression/BaseRegressionFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.generalregression; 22 | 23 | import cascading.flow.FlowProcess; 24 | import cascading.operation.OperationCall; 25 | import cascading.pattern.model.ModelScoringFunction; 26 | import cascading.pattern.model.generalregression.expression.ExpressionEvaluator; 27 | import cascading.tuple.Fields; 28 | 29 | /** 30 | * 31 | */ 32 | abstract class BaseRegressionFunction extends ModelScoringFunction 33 | { 34 | protected static class ExpressionContext 35 | { 36 | public ExpressionEvaluator[] expressions; 37 | public double[] results; 38 | } 39 | 40 | public BaseRegressionFunction( GeneralRegressionSpec spec ) 41 | { 42 | super( spec ); 43 | } 44 | 45 | @Override 46 | public void prepare( FlowProcess flowProcess, OperationCall> operationCall ) 47 | { 48 | super.prepare( flowProcess, operationCall ); 49 | 50 | Fields argumentFields = operationCall.getArgumentFields(); 51 | 52 | operationCall.getContext().payload = new ExpressionContext(); 53 | operationCall.getContext().payload.expressions = getSpec().getRegressionTableEvaluators( argumentFields ); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/generalregression/LinkFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.generalregression; 22 | 23 | /** Enum for the LinkFunction in GLM */ 24 | public enum LinkFunction 25 | { 26 | NONE( "none" ) 27 | { 28 | public double calculate( double value ) 29 | { 30 | return value; 31 | } 32 | }, 33 | 34 | LOGIT( "logit" ) 35 | { 36 | public double calculate( double value ) 37 | { 38 | return 1.0 / ( 1.0 + Math.exp( -value ) ); 39 | } 40 | }, 41 | 42 | CLOGLOG( "cloglog" ) 43 | { 44 | public double calculate( double value ) 45 | { 46 | return 1.0 - Math.exp( -Math.exp( value ) ); 47 | } 48 | }, 49 | 50 | LOGLOG( "loglog" ) 51 | { 52 | public double calculate( double value ) 53 | { 54 | return Math.exp( -Math.exp( -value ) ); 55 | } 56 | }, 57 | 58 | CAUCHIT( "cauchit" ) 59 | { 60 | public double calculate( double value ) 61 | { 62 | return 0.5 + 1.0 / Math.PI * Math.atan( value ); 63 | } 64 | }; 65 | 66 | public String function; 67 | 68 | private LinkFunction( String function ) 69 | { 70 | this.function = function; 71 | } 72 | 73 | /** 74 | * Returns the corresponding LinkFunction 75 | * 76 | * @param functionName String 77 | * @return LinkFunction 78 | */ 79 | public static LinkFunction getFunction( String functionName ) 80 | { 81 | 82 | for( LinkFunction lf : values() ) 83 | if( lf.function.matches( functionName ) ) 84 | return lf; 85 | 86 | return LinkFunction.NONE; 87 | } 88 | 89 | public abstract double calculate( double value ); 90 | } 91 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/generalregression/Parameter.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.generalregression; 22 | 23 | import java.io.Serializable; 24 | import java.util.ArrayList; 25 | import java.util.Arrays; 26 | import java.util.List; 27 | 28 | import cascading.pattern.model.generalregression.expression.ParameterExpression; 29 | import cascading.pattern.model.generalregression.predictor.CovariantPredictor; 30 | import cascading.pattern.model.generalregression.predictor.FactorPredictor; 31 | import cascading.pattern.model.generalregression.predictor.Predictor; 32 | import cascading.tuple.Fields; 33 | 34 | /** 35 | * 36 | */ 37 | public class Parameter implements Serializable 38 | { 39 | String name; 40 | double beta; 41 | int df; 42 | 43 | ArrayList covariants = new ArrayList(); 44 | ArrayList factors = new ArrayList(); 45 | 46 | public Parameter( String name, double beta, int df ) 47 | { 48 | this.name = name; 49 | this.beta = beta; 50 | this.df = df; 51 | } 52 | 53 | public Parameter( String name, double beta ) 54 | { 55 | this.name = name; 56 | this.beta = beta; 57 | } 58 | 59 | public Parameter( String name, double beta, Predictor... predictors ) 60 | { 61 | this( name, beta, Arrays.asList( predictors ) ); 62 | } 63 | 64 | public Parameter( String name, double beta, List predictors ) 65 | { 66 | this.name = name; 67 | this.beta = beta; 68 | 69 | addPredictors( predictors ); 70 | } 71 | 72 | public String getName() 73 | { 74 | return name; 75 | } 76 | 77 | public double getBeta() 78 | { 79 | return beta; 80 | } 81 | 82 | public int getDegreesOfFreedom() 83 | { 84 | return df; 85 | } 86 | 87 | public ArrayList getCovariants() 88 | { 89 | return covariants; 90 | } 91 | 92 | public ArrayList getFactors() 93 | { 94 | return factors; 95 | } 96 | 97 | public boolean isNoOp() 98 | { 99 | return beta == 0; 100 | } 101 | 102 | public void addPredictors( List predictors ) 103 | { 104 | for( Predictor predictor : predictors ) 105 | addPredictor( predictor ); 106 | } 107 | 108 | public void addPredictor( Predictor predictor ) 109 | { 110 | if( predictor instanceof CovariantPredictor ) 111 | covariants.add( (CovariantPredictor) predictor ); 112 | 113 | if( predictor instanceof FactorPredictor ) 114 | factors.add( (FactorPredictor) predictor ); 115 | } 116 | 117 | public ParameterExpression createExpression( Fields argumentsFields ) 118 | { 119 | return new ParameterExpression( argumentsFields, this ); 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/generalregression/PredictionRegressionFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.generalregression; 22 | 23 | import cascading.flow.FlowProcess; 24 | import cascading.operation.FunctionCall; 25 | import cascading.pattern.model.generalregression.expression.ExpressionEvaluator; 26 | import org.slf4j.Logger; 27 | import org.slf4j.LoggerFactory; 28 | 29 | /** 30 | * Class PredictionRegressionFunction will return a single prediction 31 | * as determined by the {@link RegressionTable}s added to the {@link GeneralRegressionSpec}. 32 | */ 33 | public class PredictionRegressionFunction extends BaseRegressionFunction 34 | { 35 | private static final Logger LOG = LoggerFactory.getLogger( PredictionRegressionFunction.class ); 36 | 37 | public PredictionRegressionFunction( GeneralRegressionSpec param ) 38 | { 39 | super( param ); 40 | 41 | if( getSpec().getRegressionTables().size() != 1 ) 42 | throw new IllegalArgumentException( "regression function only supports a single table, got: " + getSpec().getRegressionTables().size() ); 43 | } 44 | 45 | @Override 46 | public void operate( FlowProcess flowProcess, FunctionCall> functionCall ) 47 | { 48 | ExpressionEvaluator evaluator = functionCall.getContext().payload.expressions[ 0 ]; 49 | LinkFunction linkFunction = getSpec().linkFunction; 50 | 51 | double result = evaluator.calculate( functionCall.getArguments() ); 52 | double linkResult = linkFunction.calculate( result ); 53 | 54 | LOG.debug( "result: {}", linkResult ); 55 | 56 | functionCall.getOutputCollector().add( functionCall.getContext().result( linkResult ) ); 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/generalregression/RegressionTable.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.generalregression; 22 | 23 | import java.io.Serializable; 24 | import java.util.LinkedHashMap; 25 | import java.util.Map; 26 | import java.util.Set; 27 | 28 | import cascading.pattern.model.generalregression.expression.ExpressionEvaluator; 29 | import cascading.pattern.model.generalregression.expression.ParameterExpression; 30 | import cascading.tuple.Fields; 31 | 32 | /** 33 | * Class RegressionTable simply holds a set of {@link Parameter} instances. 34 | *

35 | * If used for classification or categorization with CategoricalRegressionFunction, the 36 | * table must have a {@code targetCategory} value. 37 | */ 38 | public class RegressionTable implements Serializable 39 | { 40 | private String targetCategory; 41 | 42 | Map parameters = new LinkedHashMap(); 43 | 44 | public RegressionTable() 45 | { 46 | } 47 | 48 | public RegressionTable( String targetCategory ) 49 | { 50 | this.targetCategory = targetCategory; 51 | } 52 | 53 | public String getTargetCategory() 54 | { 55 | return targetCategory; 56 | } 57 | 58 | public void setTargetCategory( String targetCategory ) 59 | { 60 | this.targetCategory = targetCategory; 61 | } 62 | 63 | public void addParameter( Parameter parameter ) 64 | { 65 | if( parameters.containsKey( parameter.getName() ) ) 66 | throw new IllegalArgumentException( "may not have duplicate parameter names, got: " + parameter.getName() ); 67 | 68 | parameters.put( parameter.getName(), parameter ); 69 | } 70 | 71 | public Parameter getParameter( String name ) 72 | { 73 | return parameters.get( name ); 74 | } 75 | 76 | public boolean isNoOp() 77 | { 78 | for( Parameter parameter : parameters.values() ) 79 | { 80 | if( !parameter.isNoOp() ) 81 | return false; 82 | } 83 | 84 | return true; 85 | } 86 | 87 | ExpressionEvaluator bind( Fields argumentFields ) 88 | { 89 | if( isNoOp() ) 90 | return new ExpressionEvaluator( targetCategory ); 91 | 92 | ParameterExpression[] expressions = new ParameterExpression[ parameters.size() ]; 93 | 94 | int count = 0; 95 | 96 | for( Parameter parameter : parameters.values() ) 97 | expressions[ count++ ] = parameter.createExpression( argumentFields ); 98 | 99 | return new ExpressionEvaluator( targetCategory, expressions ); 100 | } 101 | 102 | public Set getParameterNames() 103 | { 104 | return parameters.keySet(); 105 | } 106 | 107 | @Override 108 | public String toString() 109 | { 110 | final StringBuilder sb = new StringBuilder( "GeneralRegressionTable{" ); 111 | sb.append( "targetCategory='" ).append( targetCategory ).append( '\'' ); 112 | sb.append( ", parameters=" ).append( parameters ); 113 | sb.append( '}' ); 114 | return sb.toString(); 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/generalregression/expression/ExpressionEvaluator.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.generalregression.expression; 22 | 23 | import cascading.tuple.TupleEntry; 24 | 25 | /** 26 | * 27 | */ 28 | public class ExpressionEvaluator 29 | { 30 | private final String targetCategory; 31 | private final ParameterExpression[] expressions; 32 | 33 | public ExpressionEvaluator( String targetCategory ) 34 | { 35 | this.targetCategory = targetCategory; 36 | this.expressions = new ParameterExpression[ 0 ]; 37 | } 38 | 39 | public ExpressionEvaluator( String targetCategory, ParameterExpression[] expressions ) 40 | { 41 | this.targetCategory = targetCategory; 42 | this.expressions = expressions; 43 | } 44 | 45 | public String getTargetCategory() 46 | { 47 | return targetCategory; 48 | } 49 | 50 | public double calculate( TupleEntry tupleEntry ) 51 | { 52 | double result = 0.0d; 53 | 54 | for( ParameterExpression expression : expressions ) 55 | { 56 | if( expression.applies( tupleEntry ) ) 57 | result += expression.calculate( tupleEntry ); 58 | } 59 | 60 | return result; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/generalregression/normalization/Normalization.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.generalregression.normalization; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * 27 | */ 28 | public abstract class Normalization implements Serializable 29 | { 30 | public static final Normalization NONE = new Normalization() 31 | { 32 | @Override 33 | public double[] normalize( double[] values ) 34 | { 35 | return values; 36 | } 37 | }; 38 | 39 | public abstract double[] normalize( double[] values ); 40 | } 41 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/generalregression/normalization/SoftMaxNormalization.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.generalregression.normalization; 22 | 23 | /** 24 | * 25 | */ 26 | public class SoftMaxNormalization extends Normalization 27 | { 28 | @Override 29 | public double[] normalize( double[] values ) 30 | { 31 | double sum = 0.0d; 32 | 33 | for( int i = 0; i < values.length; i++ ) 34 | { 35 | values[ i ] = Math.exp( values[ i ] ); 36 | sum += values[ i ]; 37 | } 38 | 39 | for( int i = 0; i < values.length; i++ ) 40 | values[ i ] = values[ i ] / sum; 41 | 42 | return values; 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/generalregression/predictor/CovariantPredictor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.generalregression.predictor; 22 | 23 | /** 24 | * 25 | */ 26 | public class CovariantPredictor extends Predictor 27 | { 28 | private final long exponent; 29 | 30 | public CovariantPredictor( String fieldName ) 31 | { 32 | super( fieldName ); 33 | 34 | this.exponent = 1; 35 | } 36 | 37 | public CovariantPredictor( String fieldName, long exponent ) 38 | { 39 | super( fieldName ); 40 | 41 | this.exponent = exponent; 42 | } 43 | 44 | public double calculate( double value ) 45 | { 46 | return Math.pow( value, exponent ); 47 | } 48 | } 49 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/generalregression/predictor/FactorPredictor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.generalregression.predictor; 22 | 23 | /** 24 | * 25 | */ 26 | public class FactorPredictor extends Predictor 27 | { 28 | private final String factor; 29 | 30 | public FactorPredictor( String fieldName, String factor ) 31 | { 32 | super( fieldName ); 33 | 34 | this.factor = factor; 35 | } 36 | 37 | public boolean matches( String value ) 38 | { 39 | return factor.equals( value ); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/generalregression/predictor/Predictor.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.generalregression.predictor; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * 27 | */ 28 | public class Predictor implements Serializable 29 | { 30 | protected final String fieldName; 31 | 32 | public Predictor( String fieldName ) 33 | { 34 | this.fieldName = fieldName; 35 | } 36 | 37 | public String getFieldName() 38 | { 39 | return fieldName; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/Node.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree; 22 | 23 | import java.io.Serializable; 24 | 25 | import cascading.pattern.model.tree.predicate.Predicate; 26 | 27 | public class Node implements Serializable 28 | { 29 | String id; 30 | Predicate predicate = null; 31 | Object score = null; 32 | 33 | public Node( String id, Predicate predicate ) 34 | { 35 | this( id ); 36 | 37 | this.predicate = predicate; 38 | } 39 | 40 | public Node( String id, Predicate predicate, Object score ) 41 | { 42 | this( id ); 43 | this.predicate = predicate; 44 | this.score = score; 45 | } 46 | 47 | public Node( String id ) 48 | { 49 | if( id == null ) 50 | throw new IllegalArgumentException( "id may not be null" ); 51 | 52 | this.id = id; 53 | } 54 | 55 | public String getID() 56 | { 57 | return id; 58 | } 59 | 60 | public Predicate getPredicate() 61 | { 62 | return predicate; 63 | } 64 | 65 | public void setScore( String score ) 66 | { 67 | this.score = score; 68 | } 69 | 70 | public Object getScore() 71 | { 72 | return score; 73 | } 74 | 75 | @Override 76 | public String toString() 77 | { 78 | final StringBuilder sb = new StringBuilder( "Node{" ); 79 | sb.append( "id='" ).append( id ).append( '\'' ); 80 | sb.append( ", predicate=" ).append( predicate ); 81 | sb.append( ", category='" ).append( score ).append( '\'' ); 82 | sb.append( '}' ); 83 | return sb.toString(); 84 | } 85 | 86 | @Override 87 | public boolean equals( Object object ) 88 | { 89 | if( this == object ) 90 | return true; 91 | 92 | if( object == null || getClass() != object.getClass() ) 93 | return false; 94 | 95 | Node node = (Node) object; 96 | 97 | if( !id.equals( node.id ) ) 98 | return false; 99 | 100 | return true; 101 | } 102 | 103 | @Override 104 | public int hashCode() 105 | { 106 | return id.hashCode(); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/TreeFunction.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree; 22 | 23 | import cascading.flow.FlowProcess; 24 | import cascading.operation.FunctionCall; 25 | import cascading.operation.OperationCall; 26 | import cascading.pattern.model.ModelScoringFunction; 27 | import cascading.pattern.model.tree.decision.DecisionTree; 28 | import cascading.pattern.model.tree.decision.FinalDecision; 29 | import org.slf4j.Logger; 30 | import org.slf4j.LoggerFactory; 31 | 32 | /** Class TreeFunction will return the result or score from the given decision tree defined by the {@link TreeSpec}. */ 33 | public class TreeFunction extends ModelScoringFunction 34 | { 35 | private static final Logger LOG = LoggerFactory.getLogger( TreeFunction.class ); 36 | 37 | Result result; 38 | 39 | public TreeFunction( TreeSpec treeSpec ) 40 | { 41 | this( treeSpec, false, SAFE_DEFAULT ); 42 | } 43 | 44 | public TreeFunction( TreeSpec spec, boolean returnIndex, boolean safe ) 45 | { 46 | super( spec, safe ); 47 | 48 | if( returnIndex ) 49 | result = new Result() 50 | { 51 | @Override 52 | public Integer transform( FinalDecision finalDecision ) 53 | { 54 | return finalDecision.getIndex(); 55 | } 56 | }; 57 | else 58 | result = new Result() 59 | { 60 | @Override 61 | public Object transform( FinalDecision finalDecision ) 62 | { 63 | return finalDecision.getScore(); 64 | } 65 | }; 66 | } 67 | 68 | @Override 69 | public void prepare( FlowProcess flowProcess, OperationCall> operationCall ) 70 | { 71 | super.prepare( flowProcess, operationCall ); 72 | 73 | operationCall.getContext().payload = getSpec().getTree().createDecisionTree( getSpec().getCategoriesArray(), operationCall.getArgumentFields() ); 74 | } 75 | 76 | @Override 77 | public void operate( FlowProcess flowProcess, FunctionCall> functionCall ) 78 | { 79 | DecisionTree decisionTree = functionCall.getContext().payload; 80 | 81 | FinalDecision finalDecision = decisionTree.decide( functionCall.getArguments() ); 82 | 83 | LOG.debug( "decision: {}", finalDecision ); 84 | 85 | Object result = this.result.transform( finalDecision ); 86 | 87 | functionCall.getOutputCollector().add( functionCall.getContext().result( result ) ); 88 | } 89 | } 90 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/TreeSpec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree; 22 | 23 | import cascading.pattern.model.ModelSchema; 24 | import cascading.pattern.model.Spec; 25 | 26 | 27 | /** 28 | * Class TreeSpec is used to define a decision tree model. It simply holds a {@link Tree} instance 29 | * populated with {@link cascading.pattern.model.tree.predicate.Predicate} instances. 30 | */ 31 | public class TreeSpec extends Spec 32 | { 33 | public Tree tree; 34 | 35 | public TreeSpec( ModelSchema modelSchema ) 36 | { 37 | super( modelSchema ); 38 | } 39 | 40 | public TreeSpec( ModelSchema modelSchema, Tree tree ) 41 | { 42 | super( modelSchema ); 43 | this.tree = tree; 44 | } 45 | 46 | public Tree getTree() 47 | { 48 | return tree; 49 | } 50 | 51 | public void setTree( Tree tree ) 52 | { 53 | this.tree = tree; 54 | } 55 | 56 | @Override 57 | public String toString() 58 | { 59 | final StringBuilder sb = new StringBuilder( "TreeSpec{" ); 60 | sb.append( "tree=" ).append( tree ); 61 | sb.append( '}' ); 62 | return sb.toString(); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/decision/Decision.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.decision; 22 | 23 | import java.util.List; 24 | 25 | import cascading.pattern.model.tree.Node; 26 | import cascading.pattern.model.tree.Tree; 27 | import cascading.tuple.TupleEntry; 28 | import org.jgrapht.GraphPath; 29 | import org.jgrapht.Graphs; 30 | import org.jgrapht.alg.KShortestPaths; 31 | 32 | /** 33 | * 34 | */ 35 | public abstract class Decision 36 | { 37 | protected final String name; 38 | 39 | public Decision( Tree tree, Node node ) 40 | { 41 | this.name = createName( tree, node ); 42 | } 43 | 44 | public String getName() 45 | { 46 | return name; 47 | } 48 | 49 | protected String createName( Tree tree, Node node ) 50 | { 51 | if( tree.getRoot() == node ) 52 | return node.getID(); 53 | 54 | List> paths = new KShortestPaths( tree.getGraph(), tree.getRoot(), 1 ).getPaths( node ); 55 | 56 | List predecessors = Graphs.getPathVertexList( paths.get( 0 ) ); 57 | 58 | predecessors.remove( node ); 59 | 60 | String name = ""; 61 | 62 | for( Node predecessor : predecessors ) 63 | name += predecessor.getID() + "."; 64 | 65 | name += node.getID(); 66 | 67 | return name; 68 | } 69 | 70 | protected abstract FinalDecision decide( TupleEntry tupleEntry ); 71 | 72 | @Override 73 | public String toString() 74 | { 75 | final StringBuilder sb = new StringBuilder( "Decision{" ); 76 | toString( sb ); 77 | sb.append( '}' ); 78 | return sb.toString(); 79 | } 80 | 81 | protected StringBuilder toString( StringBuilder sb ) 82 | { 83 | return sb.append( "name='" ).append( name ).append( '\'' ); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/decision/DecisionTree.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.decision; 22 | 23 | import cascading.pattern.model.tree.Node; 24 | import cascading.pattern.model.tree.Tree; 25 | import cascading.tuple.Fields; 26 | import cascading.tuple.TupleEntry; 27 | 28 | /** 29 | * 30 | */ 31 | public class DecisionTree extends ParentDecision 32 | { 33 | public DecisionTree( String[] categories, Fields expectedFields, Tree tree, Node node ) 34 | { 35 | super( categories, expectedFields, tree, node ); 36 | } 37 | 38 | @Override 39 | public FinalDecision decide( TupleEntry tupleEntry ) 40 | { 41 | return super.decide( tupleEntry ); 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/decision/FinalDecision.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.decision; 22 | 23 | import java.util.Arrays; 24 | 25 | import cascading.pattern.model.tree.Node; 26 | import cascading.pattern.model.tree.Tree; 27 | import cascading.tuple.TupleEntry; 28 | import org.slf4j.Logger; 29 | import org.slf4j.LoggerFactory; 30 | 31 | /** 32 | * 33 | */ 34 | public class FinalDecision extends Decision 35 | { 36 | private static final Logger LOG = LoggerFactory.getLogger( FinalDecision.class ); 37 | 38 | private final Object score; 39 | private final int index; 40 | 41 | public FinalDecision( Tree tree, Node node ) 42 | { 43 | this( null, tree, node ); 44 | } 45 | 46 | public FinalDecision( String[] categories, Tree tree, Node node ) 47 | { 48 | super( tree, node ); 49 | 50 | this.score = node.getScore(); 51 | 52 | if( this.score == null ) 53 | throw new IllegalStateException( "score may not be null, likely missing leaf node in tree at: " + getName() ); 54 | 55 | if( categories != null ) 56 | this.index = Arrays.asList( categories ).indexOf( this.score ); 57 | else 58 | this.index = -1; 59 | } 60 | 61 | public Object getScore() 62 | { 63 | return score; 64 | } 65 | 66 | public int getIndex() 67 | { 68 | return index; 69 | } 70 | 71 | @Override 72 | protected FinalDecision decide( TupleEntry tupleEntry ) 73 | { 74 | LOG.debug( "decision: {}", name ); 75 | 76 | return this; 77 | } 78 | 79 | @Override 80 | public String toString() 81 | { 82 | final StringBuilder sb = new StringBuilder( "FinalDecision{" ); 83 | sb.append( "name='" ).append( getName() ).append( '\'' ); 84 | sb.append( ",score='" ).append( score ).append( '\'' ); 85 | sb.append( '}' ); 86 | return sb.toString(); 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/decision/ParentDecision.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.decision; 22 | 23 | import java.util.List; 24 | 25 | import cascading.pattern.model.tree.Node; 26 | import cascading.pattern.model.tree.Tree; 27 | import cascading.tuple.Fields; 28 | import cascading.tuple.TupleEntry; 29 | import org.jgrapht.Graphs; 30 | 31 | /** 32 | * 33 | */ 34 | abstract class ParentDecision extends Decision 35 | { 36 | protected final Decision[] successors; 37 | 38 | public ParentDecision( String[] categories, Fields expectedFields, Tree tree, Node node ) 39 | { 40 | super( tree, node ); 41 | 42 | this.successors = createSuccessors( categories, expectedFields, tree, node ); 43 | } 44 | 45 | protected Decision[] createSuccessors( String[] categories, Fields expectedFields, Tree tree, Node node ) 46 | { 47 | List successorNodes = Graphs.successorListOf( tree.getGraph(), node ); 48 | 49 | if( successorNodes.size() == 0 ) 50 | return new Decision[]{new FinalDecision( categories, tree, node )}; 51 | 52 | Decision[] successors = new Decision[ successorNodes.size() ]; 53 | 54 | for( int i = 0; i < successorNodes.size(); i++ ) 55 | { 56 | Node successorNode = successorNodes.get( i ); 57 | 58 | successors[ i ] = new PredicatedDecision( categories, expectedFields, tree, successorNode ); 59 | } 60 | 61 | return successors; 62 | } 63 | 64 | protected FinalDecision decide( TupleEntry tupleEntry ) 65 | { 66 | for( Decision child : successors ) 67 | { 68 | FinalDecision decision = child.decide( tupleEntry ); 69 | 70 | if( decision != null ) 71 | return decision; 72 | } 73 | 74 | return null; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/decision/PredicatedDecision.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.decision; 22 | 23 | import cascading.pattern.model.tree.Node; 24 | import cascading.pattern.model.tree.Tree; 25 | import cascading.tuple.Fields; 26 | import cascading.tuple.TupleEntry; 27 | 28 | /** 29 | * 30 | */ 31 | class PredicatedDecision extends ParentDecision 32 | { 33 | private final PredicateEvaluator evaluator; 34 | 35 | public PredicatedDecision( String[] categories, Fields expectedFields, Tree tree, Node node ) 36 | { 37 | super( categories, expectedFields, tree, node ); 38 | 39 | this.evaluator = new PredicateEvaluator( expectedFields, node.getPredicate() ); 40 | } 41 | 42 | @Override 43 | protected FinalDecision decide( TupleEntry tupleEntry ) 44 | { 45 | boolean result = evaluator.evaluate( tupleEntry ); 46 | 47 | if( !result ) 48 | return null; 49 | 50 | return super.decide( tupleEntry ); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/ComparablePredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | /** 24 | * 25 | */ 26 | public abstract class ComparablePredicate extends SimplePredicate 27 | { 28 | T value; 29 | 30 | protected ComparablePredicate( String field, T value ) 31 | { 32 | super( field ); 33 | this.value = value; 34 | } 35 | 36 | public T getValue() 37 | { 38 | return value; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/EqualsToPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | /** 24 | * 25 | */ 26 | public class EqualsToPredicate extends ComparablePredicate 27 | { 28 | public EqualsToPredicate( String field, Object value ) 29 | { 30 | super( field, value ); 31 | } 32 | 33 | @Override 34 | public Boolean evaluate( Object argument ) 35 | { 36 | if( argument == null ) 37 | return null; 38 | 39 | return argument.equals( value ); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/FalsePredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | /** Class FalsePredicate always evaluates to false. */ 24 | public class FalsePredicate extends SimplePredicate 25 | { 26 | public FalsePredicate() 27 | { 28 | super( null ); 29 | } 30 | 31 | @Override 32 | public Boolean evaluate( Object argument ) 33 | { 34 | return Boolean.FALSE; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/GreaterOrEqualThanPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | /** 24 | * 25 | */ 26 | public class GreaterOrEqualThanPredicate extends ComparablePredicate 27 | { 28 | public GreaterOrEqualThanPredicate( String field, Comparable value ) 29 | { 30 | super( field, value ); 31 | } 32 | 33 | @Override 34 | public Boolean evaluate( Object argument ) 35 | { 36 | if( argument == null ) 37 | return null; 38 | 39 | return ( (Comparable) argument ).compareTo( value ) >= 0; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/GreaterThanPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | /** 24 | * 25 | */ 26 | public class GreaterThanPredicate extends ComparablePredicate 27 | { 28 | public GreaterThanPredicate( String field, Comparable value ) 29 | { 30 | super( field, value ); 31 | } 32 | 33 | @Override 34 | public Boolean evaluate( Object argument ) 35 | { 36 | if( argument == null ) 37 | return null; 38 | 39 | return ( (Comparable) argument ).compareTo( value ) > 0; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/IsInSetPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | import java.util.Collection; 24 | 25 | /** Class IsInSetPredicate returns true if the current value is in the given collection of values. */ 26 | public class IsInSetPredicate extends SimpleSetPredicate 27 | { 28 | public IsInSetPredicate( String field, Collection values ) 29 | { 30 | super( field, values ); 31 | } 32 | 33 | @Override 34 | public Boolean evaluate( Object argument ) 35 | { 36 | if( argument == null ) 37 | return null; 38 | 39 | return set.contains( argument ); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/IsMissingPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | /** Class IsMissingPredicate returns true if the given field has a null value. */ 24 | public class IsMissingPredicate extends SimplePredicate 25 | { 26 | public IsMissingPredicate( String field ) 27 | { 28 | super( field ); 29 | } 30 | 31 | @Override 32 | public Boolean evaluate( Object argument ) 33 | { 34 | return argument == null; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/IsNotInSetPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | import java.util.Collection; 24 | 25 | /** Class IsNotInSetPredicate returns true if the current value is not in the given collection of values. */ 26 | public class IsNotInSetPredicate extends SimpleSetPredicate 27 | { 28 | public IsNotInSetPredicate( String field, Collection values ) 29 | { 30 | super( field, values ); 31 | } 32 | 33 | @Override 34 | public Boolean evaluate( Object argument ) 35 | { 36 | if( argument == null ) 37 | return null; 38 | 39 | return !set.contains( argument ); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/IsNotMissingPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | /** Class IsNotMissingPredicate returns true if the given field does not have a null value. */ 24 | public class IsNotMissingPredicate extends SimplePredicate 25 | { 26 | public IsNotMissingPredicate( String field ) 27 | { 28 | super( field ); 29 | } 30 | 31 | @Override 32 | public Boolean evaluate( Object argument ) 33 | { 34 | return argument != null; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/LessOrEqualThanPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | /** 24 | * 25 | */ 26 | public class LessOrEqualThanPredicate extends ComparablePredicate 27 | { 28 | public LessOrEqualThanPredicate( String field, Comparable value ) 29 | { 30 | super( field, value ); 31 | } 32 | 33 | @Override 34 | public Boolean evaluate( Object argument ) 35 | { 36 | if( argument == null ) 37 | return null; 38 | 39 | return ( (Comparable) argument ).compareTo( value ) <= 0; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/LessThanPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | /** 24 | * 25 | */ 26 | public class LessThanPredicate extends ComparablePredicate 27 | { 28 | public LessThanPredicate( String field, Comparable value ) 29 | { 30 | super( field, value ); 31 | } 32 | 33 | @Override 34 | public Boolean evaluate( Object argument ) 35 | { 36 | if( argument == null ) 37 | return null; 38 | 39 | return ( (Comparable) argument ).compareTo( value ) < 0; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/NotEqualsToPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | /** 24 | * 25 | */ 26 | public class NotEqualsToPredicate extends ComparablePredicate 27 | { 28 | public NotEqualsToPredicate( String field, Object value ) 29 | { 30 | super( field, value ); 31 | } 32 | 33 | @Override 34 | public Boolean evaluate( Object argument ) 35 | { 36 | if( argument == null ) 37 | return null; 38 | 39 | return !argument.equals( value ); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/Predicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * 27 | */ 28 | public abstract class Predicate implements Serializable 29 | { 30 | } 31 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/SimplePredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | /** 24 | * 25 | */ 26 | public abstract class SimplePredicate extends Predicate 27 | { 28 | String field; 29 | 30 | public SimplePredicate( String field ) 31 | { 32 | this.field = field; 33 | } 34 | 35 | public String getArgumentField() 36 | { 37 | return field; 38 | } 39 | 40 | public abstract Boolean evaluate( Object argument ); 41 | 42 | @Override 43 | public String toString() 44 | { 45 | final StringBuilder sb = new StringBuilder(); 46 | sb.append( getClass().getSimpleName() ).append( "{" ); 47 | sb.append( "field='" ).append( field ).append( '\'' ); 48 | sb.append( '}' ); 49 | return sb.toString(); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/SimpleSetPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | import java.util.Collection; 24 | import java.util.HashSet; 25 | import java.util.Set; 26 | 27 | /** 28 | * 29 | */ 30 | public abstract class SimpleSetPredicate extends SimplePredicate 31 | { 32 | Set set = new HashSet(); 33 | 34 | public SimpleSetPredicate( String field, Collection values ) 35 | { 36 | super( field ); 37 | this.set.addAll( values ); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/TruePredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate; 22 | 23 | /** Class TruePredicate always evaluates to true. */ 24 | public class TruePredicate extends SimplePredicate 25 | { 26 | public TruePredicate() 27 | { 28 | super( null ); 29 | } 30 | 31 | @Override 32 | public Boolean evaluate( Object argument ) 33 | { 34 | return Boolean.TRUE; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/compound/AndPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate.compound; 22 | 23 | import java.util.Iterator; 24 | import java.util.List; 25 | 26 | import cascading.pattern.model.tree.predicate.Predicate; 27 | 28 | /** Class AndPredicate returns true if all child predicates are true. */ 29 | public class AndPredicate extends CompoundPredicate 30 | { 31 | public AndPredicate( List children ) 32 | { 33 | super( children ); 34 | } 35 | 36 | public AndPredicate( Predicate... children ) 37 | { 38 | super( children ); 39 | } 40 | 41 | @Override 42 | public Boolean evaluate( Iterator results ) 43 | { 44 | while( results.hasNext() ) 45 | { 46 | Boolean result = results.next(); 47 | 48 | if( result == null ) 49 | return null; 50 | 51 | if( !result ) 52 | return false; 53 | } 54 | 55 | return true; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/compound/CompoundPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate.compound; 22 | 23 | import java.util.Arrays; 24 | import java.util.Iterator; 25 | import java.util.List; 26 | 27 | import cascading.pattern.model.tree.predicate.Predicate; 28 | 29 | /** 30 | * 31 | */ 32 | public abstract class CompoundPredicate extends Predicate 33 | { 34 | protected Predicate[] children; 35 | 36 | protected CompoundPredicate( List children ) 37 | { 38 | this.children = children.toArray( new Predicate[ children.size() ] ); 39 | } 40 | 41 | protected CompoundPredicate( Predicate... children ) 42 | { 43 | this.children = children; 44 | } 45 | 46 | public Predicate[] getChildren() 47 | { 48 | return children; 49 | } 50 | 51 | public abstract Boolean evaluate( Iterator results ); 52 | 53 | @Override 54 | public String toString() 55 | { 56 | final StringBuilder sb = new StringBuilder(); 57 | sb.append( getClass().getSimpleName() ).append( "{" ); 58 | sb.append( "children=" ).append( Arrays.toString( children ) ); 59 | sb.append( '}' ); 60 | return sb.toString(); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/compound/OrPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate.compound; 22 | 23 | import java.util.Iterator; 24 | import java.util.List; 25 | 26 | import cascading.pattern.model.tree.predicate.Predicate; 27 | 28 | /** Class OrPredicate returns true if any child predicate returns true. */ 29 | public class OrPredicate extends CompoundPredicate 30 | { 31 | public OrPredicate( List children ) 32 | { 33 | super( children ); 34 | } 35 | 36 | public OrPredicate( Predicate... children ) 37 | { 38 | super( children ); 39 | } 40 | 41 | @Override 42 | public Boolean evaluate( Iterator results ) 43 | { 44 | while( results.hasNext() ) 45 | { 46 | Boolean result = results.next(); 47 | 48 | if( result == null ) 49 | return null; 50 | 51 | if( result ) 52 | return true; 53 | } 54 | 55 | return false; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/compound/SurrogatePredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate.compound; 22 | 23 | import java.util.Iterator; 24 | import java.util.List; 25 | 26 | import cascading.pattern.model.tree.predicate.Predicate; 27 | 28 | /** 29 | * Class SurrogatePredicate returns the result of the first child predicate 30 | * that as non null or not missing value. This is similar to a if then else statement. 31 | */ 32 | public class SurrogatePredicate extends CompoundPredicate 33 | { 34 | public SurrogatePredicate( List predicates ) 35 | { 36 | super( predicates ); 37 | } 38 | 39 | public SurrogatePredicate( Predicate... predicates ) 40 | { 41 | super( predicates ); 42 | } 43 | 44 | @Override 45 | public Boolean evaluate( Iterator results ) 46 | { 47 | while( results.hasNext() ) 48 | { 49 | Boolean result = results.next(); 50 | 51 | if( result != null ) 52 | return result; 53 | } 54 | 55 | return null; 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/model/tree/predicate/compound/XorPredicate.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.model.tree.predicate.compound; 22 | 23 | import java.util.Iterator; 24 | import java.util.List; 25 | 26 | import cascading.pattern.model.tree.predicate.Predicate; 27 | 28 | /** Class XorPredicate returns true if an odd number of child predicates return true. */ 29 | public class XorPredicate extends CompoundPredicate 30 | { 31 | public XorPredicate( List children ) 32 | { 33 | super( children ); 34 | } 35 | 36 | public XorPredicate( Predicate... children ) 37 | { 38 | super( children ); 39 | } 40 | 41 | @Override 42 | public Boolean evaluate( Iterator results ) 43 | { 44 | int count = 0; 45 | 46 | while( results.hasNext() ) 47 | { 48 | Boolean result = results.next(); 49 | 50 | if( result == null ) 51 | return null; 52 | 53 | if( result ) 54 | count++; 55 | } 56 | 57 | return ( count % 2 ) == 1; // is odd 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /pattern-core/src/main/java/cascading/pattern/util/Logging.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.util; 22 | 23 | /** LogUtil is an internal utility class for setting log levels. */ 24 | public class Logging 25 | { 26 | public static void setLogLevel( String level ) 27 | { 28 | setLogLevel( Logging.class.getClassLoader(), "cascading", level ); 29 | } 30 | 31 | public static void setLogLevel( Class type, String log, String level ) 32 | { 33 | setLogLevel( type.getClassLoader(), log, level ); 34 | } 35 | 36 | public static void setLogLevel( ClassLoader loader, String log, String level ) 37 | { 38 | Object loggerObject = getLoggerObject( loader, log ); 39 | 40 | Object levelObject = Reflection.invokeStaticMethod( loader, "org.apache.log4j.Level", "toLevel", 41 | new Object[]{level}, new Class[]{String.class} ); 42 | 43 | Reflection.invokeInstanceMethod( loggerObject, "setLevel", levelObject, levelObject.getClass() ); 44 | } 45 | 46 | public static String getLogLevel( ClassLoader loader, String log ) 47 | { 48 | Object loggerObject = getLoggerObject( loader, log ); 49 | 50 | Object level = Reflection.invokeInstanceMethod( loggerObject, "getLevel" ); 51 | 52 | if( level == null ) 53 | return ""; 54 | 55 | return level.toString(); 56 | } 57 | 58 | private static Object getLoggerObject( ClassLoader loader, String log ) 59 | { 60 | if( log == null || log.isEmpty() ) 61 | return Reflection.invokeStaticMethod( loader, "org.apache.log4j.Logger", "getRootLogger", null, null ); 62 | 63 | return Reflection.invokeStaticMethod( loader, "org.apache.log4j.Logger", "getLogger", 64 | new Object[]{log}, new Class[]{String.class} ); 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /pattern-core/src/test/java/cascading/pattern/ensemble/EnsemblePlatformTestCase.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.ensemble; 22 | 23 | import java.io.IOException; 24 | import java.util.List; 25 | 26 | import cascading.flow.Flow; 27 | import cascading.flow.FlowDef; 28 | import cascading.pattern.PatternPlatformTestCase; 29 | import cascading.pattern.model.tree.TreeSpec; 30 | import cascading.pipe.Pipe; 31 | import cascading.pipe.assembly.Discard; 32 | import cascading.tap.SinkMode; 33 | import cascading.tap.Tap; 34 | import cascading.tuple.Fields; 35 | import cascading.tuple.Tuple; 36 | import org.slf4j.Logger; 37 | import org.slf4j.LoggerFactory; 38 | 39 | /** 40 | * 41 | */ 42 | public class EnsemblePlatformTestCase extends PatternPlatformTestCase 43 | { 44 | public static final String DATA_PATH = System.getProperty( "test.data.path", "../pattern-core/src/test/resources/data/" ); 45 | private static final Logger LOG = LoggerFactory.getLogger( SimpleEnsemblePlatformTest.class ); 46 | private String resultPath; 47 | 48 | protected String getResultPath() 49 | { 50 | if( resultPath == null ) 51 | resultPath = getOutputPath( getTestName() ); 52 | 53 | return resultPath; 54 | } 55 | 56 | protected String getFlowPlanPath() 57 | { 58 | return getRootPath() + "/dot/" + getTestName(); 59 | } 60 | 61 | @Override 62 | public void setUp() throws Exception 63 | { 64 | super.setUp(); 65 | 66 | enableLogging( "cascading.pattern", "debug" ); 67 | } 68 | 69 | protected void performTest( String inputData, Fields predictedFields, Fields expectedFields, EnsembleSpec ensembleSpec ) throws IOException 70 | { 71 | Pipe pipe = new Pipe( "head" ); 72 | pipe = new Discard( pipe, predictedFields ); 73 | pipe = new ParallelEnsembleAssembly( pipe, ensembleSpec ); 74 | pipe = new Pipe( "tail", pipe ); 75 | 76 | Tap source = getPlatform().getDelimitedFile( expectedFields.append( predictedFields ), true, ",", "\"", DATA_PATH + inputData, SinkMode.KEEP ); 77 | Tap sink = getPlatform().getDelimitedFile( Fields.ALL, true, ",", "\"", getResultPath(), SinkMode.REPLACE ); 78 | 79 | FlowDef flowDef = FlowDef.flowDef() 80 | .addSource( "head", source ) 81 | .addSink( "tail", sink ) 82 | .addTail( pipe ); 83 | 84 | Flow flow = getPlatform().getFlowConnector().connect( flowDef ); 85 | 86 | flow.writeDOT( getFlowPlanPath() + "/plan.dot" ); 87 | 88 | flow.complete(); 89 | 90 | Fields sourceSelector = source.getSourceFields(); 91 | Fields sinkSelector = sink.getSinkFields(); 92 | 93 | LOG.debug( "source select = {}", sourceSelector.printVerbose() ); 94 | LOG.debug( "sink select = {}", sinkSelector.printVerbose() ); 95 | 96 | List sourceTuples = asList( flow, source, sourceSelector ); 97 | List sinkTuples = asList( flow, sink, sinkSelector ); 98 | 99 | assertEquals( sourceTuples, sinkTuples, 0.000001d ); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /pattern-core/src/test/resources/data/randomforest-predict.tsv: -------------------------------------------------------------------------------- 1 | var0,var1,var2,label 2 | 0,1,0,0.666666 3 | 0,1,0,0.666666 4 | 0,1,0,0.666666 -------------------------------------------------------------------------------- /pattern-core/src/test/resources/data/randomforest.tsv: -------------------------------------------------------------------------------- 1 | var0,var1,var2,label 2 | 0,1,0,1 3 | 0,1,0,1 4 | 0,1,0,1 -------------------------------------------------------------------------------- /pattern-core/src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | # log4j configuration used during build and unit tests 22 | 23 | log4j.rootLogger=info,stdout 24 | log4j.threshold=ALL 25 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 26 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 27 | log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} (%F:%M(%L)) - %m%n 28 | 29 | log4j.logger.cascading.pattern=INFO 30 | #log4j.logger.cascading.pattern.shell=DEBUG 31 | 32 | #log4j.logger.cascading=OFF 33 | #log4j.logger.cascading=DEBUG 34 | #log4j.logger.cascading.flow=DEBUG 35 | #log4j.logger.cascading.operation=DEBUG 36 | #log4j.logger.cascading.cascade=DEBUG 37 | -------------------------------------------------------------------------------- /pattern-examples/.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | dot 3 | out 4 | -------------------------------------------------------------------------------- /pattern-examples/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | ext.hadoopVersion = '1.0.4' 22 | 23 | dependencies { 24 | compile( group: 'cascading', name: 'cascading-hadoop', version: cascadingVersion ) 25 | 26 | compile project( ':pattern-pmml' ) 27 | compile project( ':pattern-hadoop' ) 28 | compile project( ':pattern-local' ) 29 | 30 | compile( 'net.sf.jopt-simple:jopt-simple:4.4' ) 31 | 32 | // hadoop deps for Main example 33 | providedCompile( group: 'org.apache.hadoop', name: 'hadoop-core', version: hadoopVersion ) { 34 | exclude group: 'ant' 35 | exclude group: 'junit' 36 | exclude group: 'oro' // causes transient build maven failures, ftw 37 | } 38 | 39 | // included so RegressionFlowExample can run in IDE, it only relies on local mode 40 | runtime group: 'log4j', name: 'log4j', version: '1.2.16' 41 | runtime group: 'org.slf4j', name: 'slf4j-api', version: '1.6.1' 42 | runtime group: 'org.slf4j', name: 'slf4j-log4j12', version: '1.6.1' 43 | } 44 | 45 | jar { 46 | description = "Assembles a Hadoop-ready JAR file" 47 | doFirst { 48 | into( 'lib' ) { 49 | from configurations.compile 50 | } 51 | } 52 | 53 | manifest { 54 | attributes( "Main-Class": "cascading.pattern.Main" ) 55 | } 56 | } 57 | 58 | uploadArchives.enabled = false 59 | 60 | platformTest.enabled = false 61 | -------------------------------------------------------------------------------- /pattern-examples/data/iris.glm.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:54 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 |
52 | -------------------------------------------------------------------------------- /pattern-examples/data/iris.hc.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:53 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 5.006 3.428 1.462 0.246 30 | 31 | 32 | 5.9296875 2.7578125 4.4109375 1.4390625 33 | 34 | 35 | 6.85277777777778 3.075 5.78611111111111 2.09722222222222 36 | 37 | 38 |
39 | -------------------------------------------------------------------------------- /pattern-examples/data/iris.kmeans.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:53 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 5.006 3.428 1.462 0.246 30 | 31 | 32 | 6.85 3.07368421052632 5.74210526315789 2.07105263157895 33 | 34 | 35 | 5.90161290322581 2.74838709677419 4.39354838709678 1.43387096774194 36 | 37 | 38 |
39 | -------------------------------------------------------------------------------- /pattern-examples/data/iris.lm_p.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:53 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 |
37 | -------------------------------------------------------------------------------- /pattern-examples/data/iris.multinom.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:53 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 |
48 | -------------------------------------------------------------------------------- /pattern-examples/data/sample.rf.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 |
4 | 5 | 6 | 2013-01-25 10:01:08 7 |
8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 |
80 | -------------------------------------------------------------------------------- /pattern-examples/examples/py/gen_orders.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # encoding: utf-8 3 | 4 | import random 5 | import sys 6 | import uuid 7 | 8 | 9 | debug = False # True 10 | 11 | CUSTOMER_SEGMENTS = ( 12 | [0.2, ["0", random.gauss, 0.25, 0.75, "%0.2f"]], 13 | [0.8, ["0", random.gauss, 1.5, 0.25, "%0.2f"]], 14 | [0.9, ["1", random.gauss, 0.6, 0.2, "%0.2f"]], 15 | [1.0, ["1", random.gauss, 0.75, 0.2, "%0.2f"]] 16 | ) 17 | 18 | def gen_row (segments, num_col): 19 | coin_flip = random.random() 20 | 21 | for prob, rand_var in segments: 22 | if debug: 23 | print coin_flip, prob 24 | 25 | if coin_flip <= prob: 26 | (label, dist, mean, sigma, format) = rand_var 27 | order_id = str(uuid.uuid1()).split("-")[0] 28 | return [label] + map(lambda x: format % dist(mean, sigma), range(0, num_col)) + [order_id] 29 | 30 | 31 | if __name__ == '__main__': 32 | num_row = int(sys.argv[1]) 33 | num_col = int(sys.argv[2]) 34 | 35 | print "\t".join(["label"] + map(lambda x: "v" + str(x), range(0, num_col)) + ["order_id"]) 36 | 37 | for i in range(0, num_row): 38 | print "\t".join(gen_row(CUSTOMER_SEGMENTS, num_col)) 39 | -------------------------------------------------------------------------------- /pattern-examples/examples/r/pmml_models.R: -------------------------------------------------------------------------------- 1 | ## uncomment the following two lines to install the required libraries 2 | #install.packages("pmml") 3 | #install.packages("randomForest") 4 | 5 | library(pmml) 6 | library(randomForest) 7 | 8 | ## load the "baseline" reference data 9 | 10 | dat_folder <- './data' 11 | data <- read.table(file=paste(dat_folder, "orders.tsv", sep="/"), sep="\t", quote="", na.strings="NULL", header=TRUE, encoding="UTF8") 12 | 13 | dim(data) 14 | head(data) 15 | 16 | ## split data into test and train sets 17 | 18 | set.seed(71) 19 | split_ratio <- 2/10 20 | split <- round(dim(data)[1] * split_ratio) 21 | 22 | data_tests <- data[1:split,] 23 | dim(data_tests) 24 | print(table(data_tests[,"label"])) 25 | 26 | data_train <- data[(split + 1):dim(data)[1],] 27 | i <- colnames(data_train) == "order_id" 28 | j <- 1:length(i) 29 | data_train <- data_train[,-j[i]] 30 | dim(data_train) 31 | 32 | ## train a RandomForest model 33 | 34 | f <- as.formula("as.factor(label) ~ .") 35 | fit <- randomForest(f, data_train, ntree=2) 36 | 37 | ## test the model on the holdout test set 38 | 39 | print(fit$importance) 40 | print(fit) 41 | 42 | predicted <- predict(fit, data) 43 | data$predict <- predicted 44 | confuse <- table(pred = predicted, true = data[,1]) 45 | print(confuse) 46 | 47 | ## export predicted labels to TSV 48 | 49 | write.table(data, file=paste(dat_folder, "sample.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 50 | 51 | ## export RF model to PMML 52 | 53 | saveXML(pmml(fit), file=paste(dat_folder, "sample.rf.xml", sep="/")) 54 | -------------------------------------------------------------------------------- /pattern-examples/examples/r/rf_pmml.R: -------------------------------------------------------------------------------- 1 | ## uncomment the following two lines to install the required libraries 2 | #install.packages("pmml") 3 | #install.packages("randomForest") 4 | 5 | library(pmml) 6 | library(randomForest) 7 | 8 | ## load the "baseline" reference data 9 | 10 | dat_folder <- '.' 11 | data <- read.table(file=paste(dat_folder, "orders.tsv", sep="/"), sep="\t", quote="", na.strings="NULL", header=TRUE, encoding="UTF8") 12 | 13 | dim(data) 14 | head(data) 15 | 16 | ## split data into test and train sets 17 | 18 | set.seed(71) 19 | split_ratio <- 2/10 20 | split <- round(dim(data)[1] * split_ratio) 21 | 22 | data_tests <- data[1:split,] 23 | dim(data_tests) 24 | print(table(data_tests[,"label"])) 25 | 26 | data_train <- data[(split + 1):dim(data)[1],] 27 | i <- colnames(data_train) == "order_id" 28 | j <- 1:length(i) 29 | data_train <- data_train[,-j[i]] 30 | dim(data_train) 31 | 32 | ## train a RandomForest model 33 | 34 | f <- as.formula("as.factor(label) ~ .") 35 | fit <- randomForest(f, data_train, ntree=2) 36 | 37 | ## test the model on the holdout test set 38 | 39 | print(fit$importance) 40 | print(fit) 41 | 42 | predicted <- predict(fit, data) 43 | data$predict <- predicted 44 | confuse <- table(pred = predicted, true = data[,1]) 45 | print(confuse) 46 | 47 | ## export predicted labels to TSV 48 | 49 | write.table(data, file=paste(dat_folder, "huge.tsv", sep="/"), quote=FALSE, sep="\t", row.names=FALSE) 50 | 51 | ## export RF model to PMML 52 | 53 | saveXML(pmml(fit), file=paste(dat_folder, "huge.rf.xml", sep="/")) 54 | -------------------------------------------------------------------------------- /pattern-examples/src/main/java/cascading/pattern/Main.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern; 22 | 23 | import java.io.File; 24 | import java.util.Properties; 25 | 26 | import cascading.flow.Flow; 27 | import cascading.flow.FlowDef; 28 | import cascading.flow.hadoop.HadoopFlowConnector; 29 | import cascading.pattern.pmml.PMMLPlanner; 30 | import cascading.property.AppProps; 31 | import cascading.scheme.hadoop.TextDelimited; 32 | import cascading.tap.Tap; 33 | import cascading.tap.hadoop.Hfs; 34 | import cascading.tuple.Fields; 35 | import joptsimple.OptionParser; 36 | import joptsimple.OptionSet; 37 | 38 | 39 | public class Main 40 | { 41 | /** @param args */ 42 | public static void main( String[] args ) throws RuntimeException 43 | { 44 | String inputPath = args[ 0 ]; 45 | String classifyPath = args[ 1 ]; 46 | 47 | // set up the config properties 48 | Properties properties = new Properties(); 49 | AppProps.setApplicationJarClass( properties, Main.class ); 50 | HadoopFlowConnector flowConnector = new HadoopFlowConnector( properties ); 51 | 52 | // create source and sink taps 53 | Tap inputTap = new Hfs( new TextDelimited( true, "\t" ), inputPath ); 54 | Tap classifyTap = new Hfs( new TextDelimited( true, "\t" ), classifyPath ); 55 | 56 | // handle command line options 57 | OptionParser optParser = new OptionParser(); 58 | optParser.accepts( "pmml" ).withRequiredArg(); 59 | 60 | OptionSet options = optParser.parse( args ); 61 | 62 | // connect the taps, pipes, etc., into a flow 63 | FlowDef flowDef = FlowDef.flowDef() 64 | .setName( "classify" ) 65 | .addSource( "input", inputTap ) 66 | .addSink( "classify", classifyTap ); 67 | 68 | // build a Cascading assembly from the PMML description 69 | if( options.hasArgument( "pmml" ) ) 70 | { 71 | String pmmlPath = (String) options.valuesOf( "pmml" ).get( 0 ); 72 | 73 | PMMLPlanner pmmlPlanner = new PMMLPlanner() 74 | .setPMMLInput( new File( pmmlPath ) ) 75 | .retainOnlyActiveIncomingFields() 76 | .setDefaultPredictedField( new Fields( "predict", Double.class ) ); // default value if missing from the model 77 | 78 | flowDef.addAssemblyPlanner( pmmlPlanner ); 79 | } 80 | 81 | // write a DOT file and run the flow 82 | Flow classifyFlow = flowConnector.connect( flowDef ); 83 | classifyFlow.writeDOT( "dot/classify.dot" ); 84 | classifyFlow.complete(); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /pattern-examples/src/main/java/cascading/pattern/function/RandomForestSpec.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.function; 22 | 23 | import java.util.ArrayList; 24 | import java.util.List; 25 | 26 | import cascading.pattern.ensemble.EnsembleSpec; 27 | import cascading.pattern.model.ModelSchema; 28 | import cascading.pattern.model.tree.Tree; 29 | import cascading.pattern.model.tree.TreeSpec; 30 | import cascading.pattern.model.tree.decision.DecisionTree; 31 | import cascading.tuple.Fields; 32 | 33 | /** 34 | * 35 | */ 36 | public class RandomForestSpec extends EnsembleSpec 37 | { 38 | public RandomForestSpec( ModelSchema modelSchema ) 39 | { 40 | super( modelSchema ); 41 | } 42 | 43 | public RandomForestSpec( ModelSchema modelSchema, List treeSpecs ) 44 | { 45 | super( modelSchema, treeSpecs ); 46 | } 47 | 48 | public void addTreeSpec( TreeSpec treeSpec ) 49 | { 50 | addModelSpec( treeSpec ); 51 | } 52 | 53 | public List getTrees() 54 | { 55 | List trees = new ArrayList(); 56 | 57 | for( TreeSpec treeSpec : getModelSpecs() ) 58 | trees.add( treeSpec.getTree() ); 59 | 60 | return trees; 61 | } 62 | 63 | public DecisionTree[] getDecisionTrees( String[] categories, Fields argumentFields ) 64 | { 65 | List trees = getTrees(); 66 | DecisionTree[] decisionTrees = new DecisionTree[ trees.size() ]; 67 | 68 | for( int i = 0; i < trees.size(); i++ ) 69 | decisionTrees[ i ] = trees.get( i ).createDecisionTree( categories, argumentFields ); 70 | 71 | return decisionTrees; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /pattern-examples/src/main/java/cascading/pattern/pmml/iris/RegressionFlowExample.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.pmml.iris; 22 | 23 | import java.io.File; 24 | import java.io.IOException; 25 | 26 | import cascading.flow.Flow; 27 | import cascading.flow.FlowDef; 28 | import cascading.flow.local.LocalFlowConnector; 29 | import cascading.pattern.pmml.PMMLPlanner; 30 | import cascading.scheme.local.TextDelimited; 31 | import cascading.tap.SinkMode; 32 | import cascading.tap.Tap; 33 | import cascading.tap.local.FileTap; 34 | import cascading.tuple.TupleEntryIterator; 35 | 36 | /** 37 | * 38 | */ 39 | public class RegressionFlowExample 40 | { 41 | public static void main( String[] args ) throws Exception 42 | { 43 | new RegressionFlowExample().run(); 44 | } 45 | 46 | public void run() throws IOException 47 | { 48 | Tap irisTap = new FileTap( new TextDelimited( true, "\t", "\"" ), "data/iris.lm_p.tsv", SinkMode.KEEP ); 49 | 50 | Tap resultsTap = new FileTap( new TextDelimited( true, "\t", "\"" ), "build/test/output/flow/results.tsv", SinkMode.REPLACE ); 51 | 52 | FlowDef flowDef = FlowDef.flowDef() 53 | .setName( "pmml flow" ) 54 | .addSource( "iris", irisTap ) 55 | .addSink( "results", resultsTap ); 56 | 57 | PMMLPlanner pmmlPlanner = new PMMLPlanner() 58 | .setPMMLInput( new File( "data/iris.lm_p.xml" ) ) 59 | .retainOnlyActiveIncomingFields(); 60 | 61 | flowDef.addAssemblyPlanner( pmmlPlanner ); 62 | 63 | Flow flow = new LocalFlowConnector().connect( flowDef ); 64 | 65 | flow.complete(); 66 | 67 | TupleEntryIterator iterator = resultsTap.openForRead( flow.getFlowProcess() ); 68 | 69 | while( iterator.hasNext() ) 70 | System.out.println( iterator.next() ); 71 | 72 | iterator.close(); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /pattern-hadoop/README.md: -------------------------------------------------------------------------------- 1 | # pattern-hadoop 2 | 3 | This sub-project simply includes all Cascading Hadoop mode dependencies to ease inclusion in dependent projects. -------------------------------------------------------------------------------- /pattern-hadoop/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | ext.hadoopVersion = '1.0.4' 22 | 23 | if( System.properties[ 'hadoop.release.final' ] ) 24 | hadoopVersion = System.properties[ 'hadoop.release.final' ] 25 | 26 | dependencies { 27 | 28 | testCompile project( path: ':pattern-pmml', configuration: 'testArtifacts' ) 29 | 30 | compile group: 'cascading', name: 'cascading-hadoop', version: cascadingVersion, changing: true 31 | 32 | providedCompile group: 'org.slf4j', name: 'slf4j-api', version: '1.6.1' 33 | 34 | testCompile group: 'cascading', name: 'cascading-core', version: cascadingVersion, classifier: 'tests', changing: true 35 | testCompile group: 'cascading', name: 'cascading-hadoop', version: cascadingVersion, classifier: 'tests', changing: true 36 | testCompile group: 'cascading', name: 'cascading-platform', version: cascadingVersion, classifier: 'tests', changing: true 37 | 38 | // hadoop deps 39 | providedCompile( group: 'org.apache.hadoop', name: 'hadoop-core', version: hadoopVersion ) { 40 | exclude group: 'ant' 41 | exclude group: 'junit' 42 | exclude group: 'oro' // causes transient build maven failures, ftw 43 | } 44 | 45 | testCompile( group: 'org.apache.hadoop', name: 'hadoop-test', version: hadoopVersion ) { 46 | exclude group: 'oro' // causes transient build maven failures, ftw 47 | } 48 | testCompile group: 'commons-io', name: 'commons-io', version: '2.1' 49 | 50 | testRuntime 'javax.ws.rs:jsr311-api:1.1.1' // missed dep in hadoop, go figure 51 | } -------------------------------------------------------------------------------- /pattern-local/README.md: -------------------------------------------------------------------------------- 1 | # pattern-local 2 | 3 | This sub-project simply includes all Cascading local mode dependencies to ease inclusion in dependent projects. -------------------------------------------------------------------------------- /pattern-local/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | dependencies { 22 | 23 | testCompile project( path: ':pattern-pmml', configuration: 'testArtifacts' ) 24 | 25 | compile group: 'cascading', name: 'cascading-local', version: cascadingVersion, changing: true 26 | 27 | runtime group: 'log4j', name: 'log4j', version: '1.2.16' 28 | runtime group: 'org.slf4j', name: 'slf4j-api', version: '1.6.1' 29 | runtime group: 'org.slf4j', name: 'slf4j-log4j12', version: '1.6.1' 30 | 31 | testCompile group: 'cascading', name: 'cascading-core', version: cascadingVersion, classifier: 'tests', changing: true 32 | testCompile group: 'cascading', name: 'cascading-local', version: cascadingVersion, classifier: 'tests', changing: true 33 | testCompile group: 'cascading', name: 'cascading-platform', version: cascadingVersion, classifier: 'tests', changing: true 34 | } -------------------------------------------------------------------------------- /pattern-local/src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | # log4j configuration used during build and unit tests 22 | 23 | log4j.rootLogger=info,stdout 24 | log4j.threshold=ALL 25 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 26 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 27 | log4j.appender.stdout.layout.ConversionPattern=%d{ISO8601} %-5p [%t] %c{2} (%F:%M(%L)) - %m%n 28 | 29 | log4j.logger.cascading.pattern=INFO 30 | #log4j.logger.cascading.pattern.shell=DEBUG 31 | 32 | #log4j.logger.cascading=OFF 33 | #log4j.logger.cascading=DEBUG 34 | #log4j.logger.cascading.flow=DEBUG 35 | #log4j.logger.cascading.operation=DEBUG 36 | #log4j.logger.cascading.cascade=DEBUG 37 | -------------------------------------------------------------------------------- /pattern-pmml/README.md: -------------------------------------------------------------------------------- 1 | # pattern-pmml 2 | 3 | This sub-project contains all PMML related code and base PMML JUnit test cases. 4 | 5 | If submitting any issues, add a test to `SimplePMMLPlatformTest` that references a PMML document in 6 | `src/test/resources/pmml`. -------------------------------------------------------------------------------- /pattern-pmml/build.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | dependencies { 22 | 23 | compile project( ':pattern-core' ) 24 | testCompile project( path: ':pattern-core', configuration: 'testArtifacts' ) 25 | 26 | compile( group: 'org.jpmml', name: 'pmml-model', version: '1.1.7' ) 27 | 28 | compile group: 'org.slf4j', name: 'slf4j-api', version: '1.6.1' 29 | 30 | testCompile group: 'cascading', name: 'cascading-core', version: cascadingVersion, classifier: 'tests', changing: true 31 | testCompile group: 'cascading', name: 'cascading-platform', version: cascadingVersion, classifier: 'tests', changing: true 32 | 33 | testCompile group: 'commons-io', name: 'commons-io', version: '2.1' 34 | 35 | testCompile group: 'log4j', name: 'log4j', version: '1.2.16' // for setting log level as a cli option 36 | // testCompile group: 'org.slf4j', name: 'slf4j-api', version: '1.6.1' 37 | testCompile group: 'org.slf4j', name: 'slf4j-log4j12', version: '1.6.1' 38 | } 39 | 40 | platformTest.enabled = false 41 | -------------------------------------------------------------------------------- /pattern-pmml/src/main/java/cascading/pattern/pmml/ArrayUtil.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Backport of ArrayUtil from https://github.com/jpmml/jpmml at 8a8b2fcb867b4fa3cde85f75916f8d23fbd972a3 (Apache Licensed) 3 | * */ 4 | 5 | /* 6 | * Copyright (c) 2012 University of Tartu 7 | */ 8 | 9 | package cascading.pattern.pmml; 10 | 11 | import java.util.ArrayList; 12 | import java.util.Collections; 13 | import java.util.List; 14 | import cascading.pattern.PatternException; 15 | import org.dmg.pmml.Array; 16 | 17 | public class ArrayUtil 18 | { 19 | 20 | static public List parse( Array array ) 21 | { 22 | List result; 23 | 24 | Array.Type type = array.getType(); 25 | switch( type ) 26 | { 27 | case INT: 28 | case REAL: 29 | result = tokenize( array.getValue(), false ); 30 | break; 31 | case STRING: 32 | result = tokenize( array.getValue(), true ); 33 | break; 34 | default: 35 | throw new PatternException( "unsupported feature " + array + " " + type ); 36 | } 37 | 38 | Integer n = array.getN(); 39 | if( n != null && n.intValue() != result.size() ) 40 | throw new PatternException( "invalid feature " + array ); 41 | 42 | return result; 43 | } 44 | 45 | static public List tokenize( String string, boolean enableQuotes ) 46 | { 47 | List result = new ArrayList(); 48 | 49 | StringBuilder sb = new StringBuilder(); 50 | 51 | boolean quoted = false; 52 | 53 | tokens: 54 | for( int i = 0; i < string.length(); i++ ) 55 | { 56 | char c = string.charAt( i ); 57 | 58 | if( quoted ) 59 | { 60 | 61 | if( c == '\\' && i < ( string.length() - 1 ) ) 62 | { 63 | c = string.charAt( i + 1 ); 64 | 65 | if( c == '\"' ) 66 | { 67 | sb.append( '\"' ); 68 | 69 | i++; 70 | } 71 | else 72 | sb.append( '\\' ); 73 | 74 | continue tokens; 75 | } // End if 76 | 77 | sb.append( c ); 78 | 79 | if( c == '\"' ) 80 | { 81 | result.add( createToken( sb, enableQuotes ) ); 82 | quoted = false; 83 | } 84 | } 85 | else 86 | { 87 | if( c == '\"' && enableQuotes ) 88 | { 89 | if( sb.length() > 0 ) 90 | result.add( createToken( sb, enableQuotes ) ); 91 | 92 | sb.append( '\"' ); 93 | 94 | quoted = true; 95 | } 96 | else if( Character.isWhitespace( c ) ) 97 | { 98 | if( sb.length() > 0 ) 99 | result.add( createToken( sb, enableQuotes ) ); 100 | } 101 | else 102 | sb.append( c ); 103 | } 104 | } 105 | 106 | if( sb.length() > 0 ) 107 | result.add( createToken( sb, enableQuotes ) ); 108 | 109 | return Collections.unmodifiableList( result ); 110 | } 111 | 112 | static 113 | private String createToken( StringBuilder sb, boolean enableQuotes ) 114 | { 115 | String result; 116 | 117 | if( sb.length() > 1 && ( sb.charAt( 0 ) == '\"' && sb.charAt( sb.length() - 1 ) == '\"' ) && enableQuotes ) 118 | result = sb.substring( 1, sb.length() - 1 ); 119 | else 120 | result = sb.substring( 0, sb.length() ); 121 | 122 | sb.setLength( 0 ); 123 | return result; 124 | } 125 | 126 | } -------------------------------------------------------------------------------- /pattern-pmml/src/main/java/cascading/pattern/pmml/ClusteringUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.pmml; 22 | 23 | import cascading.pattern.model.clustering.compare.AbsoluteDifferenceCompareFunction; 24 | import org.dmg.pmml.ClusteringModel; 25 | import org.dmg.pmml.CompareFunctionType; 26 | 27 | /** 28 | * 29 | */ 30 | class ClusteringUtil 31 | { 32 | static AbsoluteDifferenceCompareFunction setComparisonFunction( ClusteringModel model ) 33 | { 34 | CompareFunctionType compareFunction = model.getComparisonMeasure().getCompareFunction(); 35 | 36 | switch( compareFunction ) 37 | { 38 | case ABS_DIFF: 39 | return new AbsoluteDifferenceCompareFunction(); 40 | case GAUSS_SIM: 41 | break; 42 | case DELTA: 43 | break; 44 | case EQUAL: 45 | break; 46 | case TABLE: 47 | break; 48 | } 49 | 50 | throw new UnsupportedOperationException( "unknown comparison function type: " + compareFunction ); 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /pattern-pmml/src/main/java/cascading/pattern/pmml/DataFields.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.pmml; 22 | 23 | import java.lang.reflect.Type; 24 | 25 | import cascading.pattern.PatternException; 26 | import cascading.pattern.datafield.CategoricalDataField; 27 | import cascading.pattern.datafield.ContinuousDataField; 28 | import cascading.pattern.datafield.DataField; 29 | import org.dmg.pmml.DataType; 30 | import org.dmg.pmml.OpType; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | import static cascading.pattern.pmml.PMMLUtil.asStrings; 35 | 36 | 37 | class DataFields 38 | { 39 | /** Field LOG */ 40 | private static final Logger LOG = LoggerFactory.getLogger( DataFields.class ); 41 | 42 | public static DataField createDataFields( org.dmg.pmml.DataField dataField ) 43 | { 44 | String name = dataField.getName().getValue(); 45 | 46 | OpType optype = dataField.getOptype(); 47 | 48 | if( optype == OpType.CATEGORICAL ) 49 | return createCategoricalDataField( name, dataField ); 50 | else if( optype == OpType.CONTINUOUS ) 51 | return createContinuousDataField( name, dataField ); 52 | 53 | throw new UnsupportedOperationException( "unsupported optype: " + optype ); 54 | } 55 | 56 | public static ContinuousDataField createContinuousDataField( String name, org.dmg.pmml.DataField dataField ) 57 | { 58 | if( !dataField.getIntervals().isEmpty() ) 59 | throw new UnsupportedOperationException( "intervals not supported" ); 60 | 61 | DataType dataType = dataField.getDataType(); 62 | Type type = DataTypes.getPmmlToType( dataType ); 63 | 64 | return new ContinuousDataField( name, type ); 65 | } 66 | 67 | public static CategoricalDataField createCategoricalDataField( String name, org.dmg.pmml.DataField dataField ) 68 | { 69 | DataType dataType = dataField.getDataType(); 70 | Type type = DataTypes.getPmmlToType( dataType ); 71 | 72 | if( type == String.class ) 73 | return new CategoricalDataField( name, type, asStrings( dataField.getValues() ) ); 74 | 75 | String message = String.format( "unsupported data type: %s", dataType ); 76 | LOG.error( message ); 77 | 78 | throw new PatternException( message ); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /pattern-pmml/src/main/java/cascading/pattern/pmml/DataTypes.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.pmml; 22 | 23 | import java.lang.reflect.Type; 24 | import java.util.Map; 25 | 26 | import cascading.tuple.coerce.Coercions; 27 | import com.google.common.collect.BiMap; 28 | import com.google.common.collect.HashBiMap; 29 | import com.google.common.primitives.Primitives; 30 | import org.dmg.pmml.DataType; 31 | 32 | /** 33 | * 34 | */ 35 | class DataTypes 36 | { 37 | private static Map pmmlToType = HashBiMap.create(); 38 | private static Map typeToPmml = ( (BiMap) pmmlToType ).inverse(); 39 | 40 | static 41 | { 42 | pmmlToType.put( DataType.BOOLEAN, Boolean.class ); 43 | pmmlToType.put( DataType.INTEGER, Integer.class ); 44 | pmmlToType.put( DataType.FLOAT, Float.class ); 45 | pmmlToType.put( DataType.DOUBLE, Double.class ); 46 | pmmlToType.put( DataType.STRING, String.class ); 47 | } 48 | 49 | public static Type getPmmlToType( DataType dataType ) 50 | { 51 | return pmmlToType.get( dataType ); 52 | } 53 | 54 | public static DataType getTypeToPmml( Type type ) 55 | { 56 | if( type instanceof Class ) 57 | type = Primitives.wrap( (Class) type ); 58 | 59 | return typeToPmml.get( type ); 60 | } 61 | 62 | public static Object coerceTo( Object value, DataType dataType ) 63 | { 64 | return Coercions.coerce( value, getPmmlToType( dataType ) ); 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /pattern-pmml/src/main/java/cascading/pattern/pmml/PMMLTypeResolver.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.pmml; 22 | 23 | import java.lang.reflect.Type; 24 | import java.util.Map; 25 | 26 | import cascading.scheme.util.FieldTypeResolver; 27 | 28 | /** 29 | * 30 | */ 31 | class PMMLTypeResolver implements FieldTypeResolver 32 | { 33 | private transient PMMLPlanner pmmlPlanner; 34 | private Map pmmlDictionary; 35 | 36 | public PMMLTypeResolver( PMMLPlanner pmmlPlanner ) 37 | { 38 | this.pmmlPlanner = pmmlPlanner; 39 | this.pmmlDictionary = pmmlPlanner.getPMMLModel().getDictionary(); 40 | } 41 | 42 | protected synchronized Map getPmmlDictionary() 43 | { 44 | // intentionally build a new one in case new values are added 45 | // after serialization, when the pmmlPlanner is null, use the cached version 46 | if( pmmlPlanner != null ) 47 | this.pmmlDictionary = pmmlPlanner.getPMMLModel().getDictionary(); 48 | 49 | return pmmlDictionary; 50 | } 51 | 52 | @Override 53 | public Type inferTypeFrom( int ordinal, String fieldName ) 54 | { 55 | Map pmmlDictionary = getPmmlDictionary(); 56 | Type type = pmmlDictionary.get( fieldName ); 57 | 58 | if( type == null && pmmlDictionary.containsKey( fieldName ) ) 59 | throw new UnsupportedOperationException( "requested type is unsupported for fieldName: " + fieldName ); 60 | 61 | if( type == null ) 62 | return String.class; 63 | 64 | return type; 65 | } 66 | 67 | @Override 68 | public String cleanField( int ordinal, String fieldName, Type type ) 69 | { 70 | return fieldName; 71 | } 72 | 73 | @Override 74 | public String prepareField( int i, String fieldName, Type type ) 75 | { 76 | return fieldName; 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /pattern-pmml/src/main/java/cascading/pattern/pmml/RegressionUtil.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.pmml; 22 | 23 | import cascading.pattern.model.generalregression.Parameter; 24 | import cascading.pattern.model.generalregression.RegressionTable; 25 | import cascading.pattern.model.generalregression.normalization.Normalization; 26 | import cascading.pattern.model.generalregression.normalization.SoftMaxNormalization; 27 | import cascading.pattern.model.generalregression.predictor.CovariantPredictor; 28 | import cascading.pattern.model.generalregression.predictor.FactorPredictor; 29 | import org.dmg.pmml.CategoricalPredictor; 30 | import org.dmg.pmml.NumericPredictor; 31 | import org.dmg.pmml.RegressionModel; 32 | 33 | /** 34 | * 35 | */ 36 | class RegressionUtil 37 | { 38 | public static RegressionTable createTable( org.dmg.pmml.RegressionTable regressionTable ) 39 | { 40 | RegressionTable generalRegressionTable = new RegressionTable(); 41 | 42 | String targetCategory = regressionTable.getTargetCategory(); 43 | 44 | if( targetCategory != null ) 45 | generalRegressionTable.setTargetCategory( targetCategory ); 46 | 47 | generalRegressionTable.addParameter( new Parameter( "intercept", regressionTable.getIntercept() ) ); 48 | 49 | int count = 0; 50 | 51 | for( CategoricalPredictor predictor : regressionTable.getCategoricalPredictors() ) 52 | { 53 | String name = predictor.getName().getValue(); 54 | String value = predictor.getValue(); 55 | double coefficient = predictor.getCoefficient(); 56 | 57 | generalRegressionTable.addParameter( new Parameter( "f" + count++, coefficient, new FactorPredictor( name, value ) ) ); 58 | } 59 | 60 | for( NumericPredictor predictor : regressionTable.getNumericPredictors() ) 61 | { 62 | String name = predictor.getName().getValue(); 63 | int exponent = predictor.getExponent(); 64 | 65 | double coefficient = predictor.getCoefficient(); 66 | 67 | generalRegressionTable.addParameter( new Parameter( "f" + count++, coefficient, new CovariantPredictor( name, exponent ) ) ); 68 | } 69 | 70 | return generalRegressionTable; 71 | } 72 | 73 | static Normalization getNormalizationMethod( RegressionModel model ) 74 | { 75 | switch( model.getNormalizationMethod() ) 76 | { 77 | case NONE: 78 | return Normalization.NONE; 79 | case SIMPLEMAX: 80 | break; 81 | case SOFTMAX: 82 | return new SoftMaxNormalization(); 83 | case LOGIT: 84 | break; 85 | case PROBIT: 86 | break; 87 | case CLOGLOG: 88 | break; 89 | case EXP: 90 | break; 91 | case LOGLOG: 92 | break; 93 | case CAUCHIT: 94 | break; 95 | } 96 | 97 | throw new UnsupportedOperationException( "unsupported normalization method: " + model.getNormalizationMethod() ); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /pattern-pmml/src/test/java/cascading/pattern/pmml/SimplePMMLPlatformTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | package cascading.pattern.pmml; 22 | 23 | import java.io.IOException; 24 | 25 | import cascading.tuple.Fields; 26 | import cascading.tuple.Tuple; 27 | import org.junit.Ignore; 28 | import org.junit.Test; 29 | 30 | /** 31 | * 32 | */ 33 | public class SimplePMMLPlatformTest extends PMMLPlatformTestCase 34 | { 35 | public SimplePMMLPlatformTest() 36 | { 37 | } 38 | 39 | @Test 40 | public void testKMeans() throws IOException 41 | { 42 | pmmlTest( "kmeans", Fields.NONE, Fields.LAST ); 43 | } 44 | 45 | @Test 46 | public void testRandomforest() throws IOException 47 | { 48 | pmmlTest( "randomforest", Fields.FIRST, Fields.LAST ); 49 | } 50 | 51 | @Test 52 | public void testIrisGLM() throws IOException 53 | { 54 | pmmlTest( "iris.glm", Fields.NONE, new Fields( "predict", double.class ) ); 55 | } 56 | 57 | @Test 58 | public void testIrisHC() throws IOException 59 | { 60 | // unknown why these mis-classify from the expected data set 61 | Fields skipFields = Fields.size( 5 ); 62 | Tuple[] skip = new Tuple[] 63 | { 64 | new Tuple( 6.7, 3.0, 5.0, 1.7, "versicolor" ), 65 | new Tuple( 6.9, 3.1, 4.9, 1.5, "versicolor" ) 66 | }; 67 | 68 | pmmlTest( "iris.hc", Fields.NONE, Fields.LAST, skipFields, skip ); 69 | } 70 | 71 | @Test 72 | public void testIrisKMeans() throws IOException 73 | { 74 | pmmlTest( "iris.kmeans", Fields.NONE, Fields.LAST ); 75 | } 76 | 77 | @Test 78 | public void testIrisLM() throws IOException 79 | { 80 | pmmlTest( "iris.lm_p", Fields.FIRST, new Fields( "predict", double.class ) ); 81 | } 82 | 83 | @Test 84 | public void testIrisMultiNom() throws IOException 85 | { 86 | pmmlTest( "iris.multinom", new Fields( "species" ), Fields.LAST ); 87 | } 88 | 89 | @Test 90 | @Ignore 91 | public void testIrisNN() throws IOException 92 | { 93 | pmmlTest( "iris.nn", Fields.NONE, Fields.LAST ); 94 | } 95 | 96 | @Test 97 | public void testIrisRandomForest() throws IOException 98 | { 99 | pmmlTest( "iris.rf", new Fields( "species" ), Fields.LAST ); 100 | } 101 | 102 | @Test 103 | public void testIrisRPart() throws IOException 104 | { 105 | pmmlTest( "iris.rpart", new Fields( "species" ), Fields.LAST ); 106 | } 107 | 108 | @Test 109 | @Ignore 110 | public void testIrisSVM() throws IOException 111 | { 112 | pmmlTest( "iris.svm", Fields.NONE, Fields.LAST ); 113 | } 114 | 115 | @Test 116 | public void testSampleRandomForest() throws IOException 117 | { 118 | pmmlTest( "sample.rf", Fields.FIRST, Fields.LAST ); 119 | } 120 | 121 | @Test 122 | public void testRandomForestRegression() throws IOException 123 | { 124 | pmmlTest( "iris.rf.bin", new Fields( "setosa" ), new Fields( "predict", double.class ) ); 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /pattern-pmml/src/test/resources/pmml/iris.glm.pmml: -------------------------------------------------------------------------------- 1 | 2 | 21 | 22 | 24 |
26 | 27 | 28 | 2013-01-25 10:01:54 29 |
30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 |
75 | -------------------------------------------------------------------------------- /pattern-pmml/src/test/resources/pmml/iris.hc.pmml: -------------------------------------------------------------------------------- 1 | 2 | 21 | 22 | 24 |
26 | 27 | 28 | 2013-01-25 10:01:53 29 |
30 | 31 | 32 | 33 | 34 | 35 | 36 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 5.006 3.428 1.462 0.246 53 | 54 | 55 | 5.9296875 2.7578125 4.4109375 1.4390625 56 | 57 | 58 | 6.85277777777778 3.075 5.78611111111111 2.09722222222222 59 | 60 | 61 |
62 | -------------------------------------------------------------------------------- /pattern-pmml/src/test/resources/pmml/iris.kmeans.pmml: -------------------------------------------------------------------------------- 1 | 2 | 21 | 22 | 24 |
25 | 26 | 27 | 2013-01-25 10:01:53 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 5.006 3.428 1.462 0.246 52 | 53 | 54 | 6.85 3.07368421052632 5.74210526315789 2.07105263157895 55 | 56 | 57 | 5.90161290322581 2.74838709677419 4.39354838709678 1.43387096774194 58 | 59 | 60 |
61 | -------------------------------------------------------------------------------- /pattern-pmml/src/test/resources/pmml/iris.lm_p.pmml: -------------------------------------------------------------------------------- 1 | 2 | 21 | 22 | 24 |
25 | 26 | 27 | 2013-01-25 10:01:53 28 |
29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 |
59 | -------------------------------------------------------------------------------- /pattern-pmml/src/test/resources/pmml/kmeans.pmml: -------------------------------------------------------------------------------- 1 | 2 | 21 | 22 | 25 |
27 | 29 | 31 | 2013-01-10 18:44:35 32 |
33 | 35 | 36 | 38 | 40 | 42 | 43 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 57 | 59 | 61 | 63 | 64 | 6.85 3.07368421052632 5.74210526315789 2.07105263157895 66 | 67 | 68 | 70 | 5.006 3.428 1.462 0.246 71 | 72 | 74 | 5.90161290322581 2.74838709677419 4.39354838709678 1.43387096774194 76 | 77 | 78 | 79 |
80 | -------------------------------------------------------------------------------- /pattern-pmml/src/test/resources/pmml/kmeans.tsv: -------------------------------------------------------------------------------- 1 | sepal_length sepal_width petal_length petal_width predict 2 | 5.1 3.5 1.4 0.2 2 -------------------------------------------------------------------------------- /settings.gradle: -------------------------------------------------------------------------------- 1 | /* 2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | * 4 | * Project and contact information: http://www.cascading.org/ 5 | * 6 | * This file is part of the Cascading project. 7 | * 8 | * Licensed under the Apache License, Version 2.0 (the "License"); 9 | * you may not use this file except in compliance with the License. 10 | * You may obtain a copy of the License at 11 | * 12 | * http://www.apache.org/licenses/LICENSE-2.0 13 | * 14 | * Unless required by applicable law or agreed to in writing, software 15 | * distributed under the License is distributed on an "AS IS" BASIS, 16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | * See the License for the specific language governing permissions and 18 | * limitations under the License. 19 | */ 20 | 21 | include 'pattern-core' 22 | include 'pattern-pmml' 23 | include 'pattern-hadoop' 24 | include 'pattern-local' 25 | include 'pattern-examples' 26 | 27 | rootProject.name = 'pattern' 28 | -------------------------------------------------------------------------------- /version.properties: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved. 3 | # 4 | # Project and contact information: http://www.cascading.org/ 5 | # 6 | # This file is part of the Cascading project. 7 | # 8 | # Licensed under the Apache License, Version 2.0 (the "License"); 9 | # you may not use this file except in compliance with the License. 10 | # You may obtain a copy of the License at 11 | # 12 | # http://www.apache.org/licenses/LICENSE-2.0 13 | # 14 | # Unless required by applicable law or agreed to in writing, software 15 | # distributed under the License is distributed on an "AS IS" BASIS, 16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 | # See the License for the specific language governing permissions and 18 | # limitations under the License. 19 | # 20 | 21 | pattern.release.major=1.0 22 | pattern.release.minor=0 23 | --------------------------------------------------------------------------------