├── .gitignore
├── LICENSE.txt
├── README.md
├── build.gradle
├── etc
├── includes.gradle
├── properties.gradle
├── providedCompile.gradle
├── s3Upload.gradle
├── synchronizer.properties
├── testing.gradle
└── version.gradle
├── pattern-core
├── README.md
├── build.gradle
└── src
│ ├── main
│ └── java
│ │ └── cascading
│ │ └── pattern
│ │ ├── PatternException.java
│ │ ├── datafield
│ │ ├── CategoricalDataField.java
│ │ ├── ContinuousDataField.java
│ │ └── DataField.java
│ │ ├── ensemble
│ │ ├── EnsembleSpec.java
│ │ ├── ParallelEnsembleAssembly.java
│ │ ├── function
│ │ │ └── InsertGUID.java
│ │ └── selection
│ │ │ ├── Average.java
│ │ │ ├── CategoricalSelectionBuffer.java
│ │ │ ├── CategoricalSelector.java
│ │ │ ├── MajorityVote.java
│ │ │ ├── PredictionSelectionBuffer.java
│ │ │ ├── PredictionSelector.java
│ │ │ ├── SelectionBuffer.java
│ │ │ └── SelectionStrategy.java
│ │ ├── model
│ │ ├── ModelSchema.java
│ │ ├── ModelScoringFunction.java
│ │ ├── Spec.java
│ │ ├── clustering
│ │ │ ├── Cluster.java
│ │ │ ├── ClusterEvaluator.java
│ │ │ ├── ClusteringFunction.java
│ │ │ ├── ClusteringSpec.java
│ │ │ ├── compare
│ │ │ │ ├── AbsoluteDifferenceCompareFunction.java
│ │ │ │ └── CompareFunction.java
│ │ │ └── measure
│ │ │ │ ├── ComparisonMeasure.java
│ │ │ │ ├── DistanceMeasure.java
│ │ │ │ ├── EuclideanMeasure.java
│ │ │ │ └── SquaredEuclideanMeasure.java
│ │ ├── generalregression
│ │ │ ├── BaseRegressionFunction.java
│ │ │ ├── CategoricalRegressionFunction.java
│ │ │ ├── GeneralRegressionSpec.java
│ │ │ ├── LinkFunction.java
│ │ │ ├── Parameter.java
│ │ │ ├── PredictionRegressionFunction.java
│ │ │ ├── RegressionTable.java
│ │ │ ├── expression
│ │ │ │ ├── ExpressionEvaluator.java
│ │ │ │ └── ParameterExpression.java
│ │ │ ├── normalization
│ │ │ │ ├── Normalization.java
│ │ │ │ └── SoftMaxNormalization.java
│ │ │ └── predictor
│ │ │ │ ├── CovariantPredictor.java
│ │ │ │ ├── FactorPredictor.java
│ │ │ │ └── Predictor.java
│ │ └── tree
│ │ │ ├── Node.java
│ │ │ ├── Tree.java
│ │ │ ├── TreeFunction.java
│ │ │ ├── TreeSpec.java
│ │ │ ├── decision
│ │ │ ├── Decision.java
│ │ │ ├── DecisionTree.java
│ │ │ ├── FinalDecision.java
│ │ │ ├── ParentDecision.java
│ │ │ ├── PredicateEvaluator.java
│ │ │ └── PredicatedDecision.java
│ │ │ └── predicate
│ │ │ ├── ComparablePredicate.java
│ │ │ ├── EqualsToPredicate.java
│ │ │ ├── FalsePredicate.java
│ │ │ ├── GreaterOrEqualThanPredicate.java
│ │ │ ├── GreaterThanPredicate.java
│ │ │ ├── IsInSetPredicate.java
│ │ │ ├── IsMissingPredicate.java
│ │ │ ├── IsNotInSetPredicate.java
│ │ │ ├── IsNotMissingPredicate.java
│ │ │ ├── LessOrEqualThanPredicate.java
│ │ │ ├── LessThanPredicate.java
│ │ │ ├── NotEqualsToPredicate.java
│ │ │ ├── Predicate.java
│ │ │ ├── SimplePredicate.java
│ │ │ ├── SimpleSetPredicate.java
│ │ │ ├── TruePredicate.java
│ │ │ └── compound
│ │ │ ├── AndPredicate.java
│ │ │ ├── CompoundPredicate.java
│ │ │ ├── OrPredicate.java
│ │ │ ├── SurrogatePredicate.java
│ │ │ └── XorPredicate.java
│ │ └── util
│ │ ├── Logging.java
│ │ └── Reflection.java
│ └── test
│ ├── java
│ └── cascading
│ │ └── pattern
│ │ ├── PatternPlatformTestCase.java
│ │ ├── ensemble
│ │ ├── EnsemblePlatformTestCase.java
│ │ └── SimpleEnsemblePlatformTest.java
│ │ └── model
│ │ ├── ModelTest.java
│ │ └── tree
│ │ └── PredicateTest.java
│ └── resources
│ ├── data
│ ├── randomforest-predict.tsv
│ └── randomforest.tsv
│ └── log4j.properties
├── pattern-examples
├── .gitignore
├── README.md
├── build.gradle
├── data
│ ├── groc.arules.tsv
│ ├── groc.arules.xml
│ ├── iris.glm.tsv
│ ├── iris.glm.xml
│ ├── iris.hc.tsv
│ ├── iris.hc.xml
│ ├── iris.kmeans.tsv
│ ├── iris.kmeans.xml
│ ├── iris.lm_p.tsv
│ ├── iris.lm_p.xml
│ ├── iris.multinom.tsv
│ ├── iris.multinom.xml
│ ├── iris.nn.tsv
│ ├── iris.nn.xml
│ ├── iris.rf.tsv
│ ├── iris.rf.xml
│ ├── iris.rpart.tsv
│ ├── iris.rpart.xml
│ ├── iris.svm.tsv
│ ├── iris.svm.xml
│ ├── orders.tsv
│ ├── sample.rf.xml
│ └── sample.tsv
├── examples
│ ├── py
│ │ ├── gen_orders.py
│ │ └── rf_eval.py
│ └── r
│ │ ├── pmml_models.R
│ │ ├── rattle_pmml.R
│ │ └── rf_pmml.R
└── src
│ └── main
│ └── java
│ └── cascading
│ └── pattern
│ ├── Main.java
│ ├── function
│ ├── RandomForestFunction.java
│ └── RandomForestSpec.java
│ └── pmml
│ └── iris
│ └── RegressionFlowExample.java
├── pattern-hadoop
├── README.md
└── build.gradle
├── pattern-local
├── README.md
├── build.gradle
└── src
│ └── main
│ └── resources
│ └── log4j.properties
├── pattern-pmml
├── README.md
├── build.gradle
└── src
│ ├── main
│ └── java
│ │ └── cascading
│ │ └── pattern
│ │ └── pmml
│ │ ├── ArrayUtil.java
│ │ ├── ClusteringUtil.java
│ │ ├── DataFields.java
│ │ ├── DataTypes.java
│ │ ├── GeneralRegressionUtil.java
│ │ ├── PMMLModel.java
│ │ ├── PMMLPlanner.java
│ │ ├── PMMLTypeResolver.java
│ │ ├── PMMLUtil.java
│ │ ├── RegressionUtil.java
│ │ └── TreeUtil.java
│ └── test
│ ├── java
│ └── cascading
│ │ └── pattern
│ │ └── pmml
│ │ ├── PMMLPlatformTestCase.java
│ │ └── SimplePMMLPlatformTest.java
│ └── resources
│ └── pmml
│ ├── iris.glm.pmml
│ ├── iris.glm.tsv
│ ├── iris.hc.pmml
│ ├── iris.hc.tsv
│ ├── iris.kmeans.pmml
│ ├── iris.kmeans.tsv
│ ├── iris.lm_p.pmml
│ ├── iris.lm_p.tsv
│ ├── iris.multinom.pmml
│ ├── iris.multinom.tsv
│ ├── iris.nn.pmml
│ ├── iris.nn.tsv
│ ├── iris.rf.bin.pmml
│ ├── iris.rf.bin.tsv
│ ├── iris.rf.pmml
│ ├── iris.rf.tsv
│ ├── iris.rpart.pmml
│ ├── iris.rpart.tsv
│ ├── iris.svm.pmml
│ ├── iris.svm.tsv
│ ├── kmeans.pmml
│ ├── kmeans.tsv
│ ├── randomforest.pmml
│ ├── randomforest.tsv
│ ├── sample.rf.pmml
│ └── sample.rf.tsv
├── settings.gradle
└── version.properties
/.gitignore:
--------------------------------------------------------------------------------
1 | junitvm*.properties
2 | build.properties
3 | gradle.properties
4 | .DS_Store
5 | build
6 | *.iml
7 | *.iws
8 | *.ipr
9 | .idea/
10 | .gradle/
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 |
2 | License:
3 |
4 | Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
5 |
6 | Project and contact information: http://www.cascading.org/
7 |
8 | This file is part of the Cascading project.
9 |
10 | Licensed under the Apache License, Version 2.0 (the "License");
11 | you may not use this file except in compliance with the License.
12 | You may obtain a copy of the License at
13 |
14 | http://www.apache.org/licenses/LICENSE-2.0
15 |
16 | Unless required by applicable law or agreed to in writing, software
17 | distributed under the License is distributed on an "AS IS" BASIS,
18 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | See the License for the specific language governing permissions and
20 | limitations under the License.
21 |
22 | Third-party Licenses:
23 |
24 | All third-party dependencies are listed in the build.gradle files.
25 |
26 | jPMML - https://github.com/jpmml/jpmml
27 |
28 | Copyright (c) 2009 University of Tartu
29 | All rights reserved.
30 |
31 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
32 | following conditions are met:
33 |
34 | * Redistributions of source code must retain the above copyright notice, this list of conditions and the
35 | following disclaimer.
36 | * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
37 | following disclaimer in the documentation and/or other materials provided with the distribution.
38 | * Neither the name of the University of Tartu nor the names of its contributors may be used to endorse or promote
39 | products derived from this software without specific prior written permission.
40 |
41 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
42 | INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
43 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
44 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
45 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
46 | WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
47 | USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
--------------------------------------------------------------------------------
/etc/includes.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | apply from: "${rootDir}/etc/providedCompile.gradle"
22 | apply from: "${rootDir}/etc/testing.gradle"
23 | apply from: "${rootDir}/etc/s3Upload.gradle"
24 |
--------------------------------------------------------------------------------
/etc/properties.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | if( project.properties[ 'teamcity' ] ) // make them system properties
22 | System.properties.putAll( project.properties[ 'teamcity' ] )
23 |
24 | if( System.properties[ 'aws.properties' ] )
25 | {
26 | file( System.properties[ 'aws.properties' ] ).withReader { reader ->
27 | def awsProperties = new Properties()
28 | awsProperties.load( reader )
29 | System.properties.putAll( awsProperties )
30 | }
31 | }
32 |
33 | ext.repoUrl = 'http://conjars.org/repo/'
34 | ext.repoUserName = System.properties[ 'publish.repo.userName' ]
35 | ext.repoPassword = System.properties[ 'publish.repo.password' ]
36 |
37 | if( System.properties[ 'publish.repo.url' ] )
38 | repoUrl = System.properties[ 'publish.repo.url' ]
39 |
40 | ext.awsAccessId = System.properties[ 'publish.aws.accessId' ]
41 | ext.awsSecretKey = System.properties[ 'publish.aws.secretKey' ]
42 | ext.s3Bucket = System.properties[ 'publish.bucket' ]
43 | ext.s3BucketDocs = System.properties[ 'publish.docs.bucket' ]
44 |
--------------------------------------------------------------------------------
/etc/providedCompile.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | configurations {
22 | providedCompile
23 | }
24 |
25 | sourceSets {
26 | main.compileClasspath += configurations.providedCompile
27 | }
28 |
29 | task mappings {
30 | conf2ScopeMappings.addMapping( 0, configurations.providedCompile, Conf2ScopeMappingContainer.PROVIDED )
31 | }
32 |
33 | idea {
34 | module {
35 | scopes.PROVIDED.plus += configurations.providedCompile
36 | }
37 | }
38 |
39 | javadoc {
40 | classpath += configurations.providedCompile
41 | }
42 |
43 |
--------------------------------------------------------------------------------
/etc/s3Upload.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | import com.monochromeroad.gradle.plugin.aws.s3.S3Sync
22 | import com.monochromeroad.gradle.plugin.aws.s3.ACL
23 |
24 | buildscript {
25 | repositories {
26 | mavenLocal()
27 | mavenCentral()
28 | mavenRepo name: 'monochromeroad', url: 'http://conjars.org/repo/'
29 | }
30 | dependencies {
31 | classpath 'thirdparty:gradle-aws-s3-sync:0.5.1'
32 | }
33 | }
34 |
35 | task s3Upload( type: S3Sync ) {
36 |
37 | accessKey = rootProject.awsAccessId
38 | secretKey = rootProject.awsSecretKey
39 |
40 | keepFiles = true // prevents deletion from bucket
41 |
42 | acl ACL.PublicRead
43 |
44 | configFile "${rootProject.projectDir}/etc/synchronizer.properties"
45 |
46 | ext.source = "${buildDir}/publish"
47 |
48 | if( project == rootProject ) // only publishes docs
49 | ext.destination = "${project.s3BucketDocs}/${rootProject.name}/${majorVersion}/"
50 | else
51 | ext.destination = "${project.s3Bucket}/${rootProject.name}/${majorVersion}/${project.name}/"
52 |
53 | from source
54 | into destination
55 | }
56 |
--------------------------------------------------------------------------------
/etc/synchronizer.properties:
--------------------------------------------------------------------------------
1 | #
2 | # Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | #
4 | # Project and contact information: http://www.cascading.org/
5 | #
6 | # This file is part of the Cascading project.
7 | #
8 | # Licensed under the Apache License, Version 2.0 (the "License");
9 | # you may not use this file except in compliance with the License.
10 | # You may obtain a copy of the License at
11 | #
12 | # http://www.apache.org/licenses/LICENSE-2.0
13 | #
14 | # Unless required by applicable law or agreed to in writing, software
15 | # distributed under the License is distributed on an "AS IS" BASIS,
16 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | # See the License for the specific language governing permissions and
18 | # limitations under the License.
19 | #
20 |
21 | # see http://jets3t.s3.amazonaws.com/toolkit/configuration.html
22 |
23 | #s3service.default-bucket-location=Tokyo
24 | # httpclient.max-connections=2
25 | # threaded-service.admin-max-thread-count=5
26 |
27 | ###
28 | # File/Object comparison properties
29 | ###
30 |
31 | filecomparer.skip-symlinks=true
32 | #filecomparer.use-md5-files=true
33 | #filecomparer.generate-md5-files=true
34 | #filecomparer.md5-files-root-dir=.cache
35 | filecomparer.skip-upload-of-md5-files=true
36 | filecomparer.assume-local-latest-in-mismatch=false
37 |
38 | # Page Caching - none
39 | upload.metadata.Cache-Control=no-cache
40 |
41 | upload.transformed-files-batch-size=1000
42 |
--------------------------------------------------------------------------------
/etc/testing.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | rootProject.ext.testRoots = []
22 |
23 | project.ext.setTestingProperties = {
24 |
25 | if( !System.properties[ 'test.cluster.enabled' ] )
26 | System.properties[ 'test.cluster.enabled' ] = 'false'
27 |
28 | // hadoop hard codes 'build/test' into its test harness, so might as well play along
29 | System.properties[ 'test.data.path' ] = new String( "${projectDir}/../pattern-pmml/src/test/resources/pmml/" )
30 | System.properties[ 'test.output.root' ] = new String( "${buildDir}/test/output/" )
31 |
32 | System.properties
33 | }
34 |
35 | test {
36 | jvmArgs '-Xmx756m'
37 |
38 | enableAssertions = false
39 |
40 | systemProperties = setTestingProperties()
41 | }
42 |
43 | task platformTest( type: Test, dependsOn: test ) {
44 |
45 | forkEvery = 1 // static fields on the platform test get munged otherwise
46 |
47 | if( System.properties[ 'platformTest.single' ] ) // test a single class from the command line
48 | include "**/${System.properties[ 'platformTest.single' ]}.class"
49 | else
50 | include '**/*PlatformTest.class'
51 |
52 | jvmArgs '-Xmx756m'
53 |
54 | enableAssertions = false
55 |
56 | systemProperties = setTestingProperties()
57 |
58 | setTestClassesDir( file( project( ':pattern-pmml' ).sourceSets.test.output.classesDir ) )
59 |
60 | // not called when disabled
61 | doFirst() {
62 |
63 | rootProject.ext.testRoots << systemProperties[ 'test.output.root' ]
64 | }
65 | }
66 |
67 | check {
68 | dependsOn << platformTest
69 | }
--------------------------------------------------------------------------------
/etc/version.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | apply from: './etc/properties.gradle'
22 |
23 | project.ext.currentCommit = System.properties[ 'build.vcs.number' ];
24 |
25 | if( !currentCommit )
26 | {
27 | def commitPath = File.createTempFile( "commit", "tmp" )
28 |
29 | ant.exec( dir: '.', executable: "git", output: commitPath ) {
30 | arg( line: 'rev-parse HEAD' )
31 | }
32 |
33 | currentCommit = commitPath.readLines().get( 0 )
34 |
35 | commitPath.delete()
36 | }
37 |
38 | def versionProperties = new Properties()
39 | file( 'version.properties' ).withInputStream { versionProperties.load( it ) }
40 |
41 | ext.majorVersion = versionProperties[ 'pattern.release.major' ]
42 | ext.minorVersion = versionProperties[ 'pattern.release.minor' ]
43 |
44 | ext.buildNumber = System.getProperty( 'build.number', 'dev' )
45 |
46 | if( System.properties[ 'pattern.release.private' ] )
47 | buildNumber = "priv-${buildNumber}"
48 | else if( !System.properties[ 'pattern.release.final' ] )
49 | buildNumber = "wip-${buildNumber}"
50 |
51 | ext.releaseTag = "${majorVersion}-${buildNumber}"
52 |
53 | if( !System.properties[ 'build.number' ] )
54 | releaseTag = "wip-${majorVersion}"
55 |
56 | ext.releaseVersion = majorVersion
57 |
58 | if( minorVersion )
59 | releaseVersion = "${releaseVersion}.${minorVersion}"
60 |
61 | if( !System.properties[ 'pattern.release.final' ] )
62 | releaseVersion = "${releaseVersion}-${buildNumber}"
--------------------------------------------------------------------------------
/pattern-core/README.md:
--------------------------------------------------------------------------------
1 | # pattern-core
2 |
3 | This sub-project contains all core APIs and base API JUnit test cases.
4 |
5 | Feel free to use `PatternPlatformTestCase` as the base class for any new custom tests in dependent projects.
--------------------------------------------------------------------------------
/pattern-core/build.gradle:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | dependencies {
22 | compile( group: 'cascading', name: 'cascading-core', version: cascadingVersion )
23 |
24 | compile group: 'com.google.guava', name: 'guava', version: '14.0.1'
25 |
26 | compile group: 'org.slf4j', name: 'slf4j-api', version: '1.6.1'
27 |
28 | testCompile group: 'cascading', name: 'cascading-core', version: cascadingVersion, classifier: 'tests', changing: true
29 | testCompile group: 'cascading', name: 'cascading-platform', version: cascadingVersion, classifier: 'tests', changing: true
30 |
31 | testRuntime group: 'log4j', name: 'log4j', version: '1.2.16'
32 | testRuntime group: 'org.slf4j', name: 'slf4j-api', version: '1.6.1'
33 | testRuntime group: 'org.slf4j', name: 'slf4j-log4j12', version: '1.6.1'
34 | }
35 |
36 | jar {
37 | doFirst {
38 | if( !System.properties[ 'pattern.release.final' ] )
39 | file( "${buildDir}/build.number.properties" ).write( "pattern.build.number=${buildNumber}" )
40 | }
41 |
42 | into( 'cascading/pattern' ) {
43 | from '../version.properties'
44 |
45 | if( !System.properties[ 'pattern.release.final' ] )
46 | from "${buildDir}/build.number.properties"
47 | }
48 | }
49 |
50 | platformTest.enabled = false
51 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/PatternException.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern;
22 |
23 |
24 | public class PatternException extends RuntimeException
25 | {
26 | /**
27 | *
28 | */
29 | public PatternException()
30 | {
31 | }
32 |
33 | /** @param message */
34 | public PatternException( String message )
35 | {
36 | super( message );
37 | }
38 |
39 | /**
40 | * @param message
41 | * @param cause
42 | */
43 | public PatternException( String message, Throwable cause )
44 | {
45 | super( message, cause );
46 | }
47 | }
48 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/datafield/CategoricalDataField.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.datafield;
22 |
23 | import java.lang.reflect.Type;
24 | import java.util.ArrayList;
25 | import java.util.Arrays;
26 | import java.util.Collections;
27 | import java.util.List;
28 |
29 | import cascading.tuple.Fields;
30 |
31 |
32 | /**
33 | * Class CategoricalDataField represent a field with a fixed set of possible values.
34 | *
35 | * For example, if the field name is {@code SIZE}, it could have three possible categories,
36 | * {@code small}, {@code medium}, and {@code large}.
37 | *
38 | * Order of categories is retained so that indexes into the internal list of categories can be used
39 | * to speed up some operations.
40 | */
41 | public class CategoricalDataField extends DataField
42 | {
43 | protected List categories = new ArrayList();
44 |
45 | public CategoricalDataField( CategoricalDataField dataField, String... categories )
46 | {
47 | this( dataField.name, dataField.getType(), categories );
48 | }
49 |
50 | public CategoricalDataField( Fields fields, String... categories )
51 | {
52 | this( fields.get( 0 ).toString(), fields.getType( 0 ), categories );
53 | }
54 |
55 | public CategoricalDataField( Fields fields, List categories )
56 | {
57 | this( fields.get( 0 ).toString(), fields.getType( 0 ), categories );
58 | }
59 |
60 | public CategoricalDataField( String name, Type type, String... categories )
61 | {
62 | this( name, type, Arrays.asList( categories ) );
63 | }
64 |
65 | public CategoricalDataField( String name, Type type, List categories )
66 | {
67 | super( name, type );
68 | this.categories.addAll( categories );
69 | }
70 |
71 | /**
72 | * Gets an unmodifiable list of the current categories.
73 | *
74 | * @return the categories
75 | */
76 | public List getCategories()
77 | {
78 | return Collections.unmodifiableList( categories );
79 | }
80 | }
81 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/datafield/ContinuousDataField.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.datafield;
22 |
23 | import java.lang.reflect.Type;
24 |
25 | import cascading.tuple.Fields;
26 |
27 |
28 | /** Class ContinuousDataField represent a field with a continuous set of values, like an {@link Double} value. */
29 | public class ContinuousDataField extends DataField
30 | {
31 | public ContinuousDataField( Fields fields )
32 | {
33 | super( fields );
34 | }
35 |
36 | public ContinuousDataField( String name, Type type )
37 | {
38 | super( name, type );
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/datafield/DataField.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.datafield;
22 |
23 | import java.io.Serializable;
24 | import java.lang.reflect.Type;
25 |
26 | import cascading.tuple.Fields;
27 |
28 |
29 | public abstract class DataField implements Serializable
30 | {
31 | public String name;
32 | public Type type;
33 |
34 | protected DataField( Fields fields )
35 | {
36 | this( fields.get( 0 ).toString(), fields.getType( 0 ) );
37 | }
38 |
39 | protected DataField( String name, Type type )
40 | {
41 | if( name == null || name.isEmpty() )
42 | throw new IllegalArgumentException( "name may not be null or empty" );
43 |
44 | if( type == null )
45 | throw new IllegalArgumentException( "type may not be null" );
46 |
47 | this.name = name;
48 | this.type = type;
49 | }
50 |
51 | public String getName()
52 | {
53 | return name;
54 | }
55 |
56 | public Type getType()
57 | {
58 | return type;
59 | }
60 |
61 | @Override
62 | public String toString()
63 | {
64 | return name + ":" + getClass().getSimpleName() + ":" + type;
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/ensemble/EnsembleSpec.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.ensemble;
22 |
23 |
24 | import java.io.Serializable;
25 | import java.util.ArrayList;
26 | import java.util.LinkedHashSet;
27 | import java.util.List;
28 | import java.util.Set;
29 |
30 | import cascading.pattern.ensemble.selection.MajorityVote;
31 | import cascading.pattern.ensemble.selection.SelectionStrategy;
32 | import cascading.pattern.model.ModelSchema;
33 | import cascading.pattern.model.Spec;
34 |
35 |
36 | /**
37 | * Class EnsembleSpec is used to define an ensemble model.
38 | *
39 | * It is used with {@link ParallelEnsembleAssembly}.
40 | */
41 | public class EnsembleSpec extends Spec implements Serializable
42 | {
43 | protected List modelSpecs = new ArrayList();
44 | protected SelectionStrategy selectionStrategy = new MajorityVote();
45 |
46 | public EnsembleSpec( ModelSchema modelSchema )
47 | {
48 | super( modelSchema );
49 | }
50 |
51 | public EnsembleSpec( ModelSchema modelSchema, List modelSpecs )
52 | {
53 | super( modelSchema );
54 | this.modelSpecs = modelSpecs;
55 | }
56 |
57 | public boolean isParallel()
58 | {
59 | return selectionStrategy.isParallel();
60 | }
61 |
62 | public void addModelSpecs( List modelSpec )
63 | {
64 | this.modelSpecs.addAll( modelSpec );
65 | }
66 |
67 | public void addModelSpec( S modelSpec )
68 | {
69 | this.modelSpecs.add( modelSpec );
70 | }
71 |
72 | public List getModelSpecs()
73 | {
74 | return modelSpecs;
75 | }
76 |
77 | public SelectionStrategy getSelectionStrategy()
78 | {
79 | return selectionStrategy;
80 | }
81 |
82 | public void setSelectionStrategy( SelectionStrategy selectionStrategy )
83 | {
84 | this.selectionStrategy = selectionStrategy;
85 | }
86 |
87 | /**
88 | * Returns unique list of all the underlying declared model categories.
89 | *
90 | * @return the model categories
91 | */
92 | public List getModelCategories()
93 | {
94 | List categories = new ArrayList();
95 |
96 | Set set = new LinkedHashSet();
97 |
98 | for( S spec : getModelSpecs() )
99 | set.addAll( spec.getCategories() );
100 |
101 | categories.addAll( set );
102 |
103 | return categories;
104 | }
105 |
106 | @Override
107 | public String toString()
108 | {
109 | final StringBuilder sb = new StringBuilder( "EnsembleSpec{" );
110 | sb.append( "modelSpecs=" ).append( modelSpecs );
111 | sb.append( ", selectionStrategy=" ).append( selectionStrategy );
112 | sb.append( '}' );
113 | return sb.toString();
114 | }
115 | }
116 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/ensemble/function/InsertGUID.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.ensemble.function;
22 |
23 | import cascading.operation.expression.ExpressionFunction;
24 | import cascading.pipe.Each;
25 | import cascading.pipe.Pipe;
26 | import cascading.pipe.SubAssembly;
27 | import cascading.tuple.Fields;
28 |
29 | /**
30 | * Class InsertGUID creates a globally unique ID by calling {@link java.util.UUID#randomUUID()}.
31 | *
32 | * This Function also returns {@code false} for {@link cascading.operation.Operation#isSafe()}, preventing
33 | * duplicate ids from being generated for the same record.
34 | */
35 | public class InsertGUID extends SubAssembly
36 | {
37 | public InsertGUID( Pipe previous, Fields declaredFields )
38 | {
39 | super( previous );
40 |
41 | String expression = "java.util.UUID.randomUUID().toString()";
42 |
43 | ExpressionFunction expressionFunction = new ExpressionFunction( declaredFields, expression )
44 | {
45 | @Override
46 | public boolean isSafe()
47 | {
48 | return false;
49 | }
50 | };
51 |
52 | previous = new Each( previous, Fields.NONE, expressionFunction, Fields.ALL );
53 |
54 | setTails( previous );
55 | }
56 | }
57 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/ensemble/selection/Average.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.ensemble.selection;
22 |
23 | import org.slf4j.Logger;
24 | import org.slf4j.LoggerFactory;
25 |
26 | /**
27 | *
28 | */
29 | public class Average extends PredictionSelector
30 | {
31 | private static final Logger LOG = LoggerFactory.getLogger( Average.class );
32 |
33 | @Override
34 | public boolean isParallel()
35 | {
36 | return true;
37 | }
38 |
39 | @Override
40 | double predict( double[] results )
41 | {
42 | double sum = 0;
43 |
44 | for( double result : results )
45 | sum += result;
46 |
47 | double avg = sum / results.length;
48 |
49 | LOG.debug( "prediction: {}", avg );
50 |
51 | return avg;
52 | }
53 | }
54 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/ensemble/selection/CategoricalSelector.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.ensemble.selection;
22 |
23 | /**
24 | *
25 | */
26 | public abstract class CategoricalSelector extends SelectionStrategy
27 | {
28 | public abstract int select( int[] results );
29 | }
30 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/ensemble/selection/MajorityVote.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.ensemble.selection;
22 |
23 | import com.google.common.primitives.Ints;
24 | import org.slf4j.Logger;
25 | import org.slf4j.LoggerFactory;
26 |
27 | /**
28 | *
29 | */
30 | public class MajorityVote extends CategoricalSelector
31 | {
32 | private static final Logger LOG = LoggerFactory.getLogger( MajorityVote.class );
33 |
34 | @Override
35 | public boolean isParallel()
36 | {
37 | return true;
38 | }
39 |
40 | @Override
41 | public int select( int[] results )
42 | {
43 | int max = Ints.max( results );
44 | int index = Ints.indexOf( results, max );
45 |
46 | LOG.debug( "score: {}, with votes: {}", index, max );
47 |
48 | return index;
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/ensemble/selection/PredictionSelectionBuffer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.ensemble.selection;
22 |
23 | import java.util.Arrays;
24 | import java.util.Iterator;
25 |
26 | import cascading.flow.FlowProcess;
27 | import cascading.operation.BufferCall;
28 | import cascading.operation.OperationCall;
29 | import cascading.pattern.ensemble.EnsembleSpec;
30 | import cascading.tuple.Tuple;
31 | import cascading.tuple.TupleEntry;
32 | import org.slf4j.Logger;
33 | import org.slf4j.LoggerFactory;
34 |
35 | /**
36 | *
37 | */
38 | public class PredictionSelectionBuffer extends SelectionBuffer
39 | {
40 | private static final Logger LOG = LoggerFactory.getLogger( PredictionSelectionBuffer.class );
41 |
42 | private PredictionSelector selection;
43 |
44 | protected class DecisionContext
45 | {
46 | public Tuple tuple;
47 | public double[] results;
48 |
49 | public Tuple result( Object value )
50 | {
51 | tuple.set( 0, value );
52 |
53 | return tuple;
54 | }
55 | }
56 |
57 | public PredictionSelectionBuffer( EnsembleSpec ensembleSpec )
58 | {
59 | super( ensembleSpec.getModelSchema().getDeclaredFields(), ensembleSpec );
60 |
61 | if( !( ensembleSpec.getSelectionStrategy() instanceof PredictionSelector ) )
62 | throw new IllegalArgumentException( "selection strategy must be Prediction, got: " + ensembleSpec.getSelectionStrategy() );
63 |
64 | this.selection = (PredictionSelector) ensembleSpec.getSelectionStrategy();
65 | }
66 |
67 | @Override
68 | public void prepare( FlowProcess flowProcess, OperationCall operationCall )
69 | {
70 | ( (BufferCall) operationCall ).setRetainValues( true );
71 |
72 | DecisionContext context = new DecisionContext();
73 |
74 | context.tuple = Tuple.size( getFieldDeclaration().size() );
75 | context.results = new double[ ensembleSpec.getModelSpecs().size() ];
76 |
77 | operationCall.setContext( context );
78 | }
79 |
80 | @Override
81 | public void operate( FlowProcess flowProcess, BufferCall bufferCall )
82 | {
83 | double[] results = bufferCall.getContext().results;
84 |
85 | Arrays.fill( results, 0 ); // clear before use
86 |
87 | Iterator iterator = bufferCall.getArgumentsIterator();
88 | int count = 0;
89 |
90 | while( iterator.hasNext() )
91 | {
92 | TupleEntry next = iterator.next();
93 | Double score = next.getDouble( 0 );
94 |
95 | results[ count++ ] += score;
96 | }
97 |
98 | double prediction = selection.predict( results );
99 |
100 | LOG.debug( "prediction: {}", prediction );
101 |
102 | bufferCall.getOutputCollector().add( bufferCall.getContext().result( prediction ) );
103 | }
104 | }
105 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/ensemble/selection/PredictionSelector.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.ensemble.selection;
22 |
23 | /**
24 | *
25 | */
26 | public abstract class PredictionSelector extends SelectionStrategy
27 | {
28 | abstract double predict( double[] results );
29 | }
30 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/ensemble/selection/SelectionBuffer.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.ensemble.selection;
22 |
23 | import cascading.operation.BaseOperation;
24 | import cascading.operation.Buffer;
25 | import cascading.pattern.ensemble.EnsembleSpec;
26 | import cascading.tuple.Fields;
27 |
28 | /**
29 | *
30 | */
31 | public abstract class SelectionBuffer extends BaseOperation implements Buffer
32 | {
33 | protected final EnsembleSpec ensembleSpec;
34 |
35 | public SelectionBuffer( Fields fieldDeclaration, EnsembleSpec ensembleSpec )
36 | {
37 | super( fieldDeclaration );
38 |
39 | this.ensembleSpec = ensembleSpec;
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/ensemble/selection/SelectionStrategy.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.ensemble.selection;
22 |
23 | import java.io.Serializable;
24 |
25 | /**
26 | *
27 | */
28 | public abstract class SelectionStrategy implements Serializable
29 | {
30 | public abstract boolean isParallel();
31 |
32 | @Override
33 | public String toString()
34 | {
35 | return getClass().getSimpleName();
36 | }
37 | }
38 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/ModelScoringFunction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model;
22 |
23 | import java.io.Serializable;
24 |
25 | import cascading.flow.FlowProcess;
26 | import cascading.operation.BaseOperation;
27 | import cascading.operation.Function;
28 | import cascading.operation.OperationCall;
29 | import cascading.tuple.Tuple;
30 |
31 |
32 | public abstract class ModelScoringFunction extends BaseOperation> implements Function>
33 | {
34 | public static final boolean SAFE_DEFAULT = false;
35 |
36 | protected S spec;
37 | protected boolean isSafe = SAFE_DEFAULT;
38 |
39 | protected static interface Result extends Serializable
40 | {
41 | Out transform( In object );
42 | }
43 |
44 | /** Class Context is used to hold intermediate values. */
45 | protected static class Context
46 | {
47 | public final Tuple tuple;
48 | public Payload payload;
49 |
50 | public Context( int size )
51 | {
52 | tuple = Tuple.size( size );
53 | }
54 |
55 | public Tuple result( Object label )
56 | {
57 | tuple.set( 0, label );
58 |
59 | return tuple;
60 | }
61 | }
62 |
63 | protected ModelScoringFunction( S spec, boolean safe )
64 | {
65 | this( spec );
66 | isSafe = safe;
67 | }
68 |
69 | protected ModelScoringFunction( S spec )
70 | {
71 | super( spec.getModelSchema().getInputFields().size(), spec.getModelSchema().getDeclaredFields() );
72 | this.spec = spec;
73 | }
74 |
75 | public S getSpec()
76 | {
77 | return spec;
78 | }
79 |
80 | @Override
81 | public boolean isSafe()
82 | {
83 | return isSafe;
84 | }
85 |
86 | @Override
87 | public void prepare( FlowProcess flowProcess, OperationCall> operationCall )
88 | {
89 | operationCall.setContext( new ModelScoringFunction.Context( getFieldDeclaration().size() ) );
90 | }
91 | }
92 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/Spec.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model;
22 |
23 | import java.io.Serializable;
24 | import java.util.ArrayList;
25 | import java.util.List;
26 |
27 | import cascading.pattern.datafield.CategoricalDataField;
28 | import cascading.pattern.datafield.DataField;
29 |
30 |
31 | public abstract class Spec implements Serializable
32 | {
33 | protected ModelSchema modelSchema = null;
34 |
35 | protected Spec()
36 | {
37 | }
38 |
39 | protected Spec( ModelSchema modelSchema )
40 | {
41 | this.modelSchema = modelSchema;
42 | }
43 |
44 | public void setModelSchema( ModelSchema modelSchema )
45 | {
46 | this.modelSchema = modelSchema;
47 | }
48 |
49 | public ModelSchema getModelSchema()
50 | {
51 | if( modelSchema == null )
52 | modelSchema = new ModelSchema();
53 |
54 | return modelSchema;
55 | }
56 |
57 | public boolean isPredictedCategorical()
58 | {
59 | DataField predictedField = getModelSchema().getPredictedField( getModelSchema().getPredictedFieldNames().get( 0 ) );
60 |
61 | return predictedField instanceof CategoricalDataField;
62 | }
63 |
64 | public List getCategories()
65 | {
66 | DataField predictedField = getModelSchema().getPredictedField( getModelSchema().getPredictedFieldNames().get( 0 ) );
67 |
68 | List categories = new ArrayList();
69 |
70 | if( predictedField instanceof CategoricalDataField )
71 | categories.addAll( ( (CategoricalDataField) predictedField ).getCategories() );
72 |
73 | return categories;
74 | }
75 |
76 | public String[] getCategoriesArray()
77 | {
78 | List categories = getCategories();
79 |
80 | return categories.toArray( new String[ categories.size() ] );
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/clustering/Cluster.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.clustering;
22 |
23 | import java.io.Serializable;
24 | import java.util.List;
25 |
26 | import com.google.common.primitives.Doubles;
27 |
28 | /**
29 | * Class Cluster represents a point in space denoted by the given collection of {@code points} which
30 | * in turn represents a particular {@code targetCategory}.
31 | */
32 | public class Cluster implements Serializable
33 | {
34 | protected int ordinal; // set when added to spec
35 | protected String targetCategory;
36 | protected double[] points;
37 |
38 | public Cluster( String targetCategory, double... points )
39 | {
40 | this( targetCategory );
41 | this.points = new double[ points.length ];
42 |
43 | System.arraycopy( points, 0, this.points, 0, points.length );
44 | }
45 |
46 | public Cluster( String targetCategory, List points )
47 | {
48 | this( targetCategory );
49 | this.points = Doubles.toArray( points );
50 | }
51 |
52 | private Cluster( String targetCategory )
53 | {
54 | this.targetCategory = targetCategory;
55 | }
56 |
57 | protected void setOrdinal( int ordinal )
58 | {
59 | this.ordinal = ordinal;
60 | }
61 |
62 | public String getTargetCategory()
63 | {
64 | if( targetCategory == null )
65 | return Integer.toString( ordinal );
66 |
67 | return targetCategory;
68 | }
69 |
70 | public double[] getPoints()
71 | {
72 | double[] dest = new double[ points.length ];
73 |
74 | System.arraycopy( points, 0, dest, 0, points.length );
75 |
76 | return dest;
77 | }
78 |
79 | public int getPointsSize()
80 | {
81 | return points.length;
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/clustering/ClusterEvaluator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.clustering;
22 |
23 | import java.util.Arrays;
24 |
25 | import cascading.pattern.model.clustering.compare.CompareFunction;
26 | import cascading.pattern.model.clustering.measure.ComparisonMeasure;
27 | import cascading.tuple.Fields;
28 | import cascading.tuple.TupleEntry;
29 |
30 | /**
31 | *
32 | */
33 | class ClusterEvaluator
34 | {
35 | private final Cluster cluster;
36 | private final ComparisonMeasure comparisonMeasure;
37 | private final CompareFunction[] compareFunctions;
38 | private final double[] points;
39 |
40 | public ClusterEvaluator( Fields argumentFields, Cluster cluster, ComparisonMeasure comparisonMeasure, CompareFunction compareFunction )
41 | {
42 | this.cluster = cluster;
43 | this.comparisonMeasure = comparisonMeasure;
44 | this.compareFunctions = createCompareFunctions( argumentFields, compareFunction );
45 | this.points = cluster.getPoints();
46 | }
47 |
48 | private CompareFunction[] createCompareFunctions( Fields fields, CompareFunction defaultFunction, CompareFunction... functions )
49 | {
50 | CompareFunction[] results = new CompareFunction[ fields.size() ];
51 |
52 | Arrays.fill( results, defaultFunction );
53 |
54 | if( functions.length == 0 )
55 | return results;
56 |
57 | if( functions.length != fields.size() )
58 | throw new IllegalStateException( "fields and number of functions are not equal" );
59 |
60 | for( int i = 0; i < functions.length; i++ )
61 | {
62 | if( functions[ i ] != null )
63 | results[ i ] = functions[ i ];
64 | }
65 |
66 | return results;
67 | }
68 |
69 | double evaluate( TupleEntry tupleEntry )
70 | {
71 | return comparisonMeasure.calculate( compareFunctions, tupleEntry.getTuple(), points );
72 | }
73 |
74 | public String getTargetCategory()
75 | {
76 | return cluster.getTargetCategory();
77 | }
78 | }
79 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/clustering/compare/AbsoluteDifferenceCompareFunction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.clustering.compare;
22 |
23 | /**
24 | *
25 | */
26 | public class AbsoluteDifferenceCompareFunction extends CompareFunction
27 | {
28 | @Override
29 | public double result( double lhs, double rhs )
30 | {
31 | return Math.abs( lhs - rhs );
32 | }
33 | }
34 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/clustering/compare/CompareFunction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.clustering.compare;
22 |
23 | import java.io.Serializable;
24 |
25 | /**
26 | *
27 | */
28 | public abstract class CompareFunction implements Serializable
29 | {
30 | public abstract double result( double lhs, double rhs );
31 | }
32 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/clustering/measure/ComparisonMeasure.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.clustering.measure;
22 |
23 | import java.io.Serializable;
24 |
25 | import cascading.pattern.model.clustering.compare.CompareFunction;
26 | import cascading.tuple.Tuple;
27 |
28 | /**
29 | *
30 | */
31 | public abstract class ComparisonMeasure implements Serializable
32 | {
33 | /**
34 | * Calculate the distance from this cluster for the given tuple.
35 | *
36 | * @param compareFunctions
37 | * @param values array of tuple values
38 | * @return double
39 | */
40 | public abstract double calculate( CompareFunction[] compareFunctions, Tuple values, double[] points );
41 | }
42 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/clustering/measure/DistanceMeasure.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.clustering.measure;
22 |
23 | /**
24 | *
25 | */
26 | public abstract class DistanceMeasure extends ComparisonMeasure
27 | {
28 | }
29 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/clustering/measure/EuclideanMeasure.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.clustering.measure;
22 |
23 | import cascading.pattern.model.clustering.compare.CompareFunction;
24 | import cascading.tuple.Tuple;
25 |
26 | /**
27 | * Class EuclideanMeasure calculates Euclidean distance between two points
28 | * where the two points difference utilize the given {@link CompareFunction}.
29 | */
30 | public class EuclideanMeasure extends SquaredEuclideanMeasure
31 | {
32 | public EuclideanMeasure()
33 | {
34 | }
35 |
36 | @Override
37 | public double calculate( CompareFunction[] compareFunctions, Tuple values, double[] points )
38 | {
39 | return Math.sqrt( super.calculate( compareFunctions, values, points ) );
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/clustering/measure/SquaredEuclideanMeasure.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.clustering.measure;
22 |
23 | import cascading.pattern.model.clustering.compare.CompareFunction;
24 | import cascading.tuple.Tuple;
25 |
26 | /**
27 | *
28 | */
29 | public class SquaredEuclideanMeasure extends DistanceMeasure
30 | {
31 | public SquaredEuclideanMeasure()
32 | {
33 | }
34 |
35 | @Override
36 | public double calculate( CompareFunction[] compareFunctions, Tuple values, double[] points )
37 | {
38 | double sumOfSquares = 0.0;
39 |
40 | for( int i = 0; i < points.length; i++ )
41 | sumOfSquares += Math.pow( values.getDouble( i ) - points[ i ], 2.0 );
42 |
43 | return sumOfSquares;
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/generalregression/BaseRegressionFunction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.generalregression;
22 |
23 | import cascading.flow.FlowProcess;
24 | import cascading.operation.OperationCall;
25 | import cascading.pattern.model.ModelScoringFunction;
26 | import cascading.pattern.model.generalregression.expression.ExpressionEvaluator;
27 | import cascading.tuple.Fields;
28 |
29 | /**
30 | *
31 | */
32 | abstract class BaseRegressionFunction extends ModelScoringFunction
33 | {
34 | protected static class ExpressionContext
35 | {
36 | public ExpressionEvaluator[] expressions;
37 | public double[] results;
38 | }
39 |
40 | public BaseRegressionFunction( GeneralRegressionSpec spec )
41 | {
42 | super( spec );
43 | }
44 |
45 | @Override
46 | public void prepare( FlowProcess flowProcess, OperationCall> operationCall )
47 | {
48 | super.prepare( flowProcess, operationCall );
49 |
50 | Fields argumentFields = operationCall.getArgumentFields();
51 |
52 | operationCall.getContext().payload = new ExpressionContext();
53 | operationCall.getContext().payload.expressions = getSpec().getRegressionTableEvaluators( argumentFields );
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/generalregression/LinkFunction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.generalregression;
22 |
23 | /** Enum for the LinkFunction in GLM */
24 | public enum LinkFunction
25 | {
26 | NONE( "none" )
27 | {
28 | public double calculate( double value )
29 | {
30 | return value;
31 | }
32 | },
33 |
34 | LOGIT( "logit" )
35 | {
36 | public double calculate( double value )
37 | {
38 | return 1.0 / ( 1.0 + Math.exp( -value ) );
39 | }
40 | },
41 |
42 | CLOGLOG( "cloglog" )
43 | {
44 | public double calculate( double value )
45 | {
46 | return 1.0 - Math.exp( -Math.exp( value ) );
47 | }
48 | },
49 |
50 | LOGLOG( "loglog" )
51 | {
52 | public double calculate( double value )
53 | {
54 | return Math.exp( -Math.exp( -value ) );
55 | }
56 | },
57 |
58 | CAUCHIT( "cauchit" )
59 | {
60 | public double calculate( double value )
61 | {
62 | return 0.5 + 1.0 / Math.PI * Math.atan( value );
63 | }
64 | };
65 |
66 | public String function;
67 |
68 | private LinkFunction( String function )
69 | {
70 | this.function = function;
71 | }
72 |
73 | /**
74 | * Returns the corresponding LinkFunction
75 | *
76 | * @param functionName String
77 | * @return LinkFunction
78 | */
79 | public static LinkFunction getFunction( String functionName )
80 | {
81 |
82 | for( LinkFunction lf : values() )
83 | if( lf.function.matches( functionName ) )
84 | return lf;
85 |
86 | return LinkFunction.NONE;
87 | }
88 |
89 | public abstract double calculate( double value );
90 | }
91 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/generalregression/Parameter.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.generalregression;
22 |
23 | import java.io.Serializable;
24 | import java.util.ArrayList;
25 | import java.util.Arrays;
26 | import java.util.List;
27 |
28 | import cascading.pattern.model.generalregression.expression.ParameterExpression;
29 | import cascading.pattern.model.generalregression.predictor.CovariantPredictor;
30 | import cascading.pattern.model.generalregression.predictor.FactorPredictor;
31 | import cascading.pattern.model.generalregression.predictor.Predictor;
32 | import cascading.tuple.Fields;
33 |
34 | /**
35 | *
36 | */
37 | public class Parameter implements Serializable
38 | {
39 | String name;
40 | double beta;
41 | int df;
42 |
43 | ArrayList covariants = new ArrayList();
44 | ArrayList factors = new ArrayList();
45 |
46 | public Parameter( String name, double beta, int df )
47 | {
48 | this.name = name;
49 | this.beta = beta;
50 | this.df = df;
51 | }
52 |
53 | public Parameter( String name, double beta )
54 | {
55 | this.name = name;
56 | this.beta = beta;
57 | }
58 |
59 | public Parameter( String name, double beta, Predictor... predictors )
60 | {
61 | this( name, beta, Arrays.asList( predictors ) );
62 | }
63 |
64 | public Parameter( String name, double beta, List predictors )
65 | {
66 | this.name = name;
67 | this.beta = beta;
68 |
69 | addPredictors( predictors );
70 | }
71 |
72 | public String getName()
73 | {
74 | return name;
75 | }
76 |
77 | public double getBeta()
78 | {
79 | return beta;
80 | }
81 |
82 | public int getDegreesOfFreedom()
83 | {
84 | return df;
85 | }
86 |
87 | public ArrayList getCovariants()
88 | {
89 | return covariants;
90 | }
91 |
92 | public ArrayList getFactors()
93 | {
94 | return factors;
95 | }
96 |
97 | public boolean isNoOp()
98 | {
99 | return beta == 0;
100 | }
101 |
102 | public void addPredictors( List predictors )
103 | {
104 | for( Predictor predictor : predictors )
105 | addPredictor( predictor );
106 | }
107 |
108 | public void addPredictor( Predictor predictor )
109 | {
110 | if( predictor instanceof CovariantPredictor )
111 | covariants.add( (CovariantPredictor) predictor );
112 |
113 | if( predictor instanceof FactorPredictor )
114 | factors.add( (FactorPredictor) predictor );
115 | }
116 |
117 | public ParameterExpression createExpression( Fields argumentsFields )
118 | {
119 | return new ParameterExpression( argumentsFields, this );
120 | }
121 | }
122 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/generalregression/PredictionRegressionFunction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.generalregression;
22 |
23 | import cascading.flow.FlowProcess;
24 | import cascading.operation.FunctionCall;
25 | import cascading.pattern.model.generalregression.expression.ExpressionEvaluator;
26 | import org.slf4j.Logger;
27 | import org.slf4j.LoggerFactory;
28 |
29 | /**
30 | * Class PredictionRegressionFunction will return a single prediction
31 | * as determined by the {@link RegressionTable}s added to the {@link GeneralRegressionSpec}.
32 | */
33 | public class PredictionRegressionFunction extends BaseRegressionFunction
34 | {
35 | private static final Logger LOG = LoggerFactory.getLogger( PredictionRegressionFunction.class );
36 |
37 | public PredictionRegressionFunction( GeneralRegressionSpec param )
38 | {
39 | super( param );
40 |
41 | if( getSpec().getRegressionTables().size() != 1 )
42 | throw new IllegalArgumentException( "regression function only supports a single table, got: " + getSpec().getRegressionTables().size() );
43 | }
44 |
45 | @Override
46 | public void operate( FlowProcess flowProcess, FunctionCall> functionCall )
47 | {
48 | ExpressionEvaluator evaluator = functionCall.getContext().payload.expressions[ 0 ];
49 | LinkFunction linkFunction = getSpec().linkFunction;
50 |
51 | double result = evaluator.calculate( functionCall.getArguments() );
52 | double linkResult = linkFunction.calculate( result );
53 |
54 | LOG.debug( "result: {}", linkResult );
55 |
56 | functionCall.getOutputCollector().add( functionCall.getContext().result( linkResult ) );
57 | }
58 | }
59 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/generalregression/RegressionTable.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.generalregression;
22 |
23 | import java.io.Serializable;
24 | import java.util.LinkedHashMap;
25 | import java.util.Map;
26 | import java.util.Set;
27 |
28 | import cascading.pattern.model.generalregression.expression.ExpressionEvaluator;
29 | import cascading.pattern.model.generalregression.expression.ParameterExpression;
30 | import cascading.tuple.Fields;
31 |
32 | /**
33 | * Class RegressionTable simply holds a set of {@link Parameter} instances.
34 | *
35 | * If used for classification or categorization with CategoricalRegressionFunction, the
36 | * table must have a {@code targetCategory} value.
37 | */
38 | public class RegressionTable implements Serializable
39 | {
40 | private String targetCategory;
41 |
42 | Map parameters = new LinkedHashMap();
43 |
44 | public RegressionTable()
45 | {
46 | }
47 |
48 | public RegressionTable( String targetCategory )
49 | {
50 | this.targetCategory = targetCategory;
51 | }
52 |
53 | public String getTargetCategory()
54 | {
55 | return targetCategory;
56 | }
57 |
58 | public void setTargetCategory( String targetCategory )
59 | {
60 | this.targetCategory = targetCategory;
61 | }
62 |
63 | public void addParameter( Parameter parameter )
64 | {
65 | if( parameters.containsKey( parameter.getName() ) )
66 | throw new IllegalArgumentException( "may not have duplicate parameter names, got: " + parameter.getName() );
67 |
68 | parameters.put( parameter.getName(), parameter );
69 | }
70 |
71 | public Parameter getParameter( String name )
72 | {
73 | return parameters.get( name );
74 | }
75 |
76 | public boolean isNoOp()
77 | {
78 | for( Parameter parameter : parameters.values() )
79 | {
80 | if( !parameter.isNoOp() )
81 | return false;
82 | }
83 |
84 | return true;
85 | }
86 |
87 | ExpressionEvaluator bind( Fields argumentFields )
88 | {
89 | if( isNoOp() )
90 | return new ExpressionEvaluator( targetCategory );
91 |
92 | ParameterExpression[] expressions = new ParameterExpression[ parameters.size() ];
93 |
94 | int count = 0;
95 |
96 | for( Parameter parameter : parameters.values() )
97 | expressions[ count++ ] = parameter.createExpression( argumentFields );
98 |
99 | return new ExpressionEvaluator( targetCategory, expressions );
100 | }
101 |
102 | public Set getParameterNames()
103 | {
104 | return parameters.keySet();
105 | }
106 |
107 | @Override
108 | public String toString()
109 | {
110 | final StringBuilder sb = new StringBuilder( "GeneralRegressionTable{" );
111 | sb.append( "targetCategory='" ).append( targetCategory ).append( '\'' );
112 | sb.append( ", parameters=" ).append( parameters );
113 | sb.append( '}' );
114 | return sb.toString();
115 | }
116 | }
117 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/generalregression/expression/ExpressionEvaluator.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.generalregression.expression;
22 |
23 | import cascading.tuple.TupleEntry;
24 |
25 | /**
26 | *
27 | */
28 | public class ExpressionEvaluator
29 | {
30 | private final String targetCategory;
31 | private final ParameterExpression[] expressions;
32 |
33 | public ExpressionEvaluator( String targetCategory )
34 | {
35 | this.targetCategory = targetCategory;
36 | this.expressions = new ParameterExpression[ 0 ];
37 | }
38 |
39 | public ExpressionEvaluator( String targetCategory, ParameterExpression[] expressions )
40 | {
41 | this.targetCategory = targetCategory;
42 | this.expressions = expressions;
43 | }
44 |
45 | public String getTargetCategory()
46 | {
47 | return targetCategory;
48 | }
49 |
50 | public double calculate( TupleEntry tupleEntry )
51 | {
52 | double result = 0.0d;
53 |
54 | for( ParameterExpression expression : expressions )
55 | {
56 | if( expression.applies( tupleEntry ) )
57 | result += expression.calculate( tupleEntry );
58 | }
59 |
60 | return result;
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/generalregression/normalization/Normalization.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.generalregression.normalization;
22 |
23 | import java.io.Serializable;
24 |
25 | /**
26 | *
27 | */
28 | public abstract class Normalization implements Serializable
29 | {
30 | public static final Normalization NONE = new Normalization()
31 | {
32 | @Override
33 | public double[] normalize( double[] values )
34 | {
35 | return values;
36 | }
37 | };
38 |
39 | public abstract double[] normalize( double[] values );
40 | }
41 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/generalregression/normalization/SoftMaxNormalization.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.generalregression.normalization;
22 |
23 | /**
24 | *
25 | */
26 | public class SoftMaxNormalization extends Normalization
27 | {
28 | @Override
29 | public double[] normalize( double[] values )
30 | {
31 | double sum = 0.0d;
32 |
33 | for( int i = 0; i < values.length; i++ )
34 | {
35 | values[ i ] = Math.exp( values[ i ] );
36 | sum += values[ i ];
37 | }
38 |
39 | for( int i = 0; i < values.length; i++ )
40 | values[ i ] = values[ i ] / sum;
41 |
42 | return values;
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/generalregression/predictor/CovariantPredictor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.generalregression.predictor;
22 |
23 | /**
24 | *
25 | */
26 | public class CovariantPredictor extends Predictor
27 | {
28 | private final long exponent;
29 |
30 | public CovariantPredictor( String fieldName )
31 | {
32 | super( fieldName );
33 |
34 | this.exponent = 1;
35 | }
36 |
37 | public CovariantPredictor( String fieldName, long exponent )
38 | {
39 | super( fieldName );
40 |
41 | this.exponent = exponent;
42 | }
43 |
44 | public double calculate( double value )
45 | {
46 | return Math.pow( value, exponent );
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/generalregression/predictor/FactorPredictor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.generalregression.predictor;
22 |
23 | /**
24 | *
25 | */
26 | public class FactorPredictor extends Predictor
27 | {
28 | private final String factor;
29 |
30 | public FactorPredictor( String fieldName, String factor )
31 | {
32 | super( fieldName );
33 |
34 | this.factor = factor;
35 | }
36 |
37 | public boolean matches( String value )
38 | {
39 | return factor.equals( value );
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/generalregression/predictor/Predictor.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.generalregression.predictor;
22 |
23 | import java.io.Serializable;
24 |
25 | /**
26 | *
27 | */
28 | public class Predictor implements Serializable
29 | {
30 | protected final String fieldName;
31 |
32 | public Predictor( String fieldName )
33 | {
34 | this.fieldName = fieldName;
35 | }
36 |
37 | public String getFieldName()
38 | {
39 | return fieldName;
40 | }
41 | }
42 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/tree/Node.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.tree;
22 |
23 | import java.io.Serializable;
24 |
25 | import cascading.pattern.model.tree.predicate.Predicate;
26 |
27 | public class Node implements Serializable
28 | {
29 | String id;
30 | Predicate predicate = null;
31 | Object score = null;
32 |
33 | public Node( String id, Predicate predicate )
34 | {
35 | this( id );
36 |
37 | this.predicate = predicate;
38 | }
39 |
40 | public Node( String id, Predicate predicate, Object score )
41 | {
42 | this( id );
43 | this.predicate = predicate;
44 | this.score = score;
45 | }
46 |
47 | public Node( String id )
48 | {
49 | if( id == null )
50 | throw new IllegalArgumentException( "id may not be null" );
51 |
52 | this.id = id;
53 | }
54 |
55 | public String getID()
56 | {
57 | return id;
58 | }
59 |
60 | public Predicate getPredicate()
61 | {
62 | return predicate;
63 | }
64 |
65 | public void setScore( String score )
66 | {
67 | this.score = score;
68 | }
69 |
70 | public Object getScore()
71 | {
72 | return score;
73 | }
74 |
75 | @Override
76 | public String toString()
77 | {
78 | final StringBuilder sb = new StringBuilder( "Node{" );
79 | sb.append( "id='" ).append( id ).append( '\'' );
80 | sb.append( ", predicate=" ).append( predicate );
81 | sb.append( ", category='" ).append( score ).append( '\'' );
82 | sb.append( '}' );
83 | return sb.toString();
84 | }
85 |
86 | @Override
87 | public boolean equals( Object object )
88 | {
89 | if( this == object )
90 | return true;
91 |
92 | if( object == null || getClass() != object.getClass() )
93 | return false;
94 |
95 | Node node = (Node) object;
96 |
97 | if( !id.equals( node.id ) )
98 | return false;
99 |
100 | return true;
101 | }
102 |
103 | @Override
104 | public int hashCode()
105 | {
106 | return id.hashCode();
107 | }
108 | }
109 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/tree/TreeFunction.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.tree;
22 |
23 | import cascading.flow.FlowProcess;
24 | import cascading.operation.FunctionCall;
25 | import cascading.operation.OperationCall;
26 | import cascading.pattern.model.ModelScoringFunction;
27 | import cascading.pattern.model.tree.decision.DecisionTree;
28 | import cascading.pattern.model.tree.decision.FinalDecision;
29 | import org.slf4j.Logger;
30 | import org.slf4j.LoggerFactory;
31 |
32 | /** Class TreeFunction will return the result or score from the given decision tree defined by the {@link TreeSpec}. */
33 | public class TreeFunction extends ModelScoringFunction
34 | {
35 | private static final Logger LOG = LoggerFactory.getLogger( TreeFunction.class );
36 |
37 | Result result;
38 |
39 | public TreeFunction( TreeSpec treeSpec )
40 | {
41 | this( treeSpec, false, SAFE_DEFAULT );
42 | }
43 |
44 | public TreeFunction( TreeSpec spec, boolean returnIndex, boolean safe )
45 | {
46 | super( spec, safe );
47 |
48 | if( returnIndex )
49 | result = new Result()
50 | {
51 | @Override
52 | public Integer transform( FinalDecision finalDecision )
53 | {
54 | return finalDecision.getIndex();
55 | }
56 | };
57 | else
58 | result = new Result()
59 | {
60 | @Override
61 | public Object transform( FinalDecision finalDecision )
62 | {
63 | return finalDecision.getScore();
64 | }
65 | };
66 | }
67 |
68 | @Override
69 | public void prepare( FlowProcess flowProcess, OperationCall> operationCall )
70 | {
71 | super.prepare( flowProcess, operationCall );
72 |
73 | operationCall.getContext().payload = getSpec().getTree().createDecisionTree( getSpec().getCategoriesArray(), operationCall.getArgumentFields() );
74 | }
75 |
76 | @Override
77 | public void operate( FlowProcess flowProcess, FunctionCall> functionCall )
78 | {
79 | DecisionTree decisionTree = functionCall.getContext().payload;
80 |
81 | FinalDecision finalDecision = decisionTree.decide( functionCall.getArguments() );
82 |
83 | LOG.debug( "decision: {}", finalDecision );
84 |
85 | Object result = this.result.transform( finalDecision );
86 |
87 | functionCall.getOutputCollector().add( functionCall.getContext().result( result ) );
88 | }
89 | }
90 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/tree/TreeSpec.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.tree;
22 |
23 | import cascading.pattern.model.ModelSchema;
24 | import cascading.pattern.model.Spec;
25 |
26 |
27 | /**
28 | * Class TreeSpec is used to define a decision tree model. It simply holds a {@link Tree} instance
29 | * populated with {@link cascading.pattern.model.tree.predicate.Predicate} instances.
30 | */
31 | public class TreeSpec extends Spec
32 | {
33 | public Tree tree;
34 |
35 | public TreeSpec( ModelSchema modelSchema )
36 | {
37 | super( modelSchema );
38 | }
39 |
40 | public TreeSpec( ModelSchema modelSchema, Tree tree )
41 | {
42 | super( modelSchema );
43 | this.tree = tree;
44 | }
45 |
46 | public Tree getTree()
47 | {
48 | return tree;
49 | }
50 |
51 | public void setTree( Tree tree )
52 | {
53 | this.tree = tree;
54 | }
55 |
56 | @Override
57 | public String toString()
58 | {
59 | final StringBuilder sb = new StringBuilder( "TreeSpec{" );
60 | sb.append( "tree=" ).append( tree );
61 | sb.append( '}' );
62 | return sb.toString();
63 | }
64 | }
65 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/tree/decision/Decision.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.tree.decision;
22 |
23 | import java.util.List;
24 |
25 | import cascading.pattern.model.tree.Node;
26 | import cascading.pattern.model.tree.Tree;
27 | import cascading.tuple.TupleEntry;
28 | import org.jgrapht.GraphPath;
29 | import org.jgrapht.Graphs;
30 | import org.jgrapht.alg.KShortestPaths;
31 |
32 | /**
33 | *
34 | */
35 | public abstract class Decision
36 | {
37 | protected final String name;
38 |
39 | public Decision( Tree tree, Node node )
40 | {
41 | this.name = createName( tree, node );
42 | }
43 |
44 | public String getName()
45 | {
46 | return name;
47 | }
48 |
49 | protected String createName( Tree tree, Node node )
50 | {
51 | if( tree.getRoot() == node )
52 | return node.getID();
53 |
54 | List> paths = new KShortestPaths( tree.getGraph(), tree.getRoot(), 1 ).getPaths( node );
55 |
56 | List predecessors = Graphs.getPathVertexList( paths.get( 0 ) );
57 |
58 | predecessors.remove( node );
59 |
60 | String name = "";
61 |
62 | for( Node predecessor : predecessors )
63 | name += predecessor.getID() + ".";
64 |
65 | name += node.getID();
66 |
67 | return name;
68 | }
69 |
70 | protected abstract FinalDecision decide( TupleEntry tupleEntry );
71 |
72 | @Override
73 | public String toString()
74 | {
75 | final StringBuilder sb = new StringBuilder( "Decision{" );
76 | toString( sb );
77 | sb.append( '}' );
78 | return sb.toString();
79 | }
80 |
81 | protected StringBuilder toString( StringBuilder sb )
82 | {
83 | return sb.append( "name='" ).append( name ).append( '\'' );
84 | }
85 | }
86 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/tree/decision/DecisionTree.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.tree.decision;
22 |
23 | import cascading.pattern.model.tree.Node;
24 | import cascading.pattern.model.tree.Tree;
25 | import cascading.tuple.Fields;
26 | import cascading.tuple.TupleEntry;
27 |
28 | /**
29 | *
30 | */
31 | public class DecisionTree extends ParentDecision
32 | {
33 | public DecisionTree( String[] categories, Fields expectedFields, Tree tree, Node node )
34 | {
35 | super( categories, expectedFields, tree, node );
36 | }
37 |
38 | @Override
39 | public FinalDecision decide( TupleEntry tupleEntry )
40 | {
41 | return super.decide( tupleEntry );
42 | }
43 | }
44 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/tree/decision/FinalDecision.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.tree.decision;
22 |
23 | import java.util.Arrays;
24 |
25 | import cascading.pattern.model.tree.Node;
26 | import cascading.pattern.model.tree.Tree;
27 | import cascading.tuple.TupleEntry;
28 | import org.slf4j.Logger;
29 | import org.slf4j.LoggerFactory;
30 |
31 | /**
32 | *
33 | */
34 | public class FinalDecision extends Decision
35 | {
36 | private static final Logger LOG = LoggerFactory.getLogger( FinalDecision.class );
37 |
38 | private final Object score;
39 | private final int index;
40 |
41 | public FinalDecision( Tree tree, Node node )
42 | {
43 | this( null, tree, node );
44 | }
45 |
46 | public FinalDecision( String[] categories, Tree tree, Node node )
47 | {
48 | super( tree, node );
49 |
50 | this.score = node.getScore();
51 |
52 | if( this.score == null )
53 | throw new IllegalStateException( "score may not be null, likely missing leaf node in tree at: " + getName() );
54 |
55 | if( categories != null )
56 | this.index = Arrays.asList( categories ).indexOf( this.score );
57 | else
58 | this.index = -1;
59 | }
60 |
61 | public Object getScore()
62 | {
63 | return score;
64 | }
65 |
66 | public int getIndex()
67 | {
68 | return index;
69 | }
70 |
71 | @Override
72 | protected FinalDecision decide( TupleEntry tupleEntry )
73 | {
74 | LOG.debug( "decision: {}", name );
75 |
76 | return this;
77 | }
78 |
79 | @Override
80 | public String toString()
81 | {
82 | final StringBuilder sb = new StringBuilder( "FinalDecision{" );
83 | sb.append( "name='" ).append( getName() ).append( '\'' );
84 | sb.append( ",score='" ).append( score ).append( '\'' );
85 | sb.append( '}' );
86 | return sb.toString();
87 | }
88 | }
89 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/tree/decision/ParentDecision.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.tree.decision;
22 |
23 | import java.util.List;
24 |
25 | import cascading.pattern.model.tree.Node;
26 | import cascading.pattern.model.tree.Tree;
27 | import cascading.tuple.Fields;
28 | import cascading.tuple.TupleEntry;
29 | import org.jgrapht.Graphs;
30 |
31 | /**
32 | *
33 | */
34 | abstract class ParentDecision extends Decision
35 | {
36 | protected final Decision[] successors;
37 |
38 | public ParentDecision( String[] categories, Fields expectedFields, Tree tree, Node node )
39 | {
40 | super( tree, node );
41 |
42 | this.successors = createSuccessors( categories, expectedFields, tree, node );
43 | }
44 |
45 | protected Decision[] createSuccessors( String[] categories, Fields expectedFields, Tree tree, Node node )
46 | {
47 | List successorNodes = Graphs.successorListOf( tree.getGraph(), node );
48 |
49 | if( successorNodes.size() == 0 )
50 | return new Decision[]{new FinalDecision( categories, tree, node )};
51 |
52 | Decision[] successors = new Decision[ successorNodes.size() ];
53 |
54 | for( int i = 0; i < successorNodes.size(); i++ )
55 | {
56 | Node successorNode = successorNodes.get( i );
57 |
58 | successors[ i ] = new PredicatedDecision( categories, expectedFields, tree, successorNode );
59 | }
60 |
61 | return successors;
62 | }
63 |
64 | protected FinalDecision decide( TupleEntry tupleEntry )
65 | {
66 | for( Decision child : successors )
67 | {
68 | FinalDecision decision = child.decide( tupleEntry );
69 |
70 | if( decision != null )
71 | return decision;
72 | }
73 |
74 | return null;
75 | }
76 | }
77 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/tree/decision/PredicatedDecision.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.tree.decision;
22 |
23 | import cascading.pattern.model.tree.Node;
24 | import cascading.pattern.model.tree.Tree;
25 | import cascading.tuple.Fields;
26 | import cascading.tuple.TupleEntry;
27 |
28 | /**
29 | *
30 | */
31 | class PredicatedDecision extends ParentDecision
32 | {
33 | private final PredicateEvaluator evaluator;
34 |
35 | public PredicatedDecision( String[] categories, Fields expectedFields, Tree tree, Node node )
36 | {
37 | super( categories, expectedFields, tree, node );
38 |
39 | this.evaluator = new PredicateEvaluator( expectedFields, node.getPredicate() );
40 | }
41 |
42 | @Override
43 | protected FinalDecision decide( TupleEntry tupleEntry )
44 | {
45 | boolean result = evaluator.evaluate( tupleEntry );
46 |
47 | if( !result )
48 | return null;
49 |
50 | return super.decide( tupleEntry );
51 | }
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/tree/predicate/ComparablePredicate.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.tree.predicate;
22 |
23 | /**
24 | *
25 | */
26 | public abstract class ComparablePredicate extends SimplePredicate
27 | {
28 | T value;
29 |
30 | protected ComparablePredicate( String field, T value )
31 | {
32 | super( field );
33 | this.value = value;
34 | }
35 |
36 | public T getValue()
37 | {
38 | return value;
39 | }
40 | }
41 |
--------------------------------------------------------------------------------
/pattern-core/src/main/java/cascading/pattern/model/tree/predicate/EqualsToPredicate.java:
--------------------------------------------------------------------------------
1 | /*
2 | * Copyright (c) 2007-2015 Concurrent, Inc. All Rights Reserved.
3 | *
4 | * Project and contact information: http://www.cascading.org/
5 | *
6 | * This file is part of the Cascading project.
7 | *
8 | * Licensed under the Apache License, Version 2.0 (the "License");
9 | * you may not use this file except in compliance with the License.
10 | * You may obtain a copy of the License at
11 | *
12 | * http://www.apache.org/licenses/LICENSE-2.0
13 | *
14 | * Unless required by applicable law or agreed to in writing, software
15 | * distributed under the License is distributed on an "AS IS" BASIS,
16 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17 | * See the License for the specific language governing permissions and
18 | * limitations under the License.
19 | */
20 |
21 | package cascading.pattern.model.tree.predicate;
22 |
23 | /**
24 | *
25 | */
26 | public class EqualsToPredicate extends ComparablePredicate