├── CLA.pdf ├── bin ├── s4-build │ ├── gradle-wrapper-1.4.jar │ ├── README.md │ └── gradle-wrapper-1.4.properties ├── samza-kryo ├── samoa-storm.properties ├── samoa-s4.properties ├── samza-dist │ ├── run-job.sh │ ├── run-am.sh │ └── run-container.sh └── run-container.sh ├── .gitignore ├── CONTRIBUTING.md ├── samoa-test ├── README.md ├── src │ └── main │ │ └── assembly │ │ └── test-jar-with-dependencies.xml └── pom.xml ├── samoa-s4 ├── samoa-s4-adapter │ ├── src │ │ └── main │ │ │ └── java │ │ │ └── samoa │ │ │ └── topology │ │ │ └── adapter │ │ │ ├── package-info.java │ │ │ ├── S4AdapterApp.java │ │ │ └── S4EntranceProcessingItem.java │ └── pom.xml └── src │ └── main │ ├── java │ └── com │ │ └── yahoo │ │ └── labs │ │ └── samoa │ │ └── topology │ │ └── impl │ │ ├── SamoaSerializerModule.java │ │ ├── S4Topology.java │ │ └── S4Event.java │ └── assembly │ └── samoa-s4.xml ├── samoa-api └── src │ └── main │ └── java │ └── com │ └── yahoo │ └── labs │ └── samoa │ ├── evaluation │ ├── ClassificationPerformanceEvaluator.java │ ├── RegressionPerformanceEvaluator.java │ ├── PerformanceEvaluator.java │ ├── ClusteringResultContentEvent.java │ └── ClusteringEvaluationContentEvent.java │ ├── learners │ ├── RegressionLearner.java │ ├── ClassificationLearner.java │ ├── classifiers │ │ ├── rules │ │ │ ├── common │ │ │ │ ├── RulePassiveLearningNode.java │ │ │ │ ├── RuleActiveLearningNode.java │ │ │ │ ├── NonLearningRule.java │ │ │ │ ├── PassiveRule.java │ │ │ │ └── RuleSplitNode.java │ │ │ └── distributed │ │ │ │ ├── AssignmentContentEvent.java │ │ │ │ ├── RuleContentEvent.java │ │ │ │ └── PredicateContentEvent.java │ │ ├── ensemble │ │ │ └── BoostingDistributorProcessor.java │ │ ├── trees │ │ │ ├── DeleteContentEvent.java │ │ │ ├── LearningNode.java │ │ │ ├── InactiveLearningNode.java │ │ │ ├── ControlContentEvent.java │ │ │ └── FoundNode.java │ │ └── LocalLearner.java │ ├── AdaptiveLearner.java │ └── Learner.java │ ├── moa │ ├── core │ │ ├── Example.java │ │ ├── ObjectRepository.java │ │ ├── InstanceExample.java │ │ └── FastVector.java │ ├── streams │ │ ├── clustering │ │ │ ├── ClusterEventListener.java │ │ │ ├── ClusterEvent.java │ │ │ └── ClusteringStream.java │ │ └── InstanceStream.java │ ├── classifiers │ │ ├── rules │ │ │ └── core │ │ │ │ ├── Predicate.java │ │ │ │ └── voting │ │ │ │ └── UniformWeightedVote.java │ │ ├── Regressor.java │ │ └── core │ │ │ ├── splitcriteria │ │ │ ├── SDRSplitCriterion.java │ │ │ ├── InfoGainSplitCriterionMultilabel.java │ │ │ └── SplitCriterion.java │ │ │ ├── conditionaltests │ │ │ ├── InstanceConditionalBinaryTest.java │ │ │ └── NominalAttributeMultiwayTest.java │ │ │ ├── attributeclassobservers │ │ │ ├── NumericAttributeClassObserver.java │ │ │ └── DiscreteAttributeClassObserver.java │ │ │ └── AttributeSplitSuggestion.java │ ├── tasks │ │ ├── ResultPreviewListener.java │ │ └── Task.java │ ├── MOAObject.java │ ├── clusterers │ │ └── Clusterer.java │ ├── evaluation │ │ ├── LearningPerformanceEvaluator.java │ │ └── LearningEvaluation.java │ ├── cluster │ │ └── Miniball.java │ └── AbstractMOAObject.java │ ├── utils │ ├── PartitioningScheme.java │ └── StreamDestination.java │ ├── core │ ├── ContentEvent.java │ ├── SerializableInstance.java │ ├── Globals.java │ ├── EntranceProcessor.java │ └── Processor.java │ ├── topology │ ├── IProcessingItem.java │ ├── ISubmitter.java │ ├── EntranceProcessingItem.java │ ├── Stream.java │ ├── Topology.java │ └── ProcessingItem.java │ ├── examples │ ├── HelloWorldDestinationProcessor.java │ ├── HelloWorldContentEvent.java │ └── HelloWorldSourceProcessor.java │ ├── tasks │ └── Task.java │ └── streams │ ├── fs │ └── FileStreamSource.java │ └── StreamSource.java ├── NOTICE.txt ├── samoa-storm └── src │ ├── main │ └── java │ │ └── com │ │ └── yahoo │ │ └── labs │ │ └── samoa │ │ └── topology │ │ └── impl │ │ ├── StormTopologyNode.java │ │ ├── StormTopology.java │ │ ├── StormBoltStream.java │ │ ├── StormSpoutStream.java │ │ └── StormStream.java │ └── test │ └── java │ └── com │ └── yahoo │ └── labs │ └── samoa │ └── AlgosTest.java ├── samoa-local └── src │ ├── main │ ├── resources │ │ └── log4j.xml │ └── java │ │ └── com │ │ └── yahoo │ │ └── labs │ │ └── samoa │ │ └── topology │ │ └── impl │ │ ├── SimpleEngine.java │ │ ├── SimpleEntranceProcessingItem.java │ │ ├── SimpleTopology.java │ │ └── SimpleComponentFactory.java │ └── test │ └── java │ └── com │ └── yahoo │ └── labs │ └── samoa │ └── topology │ └── impl │ └── SimpleEngineTest.java ├── .travis.yml ├── RELEASE.txt ├── samoa-instances ├── pom.xml └── src │ └── main │ └── java │ └── com │ └── yahoo │ └── labs │ └── samoa │ └── instances │ ├── InstanceData.java │ ├── SparseInstance.java │ ├── Instance.java │ ├── DenseInstance.java │ └── SingleClassInstanceData.java ├── samoa-threads └── src │ ├── main │ └── java │ │ └── com │ │ └── yahoo │ │ └── labs │ │ └── samoa │ │ └── topology │ │ └── impl │ │ ├── ThreadsEntranceProcessingItem.java │ │ ├── ThreadsProcessingItemInstance.java │ │ ├── ThreadsEventRunnable.java │ │ ├── ThreadsComponentFactory.java │ │ └── ThreadsTopology.java │ └── test │ └── java │ └── com │ └── yahoo │ └── labs │ └── samoa │ └── topology │ └── impl │ ├── ThreadsEventRunnableTest.java │ └── ThreadsProcessingItemInstanceTest.java └── samoa-samza └── src └── main └── java └── com └── yahoo └── labs └── samoa ├── topology └── impl │ ├── SamzaProcessingNode.java │ ├── SamzaComponentFactory.java │ ├── SamoaSystemFactory.java │ └── SamzaTopology.java └── utils └── SerializableSerializer.java /CLA.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YahooArchive/samoa/HEAD/CLA.pdf -------------------------------------------------------------------------------- /bin/s4-build/gradle-wrapper-1.4.jar: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YahooArchive/samoa/HEAD/bin/s4-build/gradle-wrapper-1.4.jar -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | #maven 2 | target/ 3 | 4 | #eclipse 5 | .classpath 6 | .project 7 | .settings/ 8 | 9 | #DS_Store 10 | .DS_Store 11 | 12 | #intellij 13 | .idea/ 14 | .iml 15 | -------------------------------------------------------------------------------- /bin/s4-build/README.md: -------------------------------------------------------------------------------- 1 | As a workaround for travis CI using gradle 2.1+, which causes issues with the s4 build, this directory contains 2 | pre-generated gradlew scripts and libraries for gradle 1.4, that can be copied and used during the travis CI build. 3 | 4 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | How to contribute? 2 | ================= 3 | 4 | User contribution is essential to keep moving the project forward. 5 | 6 | # Getting Started 7 | 8 | 1. [Fork the repository](https://help.github.com/articles/fork-a-repo) on GitHub 9 | 10 | 2. Sign the [Contributor License Agreement] (http://www.clahub.com/agreements/gdfm/samoa) 11 | 12 | 3. Create a feature branch ```git checkout -b branch_name``` 13 | 14 | 4. Make your changes (please make commits that are logical units) 15 | 16 | 5. Build and run tests 17 | 18 | 6. Submit a [pull request](https://help.github.com/articles/using-pull-requests) 19 | -------------------------------------------------------------------------------- /bin/samza-kryo: -------------------------------------------------------------------------------- 1 | com.yahoo.labs.samoa.learners.classifiers.trees.AttributeContentEvent:com.yahoo.labs.samoa.learners.classifiers.trees.AttributeContentEvent$AttributeCEFullPrecSerializer 2 | com.yahoo.labs.samoa.learners.classifiers.trees.ComputeContentEvent:com.yahoo.labs.samoa.learners.classifiers.trees.ComputeContentEvent$ComputeCEFullPrecSerializer 3 | com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion:com.yahoo.labs.samoa.utils.SerializableSerializer 4 | 5 | com.yahoo.labs.samoa.learners.classifiers.rules.common.TargetMean:com.yahoo.labs.samoa.learners.classifiers.rules.common.TargetMean$TargetMeanSerializer 6 | com.yahoo.labs.samoa.learners.classifiers.rules.common.Perceptron:com.yahoo.labs.samoa.learners.classifiers.rules.common.Perceptron$PerceptronSerializer 7 | -------------------------------------------------------------------------------- /samoa-test/README.md: -------------------------------------------------------------------------------- 1 | This module contains a test framework for simplifying regression testing of Samoa algorithms on various platforms. 2 | 3 | The test framework is generic and reusable for multiple platforms. The platform modules that make use of the test framework add a maven dependency to a test-jar artifact of the samoa-test module. This test-jar artifact includes the test framework classes and its dependencies. 4 | 5 | For defining tests, we reuse the code from the test framework but customize tests according to the platform capabilities. 6 | 7 | For each algorithm to test, we must provide : 8 | 9 | * the task class for the platform 10 | * the algorithm (referring to the provided string templates in this module) 11 | * the input parameters 12 | * the expectations (thresholds or values) 13 | 14 | See existing code in samo-local, samoa-threads and samoa-storm for some examples. 15 | -------------------------------------------------------------------------------- /samoa-s4/samoa-s4-adapter/src/main/java/samoa/topology/adapter/package-info.java: -------------------------------------------------------------------------------- 1 | /** 2 | * 3 | */ 4 | /** 5 | * @author severien 6 | * 7 | */ 8 | package samoa.topology.adapter; 9 | 10 | /* 11 | * #%L 12 | * SAMOA 13 | * %% 14 | * Copyright (C) 2013 Yahoo! Inc. 15 | * %% 16 | * Licensed under the Apache License, Version 2.0 (the "License"); 17 | * you may not use this file except in compliance with the License. 18 | * You may obtain a copy of the License at 19 | * 20 | * http://www.apache.org/licenses/LICENSE-2.0 21 | * 22 | * Unless required by applicable law or agreed to in writing, software 23 | * distributed under the License is distributed on an "AS IS" BASIS, 24 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 25 | * See the License for the specific language governing permissions and 26 | * limitations under the License. 27 | * #L% 28 | */ 29 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClassificationPerformanceEvaluator.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.evaluation; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | public interface ClassificationPerformanceEvaluator extends PerformanceEvaluator { 24 | } 25 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/RegressionPerformanceEvaluator.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.evaluation; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | public interface RegressionPerformanceEvaluator extends PerformanceEvaluator { 24 | 25 | } 26 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/RegressionLearner.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.learners.Learner; 24 | 25 | public interface RegressionLearner extends Learner { 26 | 27 | } 28 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/ClassificationLearner.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.learners.Learner; 24 | 25 | public interface ClassificationLearner extends Learner { 26 | 27 | } 28 | -------------------------------------------------------------------------------- /bin/s4-build/gradle-wrapper-1.4.properties: -------------------------------------------------------------------------------- 1 | ### 2 | # #%L 3 | # SAMOA 4 | # %% 5 | # Copyright (C) 2015 Yahoo! Inc. 6 | # %% 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # #L% 19 | ### 20 | #Wed Sep 24 11:25:39 CEST 2014 21 | distributionBase=GRADLE_USER_HOME 22 | distributionPath=wrapper/dists 23 | zipStoreBase=GRADLE_USER_HOME 24 | zipStorePath=wrapper/dists 25 | distributionUrl=http\://services.gradle.org/distributions/gradle-1.4-bin.zip 26 | -------------------------------------------------------------------------------- /samoa-test/src/main/assembly/test-jar-with-dependencies.xml: -------------------------------------------------------------------------------- 1 | 4 | test-jar-with-dependencies 5 | 6 | jar 7 | 8 | false 9 | 10 | 11 | / 12 | true 13 | 14 | true 15 | false 16 | true 17 | 18 | 19 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/Example.java: -------------------------------------------------------------------------------- 1 | 2 | package com.yahoo.labs.samoa.moa.core; 3 | 4 | /* 5 | * #%L 6 | * SAMOA 7 | * %% 8 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 9 | * %% 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, software 17 | * distributed under the License is distributed on an "AS IS" BASIS, 18 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | * See the License for the specific language governing permissions and 20 | * limitations under the License. 21 | * #L% 22 | */ 23 | 24 | public interface Example< T extends Object> { 25 | 26 | public T getData(); 27 | 28 | public double weight(); 29 | 30 | public void setWeight(double weight); 31 | } 32 | -------------------------------------------------------------------------------- /NOTICE.txt: -------------------------------------------------------------------------------- 1 | SAMOA 2 | Copyright 2013 Yahoo! Inc. 3 | 4 | Licensed under the Apache License, Version 2.0 (the 5 | "License"); you may not use this file except in compliance 6 | with the License. You may obtain a copy of the License at: 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, 11 | software distributed under the License is distributed on 12 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 13 | KIND, either express or implied. See the License for the 14 | specific language governing permissions and limitations 15 | under the License. 16 | 17 | Except as specifically stated below, the 3rd party software packages are not distributed as part of 18 | this project, but instead are separately downloaded from the respective provider. 19 | 20 | * MOA version 13.08 (redistributed under the Apache License v2 - http://www.apache.org/licenses/LICENSE-2.0) 21 | Library for data stream mining. Only a small subset of the original library is redistributed with SAMOA. 22 | http://moa.cms.waikato.ac.nz 23 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/clustering/ClusterEventListener.java: -------------------------------------------------------------------------------- 1 | 2 | package com.yahoo.labs.samoa.moa.streams.clustering; 3 | 4 | /* 5 | * #%L 6 | * SAMOA 7 | * %% 8 | * Copyright (C) 2010 RWTH Aachen University, Germany 9 | * %% 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, software 17 | * distributed under the License is distributed on an "AS IS" BASIS, 18 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | * See the License for the specific language governing permissions and 20 | * limitations under the License. 21 | * #L% 22 | */ 23 | 24 | import java.util.EventListener; 25 | 26 | public interface ClusterEventListener extends EventListener { 27 | 28 | public void changeCluster(ClusterEvent e); 29 | 30 | } 31 | 32 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/utils/PartitioningScheme.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.utils; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Represents the 3 schemes to partition the streams 25 | * @author Anh Thu Vu 26 | * 27 | */ 28 | public enum PartitioningScheme { 29 | SHUFFLE, GROUP_BY_KEY, BROADCAST 30 | } 31 | // TODO: use this enum in S4 32 | // Storm doesn't seem to need this -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/ObjectRepository.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.core; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Interface for object repositories. 25 | * 26 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 27 | * @version $Revision: 7 $ 28 | */ 29 | public interface ObjectRepository { 30 | 31 | Object getObjectNamed(String string); 32 | } 33 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/Predicate.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.classifiers.rules.core; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.instances.Instance; 24 | 25 | /** 26 | * Interface for a predicate (a feature) in rules. 27 | * 28 | */ 29 | public interface Predicate { 30 | 31 | public boolean evaluate(Instance instance); 32 | 33 | } 34 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/Regressor.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.classifiers; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Regressor interface for incremental regression models. It is used only in the GUI Regression Tab. 25 | * 26 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 27 | * @version $Revision: 7 $ 28 | */ 29 | public interface Regressor { 30 | 31 | } 32 | -------------------------------------------------------------------------------- /samoa-storm/src/main/java/com/yahoo/labs/samoa/topology/impl/StormTopologyNode.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Interface to represent a node in samoa-storm topology. 25 | * @author Arinto Murdopo 26 | * 27 | */ 28 | interface StormTopologyNode { 29 | 30 | void addToTopology(StormTopology topology, int parallelismHint); 31 | StormStream createStream(); 32 | String getId(); 33 | 34 | } 35 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/common/RulePassiveLearningNode.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.rules.common; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Interface for Rule's LearningNode that does not update 25 | * statistics for expanding rule. It only updates statistics for 26 | * computing predictions. 27 | * 28 | * @author Anh Thu Vu 29 | * 30 | */ 31 | public interface RulePassiveLearningNode { 32 | 33 | } 34 | -------------------------------------------------------------------------------- /samoa-s4/src/main/java/com/yahoo/labs/samoa/topology/impl/SamoaSerializerModule.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import org.apache.s4.base.SerializerDeserializer; 24 | 25 | import com.google.inject.AbstractModule; 26 | 27 | public class SamoaSerializerModule extends AbstractModule { 28 | 29 | @Override 30 | protected void configure() { 31 | bind(SerializerDeserializer.class).to(SamoaSerializer.class); 32 | 33 | } 34 | 35 | } 36 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/core/ContentEvent.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.core; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * The Interface ContentEvent. 25 | */ 26 | public interface ContentEvent extends java.io.Serializable { 27 | 28 | /** 29 | * Gets the content event key. 30 | * 31 | * @return the key 32 | */ 33 | public String getKey(); 34 | 35 | /** 36 | * Sets the content event key. 37 | * 38 | * @param key string 39 | */ 40 | public void setKey(String key); 41 | 42 | public boolean isLastEvent(); 43 | } 44 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/common/RuleActiveLearningNode.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.rules.common; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Interface for Rule's LearningNode that updates both statistics 25 | * for expanding rule and computing predictions. 26 | * 27 | * @author Anh Thu Vu 28 | * 29 | */ 30 | public interface RuleActiveLearningNode extends RulePassiveLearningNode { 31 | 32 | public boolean tryToExpand(double splitConfidence, double tieThreshold); 33 | 34 | } 35 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/InstanceStream.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.streams; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.moa.core.Example; 24 | import com.yahoo.labs.samoa.instances.Instance; 25 | 26 | /** 27 | * Interface representing a data stream of instances. 28 | * 29 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 30 | * @version $Revision: 7 $ 31 | */ 32 | public interface InstanceStream extends ExampleStream> { 33 | 34 | 35 | 36 | 37 | } 38 | -------------------------------------------------------------------------------- /samoa-local/src/main/resources/log4j.xml: -------------------------------------------------------------------------------- 1 | 2 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | 3 | install: 4 | - git clone https://github.com/apache/incubator-s4.git 5 | - cd incubator-s4 6 | - git checkout tags/0.6.0-Final 7 | - cp ../bin/s4-build/gradlew . 8 | - cp ../bin/s4-build/gradle-wrapper-1.4.jar ./lib/ 9 | - cp ../bin/s4-build/gradle-wrapper-1.4.properties ./lib/ 10 | - ./gradlew install 11 | - ./gradlew s4-tools::installApp 12 | - cd .. 13 | - echo "sonatype-nexus-snapshots${SOSS_USERNAME}${SOSS_PASSWORD}" > ${HOME}/.m2/settings.xml 14 | #- cat ${HOME}/.m2/settings.xml 15 | 16 | script: if [[ "$TRAVIS_SECURE_ENV_VARS" == "true" && "$TRAVIS_PULL_REQUEST" == "false" ]]; then CMD=deploy; else CMD=install; fi; echo $CMD; mvn -B -Pall $CMD 17 | 18 | notifications: 19 | email: 20 | on_success: never 21 | on_failure: change 22 | 23 | env: 24 | global: 25 | - secure: "hSyN3Ys3wDMJtL8jAcfFMh8pnG7B2TaKXc4qDWgE9a73XQ77JB8asCeXtQx/0/rNrJeNLBdSrVcXNAaOXXgGZpftJ0WdIBsyAj+tzpVAf+pcEHPVCgR4PHLkm1/UlyGX//1J+DjkDXnRgNfsD8xjZxTeNFH8xFzAU5YaP0AiLmk=" 26 | - secure: "MJLRFWi1uGZ1s5u/A44u4vDSGXF23H/3GGhofvLliaM4ivkeO9uthErlHgloGSmubEVkJMiThBLveZl01tNYRgn5a08qqyIsf/eShMagJDR7cX6FmbU7qOMOSzaAI84GDtrNuDQqaz2I1nTKfnzDYcTGXrpJMwLmbx30E9D/qaY=" 27 | -------------------------------------------------------------------------------- /samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleEngine.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package com.yahoo.labs.samoa.topology.impl; 6 | 7 | /* 8 | * #%L 9 | * SAMOA 10 | * %% 11 | * Copyright (C) 2013 Yahoo! Inc. 12 | * %% 13 | * Licensed under the Apache License, Version 2.0 (the "License"); 14 | * you may not use this file except in compliance with the License. 15 | * You may obtain a copy of the License at 16 | * 17 | * http://www.apache.org/licenses/LICENSE-2.0 18 | * 19 | * Unless required by applicable law or agreed to in writing, software 20 | * distributed under the License is distributed on an "AS IS" BASIS, 21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | * See the License for the specific language governing permissions and 23 | * limitations under the License. 24 | * #L% 25 | */ 26 | 27 | import com.yahoo.labs.samoa.topology.Topology; 28 | 29 | public class SimpleEngine { 30 | 31 | public static void submitTopology(Topology topology) { 32 | SimpleTopology simpleTopology = (SimpleTopology) topology; 33 | simpleTopology.run(); 34 | // runs until completion 35 | } 36 | 37 | } 38 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/SDRSplitCriterion.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2012 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | public class SDRSplitCriterion extends VarianceReductionSplitCriterion { 24 | private static final long serialVersionUID = 1L; 25 | 26 | public static double computeSD(double[] dist) { 27 | int N = (int)dist[0]; 28 | double sum = dist[1]; 29 | double sumSq = dist[2]; 30 | return Math.sqrt((sumSq - ((sum * sum)/N))/N); 31 | } 32 | 33 | } 34 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/ensemble/BoostingDistributorProcessor.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.ensemble; 2 | 3 | import com.yahoo.labs.samoa.learners.InstanceContentEvent; 4 | 5 | /* 6 | * #%L 7 | * SAMOA 8 | * %% 9 | * Copyright (C) 2013 Yahoo! Inc. 10 | * %% 11 | * Licensed under the Apache License, Version 2.0 (the "License"); 12 | * you may not use this file except in compliance with the License. 13 | * You may obtain a copy of the License at 14 | * 15 | * http://www.apache.org/licenses/LICENSE-2.0 16 | * 17 | * Unless required by applicable law or agreed to in writing, software 18 | * distributed under the License is distributed on an "AS IS" BASIS, 19 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 20 | * See the License for the specific language governing permissions and 21 | * limitations under the License. 22 | * #L% 23 | */ 24 | 25 | 26 | /** 27 | * The Class BoostingDistributorProcessor. 28 | */ 29 | public class BoostingDistributorProcessor extends BaggingDistributorProcessor{ 30 | 31 | @Override 32 | protected void train(InstanceContentEvent inEvent) { 33 | // Boosting is trained from the prediction combiner, not from the input 34 | } 35 | 36 | } 37 | -------------------------------------------------------------------------------- /samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleEntranceProcessingItem.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.EntranceProcessor; 24 | import com.yahoo.labs.samoa.topology.LocalEntranceProcessingItem; 25 | 26 | class SimpleEntranceProcessingItem extends LocalEntranceProcessingItem { 27 | public SimpleEntranceProcessingItem(EntranceProcessor processor) { 28 | super(processor); 29 | } 30 | 31 | // The default waiting time when there is no available events is 100ms 32 | // Override waitForNewEvents() to change it 33 | } 34 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Abstract binary conditional test for instances to use to split nodes in Hoeffding trees. 25 | * 26 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 27 | * @version $Revision: 7 $ 28 | */ 29 | public abstract class InstanceConditionalBinaryTest extends InstanceConditionalTest { 30 | 31 | @Override 32 | public int maxBranches() { 33 | return 2; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/topology/IProcessingItem.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.Processor; 24 | 25 | /** 26 | * ProcessingItem interface specific for entrance processing items. 27 | * 28 | * @author severien 29 | * 30 | */ 31 | public interface IProcessingItem { 32 | 33 | /** 34 | * Gets the processing item processor. 35 | * 36 | * @return Processor 37 | */ 38 | public Processor getProcessor(); 39 | 40 | /** 41 | * Sets processing item name. 42 | * 43 | * @param name 44 | */ 45 | //public void setName(String name); 46 | 47 | } 48 | -------------------------------------------------------------------------------- /RELEASE.txt: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | Release v0.2.0 20 | NaiveBayes classitication algorithm. 21 | AMRules regression algorithm. 22 | Samza execution engine. 23 | Multithread execution engine. 24 | HDFS stream source. 25 | 26 | Release v0.1.0 27 | Initial release. 28 | Vertical Hoeffding Tree classification algorithm. 29 | Clustream clustering algorithm. 30 | Adaptive ensembles (Bagging and Boosting). 31 | Local execution engine. 32 | Storm execution engine. 33 | S4 execution engine. 34 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/topology/ISubmitter.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.tasks.Task; 24 | 25 | /** 26 | * Submitter interface for programatically deploying platform specific topologies. 27 | * 28 | * @author severien 29 | * 30 | */ 31 | public interface ISubmitter { 32 | 33 | /** 34 | * Deploy a specific task to a platform. 35 | * 36 | * @param task 37 | */ 38 | public void deployTask(Task task); 39 | 40 | /** 41 | * Sets if the task should run locally or distributed. 42 | * 43 | * @param bool 44 | */ 45 | public void setLocal(boolean bool); 46 | } 47 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Interface for observing the class data distribution for a numeric attribute. 25 | * This observer monitors the class distribution of a given attribute. 26 | * Used in naive Bayes and decision trees to monitor data statistics on leaves. 27 | * 28 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 29 | * @version $Revision: 7 $ 30 | */ 31 | public interface NumericAttributeClassObserver extends AttributeClassObserver { 32 | 33 | 34 | } 35 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/DiscreteAttributeClassObserver.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Interface for observing the class data distribution for a discrete (nominal) attribute. 25 | * This observer monitors the class distribution of a given attribute. 26 | * Used in naive Bayes and decision trees to monitor data statistics on leaves. 27 | * 28 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 29 | * @version $Revision: 7 $ 30 | */ 31 | public interface DiscreteAttributeClassObserver extends AttributeClassObserver { 32 | 33 | 34 | } 35 | -------------------------------------------------------------------------------- /samoa-s4/samoa-s4-adapter/src/main/java/samoa/topology/adapter/S4AdapterApp.java: -------------------------------------------------------------------------------- 1 | package samoa.topology.adapter; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import org.apache.s4.core.adapter.AdapterApp; 24 | 25 | import samoa.sandbox.SourceProcessor; 26 | import samoa.streams.StreamSourceProcessor; 27 | 28 | public class S4AdapterApp extends AdapterApp { 29 | 30 | S4EntranceProcessingItem entrancePI; 31 | StreamSourceProcessor sourceProcessor; 32 | 33 | @Override 34 | protected void onInit() { 35 | entrancePI = new S4EntranceProcessingItem(this); 36 | sourceProcessor = new StreamSourceProcessor(); 37 | entrancePI.setProcessor(sourceProcessor); 38 | } 39 | 40 | @Override 41 | protected void onStart() { 42 | 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /samoa-instances/pom.xml: -------------------------------------------------------------------------------- 1 | 20 | 21 | 4.0.0 22 | 23 | UTF-8 24 | 25 | 26 | samoa-instances 27 | Instances for SAMOA 28 | 29 | samoa-instances 30 | 31 | com.yahoo.labs.samoa 32 | samoa 33 | 0.3.0-SNAPSHOT 34 | 35 | 36 | -------------------------------------------------------------------------------- /samoa-threads/src/main/java/com/yahoo/labs/samoa/topology/impl/ThreadsEntranceProcessingItem.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.EntranceProcessor; 24 | import com.yahoo.labs.samoa.topology.LocalEntranceProcessingItem; 25 | 26 | /** 27 | * EntranceProcessingItem for multithreaded engine. 28 | * @author Anh Thu Vu 29 | * 30 | */ 31 | public class ThreadsEntranceProcessingItem extends LocalEntranceProcessingItem { 32 | 33 | public ThreadsEntranceProcessingItem(EntranceProcessor processor) { 34 | super(processor); 35 | } 36 | 37 | // The default waiting time when there is no available events is 100ms 38 | // Override waitForNewEvents() to change it 39 | 40 | } 41 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/trees/DeleteContentEvent.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.trees; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Delete Content Event is the content event that is sent by Model Aggregator Processor 25 | * to delete unnecessary statistic in Local Statistic Processor. 26 | * @author Arinto Murdopo 27 | * 28 | */ 29 | final class DeleteContentEvent extends ControlContentEvent { 30 | 31 | private static final long serialVersionUID = -2105250722560863633L; 32 | 33 | public DeleteContentEvent(){ 34 | super(-1); 35 | } 36 | 37 | DeleteContentEvent(long id) { 38 | super(id); } 39 | 40 | @Override 41 | LocStatControl getType() { 42 | return LocStatControl.DELETE; 43 | } 44 | 45 | } 46 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/tasks/ResultPreviewListener.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.tasks; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Interface implemented by classes that preview results 25 | * on the Graphical User Interface 26 | * 27 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 28 | * @version $Revision: 7 $ 29 | */ 30 | public interface ResultPreviewListener { 31 | 32 | /** 33 | * This method is used to receive a signal from 34 | * TaskMonitor that the lastest preview has 35 | * changed. This method is implemented in PreviewPanel 36 | * to change the results that are shown in its panel. 37 | * 38 | */ 39 | public void latestPreviewChanged(); 40 | } 41 | -------------------------------------------------------------------------------- /bin/samoa-storm.properties: -------------------------------------------------------------------------------- 1 | ### 2 | # #%L 3 | # SAMOA 4 | # %% 5 | # Copyright (C) 2013 Yahoo! Inc. 6 | # %% 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # #L% 19 | ### 20 | 21 | # SAMOA Storm properties file 22 | # This file contains specific configurations for SAMOA deployment in the Storm platform 23 | # Note that you still need to configure Storm client in your machine, 24 | # including setting up Storm configuration file (~/.storm/storm.yaml) with correct settings 25 | 26 | # samoa.storm.mode corresponds to the execution mode of the Task in Storm 27 | # possible values: 28 | # 1. cluster: the Task will be sent into nimbus. The nimbus is configured by Storm configuration file 29 | # 2. local: the Task will be sent using local Storm cluster 30 | samoa.storm.mode=local 31 | 32 | # samoa.storm.numworker corresponds to the number of worker processes allocated in Storm cluster 33 | # possible values: any integer greater than 0 34 | samoa.storm.numworker=4 35 | 36 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/InstanceExample.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.core; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.instances.Instance; 24 | import java.io.Serializable; 25 | 26 | public class InstanceExample implements Example, Serializable { 27 | 28 | public Instance instance; 29 | 30 | public InstanceExample (Instance inst) 31 | { 32 | this.instance = inst; 33 | } 34 | 35 | @Override 36 | public Instance getData() { 37 | return this.instance; 38 | } 39 | 40 | @Override 41 | public double weight() { 42 | return this.instance.weight(); 43 | } 44 | 45 | @Override 46 | public void setWeight(double w) { 47 | this.instance.setWeight(w); 48 | } 49 | 50 | } 51 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/clustering/ClusterEvent.java: -------------------------------------------------------------------------------- 1 | 2 | package com.yahoo.labs.samoa.moa.streams.clustering; 3 | 4 | /* 5 | * #%L 6 | * SAMOA 7 | * %% 8 | * Copyright (C) 2010 RWTH Aachen University, Germany 9 | * %% 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, software 17 | * distributed under the License is distributed on an "AS IS" BASIS, 18 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | * See the License for the specific language governing permissions and 20 | * limitations under the License. 21 | * #L% 22 | */ 23 | 24 | import java.util.EventObject; 25 | 26 | public class ClusterEvent extends EventObject { 27 | 28 | private String type; 29 | private String message; 30 | private long timestamp; 31 | 32 | public ClusterEvent(Object source, long timestamp, String type, String message) { 33 | super(source); 34 | this.type = type; 35 | this.message = message; 36 | this.timestamp = timestamp; 37 | } 38 | 39 | public String getMessage(){ 40 | return message; 41 | } 42 | 43 | public long getTimestamp(){ 44 | return timestamp; 45 | } 46 | 47 | public String getType(){ 48 | return type; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/topology/EntranceProcessingItem.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.EntranceProcessor; 24 | 25 | /** 26 | * Entrance processing item interface. 27 | */ 28 | public interface EntranceProcessingItem extends IProcessingItem { 29 | 30 | @Override 31 | /** 32 | * Gets the processing item processor. 33 | * 34 | * @return the embedded EntranceProcessor. 35 | */ 36 | public EntranceProcessor getProcessor(); 37 | 38 | /** 39 | * Set the single output stream for this EntranceProcessingItem. 40 | * 41 | * @param stream 42 | * the stream 43 | * @return the current instance of the EntranceProcessingItem for fluent interface. 44 | */ 45 | public EntranceProcessingItem setOutputStream(Stream stream); 46 | } -------------------------------------------------------------------------------- /samoa-local/src/test/java/com/yahoo/labs/samoa/topology/impl/SimpleEngineTest.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import mockit.Mocked; 24 | import mockit.NonStrictExpectations; 25 | import mockit.Tested; 26 | import mockit.Verifications; 27 | 28 | import org.junit.Test; 29 | 30 | /** 31 | * @author Anh Thu Vu 32 | * 33 | */ 34 | public class SimpleEngineTest { 35 | 36 | @Tested private SimpleEngine unused; 37 | @Mocked private SimpleTopology topology; 38 | @Mocked private Runtime mockedRuntime; 39 | 40 | @Test 41 | public void testSubmitTopology() { 42 | new NonStrictExpectations() { 43 | { 44 | Runtime.getRuntime(); 45 | result=mockedRuntime; 46 | mockedRuntime.exit(0); 47 | } 48 | }; 49 | SimpleEngine.submitTopology(topology); 50 | new Verifications() { 51 | { 52 | topology.run(); 53 | } 54 | }; 55 | } 56 | 57 | } 58 | -------------------------------------------------------------------------------- /samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstanceData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package com.yahoo.labs.samoa.instances; 6 | 7 | /* 8 | * #%L 9 | * SAMOA 10 | * %% 11 | * Copyright (C) 2013 Yahoo! Inc. 12 | * %% 13 | * Licensed under the Apache License, Version 2.0 (the "License"); 14 | * you may not use this file except in compliance with the License. 15 | * You may obtain a copy of the License at 16 | * 17 | * http://www.apache.org/licenses/LICENSE-2.0 18 | * 19 | * Unless required by applicable law or agreed to in writing, software 20 | * distributed under the License is distributed on an "AS IS" BASIS, 21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | * See the License for the specific language governing permissions and 23 | * limitations under the License. 24 | * #L% 25 | */ 26 | 27 | import java.io.Serializable; 28 | 29 | /** 30 | * 31 | * @author abifet 32 | */ 33 | public interface InstanceData extends Serializable{ 34 | 35 | public int numAttributes(); 36 | 37 | public double value(int instAttIndex); 38 | 39 | public boolean isMissing(int instAttIndex); 40 | 41 | public int numValues(); 42 | 43 | public int index(int i); 44 | 45 | public double valueSparse(int i); 46 | 47 | public boolean isMissingSparse(int p1); 48 | 49 | //public double value(Attribute attribute); 50 | 51 | public double[] toDoubleArray(); 52 | 53 | public void setValue(int m_numAttributes, double d); 54 | 55 | } 56 | -------------------------------------------------------------------------------- /bin/samoa-s4.properties: -------------------------------------------------------------------------------- 1 | ### 2 | # #%L 3 | # SAMOA 4 | # %% 5 | # Copyright (C) 2013 Yahoo! Inc. 6 | # %% 7 | # Licensed under the Apache License, Version 2.0 (the "License"); 8 | # you may not use this file except in compliance with the License. 9 | # You may obtain a copy of the License at 10 | # 11 | # http://www.apache.org/licenses/LICENSE-2.0 12 | # 13 | # Unless required by applicable law or agreed to in writing, software 14 | # distributed under the License is distributed on an "AS IS" BASIS, 15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | # See the License for the specific language governing permissions and 17 | # limitations under the License. 18 | # #L% 19 | ### 20 | 21 | # ===================================================== # 22 | # SAMOA S4 properties file # 23 | # ----------------------------------------------------- # 24 | # This file contains specific configuration for # 25 | # the deployment in the S4 platform. # 26 | # ===================================================== # 27 | 28 | # Zookeeper Server 29 | zookeeper.server=localhost 30 | zookeeper.port=2181 31 | 32 | # Simple HTTP Server prociding the packaged S4 jar 33 | #http.server.ip=localhost 34 | http.server.port=8000 35 | 36 | # Name of the S4 cluster 37 | cluster.name=cluster 38 | cluster.port=12000 39 | 40 | # Deployment strategy: local or cluster 41 | samoa.deploy.mode=local 42 | 43 | # Directory for storing the results of the algorithms. 44 | results.dir=/tmp/samoa/results 45 | # Directory for storing the evaluation results, if the algorithms are to be evaluated. 46 | evaluation.dir=/tmp/samoa/evaluation 47 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/common/NonLearningRule.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.rules.common; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * The most basic rule: inherit from Rule the ID and list of features. 25 | * 26 | * @author Anh Thu Vu 27 | * 28 | */ 29 | /* 30 | * This branch (Non-learning rule) was created for an old implementation. 31 | * Probably should remove None-Learning and Learning Rule classes, 32 | * merge Rule with LearningRule. 33 | */ 34 | public class NonLearningRule extends Rule { 35 | 36 | /** 37 | * 38 | */ 39 | private static final long serialVersionUID = -1210907339230307784L; 40 | 41 | public NonLearningRule(ActiveRule rule) { 42 | this.nodeList = rule.nodeList; 43 | this.ruleNumberID = rule.ruleNumberID; 44 | } 45 | 46 | @Override 47 | public void getDescription(StringBuilder sb, int indent) { 48 | // do nothing 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/topology/Stream.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.ContentEvent; 24 | 25 | /** 26 | * Stream interface. 27 | * 28 | * @author severien 29 | * 30 | */ 31 | public interface Stream { 32 | 33 | /** 34 | * Puts events into a platform specific data stream. 35 | * 36 | * @param event 37 | */ 38 | public void put(ContentEvent event); 39 | 40 | /** 41 | * Sets the stream id which is represented by a name. 42 | * 43 | * @param stream 44 | */ 45 | //public void setStreamId(String stream); 46 | 47 | 48 | /** 49 | * Gets stream id. 50 | * 51 | * @return id 52 | */ 53 | public String getStreamId(); 54 | 55 | /** 56 | * Set batch size 57 | * 58 | * @param batchSize 59 | * the suggested size for batching messages on this stream 60 | */ 61 | public void setBatchSize(int batchsize); 62 | } -------------------------------------------------------------------------------- /bin/samza-dist/run-job.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### 4 | # #%L 5 | # SAMOA 6 | # %% 7 | # Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | # %% 9 | # Licensed under the Apache License, Version 2.0 (the "License"); 10 | # you may not use this file except in compliance with the License. 11 | # You may obtain a copy of the License at 12 | # 13 | # http://www.apache.org/licenses/LICENSE-2.0 14 | # 15 | # Unless required by applicable law or agreed to in writing, software 16 | # distributed under the License is distributed on an "AS IS" BASIS, 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | # See the License for the specific language governing permissions and 19 | # limitations under the License. 20 | # #L% 21 | ### 22 | # Licensed to the Apache Software Foundation (ASF) under one 23 | # or more contributor license agreements. See the NOTICE file 24 | # distributed with this work for additional information 25 | # regarding copyright ownership. The ASF licenses this file 26 | # to you under the Apache License, Version 2.0 (the 27 | # "License"); you may not use this file except in compliance 28 | # with the License. You may obtain a copy of the License at 29 | # 30 | # http://www.apache.org/licenses/LICENSE-2.0 31 | # 32 | # Unless required by applicable law or agreed to in writing, 33 | # software distributed under the License is distributed on an 34 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 35 | # KIND, either express or implied. See the License for the 36 | # specific language governing permissions and limitations 37 | # under the License. 38 | 39 | exec $(dirname $0)/run-class.sh org.apache.samza.job.JobRunner $@ 40 | -------------------------------------------------------------------------------- /bin/run-container.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### 4 | # #%L 5 | # SAMOA 6 | # %% 7 | # Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | # %% 9 | # Licensed under the Apache License, Version 2.0 (the "License"); 10 | # you may not use this file except in compliance with the License. 11 | # You may obtain a copy of the License at 12 | # 13 | # http://www.apache.org/licenses/LICENSE-2.0 14 | # 15 | # Unless required by applicable law or agreed to in writing, software 16 | # distributed under the License is distributed on an "AS IS" BASIS, 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | # See the License for the specific language governing permissions and 19 | # limitations under the License. 20 | # #L% 21 | ### 22 | # Licensed to the Apache Software Foundation (ASF) under one 23 | # or more contributor license agreements. See the NOTICE file 24 | # distributed with this work for additional information 25 | # regarding copyright ownership. The ASF licenses this file 26 | # to you under the Apache License, Version 2.0 (the 27 | # "License"); you may not use this file except in compliance 28 | # with the License. You may obtain a copy of the License at 29 | # 30 | # http://www.apache.org/licenses/LICENSE-2.0 31 | # 32 | # Unless required by applicable law or agreed to in writing, 33 | # software distributed under the License is distributed on an 34 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 35 | # KIND, either express or implied. See the License for the 36 | # specific language governing permissions and limitations 37 | # under the License. 38 | 39 | exec $(dirname $0)/run-class.sh org.apache.samza.container.SamzaContainer $@ 40 | -------------------------------------------------------------------------------- /bin/samza-dist/run-am.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### 4 | # #%L 5 | # SAMOA 6 | # %% 7 | # Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | # %% 9 | # Licensed under the Apache License, Version 2.0 (the "License"); 10 | # you may not use this file except in compliance with the License. 11 | # You may obtain a copy of the License at 12 | # 13 | # http://www.apache.org/licenses/LICENSE-2.0 14 | # 15 | # Unless required by applicable law or agreed to in writing, software 16 | # distributed under the License is distributed on an "AS IS" BASIS, 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | # See the License for the specific language governing permissions and 19 | # limitations under the License. 20 | # #L% 21 | ### 22 | # Licensed to the Apache Software Foundation (ASF) under one 23 | # or more contributor license agreements. See the NOTICE file 24 | # distributed with this work for additional information 25 | # regarding copyright ownership. The ASF licenses this file 26 | # to you under the Apache License, Version 2.0 (the 27 | # "License"); you may not use this file except in compliance 28 | # with the License. You may obtain a copy of the License at 29 | # 30 | # http://www.apache.org/licenses/LICENSE-2.0 31 | # 32 | # Unless required by applicable law or agreed to in writing, 33 | # software distributed under the License is distributed on an 34 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 35 | # KIND, either express or implied. See the License for the 36 | # specific language governing permissions and limitations 37 | # under the License. 38 | 39 | exec $(dirname $0)/run-class.sh org.apache.samza.job.yarn.SamzaAppMaster $@ 40 | -------------------------------------------------------------------------------- /samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SparseInstance.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package com.yahoo.labs.samoa.instances; 6 | 7 | /* 8 | * #%L 9 | * SAMOA 10 | * %% 11 | * Copyright (C) 2013 Yahoo! Inc. 12 | * %% 13 | * Licensed under the Apache License, Version 2.0 (the "License"); 14 | * you may not use this file except in compliance with the License. 15 | * You may obtain a copy of the License at 16 | * 17 | * http://www.apache.org/licenses/LICENSE-2.0 18 | * 19 | * Unless required by applicable law or agreed to in writing, software 20 | * distributed under the License is distributed on an "AS IS" BASIS, 21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | * See the License for the specific language governing permissions and 23 | * limitations under the License. 24 | * #L% 25 | */ 26 | 27 | /** 28 | * 29 | * @author abifet 30 | */ 31 | public class SparseInstance extends SingleLabelInstance{ 32 | 33 | public SparseInstance(double d, double[] res) { 34 | super(d,res); 35 | } 36 | public SparseInstance(SingleLabelInstance inst) { 37 | super(inst); 38 | } 39 | 40 | public SparseInstance(double numberAttributes) { 41 | //super(1, new double[(int) numberAttributes-1]); 42 | super(1,null,null,(int) numberAttributes); 43 | } 44 | 45 | public SparseInstance(double weight, double[] attributeValues, int[] indexValues, int numberAttributes) { 46 | super(weight,attributeValues,indexValues,numberAttributes); 47 | } 48 | 49 | } 50 | -------------------------------------------------------------------------------- /bin/samza-dist/run-container.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | ### 4 | # #%L 5 | # SAMOA 6 | # %% 7 | # Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | # %% 9 | # Licensed under the Apache License, Version 2.0 (the "License"); 10 | # you may not use this file except in compliance with the License. 11 | # You may obtain a copy of the License at 12 | # 13 | # http://www.apache.org/licenses/LICENSE-2.0 14 | # 15 | # Unless required by applicable law or agreed to in writing, software 16 | # distributed under the License is distributed on an "AS IS" BASIS, 17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | # See the License for the specific language governing permissions and 19 | # limitations under the License. 20 | # #L% 21 | ### 22 | # Licensed to the Apache Software Foundation (ASF) under one 23 | # or more contributor license agreements. See the NOTICE file 24 | # distributed with this work for additional information 25 | # regarding copyright ownership. The ASF licenses this file 26 | # to you under the Apache License, Version 2.0 (the 27 | # "License"); you may not use this file except in compliance 28 | # with the License. You may obtain a copy of the License at 29 | # 30 | # http://www.apache.org/licenses/LICENSE-2.0 31 | # 32 | # Unless required by applicable law or agreed to in writing, 33 | # software distributed under the License is distributed on an 34 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 35 | # KIND, either express or implied. See the License for the 36 | # specific language governing permissions and limitations 37 | # under the License. 38 | 39 | exec $(dirname $0)/run-class.sh org.apache.samza.container.SamzaContainer $@ 40 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/examples/HelloWorldDestinationProcessor.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.examples; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.ContentEvent; 24 | import com.yahoo.labs.samoa.core.Processor; 25 | 26 | /** 27 | * Example {@link Processor} that simply prints the received events to standard output. 28 | */ 29 | public class HelloWorldDestinationProcessor implements Processor { 30 | 31 | private static final long serialVersionUID = -6042613438148776446L; 32 | private int processorId; 33 | 34 | @Override 35 | public boolean process(ContentEvent event) { 36 | System.out.println(processorId + ": " + event); 37 | return true; 38 | } 39 | 40 | @Override 41 | public void onCreate(int id) { 42 | this.processorId = id; 43 | } 44 | 45 | @Override 46 | public Processor newProcessor(Processor p) { 47 | return new HelloWorldDestinationProcessor(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/AdaptiveLearner.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * License 25 | */ 26 | 27 | 28 | import com.yahoo.labs.samoa.moa.classifiers.core.driftdetection.ChangeDetector; 29 | import com.yahoo.labs.samoa.topology.Stream; 30 | 31 | /** 32 | * The Interface Adaptive Learner. 33 | * Initializing Classifier should initalize PI to connect the Classifier with the input stream 34 | * and initialize result stream so that other PI can connect to the classification result of this classifier 35 | */ 36 | 37 | public interface AdaptiveLearner { 38 | 39 | /** 40 | * Gets the change detector item. 41 | * 42 | * @return the change detector item 43 | */ 44 | public ChangeDetector getChangeDetector(); 45 | 46 | /** 47 | * Sets the change detector item. 48 | * 49 | * @param cd the change detector item 50 | */ 51 | public void setChangeDetector(ChangeDetector cd); 52 | 53 | } 54 | -------------------------------------------------------------------------------- /samoa-storm/src/main/java/com/yahoo/labs/samoa/topology/impl/StormTopology.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import backtype.storm.topology.TopologyBuilder; 24 | 25 | import com.yahoo.labs.samoa.topology.IProcessingItem; 26 | import com.yahoo.labs.samoa.topology.AbstractTopology; 27 | 28 | /** 29 | * Adaptation of SAMOA topology in samoa-storm 30 | * @author Arinto Murdopo 31 | * 32 | */ 33 | public class StormTopology extends AbstractTopology { 34 | 35 | private TopologyBuilder builder; 36 | 37 | public StormTopology(String topologyName){ 38 | super(topologyName); 39 | this.builder = new TopologyBuilder(); 40 | } 41 | 42 | @Override 43 | public void addProcessingItem(IProcessingItem procItem, int parallelismHint){ 44 | StormTopologyNode stormNode = (StormTopologyNode) procItem; 45 | stormNode.addToTopology(this, parallelismHint); 46 | super.addProcessingItem(procItem, parallelismHint); 47 | } 48 | 49 | public TopologyBuilder getStormBuilder(){ 50 | return builder; 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /samoa-threads/src/main/java/com/yahoo/labs/samoa/topology/impl/ThreadsProcessingItemInstance.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.ContentEvent; 24 | import com.yahoo.labs.samoa.core.Processor; 25 | 26 | /** 27 | * Lightweight replicas of ThreadProcessingItem. 28 | * ThreadsProcessingItem manages a list of these objects and 29 | * assigns each incoming message to be processed by one of them. 30 | * @author Anh Thu Vu 31 | * 32 | */ 33 | public class ThreadsProcessingItemInstance { 34 | 35 | private Processor processor; 36 | private int threadIndex; 37 | 38 | public ThreadsProcessingItemInstance(Processor processor, int threadIndex) { 39 | this.processor = processor; 40 | this.threadIndex = threadIndex; 41 | } 42 | 43 | public int getThreadIndex() { 44 | return this.threadIndex; 45 | } 46 | 47 | public Processor getProcessor() { 48 | return this.processor; 49 | } 50 | 51 | public void processEvent(ContentEvent event) { 52 | this.processor.process(event); 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/trees/LearningNode.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.trees; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.instances.Instance; 24 | 25 | /** 26 | * Abstract class that represents a learning node 27 | * @author Arinto Murdopo 28 | * 29 | */ 30 | abstract class LearningNode extends Node { 31 | 32 | private static final long serialVersionUID = 7157319356146764960L; 33 | 34 | protected LearningNode(double[] classObservation) { 35 | super(classObservation); 36 | } 37 | 38 | /** 39 | * Method to process the instance for learning 40 | * @param inst The processed instance 41 | * @param proc The model aggregator processor where this learning node exists 42 | */ 43 | abstract void learnFromInstance(Instance inst, ModelAggregatorProcessor proc); 44 | 45 | @Override 46 | protected boolean isLeaf(){ 47 | return true; 48 | } 49 | 50 | @Override 51 | protected FoundNode filterInstanceToLeaf(Instance inst, SplitNode parent, 52 | int parentBranch) { 53 | return new FoundNode(this, parent, parentBranch); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleTopology.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package com.yahoo.labs.samoa.topology.impl; 6 | 7 | /* 8 | * #%L 9 | * SAMOA 10 | * %% 11 | * Copyright (C) 2013 Yahoo! Inc. 12 | * %% 13 | * Licensed under the Apache License, Version 2.0 (the "License"); 14 | * you may not use this file except in compliance with the License. 15 | * You may obtain a copy of the License at 16 | * 17 | * http://www.apache.org/licenses/LICENSE-2.0 18 | * 19 | * Unless required by applicable law or agreed to in writing, software 20 | * distributed under the License is distributed on an "AS IS" BASIS, 21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | * See the License for the specific language governing permissions and 23 | * limitations under the License. 24 | * #L% 25 | */ 26 | 27 | import com.yahoo.labs.samoa.topology.AbstractTopology; 28 | 29 | public class SimpleTopology extends AbstractTopology { 30 | SimpleTopology(String name) { 31 | super(name); 32 | } 33 | 34 | public void run() { 35 | if (this.getEntranceProcessingItems() == null) 36 | throw new IllegalStateException("You need to set entrance PI before running the topology."); 37 | if (this.getEntranceProcessingItems().size() != 1) 38 | throw new IllegalStateException("SimpleTopology supports 1 entrance PI only. Number of entrance PIs is "+this.getEntranceProcessingItems().size()); 39 | 40 | SimpleEntranceProcessingItem entrancePi = (SimpleEntranceProcessingItem) this.getEntranceProcessingItems().toArray()[0]; 41 | entrancePi.getProcessor().onCreate(0); // id=0 as it is not used in simple mode 42 | entrancePi.startSendingEvents(); 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/core/SerializableInstance.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.core; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.instances.DenseInstance; 24 | import com.yahoo.labs.samoa.instances.Instance; 25 | 26 | /** 27 | * License 28 | */ 29 | 30 | //import weka.core.DenseInstance; 31 | //import weka.core.Instance; 32 | 33 | /** 34 | * The Class SerializableInstance. 35 | * This class is needed for serialization of kryo 36 | */ 37 | public class SerializableInstance extends DenseInstance { 38 | 39 | /** The Constant serialVersionUID. */ 40 | private static final long serialVersionUID = -3659459626274566468L; 41 | 42 | /** 43 | * Instantiates a new serializable instance. 44 | */ 45 | public SerializableInstance() { 46 | super(0); 47 | } 48 | 49 | /** 50 | * Instantiates a new serializable instance. 51 | * 52 | * @param arg0 the arg0 53 | */ 54 | public SerializableInstance(int arg0) { 55 | super(arg0); 56 | } 57 | 58 | /** 59 | * Instantiates a new serializable instance. 60 | * 61 | * @param inst the inst 62 | */ 63 | public SerializableInstance(Instance inst) { 64 | super(inst); 65 | } 66 | 67 | } -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/trees/InactiveLearningNode.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.trees; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.instances.Instance; 24 | 25 | /** 26 | * Class that represents inactive learning node. Inactive learning node is 27 | * a node which only keeps track of the observed class distribution. It does 28 | * not store the statistic for splitting the node. 29 | * 30 | * @author Arinto Murdopo 31 | * 32 | */ 33 | final class InactiveLearningNode extends LearningNode { 34 | 35 | /** 36 | * 37 | */ 38 | private static final long serialVersionUID = -814552382883472302L; 39 | 40 | 41 | InactiveLearningNode(double[] initialClassObservation) { 42 | super(initialClassObservation); 43 | } 44 | 45 | @Override 46 | void learnFromInstance(Instance inst, ModelAggregatorProcessor proc) { 47 | this.observedClassDistribution.addToValue( 48 | (int)inst.classValue(), inst.weight()); 49 | } 50 | 51 | @Override 52 | double[] getClassVotes(Instance inst, ModelAggregatorProcessor map) { 53 | return this.observedClassDistribution.getArrayCopy(); 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /samoa-s4/samoa-s4-adapter/pom.xml: -------------------------------------------------------------------------------- 1 | 20 | 22 | 4.0.0 23 | 24 | 31 | 32 | 33 | samoa-s4-adapter 34 | com.yahoo.labs.bcn.samoa 35 | 0.1 36 | samoa-s4-adapter 37 | Adapter module to connect to external stream and also to provide entrance processing items for SAMOA 38 | 39 | 40 | 45 | 46 | samoa-s4 47 | com.yahoo.labs.bcn.samoa 48 | 0.1 49 | 50 | 51 | 52 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/trees/ControlContentEvent.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.trees; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.ContentEvent; 24 | 25 | /** 26 | * Abstract class to represent ContentEvent to control Local Statistic Processor. 27 | * @author Arinto Murdopo 28 | * 29 | */ 30 | abstract class ControlContentEvent implements ContentEvent { 31 | 32 | /** 33 | * 34 | */ 35 | private static final long serialVersionUID = 5837375639629708363L; 36 | 37 | protected final long learningNodeId; 38 | 39 | public ControlContentEvent(){ 40 | this.learningNodeId = -1; 41 | } 42 | 43 | ControlContentEvent(long id){ 44 | this.learningNodeId = id; 45 | } 46 | 47 | @Override 48 | public final String getKey() { 49 | return null; 50 | } 51 | 52 | @Override 53 | public void setKey(String str){ 54 | //Do nothing 55 | } 56 | 57 | @Override 58 | public boolean isLastEvent(){ 59 | return false; 60 | } 61 | 62 | final long getLearningNodeId(){ 63 | return this.learningNodeId; 64 | } 65 | 66 | abstract LocStatControl getType(); 67 | 68 | static enum LocStatControl { 69 | COMPUTE, DELETE 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /samoa-samza/src/main/java/com/yahoo/labs/samoa/topology/impl/SamzaProcessingNode.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.topology.IProcessingItem; 24 | 25 | /** 26 | * Common interface of SamzaEntranceProcessingItem and 27 | * SamzaProcessingItem 28 | * 29 | * @author Anh Thu Vu 30 | */ 31 | public interface SamzaProcessingNode extends IProcessingItem { 32 | /** 33 | * Registers an output stream with this processing item 34 | * 35 | * @param stream 36 | * the output stream 37 | * @return the number of output streams of this processing item 38 | */ 39 | public int addOutputStream(SamzaStream stream); 40 | 41 | /** 42 | * Gets the name/id of this processing item 43 | * 44 | * @return the name/id of this processing item 45 | */ 46 | // TODO: include getName() and setName() in IProcessingItem and/or AbstractEPI/PI 47 | public String getName(); 48 | 49 | /** 50 | * Sets the name/id for this processing item 51 | * @param name 52 | * the name/id of this processing item 53 | */ 54 | // TODO: include getName() and setName() in IProcessingItem and/or AbstractEPI/PI 55 | public void setName(String name); 56 | } -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/core/Globals.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.core; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * License 25 | */ 26 | 27 | import com.github.javacliparser.StringUtils; 28 | 29 | /** 30 | * Class for storing global information about current version of SAMOA. 31 | * 32 | * @author Albert Bifet 33 | * @version $Revision: 7 $ 34 | */ 35 | public class Globals { 36 | 37 | public static final String workbenchTitle = "SAMOA: Scalable Advanced Massive Online Analysis Platform "; 38 | 39 | public static final String versionString = "0.0.1"; 40 | 41 | public static final String copyrightNotice = "Copyright Yahoo! Inc 2013"; 42 | 43 | public static final String webAddress = "http://github.com/yahoo/samoa"; 44 | 45 | public static String getWorkbenchInfoString() { 46 | StringBuilder result = new StringBuilder(); 47 | result.append(workbenchTitle); 48 | StringUtils.appendNewline(result); 49 | result.append("Version: "); 50 | result.append(versionString); 51 | StringUtils.appendNewline(result); 52 | result.append("Copyright: "); 53 | result.append(copyrightNotice); 54 | StringUtils.appendNewline(result); 55 | result.append("Web: "); 56 | result.append(webAddress); 57 | return result.toString(); 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /samoa-threads/src/test/java/com/yahoo/labs/samoa/topology/impl/ThreadsEventRunnableTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * #%L 3 | * SAMOA 4 | * %% 5 | * Copyright (C) 2013 Yahoo! Inc. 6 | * %% 7 | * Licensed under the Apache License, Version 2.0 (the "License"); 8 | * you may not use this file except in compliance with the License. 9 | * You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | * #L% 19 | */ 20 | package com.yahoo.labs.samoa.topology.impl; 21 | 22 | import static org.junit.Assert.*; 23 | import mockit.Mocked; 24 | import mockit.Tested; 25 | import mockit.Verifications; 26 | 27 | import org.junit.Before; 28 | import org.junit.Test; 29 | 30 | import com.yahoo.labs.samoa.core.ContentEvent; 31 | 32 | /** 33 | * @author Anh Thu Vu 34 | * 35 | */ 36 | public class ThreadsEventRunnableTest { 37 | 38 | @Tested private ThreadsEventRunnable task; 39 | 40 | @Mocked private ThreadsProcessingItemInstance piInstance; 41 | @Mocked private ContentEvent event; 42 | 43 | /** 44 | * @throws java.lang.Exception 45 | */ 46 | @Before 47 | public void setUp() throws Exception { 48 | task = new ThreadsEventRunnable(piInstance, event); 49 | } 50 | 51 | @Test 52 | public void testConstructor() { 53 | assertSame("WorkerProcessingItem is not set correctly.",piInstance,task.getWorkerProcessingItem()); 54 | assertSame("ContentEvent is not set correctly.",event,task.getContentEvent()); 55 | } 56 | 57 | @Test 58 | public void testRun() { 59 | task.run(); 60 | new Verifications () { 61 | { 62 | piInstance.processEvent(event); times=1; 63 | } 64 | }; 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/utils/StreamDestination.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.utils; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.topology.IProcessingItem; 24 | 25 | /** 26 | * Represents one destination for streams. It has the info of: 27 | * the ProcessingItem, parallelismHint, and partitioning scheme. 28 | * Usage: 29 | * - When ProcessingItem connects to a stream, it will pass 30 | * a StreamDestination to the stream. 31 | * - Stream manages a set of StreamDestination. 32 | * - Used in single-threaded and multi-threaded local mode. 33 | * @author Anh Thu Vu 34 | * 35 | */ 36 | public class StreamDestination { 37 | private IProcessingItem pi; 38 | private int parallelism; 39 | private PartitioningScheme type; 40 | 41 | /* 42 | * Constructor 43 | */ 44 | public StreamDestination(IProcessingItem pi, int parallelismHint, PartitioningScheme type) { 45 | this.pi = pi; 46 | this.parallelism = parallelismHint; 47 | this.type = type; 48 | } 49 | 50 | /* 51 | * Getters 52 | */ 53 | public IProcessingItem getProcessingItem() { 54 | return this.pi; 55 | } 56 | 57 | public int getParallelism() { 58 | return this.parallelism; 59 | } 60 | 61 | public PartitioningScheme getPartitioningScheme() { 62 | return this.type; 63 | } 64 | 65 | } 66 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/FastVector.java: -------------------------------------------------------------------------------- 1 | 2 | /* 3 | * FastVector.java 4 | 5 | * 6 | */ 7 | package com.yahoo.labs.samoa.moa.core; 8 | 9 | /* 10 | * #%L 11 | * SAMOA 12 | * %% 13 | * Copyright (C) 1999 - 2012 University of Waikato, Hamilton, New Zealand 14 | * %% 15 | * Licensed under the Apache License, Version 2.0 (the "License"); 16 | * you may not use this file except in compliance with the License. 17 | * You may obtain a copy of the License at 18 | * 19 | * http://www.apache.org/licenses/LICENSE-2.0 20 | * 21 | * Unless required by applicable law or agreed to in writing, software 22 | * distributed under the License is distributed on an "AS IS" BASIS, 23 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 24 | * See the License for the specific language governing permissions and 25 | * limitations under the License. 26 | * #L% 27 | */ 28 | 29 | import java.util.ArrayList; 30 | 31 | /** 32 | * Simple extension of ArrayList. Exists for legacy reasons. 33 | * 34 | * @author Eibe Frank (eibe@cs.waikato.ac.nz) 35 | * @version $Revision: 8034 $ 36 | */ 37 | public class FastVector extends ArrayList { 38 | 39 | /** 40 | * Adds an element to this vector. Increases its capacity if its not large 41 | * enough. 42 | * 43 | * @param element the element to add 44 | */ 45 | public final void addElement(E element) { 46 | add(element); 47 | } 48 | 49 | /** 50 | * Returns the element at the given position. 51 | * 52 | * @param index the element's index 53 | * @return the element with the given index 54 | */ 55 | public final E elementAt(int index) { 56 | return get(index); 57 | } 58 | 59 | /** 60 | * Deletes an element from this vector. 61 | * 62 | * @param index the index of the element to be deleted 63 | */ 64 | public final void removeElementAt(int index) { 65 | remove(index); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/MOAObject.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import java.io.Serializable; 24 | 25 | /** 26 | * Interface implemented by classes in MOA, so that all are serializable, 27 | * can produce copies of their objects, and can measure its memory size. 28 | * They also give a string description. 29 | * 30 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 31 | * @version $Revision: 7 $ 32 | */ 33 | public interface MOAObject extends Serializable { 34 | 35 | /** 36 | * Gets the memory size of this object. 37 | * 38 | * @return the memory size of this object 39 | */ 40 | public int measureByteSize(); 41 | 42 | /** 43 | * This method produces a copy of this object. 44 | * 45 | * @return a copy of this object 46 | */ 47 | public MOAObject copy(); 48 | 49 | /** 50 | * Returns a string representation of this object. 51 | * Used in AbstractMOAObject.toString 52 | * to give a string representation of the object. 53 | * 54 | * @param sb the stringbuilder to add the description 55 | * @param indent the number of characters to indent 56 | */ 57 | public void getDescription(StringBuilder sb, int indent); 58 | } 59 | -------------------------------------------------------------------------------- /samoa-storm/src/main/java/com/yahoo/labs/samoa/topology/impl/StormBoltStream.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import backtype.storm.task.OutputCollector; 24 | import backtype.storm.tuple.Values; 25 | import com.yahoo.labs.samoa.core.ContentEvent; 26 | 27 | /** 28 | * Storm Stream that connects into Bolt. It wraps Storm's outputCollector class 29 | * @author Arinto Murdopo 30 | * 31 | */ 32 | class StormBoltStream extends StormStream{ 33 | 34 | /** 35 | * 36 | */ 37 | private static final long serialVersionUID = -5712513402991550847L; 38 | 39 | private OutputCollector outputCollector; 40 | 41 | StormBoltStream(String stormComponentId){ 42 | super(stormComponentId); 43 | } 44 | 45 | @Override 46 | public void put(ContentEvent contentEvent) { 47 | outputCollector.emit(this.outputStreamId, new Values(contentEvent, contentEvent.getKey())); 48 | } 49 | 50 | public void setCollector(OutputCollector outputCollector){ 51 | this.outputCollector = outputCollector; 52 | } 53 | 54 | // @Override 55 | // public void setStreamId(String streamId) { 56 | // // TODO Auto-generated method stub 57 | // //this.outputStreamId = streamId; 58 | // } 59 | 60 | @Override 61 | public String getStreamId() { 62 | // TODO Auto-generated method stub 63 | return null; 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /samoa-storm/src/main/java/com/yahoo/labs/samoa/topology/impl/StormSpoutStream.java: -------------------------------------------------------------------------------- 1 | //package com.yahoo.labs.samoa.topology.impl; 2 | // 3 | ///* 4 | // * #%L 5 | // * SAMOA 6 | // * %% 7 | // * Copyright (C) 2013 Yahoo! Inc. 8 | // * %% 9 | // * Licensed under the Apache License, Version 2.0 (the "License"); 10 | // * you may not use this file except in compliance with the License. 11 | // * You may obtain a copy of the License at 12 | // * 13 | // * http://www.apache.org/licenses/LICENSE-2.0 14 | // * 15 | // * Unless required by applicable law or agreed to in writing, software 16 | // * distributed under the License is distributed on an "AS IS" BASIS, 17 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | // * See the License for the specific language governing permissions and 19 | // * limitations under the License. 20 | // * #L% 21 | // */ 22 | // 23 | //import com.yahoo.labs.samoa.core.ContentEvent; 24 | //import com.yahoo.labs.samoa.topology.impl.StormEntranceProcessingItem.StormEntranceSpout; 25 | // 26 | ///** 27 | // * Storm Stream that connects into Spout. It wraps the spout itself 28 | // * @author Arinto Murdopo 29 | // * 30 | // */ 31 | //final class StormSpoutStream extends StormStream{ 32 | // 33 | // /** 34 | // * 35 | // */ 36 | // private static final long serialVersionUID = -7444653177614988650L; 37 | // 38 | // private StormEntranceSpout spout; 39 | // 40 | // StormSpoutStream(String stormComponentId) { 41 | // super(stormComponentId); 42 | // } 43 | // 44 | // @Override 45 | // public void put(ContentEvent contentEvent) { 46 | // spout.put(this, contentEvent); 47 | // } 48 | // 49 | // void setSpout(StormEntranceSpout spout){ 50 | // this.spout = spout; 51 | // } 52 | // 53 | //// @Override 54 | //// public void setStreamId(String stream) { 55 | //// // TODO Auto-generated method stub 56 | //// 57 | //// } 58 | // 59 | // @Override 60 | // public String getStreamId() { 61 | // // TODO Auto-generated method stub 62 | // return null; 63 | // } 64 | // 65 | //} 66 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/Learner.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.Processor; 24 | import com.yahoo.labs.samoa.instances.Instances; 25 | import com.yahoo.labs.samoa.topology.Stream; 26 | import com.yahoo.labs.samoa.topology.TopologyBuilder; 27 | 28 | import java.io.Serializable; 29 | import java.util.Set; 30 | 31 | /** 32 | * The Interface Classifier. 33 | * Initializing Classifier should initalize PI to connect the Classifier with the input stream 34 | * and initialize result stream so that other PI can connect to the classification result of this classifier 35 | */ 36 | 37 | public interface Learner extends Serializable{ 38 | 39 | /** 40 | * Inits the Learner object. 41 | * 42 | * @param topologyBuilder the topology builder 43 | * @param dataset the dataset 44 | * @param parallelism the parallelism 45 | */ 46 | public void init(TopologyBuilder topologyBuilder, Instances dataset, int parallelism); 47 | 48 | /** 49 | * Gets the input processing item. 50 | * 51 | * @return the input processing item 52 | */ 53 | public Processor getInputProcessor(); 54 | 55 | 56 | /** 57 | * Gets the result streams 58 | * 59 | * @return the set of result streams 60 | */ 61 | public Set getResultStreams(); 62 | } 63 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/tasks/Task.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.tasks; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.topology.ComponentFactory; 24 | import com.yahoo.labs.samoa.topology.Topology; 25 | 26 | /** 27 | * Task interface, the mother of all SAMOA tasks! 28 | */ 29 | public interface Task { 30 | 31 | /** 32 | * Initialize this SAMOA task, 33 | * i.e. create and connect ProcessingItems and Streams 34 | * and initialize the topology 35 | */ 36 | public void init(); 37 | 38 | /** 39 | * Return the final topology object to be executed in the cluster 40 | * @return topology object to be submitted to be executed in the cluster 41 | */ 42 | public Topology getTopology(); 43 | 44 | // /** 45 | // * Return the entrance processor to start SAMOA topology 46 | // * The logic to start the topology should be implemented here 47 | // * @return entrance processor to start the topology 48 | // */ 49 | // public TopologyStarter getTopologyStarter(); 50 | 51 | /** 52 | * Sets the factory. 53 | * TODO: propose to hide factory from task, 54 | * i.e. Task will only see TopologyBuilder, 55 | * and factory creation will be handled by TopologyBuilder 56 | * 57 | * @param factory the new factory 58 | */ 59 | public void setFactory(ComponentFactory factory) ; 60 | 61 | } -------------------------------------------------------------------------------- /samoa-threads/src/main/java/com/yahoo/labs/samoa/topology/impl/ThreadsEventRunnable.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.ContentEvent; 24 | 25 | /** 26 | * Runnable class where each object corresponds to a ContentEvent and an assigned PI. 27 | * When a PI receives a ContentEvent, it will create a ThreadsEventRunnable with the received ContentEvent 28 | * and an assigned workerPI. This runnable is then submitted to a thread queue waiting to be executed. 29 | * The worker PI will process the received event when the runnable object is executed/run. 30 | * @author Anh Thu Vu 31 | * 32 | */ 33 | public class ThreadsEventRunnable implements Runnable { 34 | 35 | private ThreadsProcessingItemInstance workerPi; 36 | private ContentEvent event; 37 | 38 | public ThreadsEventRunnable(ThreadsProcessingItemInstance workerPi, ContentEvent event) { 39 | this.workerPi = workerPi; 40 | this.event = event; 41 | } 42 | 43 | public ThreadsProcessingItemInstance getWorkerProcessingItem() { 44 | return this.workerPi; 45 | } 46 | 47 | public ContentEvent getContentEvent() { 48 | return this.event; 49 | } 50 | 51 | @Override 52 | public void run() { 53 | try { 54 | workerPi.processEvent(event); 55 | } 56 | catch (Exception e) { 57 | e.printStackTrace(); 58 | } 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleComponentFactory.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.EntranceProcessor; 24 | import com.yahoo.labs.samoa.core.Processor; 25 | import com.yahoo.labs.samoa.topology.ComponentFactory; 26 | import com.yahoo.labs.samoa.topology.EntranceProcessingItem; 27 | import com.yahoo.labs.samoa.topology.IProcessingItem; 28 | import com.yahoo.labs.samoa.topology.ProcessingItem; 29 | import com.yahoo.labs.samoa.topology.Stream; 30 | import com.yahoo.labs.samoa.topology.Topology; 31 | 32 | public class SimpleComponentFactory implements ComponentFactory { 33 | 34 | public ProcessingItem createPi(Processor processor, int paralellism) { 35 | return new SimpleProcessingItem(processor, paralellism); 36 | } 37 | 38 | public ProcessingItem createPi(Processor processor) { 39 | return this.createPi(processor, 1); 40 | } 41 | 42 | public EntranceProcessingItem createEntrancePi(EntranceProcessor processor) { 43 | return new SimpleEntranceProcessingItem(processor); 44 | } 45 | 46 | public Stream createStream(IProcessingItem sourcePi) { 47 | return new SimpleStream(sourcePi); 48 | } 49 | 50 | public Topology createTopology(String topoName) { 51 | return new SimpleTopology(topoName); 52 | } 53 | } -------------------------------------------------------------------------------- /samoa-s4/src/main/java/com/yahoo/labs/samoa/topology/impl/S4Topology.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.topology.EntranceProcessingItem; 24 | import com.yahoo.labs.samoa.topology.AbstractTopology; 25 | 26 | public class S4Topology extends AbstractTopology { 27 | 28 | // CASEY: it seems evaluationTask is not used. 29 | // Remove it for now 30 | 31 | // private String _evaluationTask; 32 | 33 | // S4Topology(String topoName, String evalTask) { 34 | // super(topoName); 35 | // } 36 | // 37 | // S4Topology(String topoName) { 38 | // this(topoName, null); 39 | // } 40 | 41 | // @Override 42 | // public void setEvaluationTask(String evalTask) { 43 | // _evaluationTask = evalTask; 44 | // } 45 | // 46 | // @Override 47 | // public String getEvaluationTask() { 48 | // return _evaluationTask; 49 | // } 50 | 51 | S4Topology(String topoName) { 52 | super(topoName); 53 | } 54 | 55 | protected EntranceProcessingItem getEntranceProcessingItem() { 56 | if (this.getEntranceProcessingItems() == null) return null; 57 | if (this.getEntranceProcessingItems().size() < 1) return null; 58 | // TODO: support multiple entrance PIs 59 | return (EntranceProcessingItem)this.getEntranceProcessingItems().toArray()[0]; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/distributed/AssignmentContentEvent.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.rules.distributed; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.ContentEvent; 24 | import com.yahoo.labs.samoa.instances.Instance; 25 | 26 | /** 27 | * Forwarded instances from Model Agrregator to Learners/Default Rule Learner. 28 | * 29 | * @author Anh Thu Vu 30 | * 31 | */ 32 | public class AssignmentContentEvent implements ContentEvent { 33 | 34 | /** 35 | * 36 | */ 37 | private static final long serialVersionUID = 1031695762172836629L; 38 | 39 | private int ruleNumberID; 40 | private Instance instance; 41 | 42 | public AssignmentContentEvent() { 43 | this(0, null); 44 | } 45 | 46 | public AssignmentContentEvent(int ruleID, Instance instance) { 47 | this.ruleNumberID = ruleID; 48 | this.instance = instance; 49 | } 50 | 51 | @Override 52 | public String getKey() { 53 | return Integer.toString(this.ruleNumberID); 54 | } 55 | 56 | @Override 57 | public void setKey(String key) { 58 | // do nothing 59 | } 60 | 61 | @Override 62 | public boolean isLastEvent() { 63 | return false; 64 | } 65 | 66 | public Instance getInstance() { 67 | return this.instance; 68 | } 69 | 70 | public int getRuleNumberID() { 71 | return this.ruleNumberID; 72 | } 73 | 74 | } 75 | -------------------------------------------------------------------------------- /samoa-s4/samoa-s4-adapter/src/main/java/samoa/topology/adapter/S4EntranceProcessingItem.java: -------------------------------------------------------------------------------- 1 | package samoa.topology.adapter; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import org.apache.s4.core.App; 24 | import org.apache.s4.core.ProcessingElement; 25 | 26 | import samoa.core.Processor; 27 | import samoa.topology.EntranceProcessingItem; 28 | import samoa.topology.impl.DoTaskApp; 29 | import weka.core.Instance; 30 | 31 | public class S4EntranceProcessingItem extends ProcessingElement implements EntranceProcessingItem { 32 | 33 | private Processor processor; 34 | //DoTaskApp app; 35 | 36 | 37 | public S4EntranceProcessingItem(App app){ 38 | super(app); 39 | //this.app = (DoTaskApp) app; 40 | this.setSingleton(true); 41 | 42 | } 43 | 44 | @Override 45 | public Processor getProcessor() { 46 | return this.processor; 47 | } 48 | 49 | @Override 50 | public void put(Instance inst) { 51 | // do nothing 52 | //may not needed 53 | 54 | } 55 | 56 | @Override 57 | protected void onCreate() { 58 | 59 | // if (this.processor != null){ 60 | // this.processor = this.processor.newProcessor(this.processor); 61 | // this.processor.onCreate(Integer.parseInt(getId())); 62 | // } 63 | } 64 | 65 | @Override 66 | protected void onRemove() { 67 | //do nothing 68 | 69 | } 70 | 71 | public void setProcessor(Processor processor){ 72 | this.processor = processor; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /samoa-threads/src/test/java/com/yahoo/labs/samoa/topology/impl/ThreadsProcessingItemInstanceTest.java: -------------------------------------------------------------------------------- 1 | /* 2 | * #%L 3 | * SAMOA 4 | * %% 5 | * Copyright (C) 2013 Yahoo! Inc. 6 | * %% 7 | * Licensed under the Apache License, Version 2.0 (the "License"); 8 | * you may not use this file except in compliance with the License. 9 | * You may obtain a copy of the License at 10 | * 11 | * http://www.apache.org/licenses/LICENSE-2.0 12 | * 13 | * Unless required by applicable law or agreed to in writing, software 14 | * distributed under the License is distributed on an "AS IS" BASIS, 15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 | * See the License for the specific language governing permissions and 17 | * limitations under the License. 18 | * #L% 19 | */ 20 | package com.yahoo.labs.samoa.topology.impl; 21 | 22 | import static org.junit.Assert.*; 23 | import mockit.Mocked; 24 | import mockit.Tested; 25 | import mockit.Verifications; 26 | 27 | import org.junit.Before; 28 | import org.junit.Test; 29 | 30 | import com.yahoo.labs.samoa.core.ContentEvent; 31 | import com.yahoo.labs.samoa.core.Processor; 32 | 33 | /** 34 | * @author Anh Thu Vu 35 | * 36 | */ 37 | public class ThreadsProcessingItemInstanceTest { 38 | 39 | @Tested private ThreadsProcessingItemInstance piInstance; 40 | 41 | @Mocked private Processor processor; 42 | @Mocked private ContentEvent event; 43 | 44 | private final int threadIndex = 2; 45 | 46 | @Before 47 | public void setUp() throws Exception { 48 | piInstance = new ThreadsProcessingItemInstance(processor, threadIndex); 49 | } 50 | 51 | @Test 52 | public void testConstructor() { 53 | assertSame("Processor is not set correctly.", processor, piInstance.getProcessor()); 54 | assertEquals("Thread index is not set correctly.", threadIndex, piInstance.getThreadIndex(),0); 55 | } 56 | 57 | @Test 58 | public void testProcessEvent() { 59 | piInstance.processEvent(event); 60 | new Verifications() { 61 | { 62 | processor.process(event); times=1; 63 | } 64 | }; 65 | } 66 | 67 | } 68 | -------------------------------------------------------------------------------- /samoa-s4/src/main/assembly/samoa-s4.xml: -------------------------------------------------------------------------------- 1 | 20 | 24 | dist 25 | 26 | jar 27 | 28 | false 29 | 30 | 31 | 32 | 33 | lib/ 34 | ../samoa-api/target/lib/ 35 | 36 | * 37 | 38 | 39 | 40 | app/ 41 | ../samoa-api/target/ 42 | 43 | samoa-api-*.jar 44 | 45 | 46 | 47 | 48 | 49 | app/ 50 | target/ 51 | 52 | samoa-s4-*.jar 53 | 54 | 55 | 56 | / 57 | target/ 58 | 59 | lib/* 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.clusterers; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2009 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.moa.MOAObject; 24 | import com.yahoo.labs.samoa.moa.cluster.Clustering; 25 | import com.yahoo.labs.samoa.instances.InstancesHeader; 26 | import com.yahoo.labs.samoa.moa.core.Measurement; 27 | import com.yahoo.labs.samoa.moa.options.OptionHandler; 28 | import com.yahoo.labs.samoa.instances.Instance; 29 | 30 | public interface Clusterer extends MOAObject, OptionHandler { 31 | 32 | public void setModelContext(InstancesHeader ih); 33 | 34 | public InstancesHeader getModelContext(); 35 | 36 | public boolean isRandomizable(); 37 | 38 | public void setRandomSeed(int s); 39 | 40 | public boolean trainingHasStarted(); 41 | 42 | public double trainingWeightSeenByModel(); 43 | 44 | public void resetLearning(); 45 | 46 | public void trainOnInstance(Instance inst); 47 | 48 | public double[] getVotesForInstance(Instance inst); 49 | 50 | public Measurement[] getModelMeasurements(); 51 | 52 | public Clusterer[] getSubClusterers(); 53 | 54 | public Clusterer copy(); 55 | 56 | public Clustering getClusteringResult(); 57 | 58 | public boolean implementsMicroClusterer(); 59 | 60 | public Clustering getMicroClusteringResult(); 61 | 62 | public boolean keepClassLabel(); 63 | 64 | } 65 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.evaluation; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.moa.MOAObject; 24 | import com.yahoo.labs.samoa.moa.core.Measurement; 25 | 26 | import com.yahoo.labs.samoa.instances.Instance; 27 | 28 | /** 29 | * Interface implemented by learner evaluators to monitor the results of the 30 | * learning process. 31 | * 32 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 33 | * @version $Revision: 7 $ 34 | */ 35 | public interface PerformanceEvaluator extends MOAObject { 36 | 37 | /** 38 | * Resets this evaluator. It must be similar to starting a new evaluator 39 | * from scratch. 40 | * 41 | */ 42 | public void reset(); 43 | 44 | /** 45 | * Adds a learning result to this evaluator. 46 | * 47 | * @param inst 48 | * the instance to be classified 49 | * @param classVotes 50 | * an array containing the estimated membership probabilities of 51 | * the test instance in each class 52 | * @return an array of measurements monitored in this evaluator 53 | */ 54 | public void addResult(Instance inst, double[] classVotes); 55 | 56 | /** 57 | * Gets the current measurements monitored by this evaluator. 58 | * 59 | * @return an array of measurements monitored by this evaluator 60 | */ 61 | public Measurement[] getPerformanceMeasurements(); 62 | } 63 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/examples/HelloWorldContentEvent.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.examples; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.ContentEvent; 24 | 25 | /** 26 | * Example {@link ContentEvent} that contains a single integer. 27 | */ 28 | public class HelloWorldContentEvent implements ContentEvent { 29 | 30 | private static final long serialVersionUID = -2406968925730298156L; 31 | private final boolean isLastEvent; 32 | private final int helloWorldData; 33 | 34 | public HelloWorldContentEvent(int helloWorldData, boolean isLastEvent) { 35 | this.isLastEvent = isLastEvent; 36 | this.helloWorldData = helloWorldData; 37 | } 38 | 39 | /* 40 | * No-argument constructor for Kryo 41 | */ 42 | public HelloWorldContentEvent() { 43 | this(0,false); 44 | } 45 | 46 | @Override 47 | public String getKey() { 48 | return null; 49 | } 50 | 51 | @Override 52 | public void setKey(String str) { 53 | // do nothing, it's key-less content event 54 | } 55 | 56 | @Override 57 | public boolean isLastEvent() { 58 | return isLastEvent; 59 | } 60 | 61 | public int getHelloWorldData() { 62 | return helloWorldData; 63 | } 64 | 65 | @Override 66 | public String toString() { 67 | return "HelloWorldContentEvent [helloWorldData=" + helloWorldData + "]"; 68 | } 69 | } -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/core/EntranceProcessor.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.core; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import java.io.Serializable; 24 | 25 | import com.github.javacliparser.Configurable; 26 | 27 | /** 28 | * An EntranceProcessor is a specific kind of processor dedicated to providing events to inject in the topology. It can be connected to a single output stream. 29 | */ 30 | public interface EntranceProcessor extends Serializable, Configurable, Processor { 31 | 32 | /** 33 | * Initializes the Processor. This method is called once after the topology is set up and before any call to the {@link nextTuple} method. 34 | * 35 | * @param the 36 | * identifier of the processor. 37 | */ 38 | public void onCreate(int id); 39 | 40 | /** 41 | * Checks whether the source stream is finished/exhausted. 42 | */ 43 | public boolean isFinished(); 44 | 45 | /** 46 | * Checks whether a new event is ready to be processed. 47 | * 48 | * @return true if the EntranceProcessor is ready to provide the next event, false otherwise. 49 | */ 50 | public boolean hasNext(); 51 | 52 | /** 53 | * Provides the next tuple to be processed by the topology. This method is the entry point for external events into the topology. 54 | * 55 | * @return the next event to be processed. 56 | */ 57 | public ContentEvent nextEvent(); 58 | 59 | } 60 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterionMultilabel.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2012 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.moa.core.Utils; 24 | 25 | /** 26 | * Class for computing splitting criteria using information gain with respect to 27 | * distributions of class values for Multilabel data. The split criterion is 28 | * used as a parameter on decision trees and decision stumps. 29 | * 30 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 31 | * @author Jesse Read (jesse@tsc.uc3m.es) 32 | * @version $Revision: 1 $ 33 | */ 34 | public class InfoGainSplitCriterionMultilabel extends InfoGainSplitCriterion { 35 | 36 | private static final long serialVersionUID = 1L; 37 | 38 | public static double computeEntropy(double[] dist) { 39 | double entropy = 0.0; 40 | double sum = 0.0; 41 | for (double d : dist) { 42 | sum += d; 43 | } 44 | if (sum > 0.0) { 45 | for (double num : dist) { 46 | double d = num / sum; 47 | if (d > 0.0) { // TODO: how small can d be before log2 overflows? 48 | entropy -= d * Utils.log2(d) + (1 - d) * Utils.log2(1 - d); //Extension to Multilabel 49 | } 50 | } 51 | } 52 | return sum > 0.0 ? entropy : 0.0; 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/core/Processor.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.core; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import java.io.Serializable; 24 | 25 | import com.github.javacliparser.Configurable; 26 | 27 | /** 28 | * The Interface Processor. 29 | */ 30 | public interface Processor extends Serializable, Configurable { 31 | 32 | /** 33 | * Entry point for the {@link Processor} code. This method is called once for every event received. 34 | * 35 | * @param event 36 | * the event to be processed. 37 | * @return true if successful, false otherwise. 38 | */ 39 | boolean process(ContentEvent event); 40 | 41 | /** 42 | * Initializes the Processor. 43 | * This method is called once after the topology is set up and before any call to the {@link process} method. 44 | * 45 | * @param id 46 | * the identifier of the processor. 47 | */ 48 | void onCreate(int id); 49 | 50 | /** 51 | * Creates a copy of a processor. 52 | * This method is used to instantiate multiple instances of the same {@link Processsor}. 53 | * 54 | * @param processor 55 | * the processor to be copied. 56 | * 57 | * @return a new instance of the {@link Processor}. 58 | * */ 59 | Processor newProcessor(Processor processor); // FIXME there should be no need for the processor as a parameter 60 | // TODO can we substitute this with Cloneable? 61 | } 62 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/SplitCriterion.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.moa.options.OptionHandler; 24 | 25 | /** 26 | * Interface for computing splitting criteria. 27 | * with respect to distributions of class values. 28 | * The split criterion is used as a parameter on 29 | * decision trees and decision stumps. 30 | * The two split criteria most used are 31 | * Information Gain and Gini. 32 | * 33 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 34 | * @version $Revision: 7 $ 35 | */ 36 | public interface SplitCriterion extends OptionHandler { 37 | 38 | /** 39 | * Computes the merit of splitting for a given 40 | * ditribution before the split and after it. 41 | * 42 | * @param preSplitDist the class distribution before the split 43 | * @param postSplitDists the class distribution after the split 44 | * @return value of the merit of splitting 45 | */ 46 | public double getMeritOfSplit(double[] preSplitDist, 47 | double[][] postSplitDists); 48 | 49 | /** 50 | * Computes the range of splitting merit 51 | * 52 | * @param preSplitDist the class distribution before the split 53 | * @return value of the range of splitting merit 54 | */ 55 | public double getRangeOfMerit(double[] preSplitDist); 56 | } 57 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/common/PassiveRule.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.rules.common; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import java.util.LinkedList; 24 | 25 | /** 26 | * PassiveRule is a LearningRule that update its LearningNode 27 | * with the received new LearningNode. 28 | * 29 | * @author Anh Thu Vu 30 | * 31 | */ 32 | public class PassiveRule extends LearningRule { 33 | 34 | /** 35 | * 36 | */ 37 | private static final long serialVersionUID = -5551571895910530275L; 38 | 39 | private RulePassiveRegressionNode learningNode; 40 | 41 | /* 42 | * Constructor to turn an ActiveRule into a PassiveRule 43 | */ 44 | public PassiveRule(ActiveRule rule) { 45 | this.nodeList = new LinkedList<>(); 46 | for (RuleSplitNode node:rule.nodeList) { 47 | this.nodeList.add(node.getACopy()); 48 | } 49 | 50 | this.learningNode = new RulePassiveRegressionNode(rule.getLearningNode()); 51 | this.ruleNumberID = rule.ruleNumberID; 52 | } 53 | 54 | @Override 55 | public RuleRegressionNode getLearningNode() { 56 | return this.learningNode; 57 | } 58 | 59 | @Override 60 | public void setLearningNode(RuleRegressionNode learningNode) { 61 | this.learningNode = (RulePassiveRegressionNode) learningNode; 62 | } 63 | 64 | /* 65 | * MOA GUI 66 | */ 67 | @Override 68 | public void getDescription(StringBuilder sb, int indent) { 69 | // TODO Auto-generated method stub 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.classifiers.rules.core.voting; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | 24 | /** 25 | * UniformWeightedVote class for weighted votes based on estimates of errors. 26 | * 27 | * @author Joao Duarte (jmduarte@inescporto.pt) 28 | * @version $Revision: 1 $ 29 | */ 30 | public class UniformWeightedVote extends AbstractErrorWeightedVote { 31 | 32 | 33 | private static final long serialVersionUID = 6359349250620616482L; 34 | 35 | public UniformWeightedVote() { 36 | super(); 37 | } 38 | 39 | public UniformWeightedVote(AbstractErrorWeightedVote aewv) { 40 | super(aewv); 41 | } 42 | 43 | @Override 44 | public double[] computeWeightedVote() { 45 | int n=votes.size(); 46 | weights=new double[n]; 47 | double [] weightedVote=null; 48 | if (n>0){ 49 | int d=votes.get(0).length; 50 | weightedVote=new double[d]; 51 | for (int i=0; i getTaskResultType(); 42 | 43 | /** 44 | * This method performs this task, 45 | * when TaskMonitor and ObjectRepository are no needed. 46 | * 47 | * @return an object with the result of this task 48 | */ 49 | public Object doTask(); 50 | 51 | /** 52 | * This method performs this task. 53 | * AbstractTask implements this method so all 54 | * its extensions only need to implement doTaskImpl 55 | * 56 | * @param monitor the TaskMonitor to use 57 | * @param repository the ObjectRepository to use 58 | * @return an object with the result of this task 59 | */ 60 | public Object doTask(TaskMonitor monitor, ObjectRepository repository); 61 | } 62 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/evaluation/LearningPerformanceEvaluator.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.evaluation; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2009 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | 24 | import com.yahoo.labs.samoa.moa.MOAObject; 25 | import com.yahoo.labs.samoa.moa.core.Example; 26 | import com.yahoo.labs.samoa.moa.core.Measurement; 27 | 28 | /** 29 | * Interface implemented by learner evaluators to monitor 30 | * the results of the learning process. 31 | * 32 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 33 | * @version $Revision: 7 $ 34 | */ 35 | public interface LearningPerformanceEvaluator extends MOAObject { 36 | 37 | /** 38 | * Resets this evaluator. It must be similar to 39 | * starting a new evaluator from scratch. 40 | * 41 | */ 42 | public void reset(); 43 | 44 | /** 45 | * Adds a learning result to this evaluator. 46 | * 47 | * @param example the example to be classified 48 | * @param classVotes an array containing the estimated membership 49 | * probabilities of the test instance in each class 50 | * @return an array of measurements monitored in this evaluator 51 | */ 52 | public void addResult(E example, double[] classVotes); 53 | 54 | /** 55 | * Gets the current measurements monitored by this evaluator. 56 | * 57 | * @return an array of measurements monitored by this evaluator 58 | */ 59 | public Measurement[] getPerformanceMeasurements(); 60 | 61 | } 62 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/clustering/ClusteringStream.java: -------------------------------------------------------------------------------- 1 | 2 | package com.yahoo.labs.samoa.moa.streams.clustering; 3 | 4 | /* 5 | * #%L 6 | * SAMOA 7 | * %% 8 | * Copyright (C) 2010 RWTH Aachen University, Germany 9 | * %% 10 | * Licensed under the Apache License, Version 2.0 (the "License"); 11 | * you may not use this file except in compliance with the License. 12 | * You may obtain a copy of the License at 13 | * 14 | * http://www.apache.org/licenses/LICENSE-2.0 15 | * 16 | * Unless required by applicable law or agreed to in writing, software 17 | * distributed under the License is distributed on an "AS IS" BASIS, 18 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 19 | * See the License for the specific language governing permissions and 20 | * limitations under the License. 21 | * #L% 22 | */ 23 | 24 | import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler; 25 | import com.github.javacliparser.FloatOption; 26 | import com.github.javacliparser.IntOption; 27 | import com.yahoo.labs.samoa.moa.streams.InstanceStream; 28 | 29 | public abstract class ClusteringStream extends AbstractOptionHandler implements InstanceStream{ 30 | public IntOption decayHorizonOption = new IntOption("decayHorizon", 'h', 31 | "Decay horizon", 1000, 0, Integer.MAX_VALUE); 32 | 33 | public FloatOption decayThresholdOption = new FloatOption("decayThreshold", 't', 34 | "Decay horizon threshold", 0.01, 0, 1); 35 | 36 | public IntOption evaluationFrequencyOption = new IntOption("evaluationFrequency", 'e', 37 | "Evaluation frequency", 1000, 0, Integer.MAX_VALUE); 38 | 39 | public IntOption numAttsOption = new IntOption("numAtts", 'a', 40 | "The number of attributes to generate.", 2, 0, Integer.MAX_VALUE); 41 | 42 | public int getDecayHorizon(){ 43 | return decayHorizonOption.getValue(); 44 | } 45 | 46 | public double getDecayThreshold(){ 47 | return decayThresholdOption.getValue(); 48 | } 49 | 50 | public int getEvaluationFrequency(){ 51 | return evaluationFrequencyOption.getValue(); 52 | } 53 | 54 | 55 | } 56 | -------------------------------------------------------------------------------- /samoa-samza/src/main/java/com/yahoo/labs/samoa/topology/impl/SamzaComponentFactory.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.EntranceProcessor; 24 | import com.yahoo.labs.samoa.core.Processor; 25 | import com.yahoo.labs.samoa.topology.ComponentFactory; 26 | import com.yahoo.labs.samoa.topology.EntranceProcessingItem; 27 | import com.yahoo.labs.samoa.topology.IProcessingItem; 28 | import com.yahoo.labs.samoa.topology.ProcessingItem; 29 | import com.yahoo.labs.samoa.topology.Stream; 30 | import com.yahoo.labs.samoa.topology.Topology; 31 | 32 | /** 33 | * Implementation of SAMOA ComponentFactory for Samza 34 | * 35 | * @author Anh Thu Vu 36 | */ 37 | public class SamzaComponentFactory implements ComponentFactory { 38 | @Override 39 | public ProcessingItem createPi(Processor processor) { 40 | return this.createPi(processor, 1); 41 | } 42 | 43 | @Override 44 | public ProcessingItem createPi(Processor processor, int parallelism) { 45 | return new SamzaProcessingItem(processor, parallelism); 46 | } 47 | 48 | @Override 49 | public EntranceProcessingItem createEntrancePi(EntranceProcessor entranceProcessor) { 50 | return new SamzaEntranceProcessingItem(entranceProcessor); 51 | } 52 | 53 | @Override 54 | public Stream createStream(IProcessingItem sourcePi) { 55 | return new SamzaStream(sourcePi); 56 | } 57 | 58 | @Override 59 | public Topology createTopology(String topoName) { 60 | return new SamzaTopology(topoName); 61 | } 62 | } -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/streams/fs/FileStreamSource.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.streams.fs; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import java.io.InputStream; 24 | import java.io.IOException; 25 | import java.io.Serializable; 26 | 27 | /** 28 | * An interface for FileStream's source (Local FS, HDFS,...) 29 | * @author Casey 30 | */ 31 | public interface FileStreamSource extends Serializable { 32 | 33 | /** 34 | * Init the source with file/directory path and file extension 35 | * @param path 36 | * File or directory path 37 | * @param ext 38 | * File extension to be used to filter files in a directory. 39 | * If null, all files in the directory are accepted. 40 | */ 41 | public void init(String path, String ext); 42 | 43 | /** 44 | * Reset the source 45 | */ 46 | public void reset() throws IOException; 47 | 48 | /** 49 | * Retrieve InputStream for next file. 50 | * This method will return null if we are at the last file 51 | * in the list. 52 | * 53 | * @return InputStream for next file in the list 54 | */ 55 | public InputStream getNextInputStream(); 56 | 57 | /** 58 | * Retrieve InputStream for current file. 59 | * The "current pointer" is moved forward 60 | * with getNextInputStream method. So if there was no 61 | * invocation of getNextInputStream, this method will 62 | * return null. 63 | * 64 | * @return InputStream for current file in the list 65 | */ 66 | public InputStream getCurrentInputStream(); 67 | } 68 | -------------------------------------------------------------------------------- /samoa-threads/src/main/java/com/yahoo/labs/samoa/topology/impl/ThreadsComponentFactory.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.EntranceProcessor; 24 | import com.yahoo.labs.samoa.core.Processor; 25 | import com.yahoo.labs.samoa.topology.ComponentFactory; 26 | import com.yahoo.labs.samoa.topology.EntranceProcessingItem; 27 | import com.yahoo.labs.samoa.topology.IProcessingItem; 28 | import com.yahoo.labs.samoa.topology.ProcessingItem; 29 | import com.yahoo.labs.samoa.topology.Stream; 30 | import com.yahoo.labs.samoa.topology.Topology; 31 | 32 | /** 33 | * ComponentFactory for multithreaded engine 34 | * @author Anh Thu Vu 35 | * 36 | */ 37 | public class ThreadsComponentFactory implements ComponentFactory { 38 | 39 | @Override 40 | public ProcessingItem createPi(Processor processor) { 41 | return this.createPi(processor, 1); 42 | } 43 | 44 | @Override 45 | public ProcessingItem createPi(Processor processor, int paralellism) { 46 | return new ThreadsProcessingItem(processor, paralellism); 47 | } 48 | 49 | @Override 50 | public EntranceProcessingItem createEntrancePi(EntranceProcessor entranceProcessor) { 51 | return new ThreadsEntranceProcessingItem(entranceProcessor); 52 | } 53 | 54 | @Override 55 | public Stream createStream(IProcessingItem sourcePi) { 56 | return new ThreadsStream(sourcePi); 57 | } 58 | 59 | @Override 60 | public Topology createTopology(String topoName) { 61 | return new ThreadsTopology(topoName); 62 | } 63 | 64 | } 65 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringResultContentEvent.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.evaluation; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.ContentEvent; 24 | import com.yahoo.labs.samoa.moa.cluster.Clustering; 25 | 26 | /** 27 | * License 28 | */ 29 | /** 30 | * The Class Clustering ResultEvent. 31 | */ 32 | final public class ClusteringResultContentEvent implements ContentEvent { 33 | 34 | private static final long serialVersionUID = -7746983521296618922L; 35 | private Clustering clustering; 36 | private final boolean isLast; 37 | private String key = "0"; 38 | 39 | public ClusteringResultContentEvent() { 40 | this.isLast = false; 41 | } 42 | 43 | public ClusteringResultContentEvent(boolean isLast) { 44 | this.isLast = isLast; 45 | } 46 | 47 | /** 48 | * Instantiates a new clustering result event. 49 | * 50 | * @param clustering the clustering result 51 | * @param isLast is the last result 52 | */ 53 | public ClusteringResultContentEvent(Clustering clustering, boolean isLast) { 54 | this.clustering = clustering; 55 | this.isLast = isLast; 56 | } 57 | 58 | public String getKey() { 59 | return key; 60 | } 61 | 62 | public void setKey(String key) { 63 | this.key = key; 64 | } 65 | 66 | public boolean isLastEvent() { 67 | return this.isLast; 68 | } 69 | 70 | public Clustering getClustering() { 71 | return this.clustering; 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Instance.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package com.yahoo.labs.samoa.instances; 6 | 7 | /* 8 | * #%L 9 | * SAMOA 10 | * %% 11 | * Copyright (C) 2013 Yahoo! Inc. 12 | * %% 13 | * Licensed under the Apache License, Version 2.0 (the "License"); 14 | * you may not use this file except in compliance with the License. 15 | * You may obtain a copy of the License at 16 | * 17 | * http://www.apache.org/licenses/LICENSE-2.0 18 | * 19 | * Unless required by applicable law or agreed to in writing, software 20 | * distributed under the License is distributed on an "AS IS" BASIS, 21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | * See the License for the specific language governing permissions and 23 | * limitations under the License. 24 | * #L% 25 | */ 26 | 27 | import java.io.Serializable; 28 | 29 | /** 30 | * 31 | * @author abifet 32 | */ 33 | 34 | public interface Instance extends Serializable{ 35 | 36 | double weight(); 37 | void setWeight(double weight); 38 | 39 | //Attributes 40 | Attribute attribute(int instAttIndex); 41 | void deleteAttributeAt(int i); 42 | void insertAttributeAt(int i); 43 | int numAttributes(); 44 | public void addSparseValues(int[] indexValues, double[] attributeValues, int numberAttributes); 45 | 46 | 47 | //Values 48 | int numValues(); 49 | String stringValue(int i); 50 | double value(int instAttIndex); 51 | double value(Attribute attribute); 52 | void setValue(int m_numAttributes, double d); 53 | boolean isMissing(int instAttIndex); 54 | int index(int i); 55 | double valueSparse(int i); 56 | boolean isMissingSparse(int p1); 57 | double[] toDoubleArray(); 58 | 59 | //Class 60 | Attribute classAttribute(); 61 | int classIndex(); 62 | boolean classIsMissing(); 63 | double classValue(); 64 | int numClasses(); 65 | void setClassValue(double d); 66 | 67 | Instance copy(); 68 | 69 | //Dataset 70 | void setDataset(Instances dataset); 71 | Instances dataset(); 72 | String toString(); 73 | } 74 | 75 | -------------------------------------------------------------------------------- /samoa-s4/src/main/java/com/yahoo/labs/samoa/topology/impl/S4Event.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * License 25 | */ 26 | 27 | import net.jcip.annotations.Immutable; 28 | 29 | import org.apache.s4.base.Event; 30 | 31 | import com.yahoo.labs.samoa.core.ContentEvent; 32 | 33 | /** 34 | * The Class InstanceEvent. 35 | */ 36 | @Immutable 37 | final public class S4Event extends Event { 38 | 39 | private String key; 40 | 41 | public String getKey() { 42 | return key; 43 | } 44 | 45 | public void setKey(String key) { 46 | this.key = key; 47 | } 48 | 49 | /** The content event. */ 50 | private ContentEvent contentEvent; 51 | 52 | /** 53 | * Instantiates a new instance event. 54 | */ 55 | public S4Event() { 56 | // Needed for serialization of kryo 57 | } 58 | 59 | /** 60 | * Instantiates a new instance event. 61 | * 62 | * @param contentEvent the content event 63 | */ 64 | public S4Event(ContentEvent contentEvent) { 65 | if (contentEvent != null) { 66 | this.contentEvent = contentEvent; 67 | this.key = contentEvent.getKey(); 68 | 69 | } 70 | } 71 | 72 | /** 73 | * Gets the content event. 74 | * 75 | * @return the content event 76 | */ 77 | public ContentEvent getContentEvent() { 78 | return contentEvent; 79 | } 80 | 81 | /** 82 | * Sets the content event. 83 | * 84 | * @param contentEvent the new content event 85 | */ 86 | public void setContentEvent(ContentEvent contentEvent) { 87 | this.contentEvent = contentEvent; 88 | } 89 | 90 | } 91 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/streams/StreamSource.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.streams; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * License 25 | */ 26 | 27 | import com.yahoo.labs.samoa.moa.core.Example; 28 | import com.yahoo.labs.samoa.moa.streams.InstanceStream; 29 | import com.yahoo.labs.samoa.instances.Instance; 30 | 31 | /** 32 | * The Class StreamSource. 33 | */ 34 | public class StreamSource implements java.io.Serializable{ 35 | 36 | /** 37 | * 38 | */ 39 | private static final long serialVersionUID = 3974668694861231236L; 40 | 41 | /** 42 | * Instantiates a new stream source. 43 | * 44 | * @param stream the stream 45 | */ 46 | public StreamSource(InstanceStream stream) { 47 | super(); 48 | this.stream = stream; 49 | } 50 | 51 | /** The stream. */ 52 | protected InstanceStream stream; 53 | 54 | /** 55 | * Gets the stream. 56 | * 57 | * @return the stream 58 | */ 59 | public InstanceStream getStream() { 60 | return stream; 61 | } 62 | 63 | /** 64 | * Next instance. 65 | * 66 | * @return the instance 67 | */ 68 | public Example nextInstance() { 69 | return stream.nextInstance(); 70 | } 71 | 72 | /** 73 | * Sets the stream. 74 | * 75 | * @param stream the new stream 76 | */ 77 | public void setStream(InstanceStream stream) { 78 | this.stream = stream; 79 | } 80 | 81 | /** 82 | * Checks for more instances. 83 | * 84 | * @return true, if successful 85 | */ 86 | public boolean hasMoreInstances() { 87 | return this.stream.hasMoreInstances(); 88 | } 89 | 90 | } 91 | -------------------------------------------------------------------------------- /samoa-samza/src/main/java/com/yahoo/labs/samoa/topology/impl/SamoaSystemFactory.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import org.apache.samza.SamzaException; 24 | import org.apache.samza.config.Config; 25 | import org.apache.samza.metrics.MetricsRegistry; 26 | import org.apache.samza.system.SystemAdmin; 27 | import org.apache.samza.system.SystemConsumer; 28 | import org.apache.samza.system.SystemFactory; 29 | import org.apache.samza.system.SystemProducer; 30 | import org.apache.samza.util.SinglePartitionWithoutOffsetsSystemAdmin; 31 | 32 | import com.yahoo.labs.samoa.topology.impl.SamzaEntranceProcessingItem.SamoaSystemConsumer; 33 | 34 | /** 35 | * Implementation of Samza's SystemFactory 36 | * Samza will use this factory to get our custom consumer 37 | * which gets the events from SAMOA EntranceProcessor 38 | * and feed them to EntranceProcessingItem task 39 | * 40 | * @author Anh Thu Vu 41 | */ 42 | public class SamoaSystemFactory implements SystemFactory { 43 | @Override 44 | public SystemAdmin getAdmin(String systemName, Config config) { 45 | return new SinglePartitionWithoutOffsetsSystemAdmin(); 46 | } 47 | 48 | @Override 49 | public SystemConsumer getConsumer(String systemName, Config config, MetricsRegistry registry) { 50 | return new SamoaSystemConsumer(systemName, config); 51 | } 52 | 53 | @Override 54 | public SystemProducer getProducer(String systemName, Config config, MetricsRegistry registry) { 55 | throw new SamzaException("This implementation is not supposed to produce anything."); 56 | } 57 | } -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/topology/Topology.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | public interface Topology { 24 | /* 25 | * Name 26 | */ 27 | /** 28 | * Get the topology's name 29 | * 30 | * @return the name of the topology 31 | */ 32 | public String getTopologyName(); 33 | 34 | /** 35 | * Set the topology's name 36 | * 37 | * @param topologyName 38 | * the name of the topology 39 | */ 40 | public void setTopologyName(String topologyName) ; 41 | 42 | /* 43 | * Entrance Processing Items 44 | */ 45 | /** 46 | * Add an EntranceProcessingItem to this topology 47 | * 48 | * @param epi 49 | * the EntranceProcessingItem to be added 50 | */ 51 | void addEntranceProcessingItem(EntranceProcessingItem epi); 52 | 53 | 54 | /* 55 | * Processing Items 56 | */ 57 | /** 58 | * Add a ProcessingItem to this topology 59 | * with default parallelism level (i.e. 1) 60 | * 61 | * @param procItem 62 | * the ProcessingItem to be added 63 | */ 64 | void addProcessingItem(IProcessingItem procItem); 65 | 66 | /** 67 | * Add a ProcessingItem to this topology 68 | * with an associated parallelism level 69 | * 70 | * @param procItem 71 | * the ProcessingItem to be added 72 | * @param parallelismHint 73 | * the parallelism level 74 | */ 75 | void addProcessingItem(IProcessingItem procItem, int parallelismHint); 76 | 77 | /* 78 | * Streams 79 | */ 80 | /** 81 | * 82 | * @param stream 83 | */ 84 | void addStream(Stream stream); 85 | } 86 | -------------------------------------------------------------------------------- /samoa-samza/src/main/java/com/yahoo/labs/samoa/topology/impl/SamzaTopology.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import java.util.HashSet; 24 | import java.util.Set; 25 | 26 | import com.yahoo.labs.samoa.topology.IProcessingItem; 27 | import com.yahoo.labs.samoa.topology.AbstractTopology; 28 | 29 | /** 30 | * Topology for Samza 31 | * 32 | * @author Anh Thu Vu 33 | */ 34 | public class SamzaTopology extends AbstractTopology { 35 | private int procItemCounter; 36 | 37 | public SamzaTopology(String topoName) { 38 | super(topoName); 39 | procItemCounter = 0; 40 | } 41 | 42 | @Override 43 | public void addProcessingItem(IProcessingItem procItem, int parallelism) { 44 | super.addProcessingItem(procItem, parallelism); 45 | SamzaProcessingNode samzaPi = (SamzaProcessingNode) procItem; 46 | samzaPi.setName(this.getTopologyName()+"-"+Integer.toString(procItemCounter)); 47 | procItemCounter++; 48 | } 49 | 50 | /* 51 | * Gets the set of ProcessingItems, excluding EntrancePIs 52 | * Used by SamzaConfigFactory as the config for EntrancePIs and 53 | * normal PIs are different 54 | */ 55 | public Set getNonEntranceProcessingItems() throws Exception { 56 | Set copiedSet = new HashSet(); 57 | copiedSet.addAll(this.getProcessingItems()); 58 | boolean result = copiedSet.removeAll(this.getEntranceProcessingItems()); 59 | if (!result) { 60 | throw new Exception("Failed extracting the set of non-entrance processing items"); 61 | } 62 | return copiedSet; 63 | } 64 | } -------------------------------------------------------------------------------- /samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/DenseInstance.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package com.yahoo.labs.samoa.instances; 6 | 7 | /* 8 | * #%L 9 | * SAMOA 10 | * %% 11 | * Copyright (C) 2013 Yahoo! Inc. 12 | * %% 13 | * Licensed under the Apache License, Version 2.0 (the "License"); 14 | * you may not use this file except in compliance with the License. 15 | * You may obtain a copy of the License at 16 | * 17 | * http://www.apache.org/licenses/LICENSE-2.0 18 | * 19 | * Unless required by applicable law or agreed to in writing, software 20 | * distributed under the License is distributed on an "AS IS" BASIS, 21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | * See the License for the specific language governing permissions and 23 | * limitations under the License. 24 | * #L% 25 | */ 26 | 27 | /** 28 | * 29 | * @author abifet 30 | */ 31 | public class DenseInstance extends SingleLabelInstance { 32 | 33 | private static final long serialVersionUID = 280360594027716737L; 34 | 35 | public DenseInstance() { 36 | // necessary for kryo serializer 37 | } 38 | 39 | public DenseInstance(double weight, double[] res) { 40 | super(weight,res); 41 | } 42 | public DenseInstance(SingleLabelInstance inst) { 43 | super(inst); 44 | } 45 | 46 | public DenseInstance(Instance inst) { 47 | super((SingleLabelInstance) inst); 48 | } 49 | public DenseInstance(double numberAttributes) { 50 | super((int) numberAttributes); 51 | //super(1, new double[(int) numberAttributes-1]); 52 | //Add missing values 53 | //for (int i = 0; i < numberAttributes-1; i++) { 54 | // //this.setValue(i, Double.NaN); 55 | //} 56 | 57 | } 58 | 59 | @Override 60 | public String toString() { 61 | StringBuffer text = new StringBuffer(); 62 | 63 | for (int i = 0; i < this.instanceInformation.numAttributes(); i++) { 64 | if (i > 0) 65 | text.append(","); 66 | text.append(this.value(i)); 67 | } 68 | text.append(",").append(this.weight()); 69 | 70 | return text.toString(); 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SingleClassInstanceData.java: -------------------------------------------------------------------------------- 1 | /* 2 | * To change this template, choose Tools | Templates 3 | * and open the template in the editor. 4 | */ 5 | package com.yahoo.labs.samoa.instances; 6 | 7 | /* 8 | * #%L 9 | * SAMOA 10 | * %% 11 | * Copyright (C) 2013 Yahoo! Inc. 12 | * %% 13 | * Licensed under the Apache License, Version 2.0 (the "License"); 14 | * you may not use this file except in compliance with the License. 15 | * You may obtain a copy of the License at 16 | * 17 | * http://www.apache.org/licenses/LICENSE-2.0 18 | * 19 | * Unless required by applicable law or agreed to in writing, software 20 | * distributed under the License is distributed on an "AS IS" BASIS, 21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 22 | * See the License for the specific language governing permissions and 23 | * limitations under the License. 24 | * #L% 25 | */ 26 | 27 | /** 28 | * 29 | * @author abifet 30 | */ 31 | public class SingleClassInstanceData implements InstanceData { 32 | 33 | protected double classValue; 34 | 35 | @Override 36 | public int numAttributes() { 37 | return 1; 38 | } 39 | 40 | @Override 41 | public double value(int instAttIndex) { 42 | return classValue; 43 | } 44 | 45 | @Override 46 | public boolean isMissing(int indexAttribute) { 47 | return Double.isNaN(this.value(indexAttribute)); 48 | } 49 | 50 | @Override 51 | public int numValues() { 52 | return 1; 53 | } 54 | 55 | @Override 56 | public int index(int i) { 57 | return 0; 58 | } 59 | 60 | @Override 61 | public double valueSparse(int i) { 62 | return value(i); 63 | } 64 | 65 | @Override 66 | public boolean isMissingSparse(int indexAttribute) { 67 | return Double.isNaN(this.value(indexAttribute)); 68 | } 69 | 70 | /*@Override 71 | public double value(Attribute attribute) { 72 | return this.classValue; 73 | }*/ 74 | 75 | @Override 76 | public double[] toDoubleArray() { 77 | double[] array = {this.classValue}; 78 | return array; 79 | } 80 | 81 | @Override 82 | public void setValue(int m_numAttributes, double d) { 83 | this.classValue = d; 84 | } 85 | 86 | } 87 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/distributed/RuleContentEvent.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.rules.distributed; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.ContentEvent; 24 | import com.yahoo.labs.samoa.learners.classifiers.rules.common.ActiveRule; 25 | 26 | /** 27 | * New rule from Model Aggregator/Default Rule Learner to Learners 28 | * or removed rule from Learner to Model Aggregators. 29 | * 30 | * @author Anh Thu Vu 31 | * 32 | */ 33 | public class RuleContentEvent implements ContentEvent { 34 | 35 | 36 | /** 37 | * 38 | */ 39 | private static final long serialVersionUID = -9046390274402894461L; 40 | 41 | private final int ruleNumberID; 42 | private final ActiveRule addingRule; // for removing rule, we only need the rule's ID 43 | private final boolean isRemoving; 44 | 45 | public RuleContentEvent() { 46 | this(0, null, false); 47 | } 48 | 49 | public RuleContentEvent(int ruleID, ActiveRule rule, boolean isRemoving) { 50 | this.ruleNumberID = ruleID; 51 | this.isRemoving = isRemoving; 52 | this.addingRule = rule; 53 | } 54 | 55 | @Override 56 | public String getKey() { 57 | return Integer.toString(this.ruleNumberID); 58 | } 59 | 60 | @Override 61 | public void setKey(String key) { 62 | // do nothing 63 | } 64 | 65 | @Override 66 | public boolean isLastEvent() { 67 | return false; 68 | } 69 | 70 | public int getRuleNumberID() { 71 | return this.ruleNumberID; 72 | } 73 | 74 | public ActiveRule getRule() { 75 | return this.addingRule; 76 | } 77 | 78 | public boolean isRemoving() { 79 | return this.isRemoving; 80 | } 81 | 82 | } 83 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/examples/HelloWorldSourceProcessor.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.examples; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import java.util.Random; 24 | 25 | import com.yahoo.labs.samoa.core.ContentEvent; 26 | import com.yahoo.labs.samoa.core.EntranceProcessor; 27 | import com.yahoo.labs.samoa.core.Processor; 28 | 29 | /** 30 | * Example {@link EntranceProcessor} that generates a stream of random integers. 31 | */ 32 | public class HelloWorldSourceProcessor implements EntranceProcessor { 33 | 34 | private static final long serialVersionUID = 6212296305865604747L; 35 | private Random rnd; 36 | private final long maxInst; 37 | private long count; 38 | 39 | public HelloWorldSourceProcessor(long maxInst) { 40 | this.maxInst = maxInst; 41 | } 42 | 43 | @Override 44 | public boolean process(ContentEvent event) { 45 | // do nothing, API will be refined further 46 | return false; 47 | } 48 | 49 | @Override 50 | public void onCreate(int id) { 51 | rnd = new Random(id); 52 | } 53 | 54 | @Override 55 | public Processor newProcessor(Processor p) { 56 | HelloWorldSourceProcessor hwsp = (HelloWorldSourceProcessor) p; 57 | return new HelloWorldSourceProcessor(hwsp.maxInst); 58 | } 59 | 60 | @Override 61 | public boolean isFinished() { 62 | return count >= maxInst; 63 | } 64 | 65 | @Override 66 | public boolean hasNext() { 67 | return count < maxInst; 68 | } 69 | 70 | @Override 71 | public ContentEvent nextEvent() { 72 | count++; 73 | return new HelloWorldContentEvent(rnd.nextInt(), false); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/topology/ProcessingItem.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Processing item interface. 25 | * 26 | * @author severien 27 | * 28 | */ 29 | public interface ProcessingItem extends IProcessingItem { 30 | 31 | /** 32 | * Connects this processing item in a round robin fashion. The events will 33 | * be distributed evenly between the instantiated processing items. 34 | * 35 | * @param inputStream 36 | * Stream to connect this processing item. 37 | * @return ProcessingItem 38 | */ 39 | public ProcessingItem connectInputShuffleStream(Stream inputStream); 40 | 41 | /** 42 | * Connects this processing item taking the event key into account. Events 43 | * will be routed to the processing item according to the modulus of its key 44 | * and the paralellism level. Ex.: key = 5 and paralellism = 2, 5 mod 2 = 1. 45 | * Processing item responsible for 1 will receive this event. 46 | * 47 | * @param inputStream 48 | * Stream to connect this processing item. 49 | * @return ProcessingItem 50 | */ 51 | public ProcessingItem connectInputKeyStream(Stream inputStream); 52 | 53 | /** 54 | * Connects this processing item to the stream in a broadcast fashion. All 55 | * processing items of this type will receive copy of the original event. 56 | * 57 | * @param inputStream 58 | * Stream to connect this processing item. 59 | * @return ProcessingItem 60 | */ 61 | public ProcessingItem connectInputAllStream(Stream inputStream); 62 | 63 | 64 | /** 65 | * Gets processing item parallelism level. 66 | * 67 | * @return int 68 | */ 69 | public int getParallelism(); 70 | } 71 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/trees/FoundNode.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.trees; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | /** 24 | * Class that represents the necessary data structure of the node where an instance 25 | * is routed/filtered through the decision tree model. 26 | * 27 | * @author Arinto Murdopo 28 | * 29 | */ 30 | final class FoundNode implements java.io.Serializable{ 31 | 32 | /** 33 | * 34 | */ 35 | private static final long serialVersionUID = -637695387934143293L; 36 | 37 | private final Node node; 38 | private final SplitNode parent; 39 | private final int parentBranch; 40 | 41 | FoundNode(Node node, SplitNode splitNode, int parentBranch){ 42 | this.node = node; 43 | this.parent = splitNode; 44 | this.parentBranch = parentBranch; 45 | } 46 | 47 | /** 48 | * Method to get the node where an instance is routed/filtered through the decision tree 49 | * model for testing and training. 50 | * 51 | * @return The node where the instance is routed/filtered 52 | */ 53 | Node getNode(){ 54 | return this.node; 55 | } 56 | 57 | /** 58 | * Method to get the parent of the node where an instance is routed/filtered through the decision tree 59 | * model for testing and training 60 | * 61 | * @return The parent of the node 62 | */ 63 | SplitNode getParent(){ 64 | return this.parent; 65 | } 66 | 67 | /** 68 | * Method to get the index of the node (where an instance is routed/filtered through the decision tree 69 | * model for testing and training) in its parent. 70 | * 71 | * @return The index of the node in its parent node. 72 | */ 73 | int getParentBranch(){ 74 | return this.parentBranch; 75 | } 76 | 77 | } -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Miniball.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.cluster; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.dreizak.miniball.model.ArrayPointSet; 24 | import com.dreizak.miniball.model.PointSet; 25 | import java.util.ArrayList; 26 | import java.util.List; 27 | 28 | public class Miniball { 29 | 30 | private int dimension; 31 | private com.dreizak.miniball.highdim.Miniball mb; 32 | private PointStorage pointSet; 33 | 34 | public Miniball(int dimension) { 35 | this.dimension = dimension; 36 | } 37 | 38 | void clear() { 39 | this.pointSet = new PointStorage(this.dimension); 40 | } 41 | 42 | void check_in(double[] array) { 43 | this.pointSet.add(array); 44 | } 45 | 46 | double[] center() { 47 | return this.mb.center(); 48 | } 49 | 50 | double radius() { 51 | return this.mb.radius(); 52 | } 53 | 54 | void build() { 55 | this.mb = new com.dreizak.miniball.highdim.Miniball(this.pointSet); 56 | } 57 | 58 | public class PointStorage implements PointSet { 59 | 60 | protected int dimension; 61 | protected List L; 62 | 63 | public PointStorage(int dimension) { 64 | this.dimension = dimension; 65 | this.L = new ArrayList(); 66 | } 67 | 68 | public void add(double[] array) { 69 | this.L.add(array); 70 | } 71 | 72 | public int size() { 73 | return L.size(); 74 | } 75 | 76 | public int dimension() { 77 | return dimension; 78 | } 79 | 80 | public double coord(int point, int coordinate) { 81 | return L.get(point)[coordinate]; 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /samoa-storm/src/main/java/com/yahoo/labs/samoa/topology/impl/StormStream.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import java.util.UUID; 24 | 25 | import com.yahoo.labs.samoa.core.ContentEvent; 26 | import com.yahoo.labs.samoa.topology.Stream; 27 | 28 | /** 29 | * Abstract class to implement Storm Stream 30 | * @author Arinto Murdopo 31 | * 32 | */ 33 | abstract class StormStream implements Stream, java.io.Serializable { 34 | 35 | /** 36 | * 37 | */ 38 | private static final long serialVersionUID = 281835563756514852L; 39 | protected final String outputStreamId; 40 | protected final InputStreamId inputStreamId; 41 | 42 | public StormStream(String stormComponentId){ 43 | this.outputStreamId = UUID.randomUUID().toString(); 44 | this.inputStreamId = new InputStreamId(stormComponentId, this.outputStreamId); 45 | } 46 | 47 | @Override 48 | public abstract void put(ContentEvent contentEvent); 49 | 50 | String getOutputId(){ 51 | return this.outputStreamId; 52 | } 53 | 54 | InputStreamId getInputId(){ 55 | return this.inputStreamId; 56 | } 57 | 58 | final static class InputStreamId implements java.io.Serializable{ 59 | 60 | /** 61 | * 62 | */ 63 | private static final long serialVersionUID = -7457995634133691295L; 64 | private final String componentId; 65 | private final String streamId; 66 | 67 | InputStreamId(String componentId, String streamId){ 68 | this.componentId = componentId; 69 | this.streamId = streamId; 70 | } 71 | 72 | String getComponentId(){ 73 | return componentId; 74 | } 75 | 76 | String getStreamId(){ 77 | return streamId; 78 | } 79 | } 80 | 81 | @Override 82 | public void setBatchSize(int batchSize) { 83 | // Ignore batch size 84 | } 85 | } -------------------------------------------------------------------------------- /samoa-threads/src/main/java/com/yahoo/labs/samoa/topology/impl/ThreadsTopology.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.topology.impl; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.topology.AbstractTopology; 24 | import com.yahoo.labs.samoa.topology.IProcessingItem; 25 | 26 | /** 27 | * Topology for multithreaded engine. 28 | * @author Anh Thu Vu 29 | * 30 | */ 31 | public class ThreadsTopology extends AbstractTopology { 32 | ThreadsTopology(String name) { 33 | super(name); 34 | } 35 | 36 | public void run() { 37 | if (this.getEntranceProcessingItems() == null) 38 | throw new IllegalStateException("You need to set entrance PI before running the topology."); 39 | if (this.getEntranceProcessingItems().size() != 1) 40 | throw new IllegalStateException("ThreadsTopology supports 1 entrance PI only. Number of entrance PIs is "+this.getEntranceProcessingItems().size()); 41 | 42 | this.setupProcessingItemInstances(); 43 | ThreadsEntranceProcessingItem entrancePi = (ThreadsEntranceProcessingItem) this.getEntranceProcessingItems().toArray()[0]; 44 | if (entrancePi == null) 45 | throw new IllegalStateException("You need to set entrance PI before running the topology."); 46 | entrancePi.getProcessor().onCreate(0); // id=0 as it is not used in simple mode 47 | entrancePi.startSendingEvents(); 48 | } 49 | 50 | /* 51 | * Tell all the ThreadsProcessingItems to create & init their 52 | * replicas (ThreadsProcessingItemInstance) 53 | */ 54 | private void setupProcessingItemInstances() { 55 | for (IProcessingItem pi:this.getProcessingItems()) { 56 | if (pi instanceof ThreadsProcessingItem) { 57 | ThreadsProcessingItem tpi = (ThreadsProcessingItem) pi; 58 | tpi.setupInstances(); 59 | } 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/evaluation/LearningEvaluation.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.evaluation; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.moa.AbstractMOAObject; 24 | import com.yahoo.labs.samoa.moa.core.Measurement; 25 | import com.yahoo.labs.samoa.moa.learners.Learner; 26 | import java.util.Arrays; 27 | import java.util.LinkedList; 28 | import java.util.List; 29 | 30 | /** 31 | * Class that stores an array of evaluation measurements. 32 | * 33 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 34 | * @version $Revision: 7 $ 35 | */ 36 | public class LearningEvaluation extends AbstractMOAObject { 37 | 38 | private static final long serialVersionUID = 1L; 39 | 40 | protected Measurement[] measurements; 41 | 42 | public LearningEvaluation(Measurement[] measurements) { 43 | this.measurements = measurements.clone(); 44 | } 45 | 46 | public LearningEvaluation(Measurement[] evaluationMeasurements, 47 | LearningPerformanceEvaluator cpe, Learner model) { 48 | List measurementList = new LinkedList(); 49 | measurementList.addAll(Arrays.asList(evaluationMeasurements)); 50 | measurementList.addAll(Arrays.asList(cpe.getPerformanceMeasurements())); 51 | measurementList.addAll(Arrays.asList(model.getModelMeasurements())); 52 | this.measurements = measurementList.toArray(new Measurement[measurementList.size()]); 53 | } 54 | 55 | public Measurement[] getMeasurements() { 56 | return this.measurements.clone(); 57 | } 58 | 59 | @Override 60 | public void getDescription(StringBuilder sb, int indent) { 61 | Measurement.getMeasurementsDescription(this.measurements, sb, indent); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /samoa-storm/src/test/java/com/yahoo/labs/samoa/AlgosTest.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import org.junit.Test; 24 | 25 | public class AlgosTest { 26 | 27 | 28 | @Test(timeout = 60000) 29 | public void testVHTWithStorm() throws Exception { 30 | 31 | TestParams vhtConfig = new TestParams.Builder() 32 | .inputInstances(200_000) 33 | .samplingSize(20_000) 34 | .evaluationInstances(200_000) 35 | .classifiedInstances(200_000) 36 | .classificationsCorrect(55f) 37 | .kappaStat(0f) 38 | .kappaTempStat(0f) 39 | .cliStringTemplate(TestParams.Templates.PREQEVAL_VHT_RANDOMTREE) 40 | .resultFilePollTimeout(30) 41 | .prePollWait(15) 42 | .taskClassName(LocalStormDoTask.class.getName()) 43 | .build(); 44 | TestUtils.test(vhtConfig); 45 | 46 | } 47 | 48 | @Test(timeout = 120000) 49 | public void testBaggingWithStorm() throws Exception { 50 | TestParams baggingConfig = new TestParams.Builder() 51 | .inputInstances(200_000) 52 | .samplingSize(20_000) 53 | .evaluationInstances(180_000) 54 | .classifiedInstances(190_000) 55 | .classificationsCorrect(60f) 56 | .kappaStat(0f) 57 | .kappaTempStat(0f) 58 | .cliStringTemplate(TestParams.Templates.PREQEVAL_BAGGING_RANDOMTREE) 59 | .resultFilePollTimeout(40) 60 | .prePollWait(20) 61 | .taskClassName(LocalStormDoTask.class.getName()) 62 | .build(); 63 | TestUtils.test(baggingConfig); 64 | 65 | } 66 | 67 | 68 | } 69 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/common/RuleSplitNode.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.rules.common; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalTest; 24 | import com.yahoo.labs.samoa.moa.classifiers.rules.core.Predicate; 25 | import com.yahoo.labs.samoa.moa.classifiers.rules.core.conditionaltests.NumericAttributeBinaryRulePredicate; 26 | import com.yahoo.labs.samoa.learners.classifiers.trees.SplitNode; 27 | import com.yahoo.labs.samoa.instances.Instance; 28 | 29 | /** 30 | * Represent a feature of rules (an element of ruleś nodeList). 31 | * 32 | * @author Anh Thu Vu 33 | * 34 | */ 35 | public class RuleSplitNode extends SplitNode { 36 | 37 | protected double lastTargetMean; 38 | protected int operatorObserver; 39 | 40 | private static final long serialVersionUID = 1L; 41 | 42 | public InstanceConditionalTest getSplitTest() { 43 | return this.splitTest; 44 | } 45 | 46 | /** 47 | * Create a new RuleSplitNode 48 | */ 49 | public RuleSplitNode() { 50 | this(null, new double[0]); 51 | } 52 | public RuleSplitNode(InstanceConditionalTest splitTest, double[] classObservations) { 53 | super(splitTest, classObservations); 54 | } 55 | 56 | public RuleSplitNode getACopy() { 57 | InstanceConditionalTest splitTest = new NumericAttributeBinaryRulePredicate((NumericAttributeBinaryRulePredicate) this.getSplitTest()); 58 | return new RuleSplitNode(splitTest, this.getObservedClassDistribution()); 59 | } 60 | 61 | public boolean evaluate(Instance instance) { 62 | Predicate predicate = (Predicate) this.splitTest; 63 | return predicate.evaluate(instance); 64 | } 65 | 66 | } 67 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/LocalLearner.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import java.io.Serializable; 24 | import java.util.Map; 25 | 26 | import com.yahoo.labs.samoa.instances.Instance; 27 | import com.yahoo.labs.samoa.instances.Instances; 28 | 29 | /** 30 | * Learner interface for non-distributed learners. 31 | * 32 | * @author abifet 33 | */ 34 | public interface LocalLearner extends Serializable { 35 | 36 | /** 37 | * Creates a new learner object. 38 | * 39 | * @return the learner 40 | */ 41 | LocalLearner create(); 42 | 43 | /** 44 | * Predicts the class memberships for a given instance. If an instance is 45 | * unclassified, the returned array elements must be all zero. 46 | * 47 | * @param inst 48 | * the instance to be classified 49 | * @return an array containing the estimated membership probabilities of the 50 | * test instance in each class 51 | */ 52 | double[] getVotesForInstance(Instance inst); 53 | 54 | /** 55 | * Resets this classifier. It must be similar to starting a new classifier 56 | * from scratch. 57 | * 58 | */ 59 | void resetLearning(); 60 | 61 | /** 62 | * Trains this classifier incrementally using the given instance. 63 | * 64 | * @param inst 65 | * the instance to be used for training 66 | */ 67 | void trainOnInstance(Instance inst); 68 | 69 | /** 70 | * Sets where to obtain the information of attributes of Instances 71 | * 72 | * @param dataset 73 | * the dataset that contains the information 74 | */ 75 | @Deprecated 76 | public void setDataset(Instances dataset); 77 | 78 | } 79 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/AttributeSplitSuggestion.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.classifiers.core; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.moa.AbstractMOAObject; 24 | import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalTest; 25 | 26 | /** 27 | * Class for computing attribute split suggestions given a split test. 28 | * 29 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 30 | * @version $Revision: 7 $ 31 | */ 32 | public class AttributeSplitSuggestion extends AbstractMOAObject implements Comparable { 33 | 34 | private static final long serialVersionUID = 1L; 35 | 36 | public InstanceConditionalTest splitTest; 37 | 38 | public double[][] resultingClassDistributions; 39 | 40 | public double merit; 41 | 42 | public AttributeSplitSuggestion() {} 43 | 44 | public AttributeSplitSuggestion(InstanceConditionalTest splitTest, 45 | double[][] resultingClassDistributions, double merit) { 46 | this.splitTest = splitTest; 47 | this.resultingClassDistributions = resultingClassDistributions.clone(); 48 | this.merit = merit; 49 | } 50 | 51 | public int numSplits() { 52 | return this.resultingClassDistributions.length; 53 | } 54 | 55 | public double[] resultingClassDistributionFromSplit(int splitIndex) { 56 | return this.resultingClassDistributions[splitIndex].clone(); 57 | } 58 | 59 | @Override 60 | public int compareTo(AttributeSplitSuggestion comp) { 61 | return Double.compare(this.merit, comp.merit); 62 | } 63 | 64 | @Override 65 | public void getDescription(StringBuilder sb, int indent) { 66 | // do nothing 67 | } 68 | } 69 | -------------------------------------------------------------------------------- /samoa-samza/src/main/java/com/yahoo/labs/samoa/utils/SerializableSerializer.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.utils; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import java.io.ByteArrayInputStream; 24 | import java.io.ByteArrayOutputStream; 25 | import java.io.IOException; 26 | import java.io.ObjectInputStream; 27 | import java.io.ObjectOutputStream; 28 | 29 | import com.esotericsoftware.kryo.Kryo; 30 | import com.esotericsoftware.kryo.Serializer; 31 | import com.esotericsoftware.kryo.io.Input; 32 | import com.esotericsoftware.kryo.io.Output; 33 | 34 | /** 35 | * Serialize and deserialize objects with Java serialization 36 | * 37 | * @author Anh Thu Vu 38 | */ 39 | public class SerializableSerializer extends Serializer { 40 | @Override 41 | public void write(Kryo kryo, Output output, Object object) { 42 | ByteArrayOutputStream bos = new ByteArrayOutputStream(); 43 | try { 44 | ObjectOutputStream oos = new ObjectOutputStream(bos); 45 | oos.writeObject(object); 46 | oos.flush(); 47 | } catch(IOException e) { 48 | throw new RuntimeException(e); 49 | } 50 | byte[] ser = bos.toByteArray(); 51 | output.writeInt(ser.length); 52 | output.writeBytes(ser); 53 | } 54 | 55 | @SuppressWarnings("rawtypes") 56 | @Override 57 | public Object read(Kryo kryo, Input input, Class c) { 58 | int len = input.readInt(); 59 | byte[] ser = new byte[len]; 60 | input.readBytes(ser); 61 | ByteArrayInputStream bis = new ByteArrayInputStream(ser); 62 | try { 63 | ObjectInputStream ois = new ObjectInputStream(bis); 64 | return ois.readObject(); 65 | } catch(Exception e) { 66 | throw new RuntimeException(e); 67 | } 68 | } 69 | 70 | } 71 | -------------------------------------------------------------------------------- /samoa-test/pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 6 | samoa 7 | com.yahoo.labs.samoa 8 | 0.3.0-SNAPSHOT 9 | 10 | 4.0.0 11 | 12 | samoa-test 13 | 14 | 15 | org.apache.commons 16 | commons-csv 17 | 1.0 18 | 19 | 20 | commons-io 21 | commons-io 22 | 2.4 23 | 24 | 25 | com.yahoo.labs.samoa 26 | samoa-api 27 | ${project.version} 28 | 29 | 30 | 31 | 32 | 33 | org.apache.maven.plugins 34 | maven-jar-plugin 35 | 2.4 36 | 37 | 38 | 39 | test-jar 40 | 41 | 42 | 43 | 44 | 45 | org.apache.maven.plugins 46 | maven-assembly-plugin 47 | 48 | 49 | src/main/assembly/test-jar-with-dependencies.xml 50 | 51 | 52 | 53 | 54 | package 55 | 56 | single 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/NominalAttributeMultiwayTest.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.instances.InstancesHeader; 24 | import com.yahoo.labs.samoa.instances.Instance; 25 | 26 | /** 27 | * Nominal multi way conditional test for instances to use to split nodes in Hoeffding trees. 28 | * 29 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 30 | * @version $Revision: 7 $ 31 | */ 32 | public class NominalAttributeMultiwayTest extends InstanceConditionalTest { 33 | 34 | private static final long serialVersionUID = 1L; 35 | 36 | protected int attIndex; 37 | 38 | public NominalAttributeMultiwayTest(int attIndex) { 39 | this.attIndex = attIndex; 40 | } 41 | 42 | @Override 43 | public int branchForInstance(Instance inst) { 44 | int instAttIndex = this.attIndex ; //< inst.classIndex() ? this.attIndex 45 | //: this.attIndex + 1; 46 | return inst.isMissing(instAttIndex) ? -1 : (int) inst.value(instAttIndex); 47 | } 48 | 49 | @Override 50 | public String describeConditionForBranch(int branch, InstancesHeader context) { 51 | return InstancesHeader.getAttributeNameString(context, this.attIndex) 52 | + " = " 53 | + InstancesHeader.getNominalValueString(context, this.attIndex, 54 | branch); 55 | } 56 | 57 | @Override 58 | public int maxBranches() { 59 | return -1; 60 | } 61 | 62 | @Override 63 | public void getDescription(StringBuilder sb, int indent) { 64 | // TODO Auto-generated method stub 65 | } 66 | 67 | @Override 68 | public int[] getAttsTestDependsOn() { 69 | return new int[]{this.attIndex}; 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/distributed/PredicateContentEvent.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.learners.classifiers.rules.distributed; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 - 2014 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.core.ContentEvent; 24 | import com.yahoo.labs.samoa.learners.classifiers.rules.common.RulePassiveRegressionNode; 25 | import com.yahoo.labs.samoa.learners.classifiers.rules.common.RuleSplitNode; 26 | 27 | /** 28 | * New features (of newly expanded rules) from Learners to Model Aggregators. 29 | * 30 | * @author Anh Thu Vu 31 | * 32 | */ 33 | public class PredicateContentEvent implements ContentEvent { 34 | 35 | /** 36 | * 37 | */ 38 | private static final long serialVersionUID = 7909435830443732451L; 39 | 40 | private int ruleNumberID; 41 | private RuleSplitNode ruleSplitNode; 42 | private RulePassiveRegressionNode learningNode; 43 | 44 | /* 45 | * Constructor 46 | */ 47 | public PredicateContentEvent() { 48 | this(0, null, null); 49 | } 50 | 51 | public PredicateContentEvent (int ruleID, RuleSplitNode ruleSplitNode, RulePassiveRegressionNode learningNode) { 52 | this.ruleNumberID = ruleID; 53 | this.ruleSplitNode = ruleSplitNode; // is this is null: this is for updating learningNode only 54 | this.learningNode = learningNode; 55 | } 56 | 57 | @Override 58 | public String getKey() { 59 | return Integer.toString(this.ruleNumberID); 60 | } 61 | 62 | @Override 63 | public void setKey(String key) { 64 | // do nothing 65 | } 66 | 67 | @Override 68 | public boolean isLastEvent() { 69 | return false; // N/A 70 | } 71 | 72 | public int getRuleNumberID() { 73 | return this.ruleNumberID; 74 | } 75 | 76 | public RuleSplitNode getRuleSplitNode() { 77 | return this.ruleSplitNode; 78 | } 79 | 80 | public RulePassiveRegressionNode getLearningNode() { 81 | return this.learningNode; 82 | } 83 | 84 | } 85 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/moa/AbstractMOAObject.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.moa; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | 23 | import com.yahoo.labs.samoa.moa.core.SerializeUtils; 24 | //import moa.core.SizeOf; 25 | 26 | /** 27 | * Abstract MOA Object. All classes that are serializable, copiable, 28 | * can measure its size, and can give a description, extend this class. 29 | * 30 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz) 31 | * @version $Revision: 7 $ 32 | */ 33 | public abstract class AbstractMOAObject implements MOAObject { 34 | 35 | @Override 36 | public MOAObject copy() { 37 | return copy(this); 38 | } 39 | 40 | @Override 41 | public int measureByteSize() { 42 | return measureByteSize(this); 43 | } 44 | 45 | /** 46 | * Returns a description of the object. 47 | * 48 | * @return a description of the object 49 | */ 50 | @Override 51 | public String toString() { 52 | StringBuilder sb = new StringBuilder(); 53 | getDescription(sb, 0); 54 | return sb.toString(); 55 | } 56 | 57 | /** 58 | * This method produces a copy of an object. 59 | * 60 | * @param obj object to copy 61 | * @return a copy of the object 62 | */ 63 | public static MOAObject copy(MOAObject obj) { 64 | try { 65 | return (MOAObject) SerializeUtils.copyObject(obj); 66 | } catch (Exception e) { 67 | throw new RuntimeException("Object copy failed.", e); 68 | } 69 | } 70 | 71 | /** 72 | * Gets the memory size of an object. 73 | * 74 | * @param obj object to measure the memory size 75 | * @return the memory size of this object 76 | */ 77 | public static int measureByteSize(MOAObject obj) { 78 | return 0; //(int) SizeOf.fullSizeOf(obj); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringEvaluationContentEvent.java: -------------------------------------------------------------------------------- 1 | package com.yahoo.labs.samoa.evaluation; 2 | 3 | /* 4 | * #%L 5 | * SAMOA 6 | * %% 7 | * Copyright (C) 2013 Yahoo! Inc. 8 | * %% 9 | * Licensed under the Apache License, Version 2.0 (the "License"); 10 | * you may not use this file except in compliance with the License. 11 | * You may obtain a copy of the License at 12 | * 13 | * http://www.apache.org/licenses/LICENSE-2.0 14 | * 15 | * Unless required by applicable law or agreed to in writing, software 16 | * distributed under the License is distributed on an "AS IS" BASIS, 17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 18 | * See the License for the specific language governing permissions and 19 | * limitations under the License. 20 | * #L% 21 | */ 22 | import com.yahoo.labs.samoa.core.*; 23 | import com.yahoo.labs.samoa.instances.Instance; 24 | import com.yahoo.labs.samoa.moa.cluster.Clustering; 25 | import com.yahoo.labs.samoa.moa.core.DataPoint; 26 | 27 | /** 28 | * License 29 | */ 30 | /** 31 | * The Class Clustering ResultEvent. 32 | */ 33 | final public class ClusteringEvaluationContentEvent implements ContentEvent { 34 | 35 | private static final long serialVersionUID = -7746983521296618922L; 36 | private Clustering gtClustering; 37 | private DataPoint dataPoint; 38 | private final boolean isLast; 39 | private String key = "0"; 40 | 41 | public ClusteringEvaluationContentEvent() { 42 | this.isLast = false; 43 | } 44 | 45 | public ClusteringEvaluationContentEvent(boolean isLast) { 46 | this.isLast = isLast; 47 | } 48 | 49 | /** 50 | * Instantiates a new gtClustering result event. 51 | * 52 | * @param clustering the gtClustering result 53 | * @param instance data point 54 | * @param isLast is the last result 55 | */ 56 | public ClusteringEvaluationContentEvent(Clustering clustering, DataPoint instance, boolean isLast) { 57 | this.gtClustering = clustering; 58 | this.isLast = isLast; 59 | this.dataPoint = instance; 60 | } 61 | 62 | public String getKey() { 63 | return key; 64 | } 65 | 66 | public void setKey(String key) { 67 | this.key = key; 68 | } 69 | 70 | public boolean isLastEvent() { 71 | return this.isLast; 72 | } 73 | 74 | Clustering getGTClustering() { 75 | return this.gtClustering; 76 | } 77 | 78 | DataPoint getDataPoint() { 79 | return this.dataPoint; 80 | } 81 | 82 | } 83 | --------------------------------------------------------------------------------