├── CLA.pdf
├── bin
├── s4-build
│ ├── gradle-wrapper-1.4.jar
│ ├── README.md
│ └── gradle-wrapper-1.4.properties
├── samza-kryo
├── samoa-storm.properties
├── samoa-s4.properties
├── samza-dist
│ ├── run-job.sh
│ ├── run-am.sh
│ └── run-container.sh
└── run-container.sh
├── .gitignore
├── CONTRIBUTING.md
├── samoa-test
├── README.md
├── src
│ └── main
│ │ └── assembly
│ │ └── test-jar-with-dependencies.xml
└── pom.xml
├── samoa-s4
├── samoa-s4-adapter
│ ├── src
│ │ └── main
│ │ │ └── java
│ │ │ └── samoa
│ │ │ └── topology
│ │ │ └── adapter
│ │ │ ├── package-info.java
│ │ │ ├── S4AdapterApp.java
│ │ │ └── S4EntranceProcessingItem.java
│ └── pom.xml
└── src
│ └── main
│ ├── java
│ └── com
│ │ └── yahoo
│ │ └── labs
│ │ └── samoa
│ │ └── topology
│ │ └── impl
│ │ ├── SamoaSerializerModule.java
│ │ ├── S4Topology.java
│ │ └── S4Event.java
│ └── assembly
│ └── samoa-s4.xml
├── samoa-api
└── src
│ └── main
│ └── java
│ └── com
│ └── yahoo
│ └── labs
│ └── samoa
│ ├── evaluation
│ ├── ClassificationPerformanceEvaluator.java
│ ├── RegressionPerformanceEvaluator.java
│ ├── PerformanceEvaluator.java
│ ├── ClusteringResultContentEvent.java
│ └── ClusteringEvaluationContentEvent.java
│ ├── learners
│ ├── RegressionLearner.java
│ ├── ClassificationLearner.java
│ ├── classifiers
│ │ ├── rules
│ │ │ ├── common
│ │ │ │ ├── RulePassiveLearningNode.java
│ │ │ │ ├── RuleActiveLearningNode.java
│ │ │ │ ├── NonLearningRule.java
│ │ │ │ ├── PassiveRule.java
│ │ │ │ └── RuleSplitNode.java
│ │ │ └── distributed
│ │ │ │ ├── AssignmentContentEvent.java
│ │ │ │ ├── RuleContentEvent.java
│ │ │ │ └── PredicateContentEvent.java
│ │ ├── ensemble
│ │ │ └── BoostingDistributorProcessor.java
│ │ ├── trees
│ │ │ ├── DeleteContentEvent.java
│ │ │ ├── LearningNode.java
│ │ │ ├── InactiveLearningNode.java
│ │ │ ├── ControlContentEvent.java
│ │ │ └── FoundNode.java
│ │ └── LocalLearner.java
│ ├── AdaptiveLearner.java
│ └── Learner.java
│ ├── moa
│ ├── core
│ │ ├── Example.java
│ │ ├── ObjectRepository.java
│ │ ├── InstanceExample.java
│ │ └── FastVector.java
│ ├── streams
│ │ ├── clustering
│ │ │ ├── ClusterEventListener.java
│ │ │ ├── ClusterEvent.java
│ │ │ └── ClusteringStream.java
│ │ └── InstanceStream.java
│ ├── classifiers
│ │ ├── rules
│ │ │ └── core
│ │ │ │ ├── Predicate.java
│ │ │ │ └── voting
│ │ │ │ └── UniformWeightedVote.java
│ │ ├── Regressor.java
│ │ └── core
│ │ │ ├── splitcriteria
│ │ │ ├── SDRSplitCriterion.java
│ │ │ ├── InfoGainSplitCriterionMultilabel.java
│ │ │ └── SplitCriterion.java
│ │ │ ├── conditionaltests
│ │ │ ├── InstanceConditionalBinaryTest.java
│ │ │ └── NominalAttributeMultiwayTest.java
│ │ │ ├── attributeclassobservers
│ │ │ ├── NumericAttributeClassObserver.java
│ │ │ └── DiscreteAttributeClassObserver.java
│ │ │ └── AttributeSplitSuggestion.java
│ ├── tasks
│ │ ├── ResultPreviewListener.java
│ │ └── Task.java
│ ├── MOAObject.java
│ ├── clusterers
│ │ └── Clusterer.java
│ ├── evaluation
│ │ ├── LearningPerformanceEvaluator.java
│ │ └── LearningEvaluation.java
│ ├── cluster
│ │ └── Miniball.java
│ └── AbstractMOAObject.java
│ ├── utils
│ ├── PartitioningScheme.java
│ └── StreamDestination.java
│ ├── core
│ ├── ContentEvent.java
│ ├── SerializableInstance.java
│ ├── Globals.java
│ ├── EntranceProcessor.java
│ └── Processor.java
│ ├── topology
│ ├── IProcessingItem.java
│ ├── ISubmitter.java
│ ├── EntranceProcessingItem.java
│ ├── Stream.java
│ ├── Topology.java
│ └── ProcessingItem.java
│ ├── examples
│ ├── HelloWorldDestinationProcessor.java
│ ├── HelloWorldContentEvent.java
│ └── HelloWorldSourceProcessor.java
│ ├── tasks
│ └── Task.java
│ └── streams
│ ├── fs
│ └── FileStreamSource.java
│ └── StreamSource.java
├── NOTICE.txt
├── samoa-storm
└── src
│ ├── main
│ └── java
│ │ └── com
│ │ └── yahoo
│ │ └── labs
│ │ └── samoa
│ │ └── topology
│ │ └── impl
│ │ ├── StormTopologyNode.java
│ │ ├── StormTopology.java
│ │ ├── StormBoltStream.java
│ │ ├── StormSpoutStream.java
│ │ └── StormStream.java
│ └── test
│ └── java
│ └── com
│ └── yahoo
│ └── labs
│ └── samoa
│ └── AlgosTest.java
├── samoa-local
└── src
│ ├── main
│ ├── resources
│ │ └── log4j.xml
│ └── java
│ │ └── com
│ │ └── yahoo
│ │ └── labs
│ │ └── samoa
│ │ └── topology
│ │ └── impl
│ │ ├── SimpleEngine.java
│ │ ├── SimpleEntranceProcessingItem.java
│ │ ├── SimpleTopology.java
│ │ └── SimpleComponentFactory.java
│ └── test
│ └── java
│ └── com
│ └── yahoo
│ └── labs
│ └── samoa
│ └── topology
│ └── impl
│ └── SimpleEngineTest.java
├── .travis.yml
├── RELEASE.txt
├── samoa-instances
├── pom.xml
└── src
│ └── main
│ └── java
│ └── com
│ └── yahoo
│ └── labs
│ └── samoa
│ └── instances
│ ├── InstanceData.java
│ ├── SparseInstance.java
│ ├── Instance.java
│ ├── DenseInstance.java
│ └── SingleClassInstanceData.java
├── samoa-threads
└── src
│ ├── main
│ └── java
│ │ └── com
│ │ └── yahoo
│ │ └── labs
│ │ └── samoa
│ │ └── topology
│ │ └── impl
│ │ ├── ThreadsEntranceProcessingItem.java
│ │ ├── ThreadsProcessingItemInstance.java
│ │ ├── ThreadsEventRunnable.java
│ │ ├── ThreadsComponentFactory.java
│ │ └── ThreadsTopology.java
│ └── test
│ └── java
│ └── com
│ └── yahoo
│ └── labs
│ └── samoa
│ └── topology
│ └── impl
│ ├── ThreadsEventRunnableTest.java
│ └── ThreadsProcessingItemInstanceTest.java
└── samoa-samza
└── src
└── main
└── java
└── com
└── yahoo
└── labs
└── samoa
├── topology
└── impl
│ ├── SamzaProcessingNode.java
│ ├── SamzaComponentFactory.java
│ ├── SamoaSystemFactory.java
│ └── SamzaTopology.java
└── utils
└── SerializableSerializer.java
/CLA.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YahooArchive/samoa/HEAD/CLA.pdf
--------------------------------------------------------------------------------
/bin/s4-build/gradle-wrapper-1.4.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YahooArchive/samoa/HEAD/bin/s4-build/gradle-wrapper-1.4.jar
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | #maven
2 | target/
3 |
4 | #eclipse
5 | .classpath
6 | .project
7 | .settings/
8 |
9 | #DS_Store
10 | .DS_Store
11 |
12 | #intellij
13 | .idea/
14 | .iml
15 |
--------------------------------------------------------------------------------
/bin/s4-build/README.md:
--------------------------------------------------------------------------------
1 | As a workaround for travis CI using gradle 2.1+, which causes issues with the s4 build, this directory contains
2 | pre-generated gradlew scripts and libraries for gradle 1.4, that can be copied and used during the travis CI build.
3 |
4 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | How to contribute?
2 | =================
3 |
4 | User contribution is essential to keep moving the project forward.
5 |
6 | # Getting Started
7 |
8 | 1. [Fork the repository](https://help.github.com/articles/fork-a-repo) on GitHub
9 |
10 | 2. Sign the [Contributor License Agreement] (http://www.clahub.com/agreements/gdfm/samoa)
11 |
12 | 3. Create a feature branch ```git checkout -b branch_name```
13 |
14 | 4. Make your changes (please make commits that are logical units)
15 |
16 | 5. Build and run tests
17 |
18 | 6. Submit a [pull request](https://help.github.com/articles/using-pull-requests)
19 |
--------------------------------------------------------------------------------
/bin/samza-kryo:
--------------------------------------------------------------------------------
1 | com.yahoo.labs.samoa.learners.classifiers.trees.AttributeContentEvent:com.yahoo.labs.samoa.learners.classifiers.trees.AttributeContentEvent$AttributeCEFullPrecSerializer
2 | com.yahoo.labs.samoa.learners.classifiers.trees.ComputeContentEvent:com.yahoo.labs.samoa.learners.classifiers.trees.ComputeContentEvent$ComputeCEFullPrecSerializer
3 | com.yahoo.labs.samoa.moa.classifiers.core.AttributeSplitSuggestion:com.yahoo.labs.samoa.utils.SerializableSerializer
4 |
5 | com.yahoo.labs.samoa.learners.classifiers.rules.common.TargetMean:com.yahoo.labs.samoa.learners.classifiers.rules.common.TargetMean$TargetMeanSerializer
6 | com.yahoo.labs.samoa.learners.classifiers.rules.common.Perceptron:com.yahoo.labs.samoa.learners.classifiers.rules.common.Perceptron$PerceptronSerializer
7 |
--------------------------------------------------------------------------------
/samoa-test/README.md:
--------------------------------------------------------------------------------
1 | This module contains a test framework for simplifying regression testing of Samoa algorithms on various platforms.
2 |
3 | The test framework is generic and reusable for multiple platforms. The platform modules that make use of the test framework add a maven dependency to a test-jar artifact of the samoa-test module. This test-jar artifact includes the test framework classes and its dependencies.
4 |
5 | For defining tests, we reuse the code from the test framework but customize tests according to the platform capabilities.
6 |
7 | For each algorithm to test, we must provide :
8 |
9 | * the task class for the platform
10 | * the algorithm (referring to the provided string templates in this module)
11 | * the input parameters
12 | * the expectations (thresholds or values)
13 |
14 | See existing code in samo-local, samoa-threads and samoa-storm for some examples.
15 |
--------------------------------------------------------------------------------
/samoa-s4/samoa-s4-adapter/src/main/java/samoa/topology/adapter/package-info.java:
--------------------------------------------------------------------------------
1 | /**
2 | *
3 | */
4 | /**
5 | * @author severien
6 | *
7 | */
8 | package samoa.topology.adapter;
9 |
10 | /*
11 | * #%L
12 | * SAMOA
13 | * %%
14 | * Copyright (C) 2013 Yahoo! Inc.
15 | * %%
16 | * Licensed under the Apache License, Version 2.0 (the "License");
17 | * you may not use this file except in compliance with the License.
18 | * You may obtain a copy of the License at
19 | *
20 | * http://www.apache.org/licenses/LICENSE-2.0
21 | *
22 | * Unless required by applicable law or agreed to in writing, software
23 | * distributed under the License is distributed on an "AS IS" BASIS,
24 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
25 | * See the License for the specific language governing permissions and
26 | * limitations under the License.
27 | * #L%
28 | */
29 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClassificationPerformanceEvaluator.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.evaluation;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | public interface ClassificationPerformanceEvaluator extends PerformanceEvaluator {
24 | }
25 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/RegressionPerformanceEvaluator.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.evaluation;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | public interface RegressionPerformanceEvaluator extends PerformanceEvaluator {
24 |
25 | }
26 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/RegressionLearner.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.learners.Learner;
24 |
25 | public interface RegressionLearner extends Learner {
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/ClassificationLearner.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.learners.Learner;
24 |
25 | public interface ClassificationLearner extends Learner {
26 |
27 | }
28 |
--------------------------------------------------------------------------------
/bin/s4-build/gradle-wrapper-1.4.properties:
--------------------------------------------------------------------------------
1 | ###
2 | # #%L
3 | # SAMOA
4 | # %%
5 | # Copyright (C) 2015 Yahoo! Inc.
6 | # %%
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | # #L%
19 | ###
20 | #Wed Sep 24 11:25:39 CEST 2014
21 | distributionBase=GRADLE_USER_HOME
22 | distributionPath=wrapper/dists
23 | zipStoreBase=GRADLE_USER_HOME
24 | zipStorePath=wrapper/dists
25 | distributionUrl=http\://services.gradle.org/distributions/gradle-1.4-bin.zip
26 |
--------------------------------------------------------------------------------
/samoa-test/src/main/assembly/test-jar-with-dependencies.xml:
--------------------------------------------------------------------------------
1 |
4 | test-jar-with-dependencies
5 |
6 | jar
7 |
8 | false
9 |
10 |
11 | /
12 | true
13 |
14 | true
15 | false
16 | true
17 |
18 |
19 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/Example.java:
--------------------------------------------------------------------------------
1 |
2 | package com.yahoo.labs.samoa.moa.core;
3 |
4 | /*
5 | * #%L
6 | * SAMOA
7 | * %%
8 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
9 | * %%
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing, software
17 | * distributed under the License is distributed on an "AS IS" BASIS,
18 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | * See the License for the specific language governing permissions and
20 | * limitations under the License.
21 | * #L%
22 | */
23 |
24 | public interface Example< T extends Object> {
25 |
26 | public T getData();
27 |
28 | public double weight();
29 |
30 | public void setWeight(double weight);
31 | }
32 |
--------------------------------------------------------------------------------
/NOTICE.txt:
--------------------------------------------------------------------------------
1 | SAMOA
2 | Copyright 2013 Yahoo! Inc.
3 |
4 | Licensed under the Apache License, Version 2.0 (the
5 | "License"); you may not use this file except in compliance
6 | with the License. You may obtain a copy of the License at:
7 |
8 | http://www.apache.org/licenses/LICENSE-2.0
9 |
10 | Unless required by applicable law or agreed to in writing,
11 | software distributed under the License is distributed on
12 | an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
13 | KIND, either express or implied. See the License for the
14 | specific language governing permissions and limitations
15 | under the License.
16 |
17 | Except as specifically stated below, the 3rd party software packages are not distributed as part of
18 | this project, but instead are separately downloaded from the respective provider.
19 |
20 | * MOA version 13.08 (redistributed under the Apache License v2 - http://www.apache.org/licenses/LICENSE-2.0)
21 | Library for data stream mining. Only a small subset of the original library is redistributed with SAMOA.
22 | http://moa.cms.waikato.ac.nz
23 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/clustering/ClusterEventListener.java:
--------------------------------------------------------------------------------
1 |
2 | package com.yahoo.labs.samoa.moa.streams.clustering;
3 |
4 | /*
5 | * #%L
6 | * SAMOA
7 | * %%
8 | * Copyright (C) 2010 RWTH Aachen University, Germany
9 | * %%
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing, software
17 | * distributed under the License is distributed on an "AS IS" BASIS,
18 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | * See the License for the specific language governing permissions and
20 | * limitations under the License.
21 | * #L%
22 | */
23 |
24 | import java.util.EventListener;
25 |
26 | public interface ClusterEventListener extends EventListener {
27 |
28 | public void changeCluster(ClusterEvent e);
29 |
30 | }
31 |
32 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/utils/PartitioningScheme.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.utils;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Represents the 3 schemes to partition the streams
25 | * @author Anh Thu Vu
26 | *
27 | */
28 | public enum PartitioningScheme {
29 | SHUFFLE, GROUP_BY_KEY, BROADCAST
30 | }
31 | // TODO: use this enum in S4
32 | // Storm doesn't seem to need this
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/ObjectRepository.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.core;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Interface for object repositories.
25 | *
26 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
27 | * @version $Revision: 7 $
28 | */
29 | public interface ObjectRepository {
30 |
31 | Object getObjectNamed(String string);
32 | }
33 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/Predicate.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.classifiers.rules.core;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.instances.Instance;
24 |
25 | /**
26 | * Interface for a predicate (a feature) in rules.
27 | *
28 | */
29 | public interface Predicate {
30 |
31 | public boolean evaluate(Instance instance);
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/Regressor.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.classifiers;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Regressor interface for incremental regression models. It is used only in the GUI Regression Tab.
25 | *
26 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
27 | * @version $Revision: 7 $
28 | */
29 | public interface Regressor {
30 |
31 | }
32 |
--------------------------------------------------------------------------------
/samoa-storm/src/main/java/com/yahoo/labs/samoa/topology/impl/StormTopologyNode.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Interface to represent a node in samoa-storm topology.
25 | * @author Arinto Murdopo
26 | *
27 | */
28 | interface StormTopologyNode {
29 |
30 | void addToTopology(StormTopology topology, int parallelismHint);
31 | StormStream createStream();
32 | String getId();
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/common/RulePassiveLearningNode.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.rules.common;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Interface for Rule's LearningNode that does not update
25 | * statistics for expanding rule. It only updates statistics for
26 | * computing predictions.
27 | *
28 | * @author Anh Thu Vu
29 | *
30 | */
31 | public interface RulePassiveLearningNode {
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/samoa-s4/src/main/java/com/yahoo/labs/samoa/topology/impl/SamoaSerializerModule.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import org.apache.s4.base.SerializerDeserializer;
24 |
25 | import com.google.inject.AbstractModule;
26 |
27 | public class SamoaSerializerModule extends AbstractModule {
28 |
29 | @Override
30 | protected void configure() {
31 | bind(SerializerDeserializer.class).to(SamoaSerializer.class);
32 |
33 | }
34 |
35 | }
36 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/core/ContentEvent.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.core;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * The Interface ContentEvent.
25 | */
26 | public interface ContentEvent extends java.io.Serializable {
27 |
28 | /**
29 | * Gets the content event key.
30 | *
31 | * @return the key
32 | */
33 | public String getKey();
34 |
35 | /**
36 | * Sets the content event key.
37 | *
38 | * @param key string
39 | */
40 | public void setKey(String key);
41 |
42 | public boolean isLastEvent();
43 | }
44 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/common/RuleActiveLearningNode.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.rules.common;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Interface for Rule's LearningNode that updates both statistics
25 | * for expanding rule and computing predictions.
26 | *
27 | * @author Anh Thu Vu
28 | *
29 | */
30 | public interface RuleActiveLearningNode extends RulePassiveLearningNode {
31 |
32 | public boolean tryToExpand(double splitConfidence, double tieThreshold);
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/InstanceStream.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.streams;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.moa.core.Example;
24 | import com.yahoo.labs.samoa.instances.Instance;
25 |
26 | /**
27 | * Interface representing a data stream of instances.
28 | *
29 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
30 | * @version $Revision: 7 $
31 | */
32 | public interface InstanceStream extends ExampleStream> {
33 |
34 |
35 |
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/samoa-local/src/main/resources/log4j.xml:
--------------------------------------------------------------------------------
1 |
2 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 |
3 | install:
4 | - git clone https://github.com/apache/incubator-s4.git
5 | - cd incubator-s4
6 | - git checkout tags/0.6.0-Final
7 | - cp ../bin/s4-build/gradlew .
8 | - cp ../bin/s4-build/gradle-wrapper-1.4.jar ./lib/
9 | - cp ../bin/s4-build/gradle-wrapper-1.4.properties ./lib/
10 | - ./gradlew install
11 | - ./gradlew s4-tools::installApp
12 | - cd ..
13 | - echo "sonatype-nexus-snapshots${SOSS_USERNAME}${SOSS_PASSWORD}" > ${HOME}/.m2/settings.xml
14 | #- cat ${HOME}/.m2/settings.xml
15 |
16 | script: if [[ "$TRAVIS_SECURE_ENV_VARS" == "true" && "$TRAVIS_PULL_REQUEST" == "false" ]]; then CMD=deploy; else CMD=install; fi; echo $CMD; mvn -B -Pall $CMD
17 |
18 | notifications:
19 | email:
20 | on_success: never
21 | on_failure: change
22 |
23 | env:
24 | global:
25 | - secure: "hSyN3Ys3wDMJtL8jAcfFMh8pnG7B2TaKXc4qDWgE9a73XQ77JB8asCeXtQx/0/rNrJeNLBdSrVcXNAaOXXgGZpftJ0WdIBsyAj+tzpVAf+pcEHPVCgR4PHLkm1/UlyGX//1J+DjkDXnRgNfsD8xjZxTeNFH8xFzAU5YaP0AiLmk="
26 | - secure: "MJLRFWi1uGZ1s5u/A44u4vDSGXF23H/3GGhofvLliaM4ivkeO9uthErlHgloGSmubEVkJMiThBLveZl01tNYRgn5a08qqyIsf/eShMagJDR7cX6FmbU7qOMOSzaAI84GDtrNuDQqaz2I1nTKfnzDYcTGXrpJMwLmbx30E9D/qaY="
27 |
--------------------------------------------------------------------------------
/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleEngine.java:
--------------------------------------------------------------------------------
1 | /*
2 | * To change this template, choose Tools | Templates
3 | * and open the template in the editor.
4 | */
5 | package com.yahoo.labs.samoa.topology.impl;
6 |
7 | /*
8 | * #%L
9 | * SAMOA
10 | * %%
11 | * Copyright (C) 2013 Yahoo! Inc.
12 | * %%
13 | * Licensed under the Apache License, Version 2.0 (the "License");
14 | * you may not use this file except in compliance with the License.
15 | * You may obtain a copy of the License at
16 | *
17 | * http://www.apache.org/licenses/LICENSE-2.0
18 | *
19 | * Unless required by applicable law or agreed to in writing, software
20 | * distributed under the License is distributed on an "AS IS" BASIS,
21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 | * See the License for the specific language governing permissions and
23 | * limitations under the License.
24 | * #L%
25 | */
26 |
27 | import com.yahoo.labs.samoa.topology.Topology;
28 |
29 | public class SimpleEngine {
30 |
31 | public static void submitTopology(Topology topology) {
32 | SimpleTopology simpleTopology = (SimpleTopology) topology;
33 | simpleTopology.run();
34 | // runs until completion
35 | }
36 |
37 | }
38 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/SDRSplitCriterion.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2012 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | public class SDRSplitCriterion extends VarianceReductionSplitCriterion {
24 | private static final long serialVersionUID = 1L;
25 |
26 | public static double computeSD(double[] dist) {
27 | int N = (int)dist[0];
28 | double sum = dist[1];
29 | double sumSq = dist[2];
30 | return Math.sqrt((sumSq - ((sum * sum)/N))/N);
31 | }
32 |
33 | }
34 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/ensemble/BoostingDistributorProcessor.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.ensemble;
2 |
3 | import com.yahoo.labs.samoa.learners.InstanceContentEvent;
4 |
5 | /*
6 | * #%L
7 | * SAMOA
8 | * %%
9 | * Copyright (C) 2013 Yahoo! Inc.
10 | * %%
11 | * Licensed under the Apache License, Version 2.0 (the "License");
12 | * you may not use this file except in compliance with the License.
13 | * You may obtain a copy of the License at
14 | *
15 | * http://www.apache.org/licenses/LICENSE-2.0
16 | *
17 | * Unless required by applicable law or agreed to in writing, software
18 | * distributed under the License is distributed on an "AS IS" BASIS,
19 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 | * See the License for the specific language governing permissions and
21 | * limitations under the License.
22 | * #L%
23 | */
24 |
25 |
26 | /**
27 | * The Class BoostingDistributorProcessor.
28 | */
29 | public class BoostingDistributorProcessor extends BaggingDistributorProcessor{
30 |
31 | @Override
32 | protected void train(InstanceContentEvent inEvent) {
33 | // Boosting is trained from the prediction combiner, not from the input
34 | }
35 |
36 | }
37 |
--------------------------------------------------------------------------------
/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleEntranceProcessingItem.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.EntranceProcessor;
24 | import com.yahoo.labs.samoa.topology.LocalEntranceProcessingItem;
25 |
26 | class SimpleEntranceProcessingItem extends LocalEntranceProcessingItem {
27 | public SimpleEntranceProcessingItem(EntranceProcessor processor) {
28 | super(processor);
29 | }
30 |
31 | // The default waiting time when there is no available events is 100ms
32 | // Override waitForNewEvents() to change it
33 | }
34 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/conditionaltests/InstanceConditionalBinaryTest.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Abstract binary conditional test for instances to use to split nodes in Hoeffding trees.
25 | *
26 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
27 | * @version $Revision: 7 $
28 | */
29 | public abstract class InstanceConditionalBinaryTest extends InstanceConditionalTest {
30 |
31 | @Override
32 | public int maxBranches() {
33 | return 2;
34 | }
35 | }
36 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/topology/IProcessingItem.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.Processor;
24 |
25 | /**
26 | * ProcessingItem interface specific for entrance processing items.
27 | *
28 | * @author severien
29 | *
30 | */
31 | public interface IProcessingItem {
32 |
33 | /**
34 | * Gets the processing item processor.
35 | *
36 | * @return Processor
37 | */
38 | public Processor getProcessor();
39 |
40 | /**
41 | * Sets processing item name.
42 | *
43 | * @param name
44 | */
45 | //public void setName(String name);
46 |
47 | }
48 |
--------------------------------------------------------------------------------
/RELEASE.txt:
--------------------------------------------------------------------------------
1 | /*
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | Release v0.2.0
20 | NaiveBayes classitication algorithm.
21 | AMRules regression algorithm.
22 | Samza execution engine.
23 | Multithread execution engine.
24 | HDFS stream source.
25 |
26 | Release v0.1.0
27 | Initial release.
28 | Vertical Hoeffding Tree classification algorithm.
29 | Clustream clustering algorithm.
30 | Adaptive ensembles (Bagging and Boosting).
31 | Local execution engine.
32 | Storm execution engine.
33 | S4 execution engine.
34 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/topology/ISubmitter.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.tasks.Task;
24 |
25 | /**
26 | * Submitter interface for programatically deploying platform specific topologies.
27 | *
28 | * @author severien
29 | *
30 | */
31 | public interface ISubmitter {
32 |
33 | /**
34 | * Deploy a specific task to a platform.
35 | *
36 | * @param task
37 | */
38 | public void deployTask(Task task);
39 |
40 | /**
41 | * Sets if the task should run locally or distributed.
42 | *
43 | * @param bool
44 | */
45 | public void setLocal(boolean bool);
46 | }
47 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/NumericAttributeClassObserver.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Interface for observing the class data distribution for a numeric attribute.
25 | * This observer monitors the class distribution of a given attribute.
26 | * Used in naive Bayes and decision trees to monitor data statistics on leaves.
27 | *
28 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
29 | * @version $Revision: 7 $
30 | */
31 | public interface NumericAttributeClassObserver extends AttributeClassObserver {
32 |
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/attributeclassobservers/DiscreteAttributeClassObserver.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.classifiers.core.attributeclassobservers;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Interface for observing the class data distribution for a discrete (nominal) attribute.
25 | * This observer monitors the class distribution of a given attribute.
26 | * Used in naive Bayes and decision trees to monitor data statistics on leaves.
27 | *
28 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
29 | * @version $Revision: 7 $
30 | */
31 | public interface DiscreteAttributeClassObserver extends AttributeClassObserver {
32 |
33 |
34 | }
35 |
--------------------------------------------------------------------------------
/samoa-s4/samoa-s4-adapter/src/main/java/samoa/topology/adapter/S4AdapterApp.java:
--------------------------------------------------------------------------------
1 | package samoa.topology.adapter;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import org.apache.s4.core.adapter.AdapterApp;
24 |
25 | import samoa.sandbox.SourceProcessor;
26 | import samoa.streams.StreamSourceProcessor;
27 |
28 | public class S4AdapterApp extends AdapterApp {
29 |
30 | S4EntranceProcessingItem entrancePI;
31 | StreamSourceProcessor sourceProcessor;
32 |
33 | @Override
34 | protected void onInit() {
35 | entrancePI = new S4EntranceProcessingItem(this);
36 | sourceProcessor = new StreamSourceProcessor();
37 | entrancePI.setProcessor(sourceProcessor);
38 | }
39 |
40 | @Override
41 | protected void onStart() {
42 |
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/samoa-instances/pom.xml:
--------------------------------------------------------------------------------
1 |
20 |
21 | 4.0.0
22 |
23 | UTF-8
24 |
25 |
26 | samoa-instances
27 | Instances for SAMOA
28 |
29 | samoa-instances
30 |
31 | com.yahoo.labs.samoa
32 | samoa
33 | 0.3.0-SNAPSHOT
34 |
35 |
36 |
--------------------------------------------------------------------------------
/samoa-threads/src/main/java/com/yahoo/labs/samoa/topology/impl/ThreadsEntranceProcessingItem.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.EntranceProcessor;
24 | import com.yahoo.labs.samoa.topology.LocalEntranceProcessingItem;
25 |
26 | /**
27 | * EntranceProcessingItem for multithreaded engine.
28 | * @author Anh Thu Vu
29 | *
30 | */
31 | public class ThreadsEntranceProcessingItem extends LocalEntranceProcessingItem {
32 |
33 | public ThreadsEntranceProcessingItem(EntranceProcessor processor) {
34 | super(processor);
35 | }
36 |
37 | // The default waiting time when there is no available events is 100ms
38 | // Override waitForNewEvents() to change it
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/trees/DeleteContentEvent.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.trees;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Delete Content Event is the content event that is sent by Model Aggregator Processor
25 | * to delete unnecessary statistic in Local Statistic Processor.
26 | * @author Arinto Murdopo
27 | *
28 | */
29 | final class DeleteContentEvent extends ControlContentEvent {
30 |
31 | private static final long serialVersionUID = -2105250722560863633L;
32 |
33 | public DeleteContentEvent(){
34 | super(-1);
35 | }
36 |
37 | DeleteContentEvent(long id) {
38 | super(id); }
39 |
40 | @Override
41 | LocStatControl getType() {
42 | return LocStatControl.DELETE;
43 | }
44 |
45 | }
46 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/tasks/ResultPreviewListener.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.tasks;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Interface implemented by classes that preview results
25 | * on the Graphical User Interface
26 | *
27 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
28 | * @version $Revision: 7 $
29 | */
30 | public interface ResultPreviewListener {
31 |
32 | /**
33 | * This method is used to receive a signal from
34 | * TaskMonitor that the lastest preview has
35 | * changed. This method is implemented in PreviewPanel
36 | * to change the results that are shown in its panel.
37 | *
38 | */
39 | public void latestPreviewChanged();
40 | }
41 |
--------------------------------------------------------------------------------
/bin/samoa-storm.properties:
--------------------------------------------------------------------------------
1 | ###
2 | # #%L
3 | # SAMOA
4 | # %%
5 | # Copyright (C) 2013 Yahoo! Inc.
6 | # %%
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | # #L%
19 | ###
20 |
21 | # SAMOA Storm properties file
22 | # This file contains specific configurations for SAMOA deployment in the Storm platform
23 | # Note that you still need to configure Storm client in your machine,
24 | # including setting up Storm configuration file (~/.storm/storm.yaml) with correct settings
25 |
26 | # samoa.storm.mode corresponds to the execution mode of the Task in Storm
27 | # possible values:
28 | # 1. cluster: the Task will be sent into nimbus. The nimbus is configured by Storm configuration file
29 | # 2. local: the Task will be sent using local Storm cluster
30 | samoa.storm.mode=local
31 |
32 | # samoa.storm.numworker corresponds to the number of worker processes allocated in Storm cluster
33 | # possible values: any integer greater than 0
34 | samoa.storm.numworker=4
35 |
36 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/InstanceExample.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.core;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.instances.Instance;
24 | import java.io.Serializable;
25 |
26 | public class InstanceExample implements Example, Serializable {
27 |
28 | public Instance instance;
29 |
30 | public InstanceExample (Instance inst)
31 | {
32 | this.instance = inst;
33 | }
34 |
35 | @Override
36 | public Instance getData() {
37 | return this.instance;
38 | }
39 |
40 | @Override
41 | public double weight() {
42 | return this.instance.weight();
43 | }
44 |
45 | @Override
46 | public void setWeight(double w) {
47 | this.instance.setWeight(w);
48 | }
49 |
50 | }
51 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/clustering/ClusterEvent.java:
--------------------------------------------------------------------------------
1 |
2 | package com.yahoo.labs.samoa.moa.streams.clustering;
3 |
4 | /*
5 | * #%L
6 | * SAMOA
7 | * %%
8 | * Copyright (C) 2010 RWTH Aachen University, Germany
9 | * %%
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing, software
17 | * distributed under the License is distributed on an "AS IS" BASIS,
18 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | * See the License for the specific language governing permissions and
20 | * limitations under the License.
21 | * #L%
22 | */
23 |
24 | import java.util.EventObject;
25 |
26 | public class ClusterEvent extends EventObject {
27 |
28 | private String type;
29 | private String message;
30 | private long timestamp;
31 |
32 | public ClusterEvent(Object source, long timestamp, String type, String message) {
33 | super(source);
34 | this.type = type;
35 | this.message = message;
36 | this.timestamp = timestamp;
37 | }
38 |
39 | public String getMessage(){
40 | return message;
41 | }
42 |
43 | public long getTimestamp(){
44 | return timestamp;
45 | }
46 |
47 | public String getType(){
48 | return type;
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/topology/EntranceProcessingItem.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.EntranceProcessor;
24 |
25 | /**
26 | * Entrance processing item interface.
27 | */
28 | public interface EntranceProcessingItem extends IProcessingItem {
29 |
30 | @Override
31 | /**
32 | * Gets the processing item processor.
33 | *
34 | * @return the embedded EntranceProcessor.
35 | */
36 | public EntranceProcessor getProcessor();
37 |
38 | /**
39 | * Set the single output stream for this EntranceProcessingItem.
40 | *
41 | * @param stream
42 | * the stream
43 | * @return the current instance of the EntranceProcessingItem for fluent interface.
44 | */
45 | public EntranceProcessingItem setOutputStream(Stream stream);
46 | }
--------------------------------------------------------------------------------
/samoa-local/src/test/java/com/yahoo/labs/samoa/topology/impl/SimpleEngineTest.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import mockit.Mocked;
24 | import mockit.NonStrictExpectations;
25 | import mockit.Tested;
26 | import mockit.Verifications;
27 |
28 | import org.junit.Test;
29 |
30 | /**
31 | * @author Anh Thu Vu
32 | *
33 | */
34 | public class SimpleEngineTest {
35 |
36 | @Tested private SimpleEngine unused;
37 | @Mocked private SimpleTopology topology;
38 | @Mocked private Runtime mockedRuntime;
39 |
40 | @Test
41 | public void testSubmitTopology() {
42 | new NonStrictExpectations() {
43 | {
44 | Runtime.getRuntime();
45 | result=mockedRuntime;
46 | mockedRuntime.exit(0);
47 | }
48 | };
49 | SimpleEngine.submitTopology(topology);
50 | new Verifications() {
51 | {
52 | topology.run();
53 | }
54 | };
55 | }
56 |
57 | }
58 |
--------------------------------------------------------------------------------
/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/InstanceData.java:
--------------------------------------------------------------------------------
1 | /*
2 | * To change this template, choose Tools | Templates
3 | * and open the template in the editor.
4 | */
5 | package com.yahoo.labs.samoa.instances;
6 |
7 | /*
8 | * #%L
9 | * SAMOA
10 | * %%
11 | * Copyright (C) 2013 Yahoo! Inc.
12 | * %%
13 | * Licensed under the Apache License, Version 2.0 (the "License");
14 | * you may not use this file except in compliance with the License.
15 | * You may obtain a copy of the License at
16 | *
17 | * http://www.apache.org/licenses/LICENSE-2.0
18 | *
19 | * Unless required by applicable law or agreed to in writing, software
20 | * distributed under the License is distributed on an "AS IS" BASIS,
21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 | * See the License for the specific language governing permissions and
23 | * limitations under the License.
24 | * #L%
25 | */
26 |
27 | import java.io.Serializable;
28 |
29 | /**
30 | *
31 | * @author abifet
32 | */
33 | public interface InstanceData extends Serializable{
34 |
35 | public int numAttributes();
36 |
37 | public double value(int instAttIndex);
38 |
39 | public boolean isMissing(int instAttIndex);
40 |
41 | public int numValues();
42 |
43 | public int index(int i);
44 |
45 | public double valueSparse(int i);
46 |
47 | public boolean isMissingSparse(int p1);
48 |
49 | //public double value(Attribute attribute);
50 |
51 | public double[] toDoubleArray();
52 |
53 | public void setValue(int m_numAttributes, double d);
54 |
55 | }
56 |
--------------------------------------------------------------------------------
/bin/samoa-s4.properties:
--------------------------------------------------------------------------------
1 | ###
2 | # #%L
3 | # SAMOA
4 | # %%
5 | # Copyright (C) 2013 Yahoo! Inc.
6 | # %%
7 | # Licensed under the Apache License, Version 2.0 (the "License");
8 | # you may not use this file except in compliance with the License.
9 | # You may obtain a copy of the License at
10 | #
11 | # http://www.apache.org/licenses/LICENSE-2.0
12 | #
13 | # Unless required by applicable law or agreed to in writing, software
14 | # distributed under the License is distributed on an "AS IS" BASIS,
15 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | # See the License for the specific language governing permissions and
17 | # limitations under the License.
18 | # #L%
19 | ###
20 |
21 | # ===================================================== #
22 | # SAMOA S4 properties file #
23 | # ----------------------------------------------------- #
24 | # This file contains specific configuration for #
25 | # the deployment in the S4 platform. #
26 | # ===================================================== #
27 |
28 | # Zookeeper Server
29 | zookeeper.server=localhost
30 | zookeeper.port=2181
31 |
32 | # Simple HTTP Server prociding the packaged S4 jar
33 | #http.server.ip=localhost
34 | http.server.port=8000
35 |
36 | # Name of the S4 cluster
37 | cluster.name=cluster
38 | cluster.port=12000
39 |
40 | # Deployment strategy: local or cluster
41 | samoa.deploy.mode=local
42 |
43 | # Directory for storing the results of the algorithms.
44 | results.dir=/tmp/samoa/results
45 | # Directory for storing the evaluation results, if the algorithms are to be evaluated.
46 | evaluation.dir=/tmp/samoa/evaluation
47 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/common/NonLearningRule.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.rules.common;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * The most basic rule: inherit from Rule the ID and list of features.
25 | *
26 | * @author Anh Thu Vu
27 | *
28 | */
29 | /*
30 | * This branch (Non-learning rule) was created for an old implementation.
31 | * Probably should remove None-Learning and Learning Rule classes,
32 | * merge Rule with LearningRule.
33 | */
34 | public class NonLearningRule extends Rule {
35 |
36 | /**
37 | *
38 | */
39 | private static final long serialVersionUID = -1210907339230307784L;
40 |
41 | public NonLearningRule(ActiveRule rule) {
42 | this.nodeList = rule.nodeList;
43 | this.ruleNumberID = rule.ruleNumberID;
44 | }
45 |
46 | @Override
47 | public void getDescription(StringBuilder sb, int indent) {
48 | // do nothing
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/topology/Stream.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.ContentEvent;
24 |
25 | /**
26 | * Stream interface.
27 | *
28 | * @author severien
29 | *
30 | */
31 | public interface Stream {
32 |
33 | /**
34 | * Puts events into a platform specific data stream.
35 | *
36 | * @param event
37 | */
38 | public void put(ContentEvent event);
39 |
40 | /**
41 | * Sets the stream id which is represented by a name.
42 | *
43 | * @param stream
44 | */
45 | //public void setStreamId(String stream);
46 |
47 |
48 | /**
49 | * Gets stream id.
50 | *
51 | * @return id
52 | */
53 | public String getStreamId();
54 |
55 | /**
56 | * Set batch size
57 | *
58 | * @param batchSize
59 | * the suggested size for batching messages on this stream
60 | */
61 | public void setBatchSize(int batchsize);
62 | }
--------------------------------------------------------------------------------
/bin/samza-dist/run-job.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | ###
4 | # #%L
5 | # SAMOA
6 | # %%
7 | # Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | # %%
9 | # Licensed under the Apache License, Version 2.0 (the "License");
10 | # you may not use this file except in compliance with the License.
11 | # You may obtain a copy of the License at
12 | #
13 | # http://www.apache.org/licenses/LICENSE-2.0
14 | #
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 | # #L%
21 | ###
22 | # Licensed to the Apache Software Foundation (ASF) under one
23 | # or more contributor license agreements. See the NOTICE file
24 | # distributed with this work for additional information
25 | # regarding copyright ownership. The ASF licenses this file
26 | # to you under the Apache License, Version 2.0 (the
27 | # "License"); you may not use this file except in compliance
28 | # with the License. You may obtain a copy of the License at
29 | #
30 | # http://www.apache.org/licenses/LICENSE-2.0
31 | #
32 | # Unless required by applicable law or agreed to in writing,
33 | # software distributed under the License is distributed on an
34 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
35 | # KIND, either express or implied. See the License for the
36 | # specific language governing permissions and limitations
37 | # under the License.
38 |
39 | exec $(dirname $0)/run-class.sh org.apache.samza.job.JobRunner $@
40 |
--------------------------------------------------------------------------------
/bin/run-container.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | ###
4 | # #%L
5 | # SAMOA
6 | # %%
7 | # Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | # %%
9 | # Licensed under the Apache License, Version 2.0 (the "License");
10 | # you may not use this file except in compliance with the License.
11 | # You may obtain a copy of the License at
12 | #
13 | # http://www.apache.org/licenses/LICENSE-2.0
14 | #
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 | # #L%
21 | ###
22 | # Licensed to the Apache Software Foundation (ASF) under one
23 | # or more contributor license agreements. See the NOTICE file
24 | # distributed with this work for additional information
25 | # regarding copyright ownership. The ASF licenses this file
26 | # to you under the Apache License, Version 2.0 (the
27 | # "License"); you may not use this file except in compliance
28 | # with the License. You may obtain a copy of the License at
29 | #
30 | # http://www.apache.org/licenses/LICENSE-2.0
31 | #
32 | # Unless required by applicable law or agreed to in writing,
33 | # software distributed under the License is distributed on an
34 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
35 | # KIND, either express or implied. See the License for the
36 | # specific language governing permissions and limitations
37 | # under the License.
38 |
39 | exec $(dirname $0)/run-class.sh org.apache.samza.container.SamzaContainer $@
40 |
--------------------------------------------------------------------------------
/bin/samza-dist/run-am.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | ###
4 | # #%L
5 | # SAMOA
6 | # %%
7 | # Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | # %%
9 | # Licensed under the Apache License, Version 2.0 (the "License");
10 | # you may not use this file except in compliance with the License.
11 | # You may obtain a copy of the License at
12 | #
13 | # http://www.apache.org/licenses/LICENSE-2.0
14 | #
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 | # #L%
21 | ###
22 | # Licensed to the Apache Software Foundation (ASF) under one
23 | # or more contributor license agreements. See the NOTICE file
24 | # distributed with this work for additional information
25 | # regarding copyright ownership. The ASF licenses this file
26 | # to you under the Apache License, Version 2.0 (the
27 | # "License"); you may not use this file except in compliance
28 | # with the License. You may obtain a copy of the License at
29 | #
30 | # http://www.apache.org/licenses/LICENSE-2.0
31 | #
32 | # Unless required by applicable law or agreed to in writing,
33 | # software distributed under the License is distributed on an
34 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
35 | # KIND, either express or implied. See the License for the
36 | # specific language governing permissions and limitations
37 | # under the License.
38 |
39 | exec $(dirname $0)/run-class.sh org.apache.samza.job.yarn.SamzaAppMaster $@
40 |
--------------------------------------------------------------------------------
/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SparseInstance.java:
--------------------------------------------------------------------------------
1 | /*
2 | * To change this template, choose Tools | Templates
3 | * and open the template in the editor.
4 | */
5 | package com.yahoo.labs.samoa.instances;
6 |
7 | /*
8 | * #%L
9 | * SAMOA
10 | * %%
11 | * Copyright (C) 2013 Yahoo! Inc.
12 | * %%
13 | * Licensed under the Apache License, Version 2.0 (the "License");
14 | * you may not use this file except in compliance with the License.
15 | * You may obtain a copy of the License at
16 | *
17 | * http://www.apache.org/licenses/LICENSE-2.0
18 | *
19 | * Unless required by applicable law or agreed to in writing, software
20 | * distributed under the License is distributed on an "AS IS" BASIS,
21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 | * See the License for the specific language governing permissions and
23 | * limitations under the License.
24 | * #L%
25 | */
26 |
27 | /**
28 | *
29 | * @author abifet
30 | */
31 | public class SparseInstance extends SingleLabelInstance{
32 |
33 | public SparseInstance(double d, double[] res) {
34 | super(d,res);
35 | }
36 | public SparseInstance(SingleLabelInstance inst) {
37 | super(inst);
38 | }
39 |
40 | public SparseInstance(double numberAttributes) {
41 | //super(1, new double[(int) numberAttributes-1]);
42 | super(1,null,null,(int) numberAttributes);
43 | }
44 |
45 | public SparseInstance(double weight, double[] attributeValues, int[] indexValues, int numberAttributes) {
46 | super(weight,attributeValues,indexValues,numberAttributes);
47 | }
48 |
49 | }
50 |
--------------------------------------------------------------------------------
/bin/samza-dist/run-container.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | ###
4 | # #%L
5 | # SAMOA
6 | # %%
7 | # Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | # %%
9 | # Licensed under the Apache License, Version 2.0 (the "License");
10 | # you may not use this file except in compliance with the License.
11 | # You may obtain a copy of the License at
12 | #
13 | # http://www.apache.org/licenses/LICENSE-2.0
14 | #
15 | # Unless required by applicable law or agreed to in writing, software
16 | # distributed under the License is distributed on an "AS IS" BASIS,
17 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | # See the License for the specific language governing permissions and
19 | # limitations under the License.
20 | # #L%
21 | ###
22 | # Licensed to the Apache Software Foundation (ASF) under one
23 | # or more contributor license agreements. See the NOTICE file
24 | # distributed with this work for additional information
25 | # regarding copyright ownership. The ASF licenses this file
26 | # to you under the Apache License, Version 2.0 (the
27 | # "License"); you may not use this file except in compliance
28 | # with the License. You may obtain a copy of the License at
29 | #
30 | # http://www.apache.org/licenses/LICENSE-2.0
31 | #
32 | # Unless required by applicable law or agreed to in writing,
33 | # software distributed under the License is distributed on an
34 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
35 | # KIND, either express or implied. See the License for the
36 | # specific language governing permissions and limitations
37 | # under the License.
38 |
39 | exec $(dirname $0)/run-class.sh org.apache.samza.container.SamzaContainer $@
40 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/examples/HelloWorldDestinationProcessor.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.examples;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.ContentEvent;
24 | import com.yahoo.labs.samoa.core.Processor;
25 |
26 | /**
27 | * Example {@link Processor} that simply prints the received events to standard output.
28 | */
29 | public class HelloWorldDestinationProcessor implements Processor {
30 |
31 | private static final long serialVersionUID = -6042613438148776446L;
32 | private int processorId;
33 |
34 | @Override
35 | public boolean process(ContentEvent event) {
36 | System.out.println(processorId + ": " + event);
37 | return true;
38 | }
39 |
40 | @Override
41 | public void onCreate(int id) {
42 | this.processorId = id;
43 | }
44 |
45 | @Override
46 | public Processor newProcessor(Processor p) {
47 | return new HelloWorldDestinationProcessor();
48 | }
49 | }
50 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/AdaptiveLearner.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * License
25 | */
26 |
27 |
28 | import com.yahoo.labs.samoa.moa.classifiers.core.driftdetection.ChangeDetector;
29 | import com.yahoo.labs.samoa.topology.Stream;
30 |
31 | /**
32 | * The Interface Adaptive Learner.
33 | * Initializing Classifier should initalize PI to connect the Classifier with the input stream
34 | * and initialize result stream so that other PI can connect to the classification result of this classifier
35 | */
36 |
37 | public interface AdaptiveLearner {
38 |
39 | /**
40 | * Gets the change detector item.
41 | *
42 | * @return the change detector item
43 | */
44 | public ChangeDetector getChangeDetector();
45 |
46 | /**
47 | * Sets the change detector item.
48 | *
49 | * @param cd the change detector item
50 | */
51 | public void setChangeDetector(ChangeDetector cd);
52 |
53 | }
54 |
--------------------------------------------------------------------------------
/samoa-storm/src/main/java/com/yahoo/labs/samoa/topology/impl/StormTopology.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import backtype.storm.topology.TopologyBuilder;
24 |
25 | import com.yahoo.labs.samoa.topology.IProcessingItem;
26 | import com.yahoo.labs.samoa.topology.AbstractTopology;
27 |
28 | /**
29 | * Adaptation of SAMOA topology in samoa-storm
30 | * @author Arinto Murdopo
31 | *
32 | */
33 | public class StormTopology extends AbstractTopology {
34 |
35 | private TopologyBuilder builder;
36 |
37 | public StormTopology(String topologyName){
38 | super(topologyName);
39 | this.builder = new TopologyBuilder();
40 | }
41 |
42 | @Override
43 | public void addProcessingItem(IProcessingItem procItem, int parallelismHint){
44 | StormTopologyNode stormNode = (StormTopologyNode) procItem;
45 | stormNode.addToTopology(this, parallelismHint);
46 | super.addProcessingItem(procItem, parallelismHint);
47 | }
48 |
49 | public TopologyBuilder getStormBuilder(){
50 | return builder;
51 | }
52 | }
53 |
--------------------------------------------------------------------------------
/samoa-threads/src/main/java/com/yahoo/labs/samoa/topology/impl/ThreadsProcessingItemInstance.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.ContentEvent;
24 | import com.yahoo.labs.samoa.core.Processor;
25 |
26 | /**
27 | * Lightweight replicas of ThreadProcessingItem.
28 | * ThreadsProcessingItem manages a list of these objects and
29 | * assigns each incoming message to be processed by one of them.
30 | * @author Anh Thu Vu
31 | *
32 | */
33 | public class ThreadsProcessingItemInstance {
34 |
35 | private Processor processor;
36 | private int threadIndex;
37 |
38 | public ThreadsProcessingItemInstance(Processor processor, int threadIndex) {
39 | this.processor = processor;
40 | this.threadIndex = threadIndex;
41 | }
42 |
43 | public int getThreadIndex() {
44 | return this.threadIndex;
45 | }
46 |
47 | public Processor getProcessor() {
48 | return this.processor;
49 | }
50 |
51 | public void processEvent(ContentEvent event) {
52 | this.processor.process(event);
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/trees/LearningNode.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.trees;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.instances.Instance;
24 |
25 | /**
26 | * Abstract class that represents a learning node
27 | * @author Arinto Murdopo
28 | *
29 | */
30 | abstract class LearningNode extends Node {
31 |
32 | private static final long serialVersionUID = 7157319356146764960L;
33 |
34 | protected LearningNode(double[] classObservation) {
35 | super(classObservation);
36 | }
37 |
38 | /**
39 | * Method to process the instance for learning
40 | * @param inst The processed instance
41 | * @param proc The model aggregator processor where this learning node exists
42 | */
43 | abstract void learnFromInstance(Instance inst, ModelAggregatorProcessor proc);
44 |
45 | @Override
46 | protected boolean isLeaf(){
47 | return true;
48 | }
49 |
50 | @Override
51 | protected FoundNode filterInstanceToLeaf(Instance inst, SplitNode parent,
52 | int parentBranch) {
53 | return new FoundNode(this, parent, parentBranch);
54 | }
55 | }
56 |
--------------------------------------------------------------------------------
/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleTopology.java:
--------------------------------------------------------------------------------
1 | /*
2 | * To change this template, choose Tools | Templates
3 | * and open the template in the editor.
4 | */
5 | package com.yahoo.labs.samoa.topology.impl;
6 |
7 | /*
8 | * #%L
9 | * SAMOA
10 | * %%
11 | * Copyright (C) 2013 Yahoo! Inc.
12 | * %%
13 | * Licensed under the Apache License, Version 2.0 (the "License");
14 | * you may not use this file except in compliance with the License.
15 | * You may obtain a copy of the License at
16 | *
17 | * http://www.apache.org/licenses/LICENSE-2.0
18 | *
19 | * Unless required by applicable law or agreed to in writing, software
20 | * distributed under the License is distributed on an "AS IS" BASIS,
21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 | * See the License for the specific language governing permissions and
23 | * limitations under the License.
24 | * #L%
25 | */
26 |
27 | import com.yahoo.labs.samoa.topology.AbstractTopology;
28 |
29 | public class SimpleTopology extends AbstractTopology {
30 | SimpleTopology(String name) {
31 | super(name);
32 | }
33 |
34 | public void run() {
35 | if (this.getEntranceProcessingItems() == null)
36 | throw new IllegalStateException("You need to set entrance PI before running the topology.");
37 | if (this.getEntranceProcessingItems().size() != 1)
38 | throw new IllegalStateException("SimpleTopology supports 1 entrance PI only. Number of entrance PIs is "+this.getEntranceProcessingItems().size());
39 |
40 | SimpleEntranceProcessingItem entrancePi = (SimpleEntranceProcessingItem) this.getEntranceProcessingItems().toArray()[0];
41 | entrancePi.getProcessor().onCreate(0); // id=0 as it is not used in simple mode
42 | entrancePi.startSendingEvents();
43 | }
44 | }
45 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/core/SerializableInstance.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.core;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.instances.DenseInstance;
24 | import com.yahoo.labs.samoa.instances.Instance;
25 |
26 | /**
27 | * License
28 | */
29 |
30 | //import weka.core.DenseInstance;
31 | //import weka.core.Instance;
32 |
33 | /**
34 | * The Class SerializableInstance.
35 | * This class is needed for serialization of kryo
36 | */
37 | public class SerializableInstance extends DenseInstance {
38 |
39 | /** The Constant serialVersionUID. */
40 | private static final long serialVersionUID = -3659459626274566468L;
41 |
42 | /**
43 | * Instantiates a new serializable instance.
44 | */
45 | public SerializableInstance() {
46 | super(0);
47 | }
48 |
49 | /**
50 | * Instantiates a new serializable instance.
51 | *
52 | * @param arg0 the arg0
53 | */
54 | public SerializableInstance(int arg0) {
55 | super(arg0);
56 | }
57 |
58 | /**
59 | * Instantiates a new serializable instance.
60 | *
61 | * @param inst the inst
62 | */
63 | public SerializableInstance(Instance inst) {
64 | super(inst);
65 | }
66 |
67 | }
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/trees/InactiveLearningNode.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.trees;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.instances.Instance;
24 |
25 | /**
26 | * Class that represents inactive learning node. Inactive learning node is
27 | * a node which only keeps track of the observed class distribution. It does
28 | * not store the statistic for splitting the node.
29 | *
30 | * @author Arinto Murdopo
31 | *
32 | */
33 | final class InactiveLearningNode extends LearningNode {
34 |
35 | /**
36 | *
37 | */
38 | private static final long serialVersionUID = -814552382883472302L;
39 |
40 |
41 | InactiveLearningNode(double[] initialClassObservation) {
42 | super(initialClassObservation);
43 | }
44 |
45 | @Override
46 | void learnFromInstance(Instance inst, ModelAggregatorProcessor proc) {
47 | this.observedClassDistribution.addToValue(
48 | (int)inst.classValue(), inst.weight());
49 | }
50 |
51 | @Override
52 | double[] getClassVotes(Instance inst, ModelAggregatorProcessor map) {
53 | return this.observedClassDistribution.getArrayCopy();
54 | }
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/samoa-s4/samoa-s4-adapter/pom.xml:
--------------------------------------------------------------------------------
1 |
20 |
22 | 4.0.0
23 |
24 |
31 |
32 |
33 | samoa-s4-adapter
34 | com.yahoo.labs.bcn.samoa
35 | 0.1
36 | samoa-s4-adapter
37 | Adapter module to connect to external stream and also to provide entrance processing items for SAMOA
38 |
39 |
40 |
45 |
46 | samoa-s4
47 | com.yahoo.labs.bcn.samoa
48 | 0.1
49 |
50 |
51 |
52 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/trees/ControlContentEvent.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.trees;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.ContentEvent;
24 |
25 | /**
26 | * Abstract class to represent ContentEvent to control Local Statistic Processor.
27 | * @author Arinto Murdopo
28 | *
29 | */
30 | abstract class ControlContentEvent implements ContentEvent {
31 |
32 | /**
33 | *
34 | */
35 | private static final long serialVersionUID = 5837375639629708363L;
36 |
37 | protected final long learningNodeId;
38 |
39 | public ControlContentEvent(){
40 | this.learningNodeId = -1;
41 | }
42 |
43 | ControlContentEvent(long id){
44 | this.learningNodeId = id;
45 | }
46 |
47 | @Override
48 | public final String getKey() {
49 | return null;
50 | }
51 |
52 | @Override
53 | public void setKey(String str){
54 | //Do nothing
55 | }
56 |
57 | @Override
58 | public boolean isLastEvent(){
59 | return false;
60 | }
61 |
62 | final long getLearningNodeId(){
63 | return this.learningNodeId;
64 | }
65 |
66 | abstract LocStatControl getType();
67 |
68 | static enum LocStatControl {
69 | COMPUTE, DELETE
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/samoa-samza/src/main/java/com/yahoo/labs/samoa/topology/impl/SamzaProcessingNode.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.topology.IProcessingItem;
24 |
25 | /**
26 | * Common interface of SamzaEntranceProcessingItem and
27 | * SamzaProcessingItem
28 | *
29 | * @author Anh Thu Vu
30 | */
31 | public interface SamzaProcessingNode extends IProcessingItem {
32 | /**
33 | * Registers an output stream with this processing item
34 | *
35 | * @param stream
36 | * the output stream
37 | * @return the number of output streams of this processing item
38 | */
39 | public int addOutputStream(SamzaStream stream);
40 |
41 | /**
42 | * Gets the name/id of this processing item
43 | *
44 | * @return the name/id of this processing item
45 | */
46 | // TODO: include getName() and setName() in IProcessingItem and/or AbstractEPI/PI
47 | public String getName();
48 |
49 | /**
50 | * Sets the name/id for this processing item
51 | * @param name
52 | * the name/id of this processing item
53 | */
54 | // TODO: include getName() and setName() in IProcessingItem and/or AbstractEPI/PI
55 | public void setName(String name);
56 | }
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/core/Globals.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.core;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * License
25 | */
26 |
27 | import com.github.javacliparser.StringUtils;
28 |
29 | /**
30 | * Class for storing global information about current version of SAMOA.
31 | *
32 | * @author Albert Bifet
33 | * @version $Revision: 7 $
34 | */
35 | public class Globals {
36 |
37 | public static final String workbenchTitle = "SAMOA: Scalable Advanced Massive Online Analysis Platform ";
38 |
39 | public static final String versionString = "0.0.1";
40 |
41 | public static final String copyrightNotice = "Copyright Yahoo! Inc 2013";
42 |
43 | public static final String webAddress = "http://github.com/yahoo/samoa";
44 |
45 | public static String getWorkbenchInfoString() {
46 | StringBuilder result = new StringBuilder();
47 | result.append(workbenchTitle);
48 | StringUtils.appendNewline(result);
49 | result.append("Version: ");
50 | result.append(versionString);
51 | StringUtils.appendNewline(result);
52 | result.append("Copyright: ");
53 | result.append(copyrightNotice);
54 | StringUtils.appendNewline(result);
55 | result.append("Web: ");
56 | result.append(webAddress);
57 | return result.toString();
58 | }
59 | }
60 |
--------------------------------------------------------------------------------
/samoa-threads/src/test/java/com/yahoo/labs/samoa/topology/impl/ThreadsEventRunnableTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * #%L
3 | * SAMOA
4 | * %%
5 | * Copyright (C) 2013 Yahoo! Inc.
6 | * %%
7 | * Licensed under the Apache License, Version 2.0 (the "License");
8 | * you may not use this file except in compliance with the License.
9 | * You may obtain a copy of the License at
10 | *
11 | * http://www.apache.org/licenses/LICENSE-2.0
12 | *
13 | * Unless required by applicable law or agreed to in writing, software
14 | * distributed under the License is distributed on an "AS IS" BASIS,
15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | * See the License for the specific language governing permissions and
17 | * limitations under the License.
18 | * #L%
19 | */
20 | package com.yahoo.labs.samoa.topology.impl;
21 |
22 | import static org.junit.Assert.*;
23 | import mockit.Mocked;
24 | import mockit.Tested;
25 | import mockit.Verifications;
26 |
27 | import org.junit.Before;
28 | import org.junit.Test;
29 |
30 | import com.yahoo.labs.samoa.core.ContentEvent;
31 |
32 | /**
33 | * @author Anh Thu Vu
34 | *
35 | */
36 | public class ThreadsEventRunnableTest {
37 |
38 | @Tested private ThreadsEventRunnable task;
39 |
40 | @Mocked private ThreadsProcessingItemInstance piInstance;
41 | @Mocked private ContentEvent event;
42 |
43 | /**
44 | * @throws java.lang.Exception
45 | */
46 | @Before
47 | public void setUp() throws Exception {
48 | task = new ThreadsEventRunnable(piInstance, event);
49 | }
50 |
51 | @Test
52 | public void testConstructor() {
53 | assertSame("WorkerProcessingItem is not set correctly.",piInstance,task.getWorkerProcessingItem());
54 | assertSame("ContentEvent is not set correctly.",event,task.getContentEvent());
55 | }
56 |
57 | @Test
58 | public void testRun() {
59 | task.run();
60 | new Verifications () {
61 | {
62 | piInstance.processEvent(event); times=1;
63 | }
64 | };
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/utils/StreamDestination.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.utils;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.topology.IProcessingItem;
24 |
25 | /**
26 | * Represents one destination for streams. It has the info of:
27 | * the ProcessingItem, parallelismHint, and partitioning scheme.
28 | * Usage:
29 | * - When ProcessingItem connects to a stream, it will pass
30 | * a StreamDestination to the stream.
31 | * - Stream manages a set of StreamDestination.
32 | * - Used in single-threaded and multi-threaded local mode.
33 | * @author Anh Thu Vu
34 | *
35 | */
36 | public class StreamDestination {
37 | private IProcessingItem pi;
38 | private int parallelism;
39 | private PartitioningScheme type;
40 |
41 | /*
42 | * Constructor
43 | */
44 | public StreamDestination(IProcessingItem pi, int parallelismHint, PartitioningScheme type) {
45 | this.pi = pi;
46 | this.parallelism = parallelismHint;
47 | this.type = type;
48 | }
49 |
50 | /*
51 | * Getters
52 | */
53 | public IProcessingItem getProcessingItem() {
54 | return this.pi;
55 | }
56 |
57 | public int getParallelism() {
58 | return this.parallelism;
59 | }
60 |
61 | public PartitioningScheme getPartitioningScheme() {
62 | return this.type;
63 | }
64 |
65 | }
66 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/core/FastVector.java:
--------------------------------------------------------------------------------
1 |
2 | /*
3 | * FastVector.java
4 |
5 | *
6 | */
7 | package com.yahoo.labs.samoa.moa.core;
8 |
9 | /*
10 | * #%L
11 | * SAMOA
12 | * %%
13 | * Copyright (C) 1999 - 2012 University of Waikato, Hamilton, New Zealand
14 | * %%
15 | * Licensed under the Apache License, Version 2.0 (the "License");
16 | * you may not use this file except in compliance with the License.
17 | * You may obtain a copy of the License at
18 | *
19 | * http://www.apache.org/licenses/LICENSE-2.0
20 | *
21 | * Unless required by applicable law or agreed to in writing, software
22 | * distributed under the License is distributed on an "AS IS" BASIS,
23 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
24 | * See the License for the specific language governing permissions and
25 | * limitations under the License.
26 | * #L%
27 | */
28 |
29 | import java.util.ArrayList;
30 |
31 | /**
32 | * Simple extension of ArrayList. Exists for legacy reasons.
33 | *
34 | * @author Eibe Frank (eibe@cs.waikato.ac.nz)
35 | * @version $Revision: 8034 $
36 | */
37 | public class FastVector extends ArrayList {
38 |
39 | /**
40 | * Adds an element to this vector. Increases its capacity if its not large
41 | * enough.
42 | *
43 | * @param element the element to add
44 | */
45 | public final void addElement(E element) {
46 | add(element);
47 | }
48 |
49 | /**
50 | * Returns the element at the given position.
51 | *
52 | * @param index the element's index
53 | * @return the element with the given index
54 | */
55 | public final E elementAt(int index) {
56 | return get(index);
57 | }
58 |
59 | /**
60 | * Deletes an element from this vector.
61 | *
62 | * @param index the index of the element to be deleted
63 | */
64 | public final void removeElementAt(int index) {
65 | remove(index);
66 | }
67 | }
68 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/MOAObject.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import java.io.Serializable;
24 |
25 | /**
26 | * Interface implemented by classes in MOA, so that all are serializable,
27 | * can produce copies of their objects, and can measure its memory size.
28 | * They also give a string description.
29 | *
30 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
31 | * @version $Revision: 7 $
32 | */
33 | public interface MOAObject extends Serializable {
34 |
35 | /**
36 | * Gets the memory size of this object.
37 | *
38 | * @return the memory size of this object
39 | */
40 | public int measureByteSize();
41 |
42 | /**
43 | * This method produces a copy of this object.
44 | *
45 | * @return a copy of this object
46 | */
47 | public MOAObject copy();
48 |
49 | /**
50 | * Returns a string representation of this object.
51 | * Used in AbstractMOAObject.toString
52 | * to give a string representation of the object.
53 | *
54 | * @param sb the stringbuilder to add the description
55 | * @param indent the number of characters to indent
56 | */
57 | public void getDescription(StringBuilder sb, int indent);
58 | }
59 |
--------------------------------------------------------------------------------
/samoa-storm/src/main/java/com/yahoo/labs/samoa/topology/impl/StormBoltStream.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import backtype.storm.task.OutputCollector;
24 | import backtype.storm.tuple.Values;
25 | import com.yahoo.labs.samoa.core.ContentEvent;
26 |
27 | /**
28 | * Storm Stream that connects into Bolt. It wraps Storm's outputCollector class
29 | * @author Arinto Murdopo
30 | *
31 | */
32 | class StormBoltStream extends StormStream{
33 |
34 | /**
35 | *
36 | */
37 | private static final long serialVersionUID = -5712513402991550847L;
38 |
39 | private OutputCollector outputCollector;
40 |
41 | StormBoltStream(String stormComponentId){
42 | super(stormComponentId);
43 | }
44 |
45 | @Override
46 | public void put(ContentEvent contentEvent) {
47 | outputCollector.emit(this.outputStreamId, new Values(contentEvent, contentEvent.getKey()));
48 | }
49 |
50 | public void setCollector(OutputCollector outputCollector){
51 | this.outputCollector = outputCollector;
52 | }
53 |
54 | // @Override
55 | // public void setStreamId(String streamId) {
56 | // // TODO Auto-generated method stub
57 | // //this.outputStreamId = streamId;
58 | // }
59 |
60 | @Override
61 | public String getStreamId() {
62 | // TODO Auto-generated method stub
63 | return null;
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/samoa-storm/src/main/java/com/yahoo/labs/samoa/topology/impl/StormSpoutStream.java:
--------------------------------------------------------------------------------
1 | //package com.yahoo.labs.samoa.topology.impl;
2 | //
3 | ///*
4 | // * #%L
5 | // * SAMOA
6 | // * %%
7 | // * Copyright (C) 2013 Yahoo! Inc.
8 | // * %%
9 | // * Licensed under the Apache License, Version 2.0 (the "License");
10 | // * you may not use this file except in compliance with the License.
11 | // * You may obtain a copy of the License at
12 | // *
13 | // * http://www.apache.org/licenses/LICENSE-2.0
14 | // *
15 | // * Unless required by applicable law or agreed to in writing, software
16 | // * distributed under the License is distributed on an "AS IS" BASIS,
17 | // * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | // * See the License for the specific language governing permissions and
19 | // * limitations under the License.
20 | // * #L%
21 | // */
22 | //
23 | //import com.yahoo.labs.samoa.core.ContentEvent;
24 | //import com.yahoo.labs.samoa.topology.impl.StormEntranceProcessingItem.StormEntranceSpout;
25 | //
26 | ///**
27 | // * Storm Stream that connects into Spout. It wraps the spout itself
28 | // * @author Arinto Murdopo
29 | // *
30 | // */
31 | //final class StormSpoutStream extends StormStream{
32 | //
33 | // /**
34 | // *
35 | // */
36 | // private static final long serialVersionUID = -7444653177614988650L;
37 | //
38 | // private StormEntranceSpout spout;
39 | //
40 | // StormSpoutStream(String stormComponentId) {
41 | // super(stormComponentId);
42 | // }
43 | //
44 | // @Override
45 | // public void put(ContentEvent contentEvent) {
46 | // spout.put(this, contentEvent);
47 | // }
48 | //
49 | // void setSpout(StormEntranceSpout spout){
50 | // this.spout = spout;
51 | // }
52 | //
53 | //// @Override
54 | //// public void setStreamId(String stream) {
55 | //// // TODO Auto-generated method stub
56 | ////
57 | //// }
58 | //
59 | // @Override
60 | // public String getStreamId() {
61 | // // TODO Auto-generated method stub
62 | // return null;
63 | // }
64 | //
65 | //}
66 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/Learner.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.Processor;
24 | import com.yahoo.labs.samoa.instances.Instances;
25 | import com.yahoo.labs.samoa.topology.Stream;
26 | import com.yahoo.labs.samoa.topology.TopologyBuilder;
27 |
28 | import java.io.Serializable;
29 | import java.util.Set;
30 |
31 | /**
32 | * The Interface Classifier.
33 | * Initializing Classifier should initalize PI to connect the Classifier with the input stream
34 | * and initialize result stream so that other PI can connect to the classification result of this classifier
35 | */
36 |
37 | public interface Learner extends Serializable{
38 |
39 | /**
40 | * Inits the Learner object.
41 | *
42 | * @param topologyBuilder the topology builder
43 | * @param dataset the dataset
44 | * @param parallelism the parallelism
45 | */
46 | public void init(TopologyBuilder topologyBuilder, Instances dataset, int parallelism);
47 |
48 | /**
49 | * Gets the input processing item.
50 | *
51 | * @return the input processing item
52 | */
53 | public Processor getInputProcessor();
54 |
55 |
56 | /**
57 | * Gets the result streams
58 | *
59 | * @return the set of result streams
60 | */
61 | public Set getResultStreams();
62 | }
63 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/tasks/Task.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.tasks;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.topology.ComponentFactory;
24 | import com.yahoo.labs.samoa.topology.Topology;
25 |
26 | /**
27 | * Task interface, the mother of all SAMOA tasks!
28 | */
29 | public interface Task {
30 |
31 | /**
32 | * Initialize this SAMOA task,
33 | * i.e. create and connect ProcessingItems and Streams
34 | * and initialize the topology
35 | */
36 | public void init();
37 |
38 | /**
39 | * Return the final topology object to be executed in the cluster
40 | * @return topology object to be submitted to be executed in the cluster
41 | */
42 | public Topology getTopology();
43 |
44 | // /**
45 | // * Return the entrance processor to start SAMOA topology
46 | // * The logic to start the topology should be implemented here
47 | // * @return entrance processor to start the topology
48 | // */
49 | // public TopologyStarter getTopologyStarter();
50 |
51 | /**
52 | * Sets the factory.
53 | * TODO: propose to hide factory from task,
54 | * i.e. Task will only see TopologyBuilder,
55 | * and factory creation will be handled by TopologyBuilder
56 | *
57 | * @param factory the new factory
58 | */
59 | public void setFactory(ComponentFactory factory) ;
60 |
61 | }
--------------------------------------------------------------------------------
/samoa-threads/src/main/java/com/yahoo/labs/samoa/topology/impl/ThreadsEventRunnable.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.ContentEvent;
24 |
25 | /**
26 | * Runnable class where each object corresponds to a ContentEvent and an assigned PI.
27 | * When a PI receives a ContentEvent, it will create a ThreadsEventRunnable with the received ContentEvent
28 | * and an assigned workerPI. This runnable is then submitted to a thread queue waiting to be executed.
29 | * The worker PI will process the received event when the runnable object is executed/run.
30 | * @author Anh Thu Vu
31 | *
32 | */
33 | public class ThreadsEventRunnable implements Runnable {
34 |
35 | private ThreadsProcessingItemInstance workerPi;
36 | private ContentEvent event;
37 |
38 | public ThreadsEventRunnable(ThreadsProcessingItemInstance workerPi, ContentEvent event) {
39 | this.workerPi = workerPi;
40 | this.event = event;
41 | }
42 |
43 | public ThreadsProcessingItemInstance getWorkerProcessingItem() {
44 | return this.workerPi;
45 | }
46 |
47 | public ContentEvent getContentEvent() {
48 | return this.event;
49 | }
50 |
51 | @Override
52 | public void run() {
53 | try {
54 | workerPi.processEvent(event);
55 | }
56 | catch (Exception e) {
57 | e.printStackTrace();
58 | }
59 | }
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/samoa-local/src/main/java/com/yahoo/labs/samoa/topology/impl/SimpleComponentFactory.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.EntranceProcessor;
24 | import com.yahoo.labs.samoa.core.Processor;
25 | import com.yahoo.labs.samoa.topology.ComponentFactory;
26 | import com.yahoo.labs.samoa.topology.EntranceProcessingItem;
27 | import com.yahoo.labs.samoa.topology.IProcessingItem;
28 | import com.yahoo.labs.samoa.topology.ProcessingItem;
29 | import com.yahoo.labs.samoa.topology.Stream;
30 | import com.yahoo.labs.samoa.topology.Topology;
31 |
32 | public class SimpleComponentFactory implements ComponentFactory {
33 |
34 | public ProcessingItem createPi(Processor processor, int paralellism) {
35 | return new SimpleProcessingItem(processor, paralellism);
36 | }
37 |
38 | public ProcessingItem createPi(Processor processor) {
39 | return this.createPi(processor, 1);
40 | }
41 |
42 | public EntranceProcessingItem createEntrancePi(EntranceProcessor processor) {
43 | return new SimpleEntranceProcessingItem(processor);
44 | }
45 |
46 | public Stream createStream(IProcessingItem sourcePi) {
47 | return new SimpleStream(sourcePi);
48 | }
49 |
50 | public Topology createTopology(String topoName) {
51 | return new SimpleTopology(topoName);
52 | }
53 | }
--------------------------------------------------------------------------------
/samoa-s4/src/main/java/com/yahoo/labs/samoa/topology/impl/S4Topology.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.topology.EntranceProcessingItem;
24 | import com.yahoo.labs.samoa.topology.AbstractTopology;
25 |
26 | public class S4Topology extends AbstractTopology {
27 |
28 | // CASEY: it seems evaluationTask is not used.
29 | // Remove it for now
30 |
31 | // private String _evaluationTask;
32 |
33 | // S4Topology(String topoName, String evalTask) {
34 | // super(topoName);
35 | // }
36 | //
37 | // S4Topology(String topoName) {
38 | // this(topoName, null);
39 | // }
40 |
41 | // @Override
42 | // public void setEvaluationTask(String evalTask) {
43 | // _evaluationTask = evalTask;
44 | // }
45 | //
46 | // @Override
47 | // public String getEvaluationTask() {
48 | // return _evaluationTask;
49 | // }
50 |
51 | S4Topology(String topoName) {
52 | super(topoName);
53 | }
54 |
55 | protected EntranceProcessingItem getEntranceProcessingItem() {
56 | if (this.getEntranceProcessingItems() == null) return null;
57 | if (this.getEntranceProcessingItems().size() < 1) return null;
58 | // TODO: support multiple entrance PIs
59 | return (EntranceProcessingItem)this.getEntranceProcessingItems().toArray()[0];
60 | }
61 | }
62 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/distributed/AssignmentContentEvent.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.rules.distributed;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.ContentEvent;
24 | import com.yahoo.labs.samoa.instances.Instance;
25 |
26 | /**
27 | * Forwarded instances from Model Agrregator to Learners/Default Rule Learner.
28 | *
29 | * @author Anh Thu Vu
30 | *
31 | */
32 | public class AssignmentContentEvent implements ContentEvent {
33 |
34 | /**
35 | *
36 | */
37 | private static final long serialVersionUID = 1031695762172836629L;
38 |
39 | private int ruleNumberID;
40 | private Instance instance;
41 |
42 | public AssignmentContentEvent() {
43 | this(0, null);
44 | }
45 |
46 | public AssignmentContentEvent(int ruleID, Instance instance) {
47 | this.ruleNumberID = ruleID;
48 | this.instance = instance;
49 | }
50 |
51 | @Override
52 | public String getKey() {
53 | return Integer.toString(this.ruleNumberID);
54 | }
55 |
56 | @Override
57 | public void setKey(String key) {
58 | // do nothing
59 | }
60 |
61 | @Override
62 | public boolean isLastEvent() {
63 | return false;
64 | }
65 |
66 | public Instance getInstance() {
67 | return this.instance;
68 | }
69 |
70 | public int getRuleNumberID() {
71 | return this.ruleNumberID;
72 | }
73 |
74 | }
75 |
--------------------------------------------------------------------------------
/samoa-s4/samoa-s4-adapter/src/main/java/samoa/topology/adapter/S4EntranceProcessingItem.java:
--------------------------------------------------------------------------------
1 | package samoa.topology.adapter;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import org.apache.s4.core.App;
24 | import org.apache.s4.core.ProcessingElement;
25 |
26 | import samoa.core.Processor;
27 | import samoa.topology.EntranceProcessingItem;
28 | import samoa.topology.impl.DoTaskApp;
29 | import weka.core.Instance;
30 |
31 | public class S4EntranceProcessingItem extends ProcessingElement implements EntranceProcessingItem {
32 |
33 | private Processor processor;
34 | //DoTaskApp app;
35 |
36 |
37 | public S4EntranceProcessingItem(App app){
38 | super(app);
39 | //this.app = (DoTaskApp) app;
40 | this.setSingleton(true);
41 |
42 | }
43 |
44 | @Override
45 | public Processor getProcessor() {
46 | return this.processor;
47 | }
48 |
49 | @Override
50 | public void put(Instance inst) {
51 | // do nothing
52 | //may not needed
53 |
54 | }
55 |
56 | @Override
57 | protected void onCreate() {
58 |
59 | // if (this.processor != null){
60 | // this.processor = this.processor.newProcessor(this.processor);
61 | // this.processor.onCreate(Integer.parseInt(getId()));
62 | // }
63 | }
64 |
65 | @Override
66 | protected void onRemove() {
67 | //do nothing
68 |
69 | }
70 |
71 | public void setProcessor(Processor processor){
72 | this.processor = processor;
73 | }
74 | }
75 |
--------------------------------------------------------------------------------
/samoa-threads/src/test/java/com/yahoo/labs/samoa/topology/impl/ThreadsProcessingItemInstanceTest.java:
--------------------------------------------------------------------------------
1 | /*
2 | * #%L
3 | * SAMOA
4 | * %%
5 | * Copyright (C) 2013 Yahoo! Inc.
6 | * %%
7 | * Licensed under the Apache License, Version 2.0 (the "License");
8 | * you may not use this file except in compliance with the License.
9 | * You may obtain a copy of the License at
10 | *
11 | * http://www.apache.org/licenses/LICENSE-2.0
12 | *
13 | * Unless required by applicable law or agreed to in writing, software
14 | * distributed under the License is distributed on an "AS IS" BASIS,
15 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 | * See the License for the specific language governing permissions and
17 | * limitations under the License.
18 | * #L%
19 | */
20 | package com.yahoo.labs.samoa.topology.impl;
21 |
22 | import static org.junit.Assert.*;
23 | import mockit.Mocked;
24 | import mockit.Tested;
25 | import mockit.Verifications;
26 |
27 | import org.junit.Before;
28 | import org.junit.Test;
29 |
30 | import com.yahoo.labs.samoa.core.ContentEvent;
31 | import com.yahoo.labs.samoa.core.Processor;
32 |
33 | /**
34 | * @author Anh Thu Vu
35 | *
36 | */
37 | public class ThreadsProcessingItemInstanceTest {
38 |
39 | @Tested private ThreadsProcessingItemInstance piInstance;
40 |
41 | @Mocked private Processor processor;
42 | @Mocked private ContentEvent event;
43 |
44 | private final int threadIndex = 2;
45 |
46 | @Before
47 | public void setUp() throws Exception {
48 | piInstance = new ThreadsProcessingItemInstance(processor, threadIndex);
49 | }
50 |
51 | @Test
52 | public void testConstructor() {
53 | assertSame("Processor is not set correctly.", processor, piInstance.getProcessor());
54 | assertEquals("Thread index is not set correctly.", threadIndex, piInstance.getThreadIndex(),0);
55 | }
56 |
57 | @Test
58 | public void testProcessEvent() {
59 | piInstance.processEvent(event);
60 | new Verifications() {
61 | {
62 | processor.process(event); times=1;
63 | }
64 | };
65 | }
66 |
67 | }
68 |
--------------------------------------------------------------------------------
/samoa-s4/src/main/assembly/samoa-s4.xml:
--------------------------------------------------------------------------------
1 |
20 |
24 | dist
25 |
26 | jar
27 |
28 | false
29 |
30 |
31 |
32 |
33 | lib/
34 | ../samoa-api/target/lib/
35 |
36 | *
37 |
38 |
39 |
40 | app/
41 | ../samoa-api/target/
42 |
43 | samoa-api-*.jar
44 |
45 |
46 |
47 |
48 |
49 | app/
50 | target/
51 |
52 | samoa-s4-*.jar
53 |
54 |
55 |
56 | /
57 | target/
58 |
59 | lib/*
60 |
61 |
62 |
63 |
64 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/clusterers/Clusterer.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.clusterers;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.moa.MOAObject;
24 | import com.yahoo.labs.samoa.moa.cluster.Clustering;
25 | import com.yahoo.labs.samoa.instances.InstancesHeader;
26 | import com.yahoo.labs.samoa.moa.core.Measurement;
27 | import com.yahoo.labs.samoa.moa.options.OptionHandler;
28 | import com.yahoo.labs.samoa.instances.Instance;
29 |
30 | public interface Clusterer extends MOAObject, OptionHandler {
31 |
32 | public void setModelContext(InstancesHeader ih);
33 |
34 | public InstancesHeader getModelContext();
35 |
36 | public boolean isRandomizable();
37 |
38 | public void setRandomSeed(int s);
39 |
40 | public boolean trainingHasStarted();
41 |
42 | public double trainingWeightSeenByModel();
43 |
44 | public void resetLearning();
45 |
46 | public void trainOnInstance(Instance inst);
47 |
48 | public double[] getVotesForInstance(Instance inst);
49 |
50 | public Measurement[] getModelMeasurements();
51 |
52 | public Clusterer[] getSubClusterers();
53 |
54 | public Clusterer copy();
55 |
56 | public Clustering getClusteringResult();
57 |
58 | public boolean implementsMicroClusterer();
59 |
60 | public Clustering getMicroClusteringResult();
61 |
62 | public boolean keepClassLabel();
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/PerformanceEvaluator.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.evaluation;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.moa.MOAObject;
24 | import com.yahoo.labs.samoa.moa.core.Measurement;
25 |
26 | import com.yahoo.labs.samoa.instances.Instance;
27 |
28 | /**
29 | * Interface implemented by learner evaluators to monitor the results of the
30 | * learning process.
31 | *
32 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
33 | * @version $Revision: 7 $
34 | */
35 | public interface PerformanceEvaluator extends MOAObject {
36 |
37 | /**
38 | * Resets this evaluator. It must be similar to starting a new evaluator
39 | * from scratch.
40 | *
41 | */
42 | public void reset();
43 |
44 | /**
45 | * Adds a learning result to this evaluator.
46 | *
47 | * @param inst
48 | * the instance to be classified
49 | * @param classVotes
50 | * an array containing the estimated membership probabilities of
51 | * the test instance in each class
52 | * @return an array of measurements monitored in this evaluator
53 | */
54 | public void addResult(Instance inst, double[] classVotes);
55 |
56 | /**
57 | * Gets the current measurements monitored by this evaluator.
58 | *
59 | * @return an array of measurements monitored by this evaluator
60 | */
61 | public Measurement[] getPerformanceMeasurements();
62 | }
63 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/examples/HelloWorldContentEvent.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.examples;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.ContentEvent;
24 |
25 | /**
26 | * Example {@link ContentEvent} that contains a single integer.
27 | */
28 | public class HelloWorldContentEvent implements ContentEvent {
29 |
30 | private static final long serialVersionUID = -2406968925730298156L;
31 | private final boolean isLastEvent;
32 | private final int helloWorldData;
33 |
34 | public HelloWorldContentEvent(int helloWorldData, boolean isLastEvent) {
35 | this.isLastEvent = isLastEvent;
36 | this.helloWorldData = helloWorldData;
37 | }
38 |
39 | /*
40 | * No-argument constructor for Kryo
41 | */
42 | public HelloWorldContentEvent() {
43 | this(0,false);
44 | }
45 |
46 | @Override
47 | public String getKey() {
48 | return null;
49 | }
50 |
51 | @Override
52 | public void setKey(String str) {
53 | // do nothing, it's key-less content event
54 | }
55 |
56 | @Override
57 | public boolean isLastEvent() {
58 | return isLastEvent;
59 | }
60 |
61 | public int getHelloWorldData() {
62 | return helloWorldData;
63 | }
64 |
65 | @Override
66 | public String toString() {
67 | return "HelloWorldContentEvent [helloWorldData=" + helloWorldData + "]";
68 | }
69 | }
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/core/EntranceProcessor.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.core;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import java.io.Serializable;
24 |
25 | import com.github.javacliparser.Configurable;
26 |
27 | /**
28 | * An EntranceProcessor is a specific kind of processor dedicated to providing events to inject in the topology. It can be connected to a single output stream.
29 | */
30 | public interface EntranceProcessor extends Serializable, Configurable, Processor {
31 |
32 | /**
33 | * Initializes the Processor. This method is called once after the topology is set up and before any call to the {@link nextTuple} method.
34 | *
35 | * @param the
36 | * identifier of the processor.
37 | */
38 | public void onCreate(int id);
39 |
40 | /**
41 | * Checks whether the source stream is finished/exhausted.
42 | */
43 | public boolean isFinished();
44 |
45 | /**
46 | * Checks whether a new event is ready to be processed.
47 | *
48 | * @return true if the EntranceProcessor is ready to provide the next event, false otherwise.
49 | */
50 | public boolean hasNext();
51 |
52 | /**
53 | * Provides the next tuple to be processed by the topology. This method is the entry point for external events into the topology.
54 | *
55 | * @return the next event to be processed.
56 | */
57 | public ContentEvent nextEvent();
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/InfoGainSplitCriterionMultilabel.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2012 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.moa.core.Utils;
24 |
25 | /**
26 | * Class for computing splitting criteria using information gain with respect to
27 | * distributions of class values for Multilabel data. The split criterion is
28 | * used as a parameter on decision trees and decision stumps.
29 | *
30 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
31 | * @author Jesse Read (jesse@tsc.uc3m.es)
32 | * @version $Revision: 1 $
33 | */
34 | public class InfoGainSplitCriterionMultilabel extends InfoGainSplitCriterion {
35 |
36 | private static final long serialVersionUID = 1L;
37 |
38 | public static double computeEntropy(double[] dist) {
39 | double entropy = 0.0;
40 | double sum = 0.0;
41 | for (double d : dist) {
42 | sum += d;
43 | }
44 | if (sum > 0.0) {
45 | for (double num : dist) {
46 | double d = num / sum;
47 | if (d > 0.0) { // TODO: how small can d be before log2 overflows?
48 | entropy -= d * Utils.log2(d) + (1 - d) * Utils.log2(1 - d); //Extension to Multilabel
49 | }
50 | }
51 | }
52 | return sum > 0.0 ? entropy : 0.0;
53 | }
54 | }
55 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/core/Processor.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.core;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import java.io.Serializable;
24 |
25 | import com.github.javacliparser.Configurable;
26 |
27 | /**
28 | * The Interface Processor.
29 | */
30 | public interface Processor extends Serializable, Configurable {
31 |
32 | /**
33 | * Entry point for the {@link Processor} code. This method is called once for every event received.
34 | *
35 | * @param event
36 | * the event to be processed.
37 | * @return true if successful, false otherwise.
38 | */
39 | boolean process(ContentEvent event);
40 |
41 | /**
42 | * Initializes the Processor.
43 | * This method is called once after the topology is set up and before any call to the {@link process} method.
44 | *
45 | * @param id
46 | * the identifier of the processor.
47 | */
48 | void onCreate(int id);
49 |
50 | /**
51 | * Creates a copy of a processor.
52 | * This method is used to instantiate multiple instances of the same {@link Processsor}.
53 | *
54 | * @param processor
55 | * the processor to be copied.
56 | *
57 | * @return a new instance of the {@link Processor}.
58 | * */
59 | Processor newProcessor(Processor processor); // FIXME there should be no need for the processor as a parameter
60 | // TODO can we substitute this with Cloneable?
61 | }
62 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/splitcriteria/SplitCriterion.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.classifiers.core.splitcriteria;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.moa.options.OptionHandler;
24 |
25 | /**
26 | * Interface for computing splitting criteria.
27 | * with respect to distributions of class values.
28 | * The split criterion is used as a parameter on
29 | * decision trees and decision stumps.
30 | * The two split criteria most used are
31 | * Information Gain and Gini.
32 | *
33 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
34 | * @version $Revision: 7 $
35 | */
36 | public interface SplitCriterion extends OptionHandler {
37 |
38 | /**
39 | * Computes the merit of splitting for a given
40 | * ditribution before the split and after it.
41 | *
42 | * @param preSplitDist the class distribution before the split
43 | * @param postSplitDists the class distribution after the split
44 | * @return value of the merit of splitting
45 | */
46 | public double getMeritOfSplit(double[] preSplitDist,
47 | double[][] postSplitDists);
48 |
49 | /**
50 | * Computes the range of splitting merit
51 | *
52 | * @param preSplitDist the class distribution before the split
53 | * @return value of the range of splitting merit
54 | */
55 | public double getRangeOfMerit(double[] preSplitDist);
56 | }
57 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/common/PassiveRule.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.rules.common;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import java.util.LinkedList;
24 |
25 | /**
26 | * PassiveRule is a LearningRule that update its LearningNode
27 | * with the received new LearningNode.
28 | *
29 | * @author Anh Thu Vu
30 | *
31 | */
32 | public class PassiveRule extends LearningRule {
33 |
34 | /**
35 | *
36 | */
37 | private static final long serialVersionUID = -5551571895910530275L;
38 |
39 | private RulePassiveRegressionNode learningNode;
40 |
41 | /*
42 | * Constructor to turn an ActiveRule into a PassiveRule
43 | */
44 | public PassiveRule(ActiveRule rule) {
45 | this.nodeList = new LinkedList<>();
46 | for (RuleSplitNode node:rule.nodeList) {
47 | this.nodeList.add(node.getACopy());
48 | }
49 |
50 | this.learningNode = new RulePassiveRegressionNode(rule.getLearningNode());
51 | this.ruleNumberID = rule.ruleNumberID;
52 | }
53 |
54 | @Override
55 | public RuleRegressionNode getLearningNode() {
56 | return this.learningNode;
57 | }
58 |
59 | @Override
60 | public void setLearningNode(RuleRegressionNode learningNode) {
61 | this.learningNode = (RulePassiveRegressionNode) learningNode;
62 | }
63 |
64 | /*
65 | * MOA GUI
66 | */
67 | @Override
68 | public void getDescription(StringBuilder sb, int indent) {
69 | // TODO Auto-generated method stub
70 | }
71 | }
72 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/rules/core/voting/UniformWeightedVote.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.classifiers.rules.core.voting;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 |
24 | /**
25 | * UniformWeightedVote class for weighted votes based on estimates of errors.
26 | *
27 | * @author Joao Duarte (jmduarte@inescporto.pt)
28 | * @version $Revision: 1 $
29 | */
30 | public class UniformWeightedVote extends AbstractErrorWeightedVote {
31 |
32 |
33 | private static final long serialVersionUID = 6359349250620616482L;
34 |
35 | public UniformWeightedVote() {
36 | super();
37 | }
38 |
39 | public UniformWeightedVote(AbstractErrorWeightedVote aewv) {
40 | super(aewv);
41 | }
42 |
43 | @Override
44 | public double[] computeWeightedVote() {
45 | int n=votes.size();
46 | weights=new double[n];
47 | double [] weightedVote=null;
48 | if (n>0){
49 | int d=votes.get(0).length;
50 | weightedVote=new double[d];
51 | for (int i=0; i getTaskResultType();
42 |
43 | /**
44 | * This method performs this task,
45 | * when TaskMonitor and ObjectRepository are no needed.
46 | *
47 | * @return an object with the result of this task
48 | */
49 | public Object doTask();
50 |
51 | /**
52 | * This method performs this task.
53 | * AbstractTask implements this method so all
54 | * its extensions only need to implement doTaskImpl
55 | *
56 | * @param monitor the TaskMonitor to use
57 | * @param repository the ObjectRepository to use
58 | * @return an object with the result of this task
59 | */
60 | public Object doTask(TaskMonitor monitor, ObjectRepository repository);
61 | }
62 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/evaluation/LearningPerformanceEvaluator.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.evaluation;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2009 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 |
24 | import com.yahoo.labs.samoa.moa.MOAObject;
25 | import com.yahoo.labs.samoa.moa.core.Example;
26 | import com.yahoo.labs.samoa.moa.core.Measurement;
27 |
28 | /**
29 | * Interface implemented by learner evaluators to monitor
30 | * the results of the learning process.
31 | *
32 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
33 | * @version $Revision: 7 $
34 | */
35 | public interface LearningPerformanceEvaluator extends MOAObject {
36 |
37 | /**
38 | * Resets this evaluator. It must be similar to
39 | * starting a new evaluator from scratch.
40 | *
41 | */
42 | public void reset();
43 |
44 | /**
45 | * Adds a learning result to this evaluator.
46 | *
47 | * @param example the example to be classified
48 | * @param classVotes an array containing the estimated membership
49 | * probabilities of the test instance in each class
50 | * @return an array of measurements monitored in this evaluator
51 | */
52 | public void addResult(E example, double[] classVotes);
53 |
54 | /**
55 | * Gets the current measurements monitored by this evaluator.
56 | *
57 | * @return an array of measurements monitored by this evaluator
58 | */
59 | public Measurement[] getPerformanceMeasurements();
60 |
61 | }
62 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/streams/clustering/ClusteringStream.java:
--------------------------------------------------------------------------------
1 |
2 | package com.yahoo.labs.samoa.moa.streams.clustering;
3 |
4 | /*
5 | * #%L
6 | * SAMOA
7 | * %%
8 | * Copyright (C) 2010 RWTH Aachen University, Germany
9 | * %%
10 | * Licensed under the Apache License, Version 2.0 (the "License");
11 | * you may not use this file except in compliance with the License.
12 | * You may obtain a copy of the License at
13 | *
14 | * http://www.apache.org/licenses/LICENSE-2.0
15 | *
16 | * Unless required by applicable law or agreed to in writing, software
17 | * distributed under the License is distributed on an "AS IS" BASIS,
18 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
19 | * See the License for the specific language governing permissions and
20 | * limitations under the License.
21 | * #L%
22 | */
23 |
24 | import com.yahoo.labs.samoa.moa.options.AbstractOptionHandler;
25 | import com.github.javacliparser.FloatOption;
26 | import com.github.javacliparser.IntOption;
27 | import com.yahoo.labs.samoa.moa.streams.InstanceStream;
28 |
29 | public abstract class ClusteringStream extends AbstractOptionHandler implements InstanceStream{
30 | public IntOption decayHorizonOption = new IntOption("decayHorizon", 'h',
31 | "Decay horizon", 1000, 0, Integer.MAX_VALUE);
32 |
33 | public FloatOption decayThresholdOption = new FloatOption("decayThreshold", 't',
34 | "Decay horizon threshold", 0.01, 0, 1);
35 |
36 | public IntOption evaluationFrequencyOption = new IntOption("evaluationFrequency", 'e',
37 | "Evaluation frequency", 1000, 0, Integer.MAX_VALUE);
38 |
39 | public IntOption numAttsOption = new IntOption("numAtts", 'a',
40 | "The number of attributes to generate.", 2, 0, Integer.MAX_VALUE);
41 |
42 | public int getDecayHorizon(){
43 | return decayHorizonOption.getValue();
44 | }
45 |
46 | public double getDecayThreshold(){
47 | return decayThresholdOption.getValue();
48 | }
49 |
50 | public int getEvaluationFrequency(){
51 | return evaluationFrequencyOption.getValue();
52 | }
53 |
54 |
55 | }
56 |
--------------------------------------------------------------------------------
/samoa-samza/src/main/java/com/yahoo/labs/samoa/topology/impl/SamzaComponentFactory.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.EntranceProcessor;
24 | import com.yahoo.labs.samoa.core.Processor;
25 | import com.yahoo.labs.samoa.topology.ComponentFactory;
26 | import com.yahoo.labs.samoa.topology.EntranceProcessingItem;
27 | import com.yahoo.labs.samoa.topology.IProcessingItem;
28 | import com.yahoo.labs.samoa.topology.ProcessingItem;
29 | import com.yahoo.labs.samoa.topology.Stream;
30 | import com.yahoo.labs.samoa.topology.Topology;
31 |
32 | /**
33 | * Implementation of SAMOA ComponentFactory for Samza
34 | *
35 | * @author Anh Thu Vu
36 | */
37 | public class SamzaComponentFactory implements ComponentFactory {
38 | @Override
39 | public ProcessingItem createPi(Processor processor) {
40 | return this.createPi(processor, 1);
41 | }
42 |
43 | @Override
44 | public ProcessingItem createPi(Processor processor, int parallelism) {
45 | return new SamzaProcessingItem(processor, parallelism);
46 | }
47 |
48 | @Override
49 | public EntranceProcessingItem createEntrancePi(EntranceProcessor entranceProcessor) {
50 | return new SamzaEntranceProcessingItem(entranceProcessor);
51 | }
52 |
53 | @Override
54 | public Stream createStream(IProcessingItem sourcePi) {
55 | return new SamzaStream(sourcePi);
56 | }
57 |
58 | @Override
59 | public Topology createTopology(String topoName) {
60 | return new SamzaTopology(topoName);
61 | }
62 | }
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/streams/fs/FileStreamSource.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.streams.fs;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import java.io.InputStream;
24 | import java.io.IOException;
25 | import java.io.Serializable;
26 |
27 | /**
28 | * An interface for FileStream's source (Local FS, HDFS,...)
29 | * @author Casey
30 | */
31 | public interface FileStreamSource extends Serializable {
32 |
33 | /**
34 | * Init the source with file/directory path and file extension
35 | * @param path
36 | * File or directory path
37 | * @param ext
38 | * File extension to be used to filter files in a directory.
39 | * If null, all files in the directory are accepted.
40 | */
41 | public void init(String path, String ext);
42 |
43 | /**
44 | * Reset the source
45 | */
46 | public void reset() throws IOException;
47 |
48 | /**
49 | * Retrieve InputStream for next file.
50 | * This method will return null if we are at the last file
51 | * in the list.
52 | *
53 | * @return InputStream for next file in the list
54 | */
55 | public InputStream getNextInputStream();
56 |
57 | /**
58 | * Retrieve InputStream for current file.
59 | * The "current pointer" is moved forward
60 | * with getNextInputStream method. So if there was no
61 | * invocation of getNextInputStream, this method will
62 | * return null.
63 | *
64 | * @return InputStream for current file in the list
65 | */
66 | public InputStream getCurrentInputStream();
67 | }
68 |
--------------------------------------------------------------------------------
/samoa-threads/src/main/java/com/yahoo/labs/samoa/topology/impl/ThreadsComponentFactory.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.EntranceProcessor;
24 | import com.yahoo.labs.samoa.core.Processor;
25 | import com.yahoo.labs.samoa.topology.ComponentFactory;
26 | import com.yahoo.labs.samoa.topology.EntranceProcessingItem;
27 | import com.yahoo.labs.samoa.topology.IProcessingItem;
28 | import com.yahoo.labs.samoa.topology.ProcessingItem;
29 | import com.yahoo.labs.samoa.topology.Stream;
30 | import com.yahoo.labs.samoa.topology.Topology;
31 |
32 | /**
33 | * ComponentFactory for multithreaded engine
34 | * @author Anh Thu Vu
35 | *
36 | */
37 | public class ThreadsComponentFactory implements ComponentFactory {
38 |
39 | @Override
40 | public ProcessingItem createPi(Processor processor) {
41 | return this.createPi(processor, 1);
42 | }
43 |
44 | @Override
45 | public ProcessingItem createPi(Processor processor, int paralellism) {
46 | return new ThreadsProcessingItem(processor, paralellism);
47 | }
48 |
49 | @Override
50 | public EntranceProcessingItem createEntrancePi(EntranceProcessor entranceProcessor) {
51 | return new ThreadsEntranceProcessingItem(entranceProcessor);
52 | }
53 |
54 | @Override
55 | public Stream createStream(IProcessingItem sourcePi) {
56 | return new ThreadsStream(sourcePi);
57 | }
58 |
59 | @Override
60 | public Topology createTopology(String topoName) {
61 | return new ThreadsTopology(topoName);
62 | }
63 |
64 | }
65 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/evaluation/ClusteringResultContentEvent.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.evaluation;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.ContentEvent;
24 | import com.yahoo.labs.samoa.moa.cluster.Clustering;
25 |
26 | /**
27 | * License
28 | */
29 | /**
30 | * The Class Clustering ResultEvent.
31 | */
32 | final public class ClusteringResultContentEvent implements ContentEvent {
33 |
34 | private static final long serialVersionUID = -7746983521296618922L;
35 | private Clustering clustering;
36 | private final boolean isLast;
37 | private String key = "0";
38 |
39 | public ClusteringResultContentEvent() {
40 | this.isLast = false;
41 | }
42 |
43 | public ClusteringResultContentEvent(boolean isLast) {
44 | this.isLast = isLast;
45 | }
46 |
47 | /**
48 | * Instantiates a new clustering result event.
49 | *
50 | * @param clustering the clustering result
51 | * @param isLast is the last result
52 | */
53 | public ClusteringResultContentEvent(Clustering clustering, boolean isLast) {
54 | this.clustering = clustering;
55 | this.isLast = isLast;
56 | }
57 |
58 | public String getKey() {
59 | return key;
60 | }
61 |
62 | public void setKey(String key) {
63 | this.key = key;
64 | }
65 |
66 | public boolean isLastEvent() {
67 | return this.isLast;
68 | }
69 |
70 | public Clustering getClustering() {
71 | return this.clustering;
72 | }
73 | }
74 |
--------------------------------------------------------------------------------
/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/Instance.java:
--------------------------------------------------------------------------------
1 | /*
2 | * To change this template, choose Tools | Templates
3 | * and open the template in the editor.
4 | */
5 | package com.yahoo.labs.samoa.instances;
6 |
7 | /*
8 | * #%L
9 | * SAMOA
10 | * %%
11 | * Copyright (C) 2013 Yahoo! Inc.
12 | * %%
13 | * Licensed under the Apache License, Version 2.0 (the "License");
14 | * you may not use this file except in compliance with the License.
15 | * You may obtain a copy of the License at
16 | *
17 | * http://www.apache.org/licenses/LICENSE-2.0
18 | *
19 | * Unless required by applicable law or agreed to in writing, software
20 | * distributed under the License is distributed on an "AS IS" BASIS,
21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 | * See the License for the specific language governing permissions and
23 | * limitations under the License.
24 | * #L%
25 | */
26 |
27 | import java.io.Serializable;
28 |
29 | /**
30 | *
31 | * @author abifet
32 | */
33 |
34 | public interface Instance extends Serializable{
35 |
36 | double weight();
37 | void setWeight(double weight);
38 |
39 | //Attributes
40 | Attribute attribute(int instAttIndex);
41 | void deleteAttributeAt(int i);
42 | void insertAttributeAt(int i);
43 | int numAttributes();
44 | public void addSparseValues(int[] indexValues, double[] attributeValues, int numberAttributes);
45 |
46 |
47 | //Values
48 | int numValues();
49 | String stringValue(int i);
50 | double value(int instAttIndex);
51 | double value(Attribute attribute);
52 | void setValue(int m_numAttributes, double d);
53 | boolean isMissing(int instAttIndex);
54 | int index(int i);
55 | double valueSparse(int i);
56 | boolean isMissingSparse(int p1);
57 | double[] toDoubleArray();
58 |
59 | //Class
60 | Attribute classAttribute();
61 | int classIndex();
62 | boolean classIsMissing();
63 | double classValue();
64 | int numClasses();
65 | void setClassValue(double d);
66 |
67 | Instance copy();
68 |
69 | //Dataset
70 | void setDataset(Instances dataset);
71 | Instances dataset();
72 | String toString();
73 | }
74 |
75 |
--------------------------------------------------------------------------------
/samoa-s4/src/main/java/com/yahoo/labs/samoa/topology/impl/S4Event.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * License
25 | */
26 |
27 | import net.jcip.annotations.Immutable;
28 |
29 | import org.apache.s4.base.Event;
30 |
31 | import com.yahoo.labs.samoa.core.ContentEvent;
32 |
33 | /**
34 | * The Class InstanceEvent.
35 | */
36 | @Immutable
37 | final public class S4Event extends Event {
38 |
39 | private String key;
40 |
41 | public String getKey() {
42 | return key;
43 | }
44 |
45 | public void setKey(String key) {
46 | this.key = key;
47 | }
48 |
49 | /** The content event. */
50 | private ContentEvent contentEvent;
51 |
52 | /**
53 | * Instantiates a new instance event.
54 | */
55 | public S4Event() {
56 | // Needed for serialization of kryo
57 | }
58 |
59 | /**
60 | * Instantiates a new instance event.
61 | *
62 | * @param contentEvent the content event
63 | */
64 | public S4Event(ContentEvent contentEvent) {
65 | if (contentEvent != null) {
66 | this.contentEvent = contentEvent;
67 | this.key = contentEvent.getKey();
68 |
69 | }
70 | }
71 |
72 | /**
73 | * Gets the content event.
74 | *
75 | * @return the content event
76 | */
77 | public ContentEvent getContentEvent() {
78 | return contentEvent;
79 | }
80 |
81 | /**
82 | * Sets the content event.
83 | *
84 | * @param contentEvent the new content event
85 | */
86 | public void setContentEvent(ContentEvent contentEvent) {
87 | this.contentEvent = contentEvent;
88 | }
89 |
90 | }
91 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/streams/StreamSource.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.streams;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * License
25 | */
26 |
27 | import com.yahoo.labs.samoa.moa.core.Example;
28 | import com.yahoo.labs.samoa.moa.streams.InstanceStream;
29 | import com.yahoo.labs.samoa.instances.Instance;
30 |
31 | /**
32 | * The Class StreamSource.
33 | */
34 | public class StreamSource implements java.io.Serializable{
35 |
36 | /**
37 | *
38 | */
39 | private static final long serialVersionUID = 3974668694861231236L;
40 |
41 | /**
42 | * Instantiates a new stream source.
43 | *
44 | * @param stream the stream
45 | */
46 | public StreamSource(InstanceStream stream) {
47 | super();
48 | this.stream = stream;
49 | }
50 |
51 | /** The stream. */
52 | protected InstanceStream stream;
53 |
54 | /**
55 | * Gets the stream.
56 | *
57 | * @return the stream
58 | */
59 | public InstanceStream getStream() {
60 | return stream;
61 | }
62 |
63 | /**
64 | * Next instance.
65 | *
66 | * @return the instance
67 | */
68 | public Example nextInstance() {
69 | return stream.nextInstance();
70 | }
71 |
72 | /**
73 | * Sets the stream.
74 | *
75 | * @param stream the new stream
76 | */
77 | public void setStream(InstanceStream stream) {
78 | this.stream = stream;
79 | }
80 |
81 | /**
82 | * Checks for more instances.
83 | *
84 | * @return true, if successful
85 | */
86 | public boolean hasMoreInstances() {
87 | return this.stream.hasMoreInstances();
88 | }
89 |
90 | }
91 |
--------------------------------------------------------------------------------
/samoa-samza/src/main/java/com/yahoo/labs/samoa/topology/impl/SamoaSystemFactory.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import org.apache.samza.SamzaException;
24 | import org.apache.samza.config.Config;
25 | import org.apache.samza.metrics.MetricsRegistry;
26 | import org.apache.samza.system.SystemAdmin;
27 | import org.apache.samza.system.SystemConsumer;
28 | import org.apache.samza.system.SystemFactory;
29 | import org.apache.samza.system.SystemProducer;
30 | import org.apache.samza.util.SinglePartitionWithoutOffsetsSystemAdmin;
31 |
32 | import com.yahoo.labs.samoa.topology.impl.SamzaEntranceProcessingItem.SamoaSystemConsumer;
33 |
34 | /**
35 | * Implementation of Samza's SystemFactory
36 | * Samza will use this factory to get our custom consumer
37 | * which gets the events from SAMOA EntranceProcessor
38 | * and feed them to EntranceProcessingItem task
39 | *
40 | * @author Anh Thu Vu
41 | */
42 | public class SamoaSystemFactory implements SystemFactory {
43 | @Override
44 | public SystemAdmin getAdmin(String systemName, Config config) {
45 | return new SinglePartitionWithoutOffsetsSystemAdmin();
46 | }
47 |
48 | @Override
49 | public SystemConsumer getConsumer(String systemName, Config config, MetricsRegistry registry) {
50 | return new SamoaSystemConsumer(systemName, config);
51 | }
52 |
53 | @Override
54 | public SystemProducer getProducer(String systemName, Config config, MetricsRegistry registry) {
55 | throw new SamzaException("This implementation is not supposed to produce anything.");
56 | }
57 | }
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/topology/Topology.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | public interface Topology {
24 | /*
25 | * Name
26 | */
27 | /**
28 | * Get the topology's name
29 | *
30 | * @return the name of the topology
31 | */
32 | public String getTopologyName();
33 |
34 | /**
35 | * Set the topology's name
36 | *
37 | * @param topologyName
38 | * the name of the topology
39 | */
40 | public void setTopologyName(String topologyName) ;
41 |
42 | /*
43 | * Entrance Processing Items
44 | */
45 | /**
46 | * Add an EntranceProcessingItem to this topology
47 | *
48 | * @param epi
49 | * the EntranceProcessingItem to be added
50 | */
51 | void addEntranceProcessingItem(EntranceProcessingItem epi);
52 |
53 |
54 | /*
55 | * Processing Items
56 | */
57 | /**
58 | * Add a ProcessingItem to this topology
59 | * with default parallelism level (i.e. 1)
60 | *
61 | * @param procItem
62 | * the ProcessingItem to be added
63 | */
64 | void addProcessingItem(IProcessingItem procItem);
65 |
66 | /**
67 | * Add a ProcessingItem to this topology
68 | * with an associated parallelism level
69 | *
70 | * @param procItem
71 | * the ProcessingItem to be added
72 | * @param parallelismHint
73 | * the parallelism level
74 | */
75 | void addProcessingItem(IProcessingItem procItem, int parallelismHint);
76 |
77 | /*
78 | * Streams
79 | */
80 | /**
81 | *
82 | * @param stream
83 | */
84 | void addStream(Stream stream);
85 | }
86 |
--------------------------------------------------------------------------------
/samoa-samza/src/main/java/com/yahoo/labs/samoa/topology/impl/SamzaTopology.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import java.util.HashSet;
24 | import java.util.Set;
25 |
26 | import com.yahoo.labs.samoa.topology.IProcessingItem;
27 | import com.yahoo.labs.samoa.topology.AbstractTopology;
28 |
29 | /**
30 | * Topology for Samza
31 | *
32 | * @author Anh Thu Vu
33 | */
34 | public class SamzaTopology extends AbstractTopology {
35 | private int procItemCounter;
36 |
37 | public SamzaTopology(String topoName) {
38 | super(topoName);
39 | procItemCounter = 0;
40 | }
41 |
42 | @Override
43 | public void addProcessingItem(IProcessingItem procItem, int parallelism) {
44 | super.addProcessingItem(procItem, parallelism);
45 | SamzaProcessingNode samzaPi = (SamzaProcessingNode) procItem;
46 | samzaPi.setName(this.getTopologyName()+"-"+Integer.toString(procItemCounter));
47 | procItemCounter++;
48 | }
49 |
50 | /*
51 | * Gets the set of ProcessingItems, excluding EntrancePIs
52 | * Used by SamzaConfigFactory as the config for EntrancePIs and
53 | * normal PIs are different
54 | */
55 | public Set getNonEntranceProcessingItems() throws Exception {
56 | Set copiedSet = new HashSet();
57 | copiedSet.addAll(this.getProcessingItems());
58 | boolean result = copiedSet.removeAll(this.getEntranceProcessingItems());
59 | if (!result) {
60 | throw new Exception("Failed extracting the set of non-entrance processing items");
61 | }
62 | return copiedSet;
63 | }
64 | }
--------------------------------------------------------------------------------
/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/DenseInstance.java:
--------------------------------------------------------------------------------
1 | /*
2 | * To change this template, choose Tools | Templates
3 | * and open the template in the editor.
4 | */
5 | package com.yahoo.labs.samoa.instances;
6 |
7 | /*
8 | * #%L
9 | * SAMOA
10 | * %%
11 | * Copyright (C) 2013 Yahoo! Inc.
12 | * %%
13 | * Licensed under the Apache License, Version 2.0 (the "License");
14 | * you may not use this file except in compliance with the License.
15 | * You may obtain a copy of the License at
16 | *
17 | * http://www.apache.org/licenses/LICENSE-2.0
18 | *
19 | * Unless required by applicable law or agreed to in writing, software
20 | * distributed under the License is distributed on an "AS IS" BASIS,
21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 | * See the License for the specific language governing permissions and
23 | * limitations under the License.
24 | * #L%
25 | */
26 |
27 | /**
28 | *
29 | * @author abifet
30 | */
31 | public class DenseInstance extends SingleLabelInstance {
32 |
33 | private static final long serialVersionUID = 280360594027716737L;
34 |
35 | public DenseInstance() {
36 | // necessary for kryo serializer
37 | }
38 |
39 | public DenseInstance(double weight, double[] res) {
40 | super(weight,res);
41 | }
42 | public DenseInstance(SingleLabelInstance inst) {
43 | super(inst);
44 | }
45 |
46 | public DenseInstance(Instance inst) {
47 | super((SingleLabelInstance) inst);
48 | }
49 | public DenseInstance(double numberAttributes) {
50 | super((int) numberAttributes);
51 | //super(1, new double[(int) numberAttributes-1]);
52 | //Add missing values
53 | //for (int i = 0; i < numberAttributes-1; i++) {
54 | // //this.setValue(i, Double.NaN);
55 | //}
56 |
57 | }
58 |
59 | @Override
60 | public String toString() {
61 | StringBuffer text = new StringBuffer();
62 |
63 | for (int i = 0; i < this.instanceInformation.numAttributes(); i++) {
64 | if (i > 0)
65 | text.append(",");
66 | text.append(this.value(i));
67 | }
68 | text.append(",").append(this.weight());
69 |
70 | return text.toString();
71 | }
72 | }
73 |
--------------------------------------------------------------------------------
/samoa-instances/src/main/java/com/yahoo/labs/samoa/instances/SingleClassInstanceData.java:
--------------------------------------------------------------------------------
1 | /*
2 | * To change this template, choose Tools | Templates
3 | * and open the template in the editor.
4 | */
5 | package com.yahoo.labs.samoa.instances;
6 |
7 | /*
8 | * #%L
9 | * SAMOA
10 | * %%
11 | * Copyright (C) 2013 Yahoo! Inc.
12 | * %%
13 | * Licensed under the Apache License, Version 2.0 (the "License");
14 | * you may not use this file except in compliance with the License.
15 | * You may obtain a copy of the License at
16 | *
17 | * http://www.apache.org/licenses/LICENSE-2.0
18 | *
19 | * Unless required by applicable law or agreed to in writing, software
20 | * distributed under the License is distributed on an "AS IS" BASIS,
21 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
22 | * See the License for the specific language governing permissions and
23 | * limitations under the License.
24 | * #L%
25 | */
26 |
27 | /**
28 | *
29 | * @author abifet
30 | */
31 | public class SingleClassInstanceData implements InstanceData {
32 |
33 | protected double classValue;
34 |
35 | @Override
36 | public int numAttributes() {
37 | return 1;
38 | }
39 |
40 | @Override
41 | public double value(int instAttIndex) {
42 | return classValue;
43 | }
44 |
45 | @Override
46 | public boolean isMissing(int indexAttribute) {
47 | return Double.isNaN(this.value(indexAttribute));
48 | }
49 |
50 | @Override
51 | public int numValues() {
52 | return 1;
53 | }
54 |
55 | @Override
56 | public int index(int i) {
57 | return 0;
58 | }
59 |
60 | @Override
61 | public double valueSparse(int i) {
62 | return value(i);
63 | }
64 |
65 | @Override
66 | public boolean isMissingSparse(int indexAttribute) {
67 | return Double.isNaN(this.value(indexAttribute));
68 | }
69 |
70 | /*@Override
71 | public double value(Attribute attribute) {
72 | return this.classValue;
73 | }*/
74 |
75 | @Override
76 | public double[] toDoubleArray() {
77 | double[] array = {this.classValue};
78 | return array;
79 | }
80 |
81 | @Override
82 | public void setValue(int m_numAttributes, double d) {
83 | this.classValue = d;
84 | }
85 |
86 | }
87 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/distributed/RuleContentEvent.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.rules.distributed;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.core.ContentEvent;
24 | import com.yahoo.labs.samoa.learners.classifiers.rules.common.ActiveRule;
25 |
26 | /**
27 | * New rule from Model Aggregator/Default Rule Learner to Learners
28 | * or removed rule from Learner to Model Aggregators.
29 | *
30 | * @author Anh Thu Vu
31 | *
32 | */
33 | public class RuleContentEvent implements ContentEvent {
34 |
35 |
36 | /**
37 | *
38 | */
39 | private static final long serialVersionUID = -9046390274402894461L;
40 |
41 | private final int ruleNumberID;
42 | private final ActiveRule addingRule; // for removing rule, we only need the rule's ID
43 | private final boolean isRemoving;
44 |
45 | public RuleContentEvent() {
46 | this(0, null, false);
47 | }
48 |
49 | public RuleContentEvent(int ruleID, ActiveRule rule, boolean isRemoving) {
50 | this.ruleNumberID = ruleID;
51 | this.isRemoving = isRemoving;
52 | this.addingRule = rule;
53 | }
54 |
55 | @Override
56 | public String getKey() {
57 | return Integer.toString(this.ruleNumberID);
58 | }
59 |
60 | @Override
61 | public void setKey(String key) {
62 | // do nothing
63 | }
64 |
65 | @Override
66 | public boolean isLastEvent() {
67 | return false;
68 | }
69 |
70 | public int getRuleNumberID() {
71 | return this.ruleNumberID;
72 | }
73 |
74 | public ActiveRule getRule() {
75 | return this.addingRule;
76 | }
77 |
78 | public boolean isRemoving() {
79 | return this.isRemoving;
80 | }
81 |
82 | }
83 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/examples/HelloWorldSourceProcessor.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.examples;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import java.util.Random;
24 |
25 | import com.yahoo.labs.samoa.core.ContentEvent;
26 | import com.yahoo.labs.samoa.core.EntranceProcessor;
27 | import com.yahoo.labs.samoa.core.Processor;
28 |
29 | /**
30 | * Example {@link EntranceProcessor} that generates a stream of random integers.
31 | */
32 | public class HelloWorldSourceProcessor implements EntranceProcessor {
33 |
34 | private static final long serialVersionUID = 6212296305865604747L;
35 | private Random rnd;
36 | private final long maxInst;
37 | private long count;
38 |
39 | public HelloWorldSourceProcessor(long maxInst) {
40 | this.maxInst = maxInst;
41 | }
42 |
43 | @Override
44 | public boolean process(ContentEvent event) {
45 | // do nothing, API will be refined further
46 | return false;
47 | }
48 |
49 | @Override
50 | public void onCreate(int id) {
51 | rnd = new Random(id);
52 | }
53 |
54 | @Override
55 | public Processor newProcessor(Processor p) {
56 | HelloWorldSourceProcessor hwsp = (HelloWorldSourceProcessor) p;
57 | return new HelloWorldSourceProcessor(hwsp.maxInst);
58 | }
59 |
60 | @Override
61 | public boolean isFinished() {
62 | return count >= maxInst;
63 | }
64 |
65 | @Override
66 | public boolean hasNext() {
67 | return count < maxInst;
68 | }
69 |
70 | @Override
71 | public ContentEvent nextEvent() {
72 | count++;
73 | return new HelloWorldContentEvent(rnd.nextInt(), false);
74 | }
75 | }
76 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/topology/ProcessingItem.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Processing item interface.
25 | *
26 | * @author severien
27 | *
28 | */
29 | public interface ProcessingItem extends IProcessingItem {
30 |
31 | /**
32 | * Connects this processing item in a round robin fashion. The events will
33 | * be distributed evenly between the instantiated processing items.
34 | *
35 | * @param inputStream
36 | * Stream to connect this processing item.
37 | * @return ProcessingItem
38 | */
39 | public ProcessingItem connectInputShuffleStream(Stream inputStream);
40 |
41 | /**
42 | * Connects this processing item taking the event key into account. Events
43 | * will be routed to the processing item according to the modulus of its key
44 | * and the paralellism level. Ex.: key = 5 and paralellism = 2, 5 mod 2 = 1.
45 | * Processing item responsible for 1 will receive this event.
46 | *
47 | * @param inputStream
48 | * Stream to connect this processing item.
49 | * @return ProcessingItem
50 | */
51 | public ProcessingItem connectInputKeyStream(Stream inputStream);
52 |
53 | /**
54 | * Connects this processing item to the stream in a broadcast fashion. All
55 | * processing items of this type will receive copy of the original event.
56 | *
57 | * @param inputStream
58 | * Stream to connect this processing item.
59 | * @return ProcessingItem
60 | */
61 | public ProcessingItem connectInputAllStream(Stream inputStream);
62 |
63 |
64 | /**
65 | * Gets processing item parallelism level.
66 | *
67 | * @return int
68 | */
69 | public int getParallelism();
70 | }
71 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/trees/FoundNode.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.trees;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | /**
24 | * Class that represents the necessary data structure of the node where an instance
25 | * is routed/filtered through the decision tree model.
26 | *
27 | * @author Arinto Murdopo
28 | *
29 | */
30 | final class FoundNode implements java.io.Serializable{
31 |
32 | /**
33 | *
34 | */
35 | private static final long serialVersionUID = -637695387934143293L;
36 |
37 | private final Node node;
38 | private final SplitNode parent;
39 | private final int parentBranch;
40 |
41 | FoundNode(Node node, SplitNode splitNode, int parentBranch){
42 | this.node = node;
43 | this.parent = splitNode;
44 | this.parentBranch = parentBranch;
45 | }
46 |
47 | /**
48 | * Method to get the node where an instance is routed/filtered through the decision tree
49 | * model for testing and training.
50 | *
51 | * @return The node where the instance is routed/filtered
52 | */
53 | Node getNode(){
54 | return this.node;
55 | }
56 |
57 | /**
58 | * Method to get the parent of the node where an instance is routed/filtered through the decision tree
59 | * model for testing and training
60 | *
61 | * @return The parent of the node
62 | */
63 | SplitNode getParent(){
64 | return this.parent;
65 | }
66 |
67 | /**
68 | * Method to get the index of the node (where an instance is routed/filtered through the decision tree
69 | * model for testing and training) in its parent.
70 | *
71 | * @return The index of the node in its parent node.
72 | */
73 | int getParentBranch(){
74 | return this.parentBranch;
75 | }
76 |
77 | }
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/cluster/Miniball.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.cluster;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.dreizak.miniball.model.ArrayPointSet;
24 | import com.dreizak.miniball.model.PointSet;
25 | import java.util.ArrayList;
26 | import java.util.List;
27 |
28 | public class Miniball {
29 |
30 | private int dimension;
31 | private com.dreizak.miniball.highdim.Miniball mb;
32 | private PointStorage pointSet;
33 |
34 | public Miniball(int dimension) {
35 | this.dimension = dimension;
36 | }
37 |
38 | void clear() {
39 | this.pointSet = new PointStorage(this.dimension);
40 | }
41 |
42 | void check_in(double[] array) {
43 | this.pointSet.add(array);
44 | }
45 |
46 | double[] center() {
47 | return this.mb.center();
48 | }
49 |
50 | double radius() {
51 | return this.mb.radius();
52 | }
53 |
54 | void build() {
55 | this.mb = new com.dreizak.miniball.highdim.Miniball(this.pointSet);
56 | }
57 |
58 | public class PointStorage implements PointSet {
59 |
60 | protected int dimension;
61 | protected List L;
62 |
63 | public PointStorage(int dimension) {
64 | this.dimension = dimension;
65 | this.L = new ArrayList();
66 | }
67 |
68 | public void add(double[] array) {
69 | this.L.add(array);
70 | }
71 |
72 | public int size() {
73 | return L.size();
74 | }
75 |
76 | public int dimension() {
77 | return dimension;
78 | }
79 |
80 | public double coord(int point, int coordinate) {
81 | return L.get(point)[coordinate];
82 | }
83 | }
84 | }
85 |
--------------------------------------------------------------------------------
/samoa-storm/src/main/java/com/yahoo/labs/samoa/topology/impl/StormStream.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import java.util.UUID;
24 |
25 | import com.yahoo.labs.samoa.core.ContentEvent;
26 | import com.yahoo.labs.samoa.topology.Stream;
27 |
28 | /**
29 | * Abstract class to implement Storm Stream
30 | * @author Arinto Murdopo
31 | *
32 | */
33 | abstract class StormStream implements Stream, java.io.Serializable {
34 |
35 | /**
36 | *
37 | */
38 | private static final long serialVersionUID = 281835563756514852L;
39 | protected final String outputStreamId;
40 | protected final InputStreamId inputStreamId;
41 |
42 | public StormStream(String stormComponentId){
43 | this.outputStreamId = UUID.randomUUID().toString();
44 | this.inputStreamId = new InputStreamId(stormComponentId, this.outputStreamId);
45 | }
46 |
47 | @Override
48 | public abstract void put(ContentEvent contentEvent);
49 |
50 | String getOutputId(){
51 | return this.outputStreamId;
52 | }
53 |
54 | InputStreamId getInputId(){
55 | return this.inputStreamId;
56 | }
57 |
58 | final static class InputStreamId implements java.io.Serializable{
59 |
60 | /**
61 | *
62 | */
63 | private static final long serialVersionUID = -7457995634133691295L;
64 | private final String componentId;
65 | private final String streamId;
66 |
67 | InputStreamId(String componentId, String streamId){
68 | this.componentId = componentId;
69 | this.streamId = streamId;
70 | }
71 |
72 | String getComponentId(){
73 | return componentId;
74 | }
75 |
76 | String getStreamId(){
77 | return streamId;
78 | }
79 | }
80 |
81 | @Override
82 | public void setBatchSize(int batchSize) {
83 | // Ignore batch size
84 | }
85 | }
--------------------------------------------------------------------------------
/samoa-threads/src/main/java/com/yahoo/labs/samoa/topology/impl/ThreadsTopology.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.topology.impl;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.topology.AbstractTopology;
24 | import com.yahoo.labs.samoa.topology.IProcessingItem;
25 |
26 | /**
27 | * Topology for multithreaded engine.
28 | * @author Anh Thu Vu
29 | *
30 | */
31 | public class ThreadsTopology extends AbstractTopology {
32 | ThreadsTopology(String name) {
33 | super(name);
34 | }
35 |
36 | public void run() {
37 | if (this.getEntranceProcessingItems() == null)
38 | throw new IllegalStateException("You need to set entrance PI before running the topology.");
39 | if (this.getEntranceProcessingItems().size() != 1)
40 | throw new IllegalStateException("ThreadsTopology supports 1 entrance PI only. Number of entrance PIs is "+this.getEntranceProcessingItems().size());
41 |
42 | this.setupProcessingItemInstances();
43 | ThreadsEntranceProcessingItem entrancePi = (ThreadsEntranceProcessingItem) this.getEntranceProcessingItems().toArray()[0];
44 | if (entrancePi == null)
45 | throw new IllegalStateException("You need to set entrance PI before running the topology.");
46 | entrancePi.getProcessor().onCreate(0); // id=0 as it is not used in simple mode
47 | entrancePi.startSendingEvents();
48 | }
49 |
50 | /*
51 | * Tell all the ThreadsProcessingItems to create & init their
52 | * replicas (ThreadsProcessingItemInstance)
53 | */
54 | private void setupProcessingItemInstances() {
55 | for (IProcessingItem pi:this.getProcessingItems()) {
56 | if (pi instanceof ThreadsProcessingItem) {
57 | ThreadsProcessingItem tpi = (ThreadsProcessingItem) pi;
58 | tpi.setupInstances();
59 | }
60 | }
61 | }
62 | }
63 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/evaluation/LearningEvaluation.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.evaluation;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.moa.AbstractMOAObject;
24 | import com.yahoo.labs.samoa.moa.core.Measurement;
25 | import com.yahoo.labs.samoa.moa.learners.Learner;
26 | import java.util.Arrays;
27 | import java.util.LinkedList;
28 | import java.util.List;
29 |
30 | /**
31 | * Class that stores an array of evaluation measurements.
32 | *
33 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
34 | * @version $Revision: 7 $
35 | */
36 | public class LearningEvaluation extends AbstractMOAObject {
37 |
38 | private static final long serialVersionUID = 1L;
39 |
40 | protected Measurement[] measurements;
41 |
42 | public LearningEvaluation(Measurement[] measurements) {
43 | this.measurements = measurements.clone();
44 | }
45 |
46 | public LearningEvaluation(Measurement[] evaluationMeasurements,
47 | LearningPerformanceEvaluator cpe, Learner model) {
48 | List measurementList = new LinkedList();
49 | measurementList.addAll(Arrays.asList(evaluationMeasurements));
50 | measurementList.addAll(Arrays.asList(cpe.getPerformanceMeasurements()));
51 | measurementList.addAll(Arrays.asList(model.getModelMeasurements()));
52 | this.measurements = measurementList.toArray(new Measurement[measurementList.size()]);
53 | }
54 |
55 | public Measurement[] getMeasurements() {
56 | return this.measurements.clone();
57 | }
58 |
59 | @Override
60 | public void getDescription(StringBuilder sb, int indent) {
61 | Measurement.getMeasurementsDescription(this.measurements, sb, indent);
62 | }
63 | }
64 |
--------------------------------------------------------------------------------
/samoa-storm/src/test/java/com/yahoo/labs/samoa/AlgosTest.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import org.junit.Test;
24 |
25 | public class AlgosTest {
26 |
27 |
28 | @Test(timeout = 60000)
29 | public void testVHTWithStorm() throws Exception {
30 |
31 | TestParams vhtConfig = new TestParams.Builder()
32 | .inputInstances(200_000)
33 | .samplingSize(20_000)
34 | .evaluationInstances(200_000)
35 | .classifiedInstances(200_000)
36 | .classificationsCorrect(55f)
37 | .kappaStat(0f)
38 | .kappaTempStat(0f)
39 | .cliStringTemplate(TestParams.Templates.PREQEVAL_VHT_RANDOMTREE)
40 | .resultFilePollTimeout(30)
41 | .prePollWait(15)
42 | .taskClassName(LocalStormDoTask.class.getName())
43 | .build();
44 | TestUtils.test(vhtConfig);
45 |
46 | }
47 |
48 | @Test(timeout = 120000)
49 | public void testBaggingWithStorm() throws Exception {
50 | TestParams baggingConfig = new TestParams.Builder()
51 | .inputInstances(200_000)
52 | .samplingSize(20_000)
53 | .evaluationInstances(180_000)
54 | .classifiedInstances(190_000)
55 | .classificationsCorrect(60f)
56 | .kappaStat(0f)
57 | .kappaTempStat(0f)
58 | .cliStringTemplate(TestParams.Templates.PREQEVAL_BAGGING_RANDOMTREE)
59 | .resultFilePollTimeout(40)
60 | .prePollWait(20)
61 | .taskClassName(LocalStormDoTask.class.getName())
62 | .build();
63 | TestUtils.test(baggingConfig);
64 |
65 | }
66 |
67 |
68 | }
69 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/rules/common/RuleSplitNode.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers.rules.common;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalTest;
24 | import com.yahoo.labs.samoa.moa.classifiers.rules.core.Predicate;
25 | import com.yahoo.labs.samoa.moa.classifiers.rules.core.conditionaltests.NumericAttributeBinaryRulePredicate;
26 | import com.yahoo.labs.samoa.learners.classifiers.trees.SplitNode;
27 | import com.yahoo.labs.samoa.instances.Instance;
28 |
29 | /**
30 | * Represent a feature of rules (an element of ruleś nodeList).
31 | *
32 | * @author Anh Thu Vu
33 | *
34 | */
35 | public class RuleSplitNode extends SplitNode {
36 |
37 | protected double lastTargetMean;
38 | protected int operatorObserver;
39 |
40 | private static final long serialVersionUID = 1L;
41 |
42 | public InstanceConditionalTest getSplitTest() {
43 | return this.splitTest;
44 | }
45 |
46 | /**
47 | * Create a new RuleSplitNode
48 | */
49 | public RuleSplitNode() {
50 | this(null, new double[0]);
51 | }
52 | public RuleSplitNode(InstanceConditionalTest splitTest, double[] classObservations) {
53 | super(splitTest, classObservations);
54 | }
55 |
56 | public RuleSplitNode getACopy() {
57 | InstanceConditionalTest splitTest = new NumericAttributeBinaryRulePredicate((NumericAttributeBinaryRulePredicate) this.getSplitTest());
58 | return new RuleSplitNode(splitTest, this.getObservedClassDistribution());
59 | }
60 |
61 | public boolean evaluate(Instance instance) {
62 | Predicate predicate = (Predicate) this.splitTest;
63 | return predicate.evaluate(instance);
64 | }
65 |
66 | }
67 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/learners/classifiers/LocalLearner.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.learners.classifiers;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import java.io.Serializable;
24 | import java.util.Map;
25 |
26 | import com.yahoo.labs.samoa.instances.Instance;
27 | import com.yahoo.labs.samoa.instances.Instances;
28 |
29 | /**
30 | * Learner interface for non-distributed learners.
31 | *
32 | * @author abifet
33 | */
34 | public interface LocalLearner extends Serializable {
35 |
36 | /**
37 | * Creates a new learner object.
38 | *
39 | * @return the learner
40 | */
41 | LocalLearner create();
42 |
43 | /**
44 | * Predicts the class memberships for a given instance. If an instance is
45 | * unclassified, the returned array elements must be all zero.
46 | *
47 | * @param inst
48 | * the instance to be classified
49 | * @return an array containing the estimated membership probabilities of the
50 | * test instance in each class
51 | */
52 | double[] getVotesForInstance(Instance inst);
53 |
54 | /**
55 | * Resets this classifier. It must be similar to starting a new classifier
56 | * from scratch.
57 | *
58 | */
59 | void resetLearning();
60 |
61 | /**
62 | * Trains this classifier incrementally using the given instance.
63 | *
64 | * @param inst
65 | * the instance to be used for training
66 | */
67 | void trainOnInstance(Instance inst);
68 |
69 | /**
70 | * Sets where to obtain the information of attributes of Instances
71 | *
72 | * @param dataset
73 | * the dataset that contains the information
74 | */
75 | @Deprecated
76 | public void setDataset(Instances dataset);
77 |
78 | }
79 |
--------------------------------------------------------------------------------
/samoa-api/src/main/java/com/yahoo/labs/samoa/moa/classifiers/core/AttributeSplitSuggestion.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.moa.classifiers.core;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2007 University of Waikato, Hamilton, New Zealand
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import com.yahoo.labs.samoa.moa.AbstractMOAObject;
24 | import com.yahoo.labs.samoa.moa.classifiers.core.conditionaltests.InstanceConditionalTest;
25 |
26 | /**
27 | * Class for computing attribute split suggestions given a split test.
28 | *
29 | * @author Richard Kirkby (rkirkby@cs.waikato.ac.nz)
30 | * @version $Revision: 7 $
31 | */
32 | public class AttributeSplitSuggestion extends AbstractMOAObject implements Comparable {
33 |
34 | private static final long serialVersionUID = 1L;
35 |
36 | public InstanceConditionalTest splitTest;
37 |
38 | public double[][] resultingClassDistributions;
39 |
40 | public double merit;
41 |
42 | public AttributeSplitSuggestion() {}
43 |
44 | public AttributeSplitSuggestion(InstanceConditionalTest splitTest,
45 | double[][] resultingClassDistributions, double merit) {
46 | this.splitTest = splitTest;
47 | this.resultingClassDistributions = resultingClassDistributions.clone();
48 | this.merit = merit;
49 | }
50 |
51 | public int numSplits() {
52 | return this.resultingClassDistributions.length;
53 | }
54 |
55 | public double[] resultingClassDistributionFromSplit(int splitIndex) {
56 | return this.resultingClassDistributions[splitIndex].clone();
57 | }
58 |
59 | @Override
60 | public int compareTo(AttributeSplitSuggestion comp) {
61 | return Double.compare(this.merit, comp.merit);
62 | }
63 |
64 | @Override
65 | public void getDescription(StringBuilder sb, int indent) {
66 | // do nothing
67 | }
68 | }
69 |
--------------------------------------------------------------------------------
/samoa-samza/src/main/java/com/yahoo/labs/samoa/utils/SerializableSerializer.java:
--------------------------------------------------------------------------------
1 | package com.yahoo.labs.samoa.utils;
2 |
3 | /*
4 | * #%L
5 | * SAMOA
6 | * %%
7 | * Copyright (C) 2013 - 2014 Yahoo! Inc.
8 | * %%
9 | * Licensed under the Apache License, Version 2.0 (the "License");
10 | * you may not use this file except in compliance with the License.
11 | * You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing, software
16 | * distributed under the License is distributed on an "AS IS" BASIS,
17 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
18 | * See the License for the specific language governing permissions and
19 | * limitations under the License.
20 | * #L%
21 | */
22 |
23 | import java.io.ByteArrayInputStream;
24 | import java.io.ByteArrayOutputStream;
25 | import java.io.IOException;
26 | import java.io.ObjectInputStream;
27 | import java.io.ObjectOutputStream;
28 |
29 | import com.esotericsoftware.kryo.Kryo;
30 | import com.esotericsoftware.kryo.Serializer;
31 | import com.esotericsoftware.kryo.io.Input;
32 | import com.esotericsoftware.kryo.io.Output;
33 |
34 | /**
35 | * Serialize and deserialize objects with Java serialization
36 | *
37 | * @author Anh Thu Vu
38 | */
39 | public class SerializableSerializer extends Serializer