├── .github
    └── workflows
    │   └── manual.yml
├── .gitignore
├── CODEOWNERS
├── README.md
├── Vagrantfile
├── default.json
├── lesson1
    ├── stage1
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── ExclamationTopology.java
    │   │               └── ReporterExclamationTopology.java
    ├── stage2
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── ExclamationTopology.java
    │   │               ├── ReporterExclamationTopology.java
    │   │               └── spout
    │   │                   └── RandomSentenceSpout.java
    └── stage3
    │   ├── pom.xml
    │   └── src
    │       └── jvm
    │           └── udacity
    │               └── storm
    │                   ├── ExclamationTopology.java
    │                   ├── ReporterExclamationTopology.java
    │                   └── spout
    │                       └── RandomSentenceSpout.java
├── lesson2
    ├── stage1
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── WordCountTopology.java
    │   │               └── spout
    │   │                   └── RandomSentenceSpout.java
    ├── stage2
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── WordCountTopology.java
    │   │               └── spout
    │   │                   └── RandomSentenceSpout.java
    ├── stage3
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── SentenceCountTopology.java
    │   │               ├── WordCountTopology.java
    │   │               └── spout
    │   │                   └── RandomSentenceSpout.java
    ├── stage4
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── SentenceCountTopology.java
    │   │               ├── SentenceWordCountTopology.java
    │   │               ├── WordCountTopology.java
    │   │               └── spout
    │   │                   └── RandomSentenceSpout.java
    ├── stage5
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               └── TweetTopology.java
    ├── stage6
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── CountBolt.java
    │   │               ├── ParseTweetBolt.java
    │   │               ├── ReportBolt.java
    │   │               ├── TweetSpout.java
    │   │               └── TweetTopology.java
    └── stage7
    │   ├── pom.xml
    │   └── src
    │       └── jvm
    │           └── udacity
    │               └── storm
    │                   ├── CountBolt.java
    │                   ├── ParseTweetBolt.java
    │                   ├── ReportBolt.java
    │                   ├── RollingCountBolt.java
    │                   ├── TweetSpout.java
    │                   ├── TweetTopology.java
    │                   └── tools
    │                       ├── NthLastModifiedTimeTracker.java
    │                       ├── SlidingWindowCounter.java
    │                       ├── SlotBasedCounter.java
    │                       └── TupleHelpers.java
├── lesson3
    ├── stage1
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── CountBolt.java
    │   │               ├── ParseTweetBolt.java
    │   │               ├── ReportBolt.java
    │   │               ├── TweetSpout.java
    │   │               └── TweetTopology.java
    ├── stage2
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── CountBolt.java
    │   │               ├── ParseTweetBolt.java
    │   │               ├── ReportBolt.java
    │   │               ├── SplitSentence.java
    │   │               ├── TweetSpout.java
    │   │               ├── TweetTopology.java
    │   │               └── resources
    │   │                   ├── splitsentence.py
    │   │                   └── storm.py
    ├── stage3
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── CountBolt.java
    │   │               ├── ParseTweetBolt.java
    │   │               ├── ReportBolt.java
    │   │               ├── SplitSentence.java
    │   │               ├── TweetSpout.java
    │   │               ├── TweetTopology.java
    │   │               ├── URLBolt.java
    │   │               └── resources
    │   │                   ├── splitsentence.py
    │   │                   ├── storm.py
    │   │                   └── urltext.py
    ├── stage4
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── CountBolt.java
    │   │               ├── ParseTweetBolt.java
    │   │               ├── ReportBolt.java
    │   │               ├── RollingCountBolt.java
    │   │               ├── TweetSpout.java
    │   │               ├── TweetTopology.java
    │   │               └── tools
    │   │                   ├── NthLastModifiedTimeTracker.java
    │   │                   ├── SlidingWindowCounter.java
    │   │                   ├── SlotBasedCounter.java
    │   │                   └── TupleHelpers.java
    ├── stage5
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── AbstractRankerBolt.java
    │   │               ├── CountBolt.java
    │   │               ├── IntermediateRankingsBolt.java
    │   │               ├── ParseTweetBolt.java
    │   │               ├── ReportBolt.java
    │   │               ├── RollingCountBolt.java
    │   │               ├── TopNTweetTopology.java
    │   │               ├── TotalRankingsBolt.java
    │   │               ├── TweetSpout.java
    │   │               ├── spout
    │   │                   └── RandomSentenceSpout.java
    │   │               └── tools
    │   │                   ├── NthLastModifiedTimeTracker.java
    │   │                   ├── Rankable.java
    │   │                   ├── RankableObjectWithFields.java
    │   │                   ├── Rankings.java
    │   │                   ├── SlidingWindowCounter.java
    │   │                   ├── SlotBasedCounter.java
    │   │                   └── TupleHelpers.java
    ├── stage6
    │   ├── pom.xml
    │   └── src
    │   │   └── jvm
    │   │       └── udacity
    │   │           └── storm
    │   │               ├── ExclamationTopology.java
    │   │               ├── ReportBolt.java
    │   │               └── spout
    │   │                   ├── MyLikesSpout.java
    │   │                   └── MyNamesSpout.java
    └── stage7
    │   ├── pom.xml
    │   └── src
    │       └── jvm
    │           └── udacity
    │               └── storm
    │                   ├── ExclamationTopology.java
    │                   ├── ReportBolt.java
    │                   └── spout
    │                       ├── MyLikesSpout.java
    │                       └── MyNamesSpout.java
├── lesson4
    └── TeamAwesome
    │   ├── FinalProject
    │       ├── 2014_Gaz_counties_national.txt
    │       ├── geoinfo.csv
    │       ├── pom.xml
    │       └── src
    │       │   ├── MyPropFile.properties
    │       │   └── jvm
    │       │       ├── geocode
    │       │           ├── GeoName.java
    │       │           ├── ReverseGeoCode.java
    │       │           └── kdtree
    │       │           │   ├── KDNode.java
    │       │           │   ├── KDNodeComparator.java
    │       │           │   └── KDTree.java
    │       │       ├── geoinfo.csv
    │       │       └── udacity
    │       │           └── storm
    │       │               ├── CountBolt.java
    │       │               ├── InfoBolt.java
    │       │               ├── ParseTweetBolt.java
    │       │               ├── ReportBolt.java
    │       │               ├── TopNTweetTopology.java
    │       │               ├── TopWords.java
    │       │               ├── TweetSpout.java
    │       │               ├── spout
    │       │                   └── RandomSentenceSpout.java
    │       │               └── tools
    │       │                   ├── CountiesLookup.java
    │       │                   ├── NthLastModifiedTimeTracker.java
    │       │                   ├── Rankable.java
    │       │                   ├── RankableObjectWithFields.java
    │       │                   ├── Rankings.java
    │       │                   ├── SentimentAnalyzer.java
    │       │                   ├── SlidingWindowCounter.java
    │       │                   ├── SlotBasedCounter.java
    │       │                   ├── TupleHelpers.java
    │       │                   └── ValueComparator.java
    │   ├── README.md
    │   └── viz
    │       ├── README.md
    │       ├── app.py
    │       ├── d3
    │           ├── LICENSE
    │           ├── d3.js
    │           ├── d3.min.js
    │           └── d3.zip
    │       ├── dump.rdb
    │       ├── rt-provision-32.sh
    │       ├── static
    │           ├── Twitter_logo_white.png
    │           ├── Udacity-logoRobot.png
    │           ├── app-cloud.js
    │           ├── app-map.js
    │           ├── app.js
    │           ├── countyLookup.js
    │           ├── d3.layout.cloud.js
    │           ├── datamaps.js
    │           ├── unemployment.tsv
    │           ├── us - Copy.json
    │           └── us.json
    │       └── templates
    │           ├── basic.html
    │           ├── cloud.html
    │           └── map.html
├── provision.sh
└── viz
    ├── README.md
    ├── app.py
    ├── d3
        ├── LICENSE
        ├── d3.js
        ├── d3.min.js
        └── d3.zip
    ├── dump.rdb
    ├── rt-provision-32.sh
    ├── static
        ├── Twitter_logo_white.png
        ├── Udacity-logoRobot.png
        ├── app-cloud.js
        ├── app.js
        └── d3.layout.cloud.js
    └── templates
        ├── basic.html
        └── cloud.html


/.github/workflows/manual.yml:
--------------------------------------------------------------------------------
 1 | # Workflow to ensure whenever a Github PR is submitted, 
 2 | # a JIRA ticket gets created automatically. 
 3 | name: Manual Workflow
 4 | 
 5 | # Controls when the action will run. 
 6 | on:
 7 |   # Triggers the workflow on pull request events but only for the master branch
 8 |   pull_request_target:
 9 |     types: [opened, reopened]
10 | 
11 |   # Allows you to run this workflow manually from the Actions tab
12 |   workflow_dispatch:
13 | 
14 | jobs:
15 |   test-transition-issue:
16 |     name: Convert Github Issue to Jira Issue
17 |     runs-on: ubuntu-latest
18 |     steps:
19 |     - name: Checkout
20 |       uses: actions/checkout@master
21 | 
22 |     - name: Login
23 |       uses: atlassian/gajira-login@master
24 |       env:
25 |         JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }}
26 |         JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }}
27 |         JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }}
28 |         
29 |     - name: Create NEW JIRA ticket
30 |       id: create
31 |       uses: atlassian/gajira-create@master
32 |       with:
33 |         project: CONUPDATE
34 |         issuetype: Task
35 |         summary: |
36 |           Github PR [Assign the ND component] | Repo: ${{ github.repository }}  | PR# ${{github.event.number}}
37 |         description: |
38 |            Repo link: https://github.com/${{ github.repository }}   
39 |            PR no. ${{ github.event.pull_request.number }} 
40 |            PR title: ${{ github.event.pull_request.title }}  
41 |            PR description: ${{ github.event.pull_request.description }}  
42 |            In addition, please resolve other issues, if any. 
43 |         fields: '{"components": [{"name":"Github PR"}], "customfield_16449":"https://classroom.udacity.com/", "customfield_16450":"Resolve the PR", "labels": ["github"], "priority":{"id": "4"}}'
44 | 
45 |     - name: Log created issue
46 |       run: echo "Issue ${{ steps.create.outputs.issue }} was created"
47 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | bin/
12 | build/
13 | develop-eggs/
14 | dist/
15 | eggs/
16 | lib/
17 | lib64/
18 | parts/
19 | sdist/
20 | var/
21 | *.egg-info/
22 | .installed.cfg
23 | *.egg
24 | 
25 | # Installer logs
26 | pip-log.txt
27 | pip-delete-this-directory.txt
28 | 
29 | # Unit test / coverage reports
30 | htmlcov/
31 | .tox/
32 | .coverage
33 | .cache
34 | nosetests.xml
35 | coverage.xml
36 | 
37 | # Translations
38 | *.mo
39 | 
40 | # Mr Developer
41 | .mr.developer.cfg
42 | .project
43 | .pydevproject
44 | 
45 | # Rope
46 | .ropeproject
47 | 
48 | # Django stuff:
49 | *.log
50 | *.pot
51 | 
52 | # Sphinx documentation
53 | docs/_build/
54 | 
55 | # vagrant bookkeeping
56 | .vagrant/
57 | 
58 | # packer stuff
59 | packer_virtualbox-ovf_virtualbox.box
60 | 
61 | # maven builds
62 | **/target/*
63 | 
64 | .credentials
65 | 


--------------------------------------------------------------------------------
/CODEOWNERS:
--------------------------------------------------------------------------------
1 | *           @udacity/active-public-content


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Udacity and Twitter bring you Real-Time Analytics with Apache Storm
2 | =====
3 | 
4 | Join the course for free:
5 | www.udacity.com/course/ud381
6 | 


--------------------------------------------------------------------------------
/Vagrantfile:
--------------------------------------------------------------------------------
 1 | # -*- mode: ruby -*-
 2 | # vi: set ft=ruby :
 3 | 
 4 | # Vagrantfile API/syntax version. Don't touch unless you know what you're doing!
 5 | VAGRANTFILE_API_VERSION = "2"
 6 | 
 7 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config|
 8 |   config.vm.box = "udacity/ud381"
 9 |   config.vm.network :forwarded_port, guest: 5000, host: 5000
10 | end
11 | 


--------------------------------------------------------------------------------
/default.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "variables": {
 3 |     "home": "{{env `HOME`}}",
 4 |     "cloud_token": "{{env `VAGRANT_CLOUD_TOKEN`}}",
 5 |     "version": "{{env `BOX_VERSION`}}"
 6 |   },
 7 |   "builders": [
 8 |     {
 9 |       "type": "virtualbox-ovf",
10 |       "source_path": "{{user `home`}}/.vagrant.d/boxes/box-cutter-VAGRANTSLASH-ubuntu1404-i386/1.0.2/virtualbox/box.ovf",
11 |       "ssh_username": "vagrant",
12 |       "ssh_password": "vagrant",
13 |       "headless": true,
14 |       "vboxmanage": [
15 |         ["modifyvm", "{{.Name}}", "--memory", "2048"],
16 |         ["modifyvm", "{{.Name}}", "--cpus", "2"]
17 |       ],
18 |       "ssh_wait_timeout": "30s",
19 |       "shutdown_command": "sudo shutdown -h now"
20 |     }
21 |   ],
22 |   "provisioners": [
23 |     {
24 |       "type": "shell",
25 |       "script": "provision.sh"
26 |     }
27 |   ],
28 |   "post-processors": [
29 |     [{
30 |       "type": "vagrant"
31 |     },
32 |     {
33 |       "type": "vagrant-cloud",
34 |       "box_tag": "udacity/ud381",
35 |       "access_token": "{{user `cloud_token`}}",
36 |       "version": "{{user `version`}}"
37 |     }]
38 |   ]
39 | }
40 | 


--------------------------------------------------------------------------------
/lesson1/stage1/src/jvm/udacity/storm/ExclamationTopology.java:
--------------------------------------------------------------------------------
  1 | package udacity.storm;
  2 | 
  3 | import backtype.storm.Config;
  4 | import backtype.storm.LocalCluster;
  5 | import backtype.storm.StormSubmitter;
  6 | import backtype.storm.task.OutputCollector;
  7 | import backtype.storm.task.TopologyContext;
  8 | import backtype.storm.testing.TestWordSpout;
  9 | import backtype.storm.topology.OutputFieldsDeclarer;
 10 | import backtype.storm.topology.TopologyBuilder;
 11 | import backtype.storm.topology.base.BaseRichBolt;
 12 | import backtype.storm.tuple.Fields;
 13 | import backtype.storm.tuple.Tuple;
 14 | import backtype.storm.tuple.Values;
 15 | import backtype.storm.utils.Utils;
 16 | 
 17 | import java.util.Map;
 18 | 
 19 | /**
 20 |  * This is a basic example of a Storm topology.
 21 |  */
 22 | 
 23 | /**
 24 |  * This is a basic example of a storm topology.
 25 |  *
 26 |  * This topology demonstrates how to add three exclamation marks '!!!'
 27 |  * to each word emitted
 28 |  *
 29 |  * This is an example for Udacity Real Time Analytics Course - ud381
 30 |  *
 31 |  */
 32 | public class ExclamationTopology {
 33 | 
 34 |   /**
 35 |    * A bolt that adds the exclamation marks '!!!' to word
 36 |    */
 37 |   public static class ExclamationBolt extends BaseRichBolt
 38 |   {
 39 |     // To output tuples from this bolt to the next stage bolts, if any
 40 |     OutputCollector _collector;
 41 | 
 42 |     @Override
 43 |     public void prepare(
 44 |         Map                     map,
 45 |         TopologyContext         topologyContext,
 46 |         OutputCollector         collector)
 47 |     {
 48 |       // save the output collector for emitting tuples
 49 |       _collector = collector;
 50 |     }
 51 | 
 52 |     @Override
 53 |     public void execute(Tuple tuple)
 54 |     {
 55 |       // get the column word from tuple
 56 |       String word = tuple.getString(0);
 57 | 
 58 |       // build the word with the exclamation marks appended
 59 |       StringBuilder exclamatedWord = new StringBuilder();
 60 |       exclamatedWord.append(word).append("!!!");
 61 | 
 62 |       // emit the word with exclamations
 63 |       _collector.emit(tuple, new Values(exclamatedWord.toString()));
 64 |     }
 65 | 
 66 |     @Override
 67 |     public void declareOutputFields(OutputFieldsDeclarer declarer)
 68 |     {
 69 |       // tell storm the schema of the output tuple for this spout
 70 | 
 71 |       // tuple consists of a single column called 'exclamated-word'
 72 |       declarer.declare(new Fields("exclamated-word"));
 73 |     }
 74 |   }
 75 | 
 76 |   public static void main(String[] args) throws Exception
 77 |   {
 78 |     // create the topology
 79 |     TopologyBuilder builder = new TopologyBuilder();
 80 | 
 81 |     // attach the word spout to the topology - parallelism of 10
 82 |     builder.setSpout("word", new TestWordSpout(), 10);
 83 | 
 84 |     // attach the exclamation bolt to the topology - parallelism of 3
 85 |     builder.setBolt("exclaim1", new ExclamationBolt(), 3).shuffleGrouping("word");
 86 | 
 87 |     // attach another exclamation bolt to the topology - parallelism of 2
 88 |     builder.setBolt("exclaim2", new ExclamationBolt(), 2).shuffleGrouping("exclaim1");
 89 | 
 90 |     // create the default config object
 91 |     Config conf = new Config();
 92 | 
 93 |     // set the config in debugging mode
 94 |     conf.setDebug(true);
 95 | 
 96 |     if (args != null && args.length > 0) {
 97 | 
 98 |       // run it in a live cluster
 99 | 
100 |       // set the number of workers for running all spout and bolt tasks
101 |       conf.setNumWorkers(3);
102 | 
103 |       // create the topology and submit with config
104 |       StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
105 | 
106 |     } else {
107 | 
108 |       // run it in a simulated local cluster
109 | 
110 |       // create the local cluster instance
111 |       LocalCluster cluster = new LocalCluster();
112 | 
113 |       // submit the topology to the local cluster
114 |       cluster.submitTopology("exclamation", conf, builder.createTopology());
115 | 
116 |       // let the topology run for 20 seconds. note topologies never terminate!
117 |       Thread.sleep(20000);
118 | 
119 |       // kill the topology
120 |       cluster.killTopology("exclamation");
121 | 
122 |       // we are done, so shutdown the local cluster
123 |       cluster.shutdown();
124 |     }
125 |   }
126 | }
127 | 


--------------------------------------------------------------------------------
/lesson1/stage2/src/jvm/udacity/storm/ExclamationTopology.java:
--------------------------------------------------------------------------------
  1 | package udacity.storm;
  2 | 
  3 | import backtype.storm.Config;
  4 | import backtype.storm.LocalCluster;
  5 | import backtype.storm.StormSubmitter;
  6 | import backtype.storm.task.OutputCollector;
  7 | import backtype.storm.task.TopologyContext;
  8 | import backtype.storm.testing.TestWordSpout;
  9 | import backtype.storm.topology.OutputFieldsDeclarer;
 10 | import backtype.storm.topology.TopologyBuilder;
 11 | import backtype.storm.topology.base.BaseRichBolt;
 12 | import backtype.storm.tuple.Fields;
 13 | import backtype.storm.tuple.Tuple;
 14 | import backtype.storm.tuple.Values;
 15 | import backtype.storm.utils.Utils;
 16 | 
 17 | import java.util.Map;
 18 | 
 19 | /**
 20 |  * This is a basic example of a Storm topology.
 21 |  */
 22 | 
 23 | /**
 24 |  * This is a basic example of a storm topology.
 25 |  *
 26 |  * This topology demonstrates how to add three exclamation marks '!!!'
 27 |  * to each word emitted
 28 |  *
 29 |  * This is an example for Udacity Real Time Analytics Course - ud381
 30 |  *
 31 |  */
 32 | public class ExclamationTopology {
 33 | 
 34 |   /**
 35 |    * A bolt that adds the exclamation marks '!!!' to word
 36 |    */
 37 |   public static class ExclamationBolt extends BaseRichBolt
 38 |   {
 39 |     // To output tuples from this bolt to the next stage bolts, if any
 40 |     OutputCollector _collector;
 41 | 
 42 |     @Override
 43 |     public void prepare(
 44 |         Map                     map,
 45 |         TopologyContext         topologyContext,
 46 |         OutputCollector         collector)
 47 |     {
 48 |       // save the output collector for emitting tuples
 49 |       _collector = collector;
 50 |     }
 51 | 
 52 |     @Override
 53 |     public void execute(Tuple tuple)
 54 |     {
 55 |       // get the column word from tuple
 56 |       String word = tuple.getString(0);
 57 | 
 58 |       // build the word with the exclamation marks appended
 59 |       StringBuilder exclamatedWord = new StringBuilder();
 60 |       exclamatedWord.append(word).append("!!!");
 61 | 
 62 |       // emit the word with exclamations
 63 |       _collector.emit(tuple, new Values(exclamatedWord.toString()));
 64 |     }
 65 | 
 66 |     @Override
 67 |     public void declareOutputFields(OutputFieldsDeclarer declarer)
 68 |     {
 69 |       // tell storm the schema of the output tuple for this spout
 70 | 
 71 |       // tuple consists of a single column called 'exclamated-word'
 72 |       declarer.declare(new Fields("exclamated-word"));
 73 |     }
 74 |   }
 75 | 
 76 |   public static void main(String[] args) throws Exception
 77 |   {
 78 |     // create the topology
 79 |     TopologyBuilder builder = new TopologyBuilder();
 80 | 
 81 |     // attach the word spout to the topology - parallelism of 10
 82 |     builder.setSpout("word", new TestWordSpout(), 10);
 83 | 
 84 |     // attach the exclamation bolt to the topology - parallelism of 3
 85 |     builder.setBolt("exclaim1", new ExclamationBolt(), 3).shuffleGrouping("word");
 86 | 
 87 |     // attach another exclamation bolt to the topology - parallelism of 2
 88 |     builder.setBolt("exclaim2", new ExclamationBolt(), 2).shuffleGrouping("exclaim1");
 89 | 
 90 |     // create the default config object
 91 |     Config conf = new Config();
 92 | 
 93 |     // set the config in debugging mode
 94 |     conf.setDebug(true);
 95 | 
 96 |     if (args != null && args.length > 0) {
 97 | 
 98 |       // run it in a live cluster
 99 | 
100 |       // set the number of workers for running all spout and bolt tasks
101 |       conf.setNumWorkers(3);
102 | 
103 |       // create the topology and submit with config
104 |       StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
105 | 
106 |     } else {
107 | 
108 |       // run it in a simulated local cluster
109 | 
110 |       // create the local cluster instance
111 |       LocalCluster cluster = new LocalCluster();
112 | 
113 |       // submit the topology to the local cluster
114 |       cluster.submitTopology("exclamation", conf, builder.createTopology());
115 | 
116 |       // let the topology run for 30 seconds. note topologies never terminate!
117 |       Thread.sleep(30000);
118 | 
119 |       // kill the topology
120 |       cluster.killTopology("exclamation");
121 | 
122 |       // we are done, so shutdown the local cluster
123 |       cluster.shutdown();
124 |     }
125 |   }
126 | }
127 | 


--------------------------------------------------------------------------------
/lesson1/stage2/src/jvm/udacity/storm/spout/RandomSentenceSpout.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.spout;
 2 | 
 3 | import backtype.storm.spout.SpoutOutputCollector;
 4 | import backtype.storm.task.TopologyContext;
 5 | import backtype.storm.topology.OutputFieldsDeclarer;
 6 | import backtype.storm.topology.base.BaseRichSpout;
 7 | import backtype.storm.tuple.Fields;
 8 | import backtype.storm.tuple.Values;
 9 | import backtype.storm.utils.Utils;
10 | 
11 | import java.util.Map;
12 | import java.util.Random;
13 | 
14 | public class RandomSentenceSpout extends BaseRichSpout {
15 |   SpoutOutputCollector _collector;
16 |   Random _rand;
17 | 
18 | 
19 |   @Override
20 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
21 |     _collector = collector;
22 |     _rand = new Random();
23 |   }
24 | 
25 |   @Override
26 |   public void nextTuple() {
27 |     Utils.sleep(100);
28 |     String[] sentences = new String[]{
29 |       "the cow jumped over the moon",
30 |       "an apple a day keeps the doctor away",
31 |       "four score and seven years ago",
32 |       "snow white and the seven dwarfs",
33 |       "i am at two with nature"
34 |       };
35 |     String sentence = sentences[_rand.nextInt(sentences.length)];
36 |     _collector.emit(new Values(sentence));
37 |   }
38 | 
39 |   @Override
40 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
41 |     declarer.declare(new Fields("sentence"));
42 |   }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/lesson1/stage3/src/jvm/udacity/storm/ExclamationTopology.java:
--------------------------------------------------------------------------------
  1 | package udacity.storm;
  2 | 
  3 | import backtype.storm.Config;
  4 | import backtype.storm.LocalCluster;
  5 | import backtype.storm.StormSubmitter;
  6 | import backtype.storm.task.OutputCollector;
  7 | import backtype.storm.task.TopologyContext;
  8 | import backtype.storm.testing.TestWordSpout;
  9 | import backtype.storm.topology.OutputFieldsDeclarer;
 10 | import backtype.storm.topology.TopologyBuilder;
 11 | import backtype.storm.topology.base.BaseRichBolt;
 12 | import backtype.storm.tuple.Fields;
 13 | import backtype.storm.tuple.Tuple;
 14 | import backtype.storm.tuple.Values;
 15 | import backtype.storm.utils.Utils;
 16 | 
 17 | import java.util.Map;
 18 | 
 19 | /**
 20 |  * This is a basic example of a Storm topology.
 21 |  */
 22 | 
 23 | /**
 24 |  * This is a basic example of a storm topology.
 25 |  *
 26 |  * This topology demonstrates how to add three exclamation marks '!!!'
 27 |  * to each word emitted
 28 |  *
 29 |  * This is an example for Udacity Real Time Analytics Course - ud381
 30 |  *
 31 |  */
 32 | public class ExclamationTopology {
 33 | 
 34 |   /**
 35 |    * A bolt that adds the exclamation marks '!!!' to word
 36 |    */
 37 |   public static class ExclamationBolt extends BaseRichBolt
 38 |   {
 39 |     // To output tuples from this bolt to the next stage bolts, if any
 40 |     OutputCollector _collector;
 41 | 
 42 |     @Override
 43 |     public void prepare(
 44 |         Map                     map,
 45 |         TopologyContext         topologyContext,
 46 |         OutputCollector         collector)
 47 |     {
 48 |       // save the output collector for emitting tuples
 49 |       _collector = collector;
 50 |     }
 51 | 
 52 |     @Override
 53 |     public void execute(Tuple tuple)
 54 |     {
 55 |       // get the column word from tuple
 56 |       String word = tuple.getString(0);
 57 | 
 58 |       // build the word with the exclamation marks appended
 59 |       StringBuilder exclamatedWord = new StringBuilder();
 60 |       exclamatedWord.append(word).append("!!!");
 61 | 
 62 |       // emit the word with exclamations
 63 |       _collector.emit(tuple, new Values(exclamatedWord.toString()));
 64 |     }
 65 | 
 66 |     @Override
 67 |     public void declareOutputFields(OutputFieldsDeclarer declarer)
 68 |     {
 69 |       // tell storm the schema of the output tuple for this spout
 70 | 
 71 |       // tuple consists of a single column called 'exclamated-word'
 72 |       declarer.declare(new Fields("exclamated-word"));
 73 |     }
 74 |   }
 75 | 
 76 |   public static void main(String[] args) throws Exception
 77 |   {
 78 |     // create the topology
 79 |     TopologyBuilder builder = new TopologyBuilder();
 80 | 
 81 |     // attach the word spout to the topology - parallelism of 10
 82 |     builder.setSpout("word", new TestWordSpout(), 10);
 83 | 
 84 |     // attach the exclamation bolt to the topology - parallelism of 3
 85 |     builder.setBolt("exclaim1", new ExclamationBolt(), 3).shuffleGrouping("word");
 86 | 
 87 |     // attach another exclamation bolt to the topology - parallelism of 2
 88 |     builder.setBolt("exclaim2", new ExclamationBolt(), 2).shuffleGrouping("exclaim1");
 89 | 
 90 |     // create the default config object
 91 |     Config conf = new Config();
 92 | 
 93 |     // set the config in debugging mode
 94 |     conf.setDebug(true);
 95 | 
 96 |     if (args != null && args.length > 0) {
 97 | 
 98 |       // run it in a live cluster
 99 | 
100 |       // set the number of workers for running all spout and bolt tasks
101 |       conf.setNumWorkers(3);
102 | 
103 |       // create the topology and submit with config
104 |       StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
105 | 
106 |     } else {
107 | 
108 |       // run it in a simulated local cluster
109 | 
110 |       // create the local cluster instance
111 |       LocalCluster cluster = new LocalCluster();
112 | 
113 |       // submit the topology to the local cluster
114 |       cluster.submitTopology("exclamation", conf, builder.createTopology());
115 | 
116 |       // let the topology run for 30 seconds. note topologies never terminate!
117 |       Thread.sleep(30000);
118 | 
119 |       // kill the topology
120 |       cluster.killTopology("exclamation");
121 | 
122 |       // we are done, so shutdown the local cluster
123 |       cluster.shutdown();
124 |     }
125 |   }
126 | }
127 | 


--------------------------------------------------------------------------------
/lesson1/stage3/src/jvm/udacity/storm/spout/RandomSentenceSpout.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.spout;
 2 | 
 3 | import backtype.storm.spout.SpoutOutputCollector;
 4 | import backtype.storm.task.TopologyContext;
 5 | import backtype.storm.topology.OutputFieldsDeclarer;
 6 | import backtype.storm.topology.base.BaseRichSpout;
 7 | import backtype.storm.tuple.Fields;
 8 | import backtype.storm.tuple.Values;
 9 | import backtype.storm.utils.Utils;
10 | 
11 | import java.util.Map;
12 | import java.util.Random;
13 | 
14 | public class RandomSentenceSpout extends BaseRichSpout {
15 |   SpoutOutputCollector _collector;
16 |   Random _rand;
17 | 
18 | 
19 |   @Override
20 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
21 |     _collector = collector;
22 |     _rand = new Random();
23 |   }
24 | 
25 |   @Override
26 |   public void nextTuple() {
27 |     Utils.sleep(100);
28 |     String[] sentences = new String[]{
29 |       "the cow jumped over the moon",
30 |       "an apple a day keeps the doctor away",
31 |       "four score and seven years ago",
32 |       "snow white and the seven dwarfs",
33 |       "i am at two with nature"
34 |       };
35 |     String sentence = sentences[_rand.nextInt(sentences.length)];
36 |     _collector.emit(new Values(sentence));
37 |   }
38 | 
39 |   @Override
40 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
41 |     declarer.declare(new Fields("sentence"));
42 |   }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/lesson2/stage1/src/jvm/udacity/storm/spout/RandomSentenceSpout.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.spout;
 2 | 
 3 | import backtype.storm.spout.SpoutOutputCollector;
 4 | import backtype.storm.task.TopologyContext;
 5 | import backtype.storm.topology.OutputFieldsDeclarer;
 6 | import backtype.storm.topology.base.BaseRichSpout;
 7 | import backtype.storm.tuple.Fields;
 8 | import backtype.storm.tuple.Values;
 9 | import backtype.storm.utils.Utils;
10 | 
11 | import java.util.Map;
12 | import java.util.Random;
13 | 
14 | public class RandomSentenceSpout extends BaseRichSpout {
15 |   SpoutOutputCollector _collector;
16 |   Random _rand;
17 | 
18 | 
19 |   @Override
20 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
21 |     _collector = collector;
22 |     _rand = new Random();
23 |   }
24 | 
25 |   @Override
26 |   public void nextTuple() {
27 |     Utils.sleep(100);
28 |     String[] sentences = new String[]{
29 |       "the cow jumped over the moon",
30 |       "an apple a day keeps the doctor away",
31 |       "four score and seven years ago",
32 |       "snow white and the seven dwarfs",
33 |       "i am at two with nature"
34 |       };
35 |     String sentence = sentences[_rand.nextInt(sentences.length)];
36 |     _collector.emit(new Values(sentence));
37 |   }
38 | 
39 |   @Override
40 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
41 |     declarer.declare(new Fields("sentence"));
42 |   }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/lesson2/stage2/src/jvm/udacity/storm/spout/RandomSentenceSpout.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.spout;
 2 | 
 3 | import backtype.storm.spout.SpoutOutputCollector;
 4 | import backtype.storm.task.TopologyContext;
 5 | import backtype.storm.topology.OutputFieldsDeclarer;
 6 | import backtype.storm.topology.base.BaseRichSpout;
 7 | import backtype.storm.tuple.Fields;
 8 | import backtype.storm.tuple.Values;
 9 | import backtype.storm.utils.Utils;
10 | 
11 | import java.util.Map;
12 | import java.util.Random;
13 | 
14 | public class RandomSentenceSpout extends BaseRichSpout {
15 |   SpoutOutputCollector _collector;
16 |   Random _rand;
17 | 
18 | 
19 |   @Override
20 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
21 |     _collector = collector;
22 |     _rand = new Random();
23 |   }
24 | 
25 |   @Override
26 |   public void nextTuple() {
27 |     Utils.sleep(100);
28 |     String[] sentences = new String[]{
29 |       "the cow jumped over the moon",
30 |       "an apple a day keeps the doctor away",
31 |       "four score and seven years ago",
32 |       "snow white and the seven dwarfs",
33 |       "i am at two with nature" };
34 |     String sentence = sentences[_rand.nextInt(sentences.length)];
35 |     _collector.emit(new Values(sentence));
36 |   }
37 | 
38 |   @Override
39 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
40 |     declarer.declare(new Fields("sentence"));
41 |   }
42 | 
43 | }
44 | 


--------------------------------------------------------------------------------
/lesson2/stage3/src/jvm/udacity/storm/spout/RandomSentenceSpout.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.spout;
 2 | 
 3 | import backtype.storm.spout.SpoutOutputCollector;
 4 | import backtype.storm.task.TopologyContext;
 5 | import backtype.storm.topology.OutputFieldsDeclarer;
 6 | import backtype.storm.topology.base.BaseRichSpout;
 7 | import backtype.storm.tuple.Fields;
 8 | import backtype.storm.tuple.Values;
 9 | import backtype.storm.utils.Utils;
10 | 
11 | import java.util.Map;
12 | import java.util.Random;
13 | 
14 | public class RandomSentenceSpout extends BaseRichSpout {
15 |   SpoutOutputCollector _collector;
16 |   Random _rand;
17 | 
18 | 
19 |   @Override
20 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
21 |     _collector = collector;
22 |     _rand = new Random();
23 |   }
24 | 
25 |   @Override
26 |   public void nextTuple() {
27 |     Utils.sleep(100);
28 |     String[] sentences = new String[]{
29 |       "the cow jumped over the moon",
30 |       "an apple a day keeps the doctor away",
31 |       "four score and seven years ago",
32 |       "snow white and the seven dwarfs",
33 |       "i am at two with nature"
34 |       };
35 |     String sentence = sentences[_rand.nextInt(sentences.length)];
36 |     _collector.emit(new Values(sentence));
37 |   }
38 | 
39 |   @Override
40 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
41 |     declarer.declare(new Fields("sentence"));
42 |   }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/lesson2/stage4/src/jvm/udacity/storm/spout/RandomSentenceSpout.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.spout;
 2 | 
 3 | import backtype.storm.spout.SpoutOutputCollector;
 4 | import backtype.storm.task.TopologyContext;
 5 | import backtype.storm.topology.OutputFieldsDeclarer;
 6 | import backtype.storm.topology.base.BaseRichSpout;
 7 | import backtype.storm.tuple.Fields;
 8 | import backtype.storm.tuple.Values;
 9 | import backtype.storm.utils.Utils;
10 | 
11 | import java.util.Map;
12 | import java.util.Random;
13 | 
14 | public class RandomSentenceSpout extends BaseRichSpout {
15 |   SpoutOutputCollector _collector;
16 |   Random _rand;
17 | 
18 | 
19 |   @Override
20 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
21 |     _collector = collector;
22 |     _rand = new Random();
23 |   }
24 | 
25 |   @Override
26 |   public void nextTuple() {
27 |     Utils.sleep(100);
28 |     String[] sentences = new String[]{
29 |       "the cow jumped over the moon",
30 |       "an apple a day keeps the doctor away",
31 |       "four score and seven years ago",
32 |       "snow white and the seven dwarfs",
33 |       "i am at two with nature"
34 |       };
35 |     String sentence = sentences[_rand.nextInt(sentences.length)];
36 |     _collector.emit(new Values(sentence));
37 |   }
38 | 
39 |   @Override
40 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
41 |     declarer.declare(new Fields("sentence"));
42 |   }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/lesson2/stage6/src/jvm/udacity/storm/CountBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.HashMap;
20 | import java.util.Map;
21 | 
22 | /**
23 |  * A bolt that counts the words that it receives
24 |  */
25 | public class CountBolt extends BaseRichBolt
26 | {
27 |   // To output tuples from this bolt to the next stage bolts, if any
28 |   private OutputCollector collector;
29 | 
30 |   // Map to store the count of the words
31 |   private Map<String, Integer> countMap;
32 | 
33 |   @Override
34 |   public void prepare(
35 |       Map                     map,
36 |       TopologyContext         topologyContext,
37 |       OutputCollector         outputCollector)
38 |   {
39 | 
40 |     // save the collector for emitting tuples
41 |     collector = outputCollector;
42 | 
43 |     // create and initialize the map
44 |     countMap = new HashMap<String, Integer>();
45 |   }
46 | 
47 |   @Override
48 |   public void execute(Tuple tuple)
49 |   {
50 |     // get the word from the 1st column of incoming tuple
51 |     String word = tuple.getString(0);
52 | 
53 |     // check if the word is present in the map
54 |     if (countMap.get(word) == null) {
55 | 
56 |       // not present, add the word with a count of 1
57 |       countMap.put(word, 1);
58 |     } else {
59 | 
60 |       // already there, hence get the count
61 |       Integer val = countMap.get(word);
62 | 
63 |       // increment the count and save it to the map
64 |       countMap.put(word, ++val);
65 |     }
66 | 
67 |     // emit the word and count
68 |     collector.emit(new Values(word, countMap.get(word)));
69 |   }
70 | 
71 |   @Override
72 |   public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer)
73 |   {
74 |     // tell storm the schema of the output tuple for this spout
75 |     // tuple consists of a two columns called 'word' and 'count'
76 | 
77 |     // declare the first column 'word', second column 'count'
78 |     outputFieldsDeclarer.declare(new Fields("word","count"));
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/lesson2/stage6/src/jvm/udacity/storm/ParseTweetBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | /**
22 |  * A bolt that parses the tweet into words
23 |  */
24 | public class ParseTweetBolt extends BaseRichBolt 
25 | {
26 |   // To output tuples from this bolt to the count bolt
27 |   OutputCollector collector;
28 | 
29 |   @Override
30 |   public void prepare(
31 |       Map                     map,
32 |       TopologyContext         topologyContext,
33 |       OutputCollector         outputCollector) 
34 |   {
35 |     // save the output collector for emitting tuples
36 |     collector = outputCollector;
37 |   }
38 | 
39 |   @Override
40 |   public void execute(Tuple tuple) 
41 |   {
42 |     // get the 1st column 'tweet' from tuple
43 |     String tweet = tuple.getString(0);
44 | 
45 |     // provide the delimiters for splitting the tweet
46 |     String delims = "[ .,?!]+";
47 | 
48 |     // now split the tweet into tokens
49 |     String[] tokens = tweet.split(delims);
50 | 
51 |     // for each token/word, emit it
52 |     for (String token: tokens) {
53 |       collector.emit(new Values(token));
54 |     }
55 |   }
56 | 
57 |   @Override
58 |   public void declareOutputFields(OutputFieldsDeclarer declarer) 
59 |   {
60 |     // tell storm the schema of the output tuple for this spout
61 |     // tuple consists of a single column called 'tweet-word'
62 |     declarer.declare(new Fields("tweet-word"));
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/lesson2/stage6/src/jvm/udacity/storm/ReportBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | import com.lambdaworks.redis.RedisClient;
22 | import com.lambdaworks.redis.RedisConnection;
23 | 
24 | /**
25 |  * A bolt that prints the word and count to redis
26 |  */
27 | public class ReportBolt extends BaseRichBolt
28 | {
29 |   // place holder to keep the connection to redis
30 |   transient RedisConnection<String,String> redis;
31 | 
32 |   @Override
33 |   public void prepare(
34 |       Map                     map,
35 |       TopologyContext         topologyContext,
36 |       OutputCollector         outputCollector)
37 |   {
38 |     // instantiate a redis connection
39 |     RedisClient client = new RedisClient("localhost",6379);
40 | 
41 |     // initiate the actual connection
42 |     redis = client.connect();
43 |   }
44 | 
45 |   @Override
46 |   public void execute(Tuple tuple)
47 |   {
48 |     // access the first column 'word'
49 |     String word = tuple.getStringByField("word");
50 | 
51 |     // access the second column 'count'
52 |     Integer count = tuple.getIntegerByField("count");
53 | 
54 |     // publish the word count to redis using word as the key
55 |     redis.publish("WordCountTopology", word + "|" + Long.toString(count));
56 |   }
57 | 
58 |   public void declareOutputFields(OutputFieldsDeclarer declarer)
59 |   {
60 |     // nothing to add - since it is the final bolt
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/lesson2/stage6/src/jvm/udacity/storm/TweetTopology.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | class TweetTopology
20 | {
21 |   public static void main(String[] args) throws Exception
22 |   {
23 |     // create the topology
24 |     TopologyBuilder builder = new TopologyBuilder();
25 | 
26 |     /*
27 |      * In order to create the spout, you need to get twitter credentials
28 |      * If you need to use Twitter firehose/Tweet stream for your idea,
29 |      * create a set of credentials by following the instructions at
30 |      *
31 |      * https://dev.twitter.com/discussions/631
32 |      *
33 |      */
34 | 
35 |     // now create the tweet spout with the credentials
36 |     TweetSpout tweetSpout = new TweetSpout(
37 |         "[Your customer key]",
38 |         "[Your secret key]",
39 |         "[Your access token]",
40 |         "[Your access secret]"
41 |     );
42 | 
43 |     //*********************************************************************
44 |     // Complete the Topology.
45 |     // Part 0: attach the tweet spout to the topology - parallelism of 1
46 |     // Part 1: // attach the parse tweet bolt, parallelism of 10 (what grouping is needed?)
47 |     // Part 2: // attach the count bolt, parallelism of 15 (what grouping is needed?)
48 |     // Part 3: attach the report bolt, parallelism of 1 (what grouping is needed?)
49 |     // Submit and run the topology.
50 | 
51 | 
52 |     //*********************************************************************
53 | 
54 |     // create the default config object
55 |     Config conf = new Config();
56 | 
57 |     // set the config in debugging mode
58 |     conf.setDebug(true);
59 | 
60 |     if (args != null && args.length > 0) {
61 | 
62 |       // run it in a live cluster
63 | 
64 |       // set the number of workers for running all spout and bolt tasks
65 |       conf.setNumWorkers(3);
66 | 
67 |       // create the topology and submit with config
68 |       StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
69 | 
70 |     } else {
71 | 
72 |       // run it in a simulated local cluster
73 | 
74 |       // set the number of threads to run - similar to setting number of workers in live cluster
75 |       conf.setMaxTaskParallelism(3);
76 | 
77 |       // create the local cluster instance
78 |       LocalCluster cluster = new LocalCluster();
79 | 
80 |       // submit the topology to the local cluster
81 |       cluster.submitTopology("tweet-word-count", conf, builder.createTopology());
82 | 
83 |       // let the topology run for 300 seconds. note topologies never terminate!
84 |       Utils.sleep(300000);
85 | 
86 |       // now kill the topology
87 |       cluster.killTopology("tweet-word-count");
88 | 
89 |       // we are done, so shutdown the local cluster
90 |       cluster.shutdown();
91 |     }
92 |   }
93 | }
94 | 


--------------------------------------------------------------------------------
/lesson2/stage7/src/jvm/udacity/storm/CountBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.HashMap;
20 | import java.util.Map;
21 | 
22 | /**
23 |  * A bolt that counts the words that it receives
24 |  */
25 | public class CountBolt extends BaseRichBolt
26 | {
27 |   // To output tuples from this bolt to the next stage bolts, if any
28 |   private OutputCollector collector;
29 | 
30 |   // Map to store the count of the words
31 |   private Map<String, Integer> countMap;
32 | 
33 |   @Override
34 |   public void prepare(
35 |       Map                     map,
36 |       TopologyContext         topologyContext,
37 |       OutputCollector         outputCollector)
38 |   {
39 | 
40 |     // save the collector for emitting tuples
41 |     collector = outputCollector;
42 | 
43 |     // create and initialize the map
44 |     countMap = new HashMap<String, Integer>();
45 |   }
46 | 
47 |   @Override
48 |   public void execute(Tuple tuple)
49 |   {
50 |     // get the word from the 1st column of incoming tuple
51 |     String word = tuple.getString(0);
52 | 
53 |     // check if the word is present in the map
54 |     if (countMap.get(word) == null) {
55 | 
56 |       // not present, add the word with a count of 1
57 |       countMap.put(word, 1);
58 |     } else {
59 | 
60 |       // already there, hence get the count
61 |       Integer val = countMap.get(word);
62 | 
63 |       // increment the count and save it to the map
64 |       countMap.put(word, ++val);
65 |     }
66 | 
67 |     // emit the word and count
68 |     collector.emit(new Values(word, countMap.get(word)));
69 |   }
70 | 
71 |   @Override
72 |   public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer)
73 |   {
74 |     // tell storm the schema of the output tuple for this spout
75 |     // tuple consists of a two columns called 'word' and 'count'
76 | 
77 |     // declare the first column 'word', second column 'count'
78 |     outputFieldsDeclarer.declare(new Fields("word","count"));
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/lesson2/stage7/src/jvm/udacity/storm/ParseTweetBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | /**
22 |  * A bolt that parses the tweet into words
23 |  */
24 | public class ParseTweetBolt extends BaseRichBolt 
25 | {
26 |   // To output tuples from this bolt to the count bolt
27 |   OutputCollector collector;
28 | 
29 |   @Override
30 |   public void prepare(
31 |       Map                     map,
32 |       TopologyContext         topologyContext,
33 |       OutputCollector         outputCollector) 
34 |   {
35 |     // save the output collector for emitting tuples
36 |     collector = outputCollector;
37 |   }
38 | 
39 |   @Override
40 |   public void execute(Tuple tuple) 
41 |   {
42 |     // get the 1st column 'tweet' from tuple
43 |     String tweet = tuple.getString(0);
44 | 
45 |     // provide the delimiters for splitting the tweet
46 |     String delims = "[ .,?!]+";
47 | 
48 |     // now split the tweet into tokens
49 |     String[] tokens = tweet.split(delims);
50 | 
51 |     // for each token/word, emit it
52 |     for (String token: tokens) {
53 |       collector.emit(new Values(token));
54 |     }
55 |   }
56 | 
57 |   @Override
58 |   public void declareOutputFields(OutputFieldsDeclarer declarer) 
59 |   {
60 |     // tell storm the schema of the output tuple for this spout
61 |     // tuple consists of a single column called 'tweet-word'
62 |     declarer.declare(new Fields("tweet-word"));
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/lesson2/stage7/src/jvm/udacity/storm/ReportBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | import com.lambdaworks.redis.RedisClient;
22 | import com.lambdaworks.redis.RedisConnection;
23 | 
24 | /**
25 |  * A bolt that prints the word and count to redis
26 |  */
27 | public class ReportBolt extends BaseRichBolt
28 | {
29 |   // place holder to keep the connection to redis
30 |   transient RedisConnection<String,String> redis;
31 | 
32 |   @Override
33 |   public void prepare(
34 |       Map                     map,
35 |       TopologyContext         topologyContext,
36 |       OutputCollector         outputCollector)
37 |   {
38 |     // instantiate a redis connection
39 |     RedisClient client = new RedisClient("localhost",6379);
40 | 
41 |     // initiate the actual connection
42 |     redis = client.connect();
43 |   }
44 | 
45 |   @Override
46 |   public void execute(Tuple tuple)
47 |   {
48 |     // access the first column 'word'
49 |     String word = tuple.getStringByField("word");
50 | 
51 |     // access the second column 'count'
52 |     Integer count = tuple.getIntegerByField("count");
53 | 
54 |     // publish the word count to redis using word as the key
55 |     redis.publish("WordCountTopology", word + "|" + Long.toString(count));
56 |   }
57 | 
58 |   public void declareOutputFields(OutputFieldsDeclarer declarer)
59 |   {
60 |     // nothing to add - since it is the final bolt
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/lesson2/stage7/src/jvm/udacity/storm/TweetTopology.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | class TweetTopology
20 | {
21 |   public static void main(String[] args) throws Exception
22 |   {
23 |     // create the topology
24 |     TopologyBuilder builder = new TopologyBuilder();
25 | 
26 |     /*
27 |      * In order to create the spout, you need to get twitter credentials
28 |      * If you need to use Twitter firehose/Tweet stream for your idea,
29 |      * create a set of credentials by following the instructions at
30 |      *
31 |      * https://dev.twitter.com/discussions/631
32 |      *
33 |      */
34 | 
35 |     // now create the tweet spout with the credentials
36 |     TweetSpout tweetSpout = new TweetSpout(
37 |         "[Your customer key]",
38 |         "[Your secret key]",
39 |         "[Your access token]",
40 |         "[Your access secret]"
41 |     );
42 | 
43 |     // attach the tweet spout to the topology - parallelism of 1
44 |     builder.setSpout("tweet-spout", tweetSpout, 1);
45 | 
46 |     // attach the parse tweet bolt using shuffle grouping
47 |     builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout");
48 | 
49 |     // attach the count bolt using fields grouping - parallelism of 15
50 |     //builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word"));
51 | 
52 |     // attach rolling count bolt using fields grouping - parallelism of 5
53 |     builder.setBolt("rolling-count-bolt", new RollingCountBolt(30, 10), 1).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word"));
54 | 
55 |     // attach the report bolt using global grouping - parallelism of 1
56 |     builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("rolling-count-bolt");
57 | 
58 |     // create the default config object
59 |     Config conf = new Config();
60 | 
61 |     // set the config in debugging mode
62 |     conf.setDebug(true);
63 | 
64 |     if (args != null && args.length > 0) {
65 | 
66 |       // run it in a live cluster
67 | 
68 |       // set the number of workers for running all spout and bolt tasks
69 |       conf.setNumWorkers(3);
70 | 
71 |       // create the topology and submit with config
72 |       StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
73 | 
74 |     } else {
75 | 
76 |       // run it in a simulated local cluster
77 | 
78 |       // set the number of threads to run - similar to setting number of workers in live cluster
79 |       conf.setMaxTaskParallelism(3);
80 | 
81 |       // create the local cluster instance
82 |       LocalCluster cluster = new LocalCluster();
83 | 
84 |       // submit the topology to the local cluster
85 |       cluster.submitTopology("tweet-word-count", conf, builder.createTopology());
86 | 
87 |       // let the topology run for 300 seconds. note topologies never terminate!
88 |       Utils.sleep(300000);
89 | 
90 |       // now kill the topology
91 |       cluster.killTopology("tweet-word-count");
92 | 
93 |       // we are done, so shutdown the local cluster
94 |       cluster.shutdown();
95 |     }
96 |   }
97 | }
98 | 


--------------------------------------------------------------------------------
/lesson2/stage7/src/jvm/udacity/storm/tools/NthLastModifiedTimeTracker.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.tools;
 2 | 
 3 | import backtype.storm.utils.Time;
 4 | import org.apache.commons.collections.buffer.CircularFifoBuffer;
 5 | 
 6 | /**
 7 |  * This class tracks the time-since-last-modify of a "thing" in a rolling fashion.
 8 |  * <p/>
 9 |  * For example, create a 5-slot tracker to track the five most recent time-since-last-modify.
10 |  * <p/>
11 |  * You must manually "mark" that the "something" that you want to track -- in terms of modification times -- has just
12 |  * been modified.
13 |  */
14 | public class NthLastModifiedTimeTracker {
15 | 
16 |   private static final int MILLIS_IN_SEC = 1000;
17 | 
18 |   private final CircularFifoBuffer lastModifiedTimesMillis;
19 | 
20 |   public NthLastModifiedTimeTracker(int numTimesToTrack) {
21 |     if (numTimesToTrack < 1) {
22 |       throw new IllegalArgumentException(
23 |           "numTimesToTrack must be greater than zero (you requested " + numTimesToTrack + ")");
24 |     }
25 |     lastModifiedTimesMillis = new CircularFifoBuffer(numTimesToTrack);
26 |     initLastModifiedTimesMillis();
27 |   }
28 | 
29 |   private void initLastModifiedTimesMillis() {
30 |     long nowCached = now();
31 |     for (int i = 0; i < lastModifiedTimesMillis.maxSize(); i++) {
32 |       lastModifiedTimesMillis.add(Long.valueOf(nowCached));
33 |     }
34 |   }
35 | 
36 |   private long now() {
37 |     return Time.currentTimeMillis();
38 |   }
39 | 
40 |   public int secondsSinceOldestModification() {
41 |     long modifiedTimeMillis = ((Long) lastModifiedTimesMillis.get()).longValue();
42 |     return (int) ((now() - modifiedTimeMillis) / MILLIS_IN_SEC);
43 |   }
44 | 
45 |   public void markAsModified() {
46 |     updateLastModifiedTime();
47 |   }
48 | 
49 |   private void updateLastModifiedTime() {
50 |     lastModifiedTimesMillis.add(now());
51 |   }
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/lesson2/stage7/src/jvm/udacity/storm/tools/SlotBasedCounter.java:
--------------------------------------------------------------------------------
  1 | //package storm.starter.tools;
  2 | package udacity.storm.tools;
  3 | 
  4 | import java.io.Serializable;
  5 | import java.util.HashMap;
  6 | import java.util.HashSet;
  7 | import java.util.Map;
  8 | import java.util.Set;
  9 | 
 10 | /**
 11 |  * This class provides per-slot counts of the occurrences of objects.
 12 |  * <p/>
 13 |  * It can be used, for instance, as a building block for implementing sliding window counting of objects.
 14 |  *
 15 |  * @param <T> The type of those objects we want to count.
 16 |  */
 17 | public final class SlotBasedCounter<T> implements Serializable {
 18 | 
 19 |   private static final long serialVersionUID = 4858185737378394432L;
 20 | 
 21 |   private final Map<T, long[]> objToCounts = new HashMap<T, long[]>();
 22 |   private final int numSlots;
 23 | 
 24 |   public SlotBasedCounter(int numSlots) {
 25 |     if (numSlots <= 0) {
 26 |       throw new IllegalArgumentException("Number of slots must be greater than zero (you requested " + numSlots + ")");
 27 |     }
 28 |     this.numSlots = numSlots;
 29 |   }
 30 | 
 31 |   public void incrementCount(T obj, int slot) {
 32 |     long[] counts = objToCounts.get(obj);
 33 |     if (counts == null) {
 34 |       counts = new long[this.numSlots];
 35 |       objToCounts.put(obj, counts);
 36 |     }
 37 |     counts[slot]++;
 38 |   }
 39 | 
 40 |   public long getCount(T obj, int slot) {
 41 |     long[] counts = objToCounts.get(obj);
 42 |     if (counts == null) {
 43 |       return 0;
 44 |     }
 45 |     else {
 46 |       return counts[slot];
 47 |     }
 48 |   }
 49 | 
 50 |   public Map<T, Long> getCounts() {
 51 |     Map<T, Long> result = new HashMap<T, Long>();
 52 |     for (T obj : objToCounts.keySet()) {
 53 |       result.put(obj, computeTotalCount(obj));
 54 |     }
 55 |     return result;
 56 |   }
 57 | 
 58 |   private long computeTotalCount(T obj) {
 59 |     long[] curr = objToCounts.get(obj);
 60 |     long total = 0;
 61 |     for (long l : curr) {
 62 |       total += l;
 63 |     }
 64 |     return total;
 65 |   }
 66 | 
 67 |   /**
 68 |    * Reset the slot count of any tracked objects to zero for the given slot.
 69 |    *
 70 |    * @param slot
 71 |    */
 72 |   public void wipeSlot(int slot) {
 73 |     for (T obj : objToCounts.keySet()) {
 74 |       resetSlotCountToZero(obj, slot);
 75 |     }
 76 |   }
 77 | 
 78 |   private void resetSlotCountToZero(T obj, int slot) {
 79 |     long[] counts = objToCounts.get(obj);
 80 |     counts[slot] = 0;
 81 |   }
 82 | 
 83 |   private boolean shouldBeRemovedFromCounter(T obj) {
 84 |     return computeTotalCount(obj) == 0;
 85 |   }
 86 | 
 87 |   /**
 88 |    * Remove any object from the counter whose total count is zero (to free up memory).
 89 |    */
 90 |   public void wipeZeros() {
 91 |     Set<T> objToBeRemoved = new HashSet<T>();
 92 |     for (T obj : objToCounts.keySet()) {
 93 |       if (shouldBeRemovedFromCounter(obj)) {
 94 |         objToBeRemoved.add(obj);
 95 |       }
 96 |     }
 97 |     for (T obj : objToBeRemoved) {
 98 |       objToCounts.remove(obj);
 99 |     }
100 |   }
101 | 
102 | }
103 | 


--------------------------------------------------------------------------------
/lesson2/stage7/src/jvm/udacity/storm/tools/TupleHelpers.java:
--------------------------------------------------------------------------------
 1 | //package storm.starter.util;
 2 | package udacity.storm.tools;
 3 | 
 4 | import backtype.storm.Constants;
 5 | import backtype.storm.tuple.Tuple;
 6 | 
 7 | public final class TupleHelpers {
 8 | 
 9 |   private TupleHelpers() {
10 |   }
11 | 
12 |   public static boolean isTickTuple(Tuple tuple) {
13 |     return tuple.getSourceComponent().equals(Constants.SYSTEM_COMPONENT_ID) && tuple.getSourceStreamId().equals(
14 |         Constants.SYSTEM_TICK_STREAM_ID);
15 |   }
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/lesson3/stage1/src/jvm/udacity/storm/CountBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.HashMap;
20 | import java.util.Map;
21 | 
22 | /**
23 |  * A bolt that counts the words that it receives
24 |  */
25 | public class CountBolt extends BaseRichBolt
26 | {
27 |   // To output tuples from this bolt to the next stage bolts, if any
28 |   private OutputCollector collector;
29 | 
30 |   // Map to store the count of the words
31 |   private Map<String, Integer> countMap;
32 | 
33 |   @Override
34 |   public void prepare(
35 |       Map                     map,
36 |       TopologyContext         topologyContext,
37 |       OutputCollector         outputCollector)
38 |   {
39 | 
40 |     // save the collector for emitting tuples
41 |     collector = outputCollector;
42 | 
43 |     // create and initialize the map
44 |     countMap = new HashMap<String, Integer>();
45 |   }
46 | 
47 |   @Override
48 |   public void execute(Tuple tuple)
49 |   {
50 |     // get the word from the 1st column of incoming tuple
51 |     String word = tuple.getString(0);
52 | 
53 |     // check if the word is present in the map
54 |     if (countMap.get(word) == null) {
55 | 
56 |       // not present, add the word with a count of 1
57 |       countMap.put(word, 1);
58 |     } else {
59 | 
60 |       // already there, hence get the count
61 |       Integer val = countMap.get(word);
62 | 
63 |       // increment the count and save it to the map
64 |       countMap.put(word, ++val);
65 |     }
66 | 
67 |     // emit the word and count
68 |     collector.emit(new Values(word, countMap.get(word)));
69 |   }
70 | 
71 |   @Override
72 |   public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer)
73 |   {
74 |     // tell storm the schema of the output tuple for this spout
75 |     // tuple consists of a two columns called 'word' and 'count'
76 | 
77 |     // declare the first column 'word', second column 'count'
78 |     outputFieldsDeclarer.declare(new Fields("word","count"));
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/lesson3/stage1/src/jvm/udacity/storm/ParseTweetBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | /**
22 |  * A bolt that parses the tweet into words
23 |  */
24 | public class ParseTweetBolt extends BaseRichBolt 
25 | {
26 |   // To output tuples from this bolt to the count bolt
27 |   OutputCollector collector;
28 | 
29 |   @Override
30 |   public void prepare(
31 |       Map                     map,
32 |       TopologyContext         topologyContext,
33 |       OutputCollector         outputCollector) 
34 |   {
35 |     // save the output collector for emitting tuples
36 |     collector = outputCollector;
37 |   }
38 | 
39 |   @Override
40 |   public void execute(Tuple tuple) 
41 |   {
42 |     // get the 1st column 'tweet' from tuple
43 |     String tweet = tuple.getString(0);
44 | 
45 |     // provide the delimiters for splitting the tweet
46 |     String delims = "[ .,?!]+";
47 | 
48 |     // now split the tweet into tokens
49 |     String[] tokens = tweet.split(delims);
50 | 
51 |     // for each token/word, emit it
52 |     for (String token: tokens) {
53 |       collector.emit(new Values(token));
54 |     }
55 |   }
56 | 
57 |   @Override
58 |   public void declareOutputFields(OutputFieldsDeclarer declarer) 
59 |   {
60 |     // tell storm the schema of the output tuple for this spout
61 |     // tuple consists of a single column called 'tweet-word'
62 |     declarer.declare(new Fields("tweet-word"));
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/lesson3/stage1/src/jvm/udacity/storm/ReportBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | import com.lambdaworks.redis.RedisClient;
22 | import com.lambdaworks.redis.RedisConnection;
23 | 
24 | /**
25 |  * A bolt that prints the word and count to redis
26 |  */
27 | public class ReportBolt extends BaseRichBolt
28 | {
29 |   // place holder to keep the connection to redis
30 |   transient RedisConnection<String,String> redis;
31 | 
32 |   @Override
33 |   public void prepare(
34 |       Map                     map,
35 |       TopologyContext         topologyContext,
36 |       OutputCollector         outputCollector)
37 |   {
38 |     // instantiate a redis connection
39 |     RedisClient client = new RedisClient("localhost",6379);
40 | 
41 |     // initiate the actual connection
42 |     redis = client.connect();
43 |   }
44 | 
45 |   @Override
46 |   public void execute(Tuple tuple)
47 |   {
48 |     // access the first column 'word'
49 |     String word = tuple.getStringByField("word");
50 | 
51 |     // access the second column 'count'
52 |     Integer count = tuple.getIntegerByField("count");
53 | 
54 |     // publish the word count to redis using word as the key
55 |     redis.publish("WordCountTopology", word + "|" + Long.toString(count));
56 |   }
57 | 
58 |   public void declareOutputFields(OutputFieldsDeclarer declarer)
59 |   {
60 |     // nothing to add - since it is the final bolt
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/lesson3/stage1/src/jvm/udacity/storm/TweetTopology.java:
--------------------------------------------------------------------------------
  1 | package udacity.storm;
  2 | 
  3 | import backtype.storm.Config;
  4 | import backtype.storm.LocalCluster;
  5 | import backtype.storm.StormSubmitter;
  6 | import backtype.storm.spout.SpoutOutputCollector;
  7 | import backtype.storm.task.OutputCollector;
  8 | import backtype.storm.task.TopologyContext;
  9 | import backtype.storm.testing.TestWordSpout;
 10 | import backtype.storm.topology.OutputFieldsDeclarer;
 11 | import backtype.storm.topology.TopologyBuilder;
 12 | import backtype.storm.topology.base.BaseRichSpout;
 13 | import backtype.storm.topology.base.BaseRichBolt;
 14 | import backtype.storm.tuple.Fields;
 15 | import backtype.storm.tuple.Tuple;
 16 | import backtype.storm.tuple.Values;
 17 | import backtype.storm.utils.Utils;
 18 | 
 19 | // NOTE - you must install the python Beautiful Soup module in Ubuntu
 20 | // before this code will run.  In your virtual machine, run:
 21 | //
 22 | // sudo apt-get install python-bs4
 23 | //
 24 | // see Lesson 4 for details on adding this provision to your Vagrantfile
 25 | 
 26 | // Lesson 3 Stage 1 is taken from Lesson 2 Stage 6 to provide a basic starting point.
 27 | // Copy, paste and uncomment the following resources module in the POM.xml file.
 28 | // This defines the src/jvm/udacity/storm/resources location needed for python shell
 29 | 
 30 | //<!-- ADDING MAVEN RESOURCES FOR PYTHON SHELL -->
 31 | //<resources>
 32 | //  <resource>
 33 | //    <directory>src/jvm/udacity/storm</directory>
 34 | //  </resource>
 35 | //</resources>
 36 | //<!-- END ADDING MAVEN RESOURCES FOR PYTHON SHELL -->
 37 | 
 38 | //
 39 | 
 40 | class TweetTopology
 41 | {
 42 |   public static void main(String[] args) throws Exception
 43 |   {
 44 |     // create the topology
 45 |     TopologyBuilder builder = new TopologyBuilder();
 46 | 
 47 |     /*
 48 |      * In order to create the spout, you need to get twitter credentials
 49 |      * If you need to use Twitter firehose/Tweet stream for your idea,
 50 |      * create a set of credentials by following the instructions at
 51 |      *
 52 |      * https://dev.twitter.com/discussions/631
 53 |      *
 54 |      */
 55 | 
 56 |     // now create the tweet spout with the credentials
 57 |     TweetSpout tweetSpout = new TweetSpout(
 58 |         "[Your customer key]",
 59 |         "[Your secret key]",
 60 |         "[Your access token]",
 61 |         "[Your access secret]"
 62 |     );
 63 | 
 64 |     // attach the tweet spout to the topology - parallelism of 1
 65 |     builder.setSpout("tweet-spout", tweetSpout, 1);
 66 | 
 67 |     // attach the parse tweet bolt using shuffle grouping
 68 |     builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout");
 69 | 
 70 |     // attach the count bolt using fields grouping - parallelism of 15
 71 |     builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word"));
 72 | 
 73 |     // attach the report bolt using global grouping - parallelism of 1
 74 |     builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("count-bolt");
 75 | 
 76 |     // create the default config object
 77 |     Config conf = new Config();
 78 | 
 79 |     // set the config in debugging mode
 80 |     conf.setDebug(true);
 81 | 
 82 |     if (args != null && args.length > 0) {
 83 | 
 84 |       // run it in a live cluster
 85 | 
 86 |       // set the number of workers for running all spout and bolt tasks
 87 |       conf.setNumWorkers(3);
 88 | 
 89 |       // create the topology and submit with config
 90 |       StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
 91 | 
 92 |     } else {
 93 | 
 94 |       // run it in a simulated local cluster
 95 | 
 96 |       // set the number of threads to run - similar to setting number of workers in live cluster
 97 |       conf.setMaxTaskParallelism(3);
 98 | 
 99 |       // create the local cluster instance
100 |       LocalCluster cluster = new LocalCluster();
101 | 
102 |       // submit the topology to the local cluster
103 |       cluster.submitTopology("tweet-word-count", conf, builder.createTopology());
104 | 
105 |       // let the topology run for 30 seconds. note topologies never terminate!
106 |       Utils.sleep(30000);
107 | 
108 |       // now kill the topology
109 |       cluster.killTopology("tweet-word-count");
110 | 
111 |       // we are done, so shutdown the local cluster
112 |       cluster.shutdown();
113 |     }
114 |   }
115 | }
116 | 


--------------------------------------------------------------------------------
/lesson3/stage2/src/jvm/udacity/storm/CountBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.HashMap;
20 | import java.util.Map;
21 | 
22 | /**
23 |  * A bolt that counts the words that it receives
24 |  */
25 | public class CountBolt extends BaseRichBolt
26 | {
27 |   // To output tuples from this bolt to the next stage bolts, if any
28 |   private OutputCollector collector;
29 | 
30 |   // Map to store the count of the words
31 |   private Map<String, Integer> countMap;
32 | 
33 |   @Override
34 |   public void prepare(
35 |       Map                     map,
36 |       TopologyContext         topologyContext,
37 |       OutputCollector         outputCollector)
38 |   {
39 | 
40 |     // save the collector for emitting tuples
41 |     collector = outputCollector;
42 | 
43 |     // create and initialize the map
44 |     countMap = new HashMap<String, Integer>();
45 |   }
46 | 
47 |   @Override
48 |   public void execute(Tuple tuple)
49 |   {
50 |     // get the word from the 1st column of incoming tuple
51 |     String word = tuple.getString(0);
52 | 
53 |     // check if the word is present in the map
54 |     if (countMap.get(word) == null) {
55 | 
56 |       // not present, add the word with a count of 1
57 |       countMap.put(word, 1);
58 |     } else {
59 | 
60 |       // already there, hence get the count
61 |       Integer val = countMap.get(word);
62 | 
63 |       // increment the count and save it to the map
64 |       countMap.put(word, ++val);
65 |     }
66 | 
67 |     // emit the word and count
68 |     collector.emit(new Values(word, countMap.get(word)));
69 |   }
70 | 
71 |   @Override
72 |   public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer)
73 |   {
74 |     // tell storm the schema of the output tuple for this spout
75 |     // tuple consists of a two columns called 'word' and 'count'
76 | 
77 |     // declare the first column 'word', second column 'count'
78 |     outputFieldsDeclarer.declare(new Fields("word","count"));
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/lesson3/stage2/src/jvm/udacity/storm/ParseTweetBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | /**
22 |  * A bolt that parses the tweet into words
23 |  */
24 | public class ParseTweetBolt extends BaseRichBolt 
25 | {
26 |   // To output tuples from this bolt to the count bolt
27 |   OutputCollector collector;
28 | 
29 |   @Override
30 |   public void prepare(
31 |       Map                     map,
32 |       TopologyContext         topologyContext,
33 |       OutputCollector         outputCollector) 
34 |   {
35 |     // save the output collector for emitting tuples
36 |     collector = outputCollector;
37 |   }
38 | 
39 |   @Override
40 |   public void execute(Tuple tuple) 
41 |   {
42 |     // get the 1st column 'tweet' from tuple
43 |     String tweet = tuple.getString(0);
44 | 
45 |     // provide the delimiters for splitting the tweet
46 |     String delims = "[ .,?!]+";
47 | 
48 |     // now split the tweet into tokens
49 |     String[] tokens = tweet.split(delims);
50 | 
51 |     // for each token/word, emit it
52 |     for (String token: tokens) {
53 |       collector.emit(new Values(token));
54 |     }
55 |   }
56 | 
57 |   @Override
58 |   public void declareOutputFields(OutputFieldsDeclarer declarer) 
59 |   {
60 |     // tell storm the schema of the output tuple for this spout
61 |     // tuple consists of a single column called 'tweet-word'
62 |     declarer.declare(new Fields("tweet-word"));
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/lesson3/stage2/src/jvm/udacity/storm/ReportBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | import com.lambdaworks.redis.RedisClient;
22 | import com.lambdaworks.redis.RedisConnection;
23 | 
24 | /**
25 |  * A bolt that prints the word and count to redis
26 |  */
27 | public class ReportBolt extends BaseRichBolt
28 | {
29 |   // place holder to keep the connection to redis
30 |   transient RedisConnection<String,String> redis;
31 | 
32 |   @Override
33 |   public void prepare(
34 |       Map                     map,
35 |       TopologyContext         topologyContext,
36 |       OutputCollector         outputCollector)
37 |   {
38 |     // instantiate a redis connection
39 |     RedisClient client = new RedisClient("localhost",6379);
40 | 
41 |     // initiate the actual connection
42 |     redis = client.connect();
43 |   }
44 | 
45 |   @Override
46 |   public void execute(Tuple tuple)
47 |   {
48 |     // access the first column 'word'
49 |     String word = tuple.getStringByField("word");
50 | 
51 |     // access the second column 'count'
52 |     Integer count = tuple.getIntegerByField("count");
53 | 
54 |     // publish the word count to redis using word as the key
55 |     redis.publish("WordCountTopology", word + "|" + Long.toString(count));
56 |   }
57 | 
58 |   public void declareOutputFields(OutputFieldsDeclarer declarer)
59 |   {
60 |     // nothing to add - since it is the final bolt
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/lesson3/stage2/src/jvm/udacity/storm/SplitSentence.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.task.ShellBolt;
 7 | import backtype.storm.topology.BasicOutputCollector;
 8 | import backtype.storm.topology.IRichBolt;
 9 | import backtype.storm.topology.OutputFieldsDeclarer;
10 | import backtype.storm.topology.TopologyBuilder;
11 | import backtype.storm.topology.base.BaseBasicBolt;
12 | import backtype.storm.tuple.Fields;
13 | import backtype.storm.tuple.Tuple;
14 | import backtype.storm.tuple.Values;
15 | //import storm.starter.spout.RandomSentenceSpout;
16 | 
17 | import java.util.HashMap;
18 | import java.util.Map;
19 | 
20 | /**
21 |  * A bolt that parses the tweet into words
22 |  */
23 | 
24 |  // https://github.com/apache/storm/blob/master/examples/storm-starter/src/jvm/storm/starter/WordCountTopology.java
25 | 
26 |   public class SplitSentence extends ShellBolt implements IRichBolt {
27 | 
28 |     public SplitSentence() {
29 |       super("python", "splitsentence.py");
30 |     }
31 | 
32 |     @Override
33 |     public void declareOutputFields(OutputFieldsDeclarer declarer) {
34 |       declarer.declare(new Fields("word"));
35 |     }
36 | 
37 |     @Override
38 |     public Map<String, Object> getComponentConfiguration() {
39 |       return null;
40 |     }
41 |   }
42 | 


--------------------------------------------------------------------------------
/lesson3/stage2/src/jvm/udacity/storm/TweetTopology.java:
--------------------------------------------------------------------------------
  1 | package udacity.storm;
  2 | 
  3 | import backtype.storm.Config;
  4 | import backtype.storm.LocalCluster;
  5 | import backtype.storm.StormSubmitter;
  6 | import backtype.storm.spout.SpoutOutputCollector;
  7 | import backtype.storm.task.OutputCollector;
  8 | import backtype.storm.task.TopologyContext;
  9 | import backtype.storm.testing.TestWordSpout;
 10 | import backtype.storm.topology.OutputFieldsDeclarer;
 11 | import backtype.storm.topology.TopologyBuilder;
 12 | import backtype.storm.topology.base.BaseRichSpout;
 13 | import backtype.storm.topology.base.BaseRichBolt;
 14 | import backtype.storm.tuple.Fields;
 15 | import backtype.storm.tuple.Tuple;
 16 | import backtype.storm.tuple.Values;
 17 | import backtype.storm.utils.Utils;
 18 | 
 19 | // NOTE - you must install the python Beautiful Soup module in Ubuntu
 20 | // before this code will run.  In your virtual machine, run:
 21 | //
 22 | // sudo apt-get install python-bs4
 23 | //
 24 | // see Lesson 4 for details on adding this provision to your Vagrantfile
 25 | 
 26 | class TweetTopology
 27 | {
 28 |   public static void main(String[] args) throws Exception
 29 |   {
 30 |     // create the topology
 31 |     TopologyBuilder builder = new TopologyBuilder();
 32 | 
 33 |     /*
 34 |      * In order to create the spout, you need to get twitter credentials
 35 |      * If you need to use Twitter firehose/Tweet stream for your idea,
 36 |      * create a set of credentials by following the instructions at
 37 |      *
 38 |      * https://dev.twitter.com/discussions/631
 39 |      *
 40 |      */
 41 | 
 42 |     // now create the tweet spout with the credentials
 43 |     TweetSpout tweetSpout = new TweetSpout(
 44 |         "[Your customer key]",
 45 |         "[Your secret key]",
 46 |         "[Your access token]",
 47 |         "[Your access secret]"
 48 |     );
 49 | 
 50 |     // attach the tweet spout to the topology - parallelism of 1
 51 |     builder.setSpout("tweet-spout", tweetSpout, 1);
 52 | 
 53 |     // attach the parse tweet bolt using shuffle grouping
 54 |     //builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout");
 55 | 
 56 |     //************* replace Java ParseTweetBolt with Java/Python SplitSentence
 57 |     builder.setBolt("python-split-sentence", new SplitSentence(), 10).shuffleGrouping("tweet-spout");
 58 | 
 59 |     // attach the count bolt using fields grouping - parallelism of 15
 60 |     //builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word"));
 61 | 
 62 |     //************* replace Java "parse-tweet-bolt" with Java/Python "python-split-sentence"
 63 |     builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("python-split-sentence", new Fields("word"));
 64 | 
 65 |     // attach the report bolt using global grouping - parallelism of 1
 66 |     builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("count-bolt");
 67 | 
 68 |     // create the default config object
 69 |     Config conf = new Config();
 70 | 
 71 |     // set the config in debugging mode
 72 |     conf.setDebug(true);
 73 | 
 74 |     if (args != null && args.length > 0) {
 75 | 
 76 |       // run it in a live cluster
 77 | 
 78 |       // set the number of workers for running all spout and bolt tasks
 79 |       conf.setNumWorkers(3);
 80 | 
 81 |       // create the topology and submit with config
 82 |       StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
 83 | 
 84 |     } else {
 85 | 
 86 |       // run it in a simulated local cluster
 87 | 
 88 |       // set the number of threads to run - similar to setting number of workers in live cluster
 89 |       conf.setMaxTaskParallelism(3);
 90 | 
 91 |       // create the local cluster instance
 92 |       LocalCluster cluster = new LocalCluster();
 93 | 
 94 |       // submit the topology to the local cluster
 95 |       cluster.submitTopology("tweet-word-count", conf, builder.createTopology());
 96 | 
 97 |       // let the topology run for 1000*30 seconds. note topologies never terminate!
 98 |       Utils.sleep(1000*30000);
 99 | 
100 |       // now kill the topology
101 |       cluster.killTopology("tweet-word-count");
102 | 
103 |       // we are done, so shutdown the local cluster
104 |       cluster.shutdown();
105 |     }
106 |   }
107 | }
108 | 


--------------------------------------------------------------------------------
/lesson3/stage2/src/jvm/udacity/storm/resources/splitsentence.py:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # https://github.com/apache/storm/blob/master/examples/storm-starter/multilang/resources/splitsentence.py
18 | 
19 | import storm
20 | 
21 | class SplitSentenceBolt(storm.BasicBolt):
22 |     def process(self, tup):
23 |         #TO DO: Add check for empty values
24 |         words = tup.values[0].split(" ")
25 |         for word in words:
26 |           storm.emit([word])
27 | 
28 | SplitSentenceBolt().run()
29 | 


--------------------------------------------------------------------------------
/lesson3/stage3/src/jvm/udacity/storm/CountBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.HashMap;
20 | import java.util.Map;
21 | 
22 | /**
23 |  * A bolt that counts the words that it receives
24 |  */
25 | public class CountBolt extends BaseRichBolt
26 | {
27 |   // To output tuples from this bolt to the next stage bolts, if any
28 |   private OutputCollector collector;
29 | 
30 |   // Map to store the count of the words
31 |   private Map<String, Integer> countMap;
32 | 
33 |   @Override
34 |   public void prepare(
35 |       Map                     map,
36 |       TopologyContext         topologyContext,
37 |       OutputCollector         outputCollector)
38 |   {
39 | 
40 |     // save the collector for emitting tuples
41 |     collector = outputCollector;
42 | 
43 |     // create and initialize the map
44 |     countMap = new HashMap<String, Integer>();
45 |   }
46 | 
47 |   @Override
48 |   public void execute(Tuple tuple)
49 |   {
50 |     // get the word from the 1st column of incoming tuple
51 |     String word = tuple.getString(0);
52 | 
53 |     // check if the word is present in the map
54 |     if (countMap.get(word) == null) {
55 | 
56 |       // not present, add the word with a count of 1
57 |       countMap.put(word, 1);
58 |     } else {
59 | 
60 |       // already there, hence get the count
61 |       Integer val = countMap.get(word);
62 | 
63 |       // increment the count and save it to the map
64 |       countMap.put(word, ++val);
65 |     }
66 | 
67 |     // emit the word and count
68 |     collector.emit(new Values(word, countMap.get(word)));
69 |   }
70 | 
71 |   @Override
72 |   public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer)
73 |   {
74 |     // tell storm the schema of the output tuple for this spout
75 |     // tuple consists of a two columns called 'word' and 'count'
76 | 
77 |     // declare the first column 'word', second column 'count'
78 |     outputFieldsDeclarer.declare(new Fields("word","count"));
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/lesson3/stage3/src/jvm/udacity/storm/ParseTweetBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | /**
22 |  * A bolt that parses the tweet into words
23 |  */
24 | public class ParseTweetBolt extends BaseRichBolt 
25 | {
26 |   // To output tuples from this bolt to the count bolt
27 |   OutputCollector collector;
28 | 
29 |   @Override
30 |   public void prepare(
31 |       Map                     map,
32 |       TopologyContext         topologyContext,
33 |       OutputCollector         outputCollector) 
34 |   {
35 |     // save the output collector for emitting tuples
36 |     collector = outputCollector;
37 |   }
38 | 
39 |   @Override
40 |   public void execute(Tuple tuple) 
41 |   {
42 |     // get the 1st column 'tweet' from tuple
43 |     String tweet = tuple.getString(0);
44 | 
45 |     // provide the delimiters for splitting the tweet
46 |     String delims = "[ .,?!]+";
47 | 
48 |     // now split the tweet into tokens
49 |     String[] tokens = tweet.split(delims);
50 | 
51 |     // for each token/word, emit it
52 |     for (String token: tokens) {
53 |       collector.emit(new Values(token));
54 |     }
55 |   }
56 | 
57 |   @Override
58 |   public void declareOutputFields(OutputFieldsDeclarer declarer) 
59 |   {
60 |     // tell storm the schema of the output tuple for this spout
61 |     // tuple consists of a single column called 'tweet-word'
62 |     declarer.declare(new Fields("tweet-word"));
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/lesson3/stage3/src/jvm/udacity/storm/ReportBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | import com.lambdaworks.redis.RedisClient;
22 | import com.lambdaworks.redis.RedisConnection;
23 | 
24 | /**
25 |  * A bolt that prints the word and count to redis
26 |  */
27 | public class ReportBolt extends BaseRichBolt
28 | {
29 |   // place holder to keep the connection to redis
30 |   transient RedisConnection<String,String> redis;
31 | 
32 |   @Override
33 |   public void prepare(
34 |       Map                     map,
35 |       TopologyContext         topologyContext,
36 |       OutputCollector         outputCollector)
37 |   {
38 |     // instantiate a redis connection
39 |     RedisClient client = new RedisClient("localhost",6379);
40 | 
41 |     // initiate the actual connection
42 |     redis = client.connect();
43 |   }
44 | 
45 |   @Override
46 |   public void execute(Tuple tuple)
47 |   {
48 |     // access the first column 'word'
49 |     String word = tuple.getStringByField("word");
50 | 
51 |     // access the second column 'count'
52 |     Integer count = tuple.getIntegerByField("count");
53 | 
54 |     // publish the word count to redis using word as the key
55 |     redis.publish("WordCountTopology", word + "|" + Long.toString(count));
56 |   }
57 | 
58 |   public void declareOutputFields(OutputFieldsDeclarer declarer)
59 |   {
60 |     // nothing to add - since it is the final bolt
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/lesson3/stage3/src/jvm/udacity/storm/SplitSentence.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.task.ShellBolt;
 7 | import backtype.storm.topology.BasicOutputCollector;
 8 | import backtype.storm.topology.IRichBolt;
 9 | import backtype.storm.topology.OutputFieldsDeclarer;
10 | import backtype.storm.topology.TopologyBuilder;
11 | import backtype.storm.topology.base.BaseBasicBolt;
12 | import backtype.storm.tuple.Fields;
13 | import backtype.storm.tuple.Tuple;
14 | import backtype.storm.tuple.Values;
15 | //import storm.starter.spout.RandomSentenceSpout;
16 | 
17 | import java.util.HashMap;
18 | import java.util.Map;
19 | 
20 | /**
21 |  * A bolt that parses the tweet into words
22 |  */
23 | 
24 |  // https://github.com/apache/storm/blob/master/examples/storm-starter/src/jvm/storm/starter/WordCountTopology.java
25 | 
26 |   public class SplitSentence extends ShellBolt implements IRichBolt {
27 | 
28 |     public SplitSentence() {
29 |       super("python", "splitsentence.py");
30 |     }
31 | 
32 |     @Override
33 |     public void declareOutputFields(OutputFieldsDeclarer declarer) {
34 |       declarer.declare(new Fields("word"));
35 |     }
36 | 
37 |     @Override
38 |     public Map<String, Object> getComponentConfiguration() {
39 |       return null;
40 |     }
41 |   }
42 | 


--------------------------------------------------------------------------------
/lesson3/stage3/src/jvm/udacity/storm/TweetTopology.java:
--------------------------------------------------------------------------------
  1 | package udacity.storm;
  2 | 
  3 | import backtype.storm.Config;
  4 | import backtype.storm.LocalCluster;
  5 | import backtype.storm.StormSubmitter;
  6 | import backtype.storm.spout.SpoutOutputCollector;
  7 | import backtype.storm.task.OutputCollector;
  8 | import backtype.storm.task.TopologyContext;
  9 | import backtype.storm.testing.TestWordSpout;
 10 | import backtype.storm.topology.OutputFieldsDeclarer;
 11 | import backtype.storm.topology.TopologyBuilder;
 12 | import backtype.storm.topology.base.BaseRichSpout;
 13 | import backtype.storm.topology.base.BaseRichBolt;
 14 | import backtype.storm.tuple.Fields;
 15 | import backtype.storm.tuple.Tuple;
 16 | import backtype.storm.tuple.Values;
 17 | import backtype.storm.utils.Utils;
 18 | 
 19 | // NOTE - you must install the python Beautiful Soup module in Ubuntu
 20 | // before this code will run.  In your virtual machine, run:
 21 | //
 22 | // sudo apt-get install python-bs4
 23 | //
 24 | // see Lesson 4 for details on adding this provision to your Vagrantfile
 25 | 
 26 | 
 27 | 
 28 | class TweetTopology
 29 | {
 30 |   public static void main(String[] args) throws Exception
 31 |   {
 32 |     // create the topology
 33 |     TopologyBuilder builder = new TopologyBuilder();
 34 | 
 35 |     /*
 36 |      * In order to create the spout, you need to get twitter credentials
 37 |      * If you need to use Twitter firehose/Tweet stream for your idea,
 38 |      * create a set of credentials by following the instructions at
 39 |      *
 40 |      * https://dev.twitter.com/discussions/631
 41 |      *
 42 |      */
 43 | 
 44 |     // now create the tweet spout with the credentials
 45 |     TweetSpout tweetSpout = new TweetSpout(
 46 |         "[Your customer key]",
 47 |         "[Your secret key]",
 48 |         "[Your access token]",
 49 |         "[Your access secret]"
 50 |     );
 51 | 
 52 |     // attach the tweet spout to the topology - parallelism of 1
 53 |     builder.setSpout("tweet-spout", tweetSpout, 1);
 54 | 
 55 |     // attach the parse tweet bolt using shuffle grouping
 56 |     //builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout");
 57 | 
 58 |     //************* replace with URLBolt emitting text using shuffle grouping
 59 |     builder.setBolt("python-URL-bolt", new URLBolt(), 10).shuffleGrouping("tweet-spout");
 60 | 
 61 | 
 62 |     //************* replace Java ParseTweetBolt with Java/Python SplitSentence
 63 |     builder.setBolt("python-split-sentence", new SplitSentence(), 10).shuffleGrouping("python-URL-bolt");
 64 | 
 65 |     // attach the count bolt using fields grouping - parallelism of 15
 66 |     //builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word"));
 67 | 
 68 |     //************* replace Java "parse-tweet-bolt" with Java/Python "python-split-sentence"
 69 |     builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("python-split-sentence", new Fields("word"));
 70 | 
 71 |     // attach the report bolt using global grouping - parallelism of 1
 72 |     builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("count-bolt");
 73 | 
 74 |     // create the default config object
 75 |     Config conf = new Config();
 76 | 
 77 |     // set the config in debugging mode
 78 |     conf.setDebug(true);
 79 | 
 80 |     if (args != null && args.length > 0) {
 81 | 
 82 |       // run it in a live cluster
 83 | 
 84 |       // set the number of workers for running all spout and bolt tasks
 85 |       conf.setNumWorkers(3);
 86 | 
 87 |       // create the topology and submit with config
 88 |       StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
 89 | 
 90 |     } else {
 91 | 
 92 |       // run it in a simulated local cluster
 93 | 
 94 |       // set the number of threads to run - similar to setting number of workers in live cluster
 95 |       conf.setMaxTaskParallelism(3);
 96 | 
 97 |       // create the local cluster instance
 98 |       LocalCluster cluster = new LocalCluster();
 99 | 
100 |       // submit the topology to the local cluster
101 |       cluster.submitTopology("tweet-word-count", conf, builder.createTopology());
102 | 
103 |       // let the topology run for 1000*30 seconds. note topologies never terminate!
104 |       Utils.sleep(1000*30000);
105 | 
106 |       // now kill the topology
107 |       cluster.killTopology("tweet-word-count");
108 | 
109 |       // we are done, so shutdown the local cluster
110 |       cluster.shutdown();
111 |     }
112 |   }
113 | }
114 | 


--------------------------------------------------------------------------------
/lesson3/stage3/src/jvm/udacity/storm/URLBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.task.ShellBolt;
 7 | import backtype.storm.topology.BasicOutputCollector;
 8 | import backtype.storm.topology.IRichBolt;
 9 | import backtype.storm.topology.OutputFieldsDeclarer;
10 | import backtype.storm.topology.TopologyBuilder;
11 | import backtype.storm.topology.base.BaseBasicBolt;
12 | import backtype.storm.tuple.Fields;
13 | import backtype.storm.tuple.Tuple;
14 | import backtype.storm.tuple.Values;
15 | //import storm.starter.spout.RandomSentenceSpout;
16 | 
17 | import java.util.HashMap;
18 | import java.util.Map;
19 | 
20 | /**
21 |  * A bolt that parses the tweet into words
22 |  */
23 | 
24 |  // https://github.com/apache/storm/blob/master/examples/storm-starter/src/jvm/storm/starter/WordCountTopology.java
25 | 
26 |   public class URLBolt extends ShellBolt implements IRichBolt {
27 | 
28 |     public URLBolt() {
29 |       super("python", "urltext.py");
30 |     }
31 | 
32 |     @Override
33 |     public void declareOutputFields(OutputFieldsDeclarer declarer) {
34 |       declarer.declare(new Fields("text"));
35 |     }
36 | 
37 |     @Override
38 |     public Map<String, Object> getComponentConfiguration() {
39 |       return null;
40 |     }
41 |   }
42 | 


--------------------------------------------------------------------------------
/lesson3/stage3/src/jvm/udacity/storm/resources/splitsentence.py:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # https://github.com/apache/storm/blob/master/examples/storm-starter/multilang/resources/splitsentence.py
18 | 
19 | import storm
20 | 
21 | class SplitSentenceBolt(storm.BasicBolt):
22 |     def process(self, tup):
23 |       #added to check for empty values
24 |       if tup.values[0]:
25 |         words = tup.values[0].split(" ")
26 |         if words:
27 |           for word in words:
28 |             storm.emit([word])
29 | 
30 | SplitSentenceBolt().run()
31 | 


--------------------------------------------------------------------------------
/lesson3/stage3/src/jvm/udacity/storm/resources/urltext.py:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # https://github.com/apache/storm/blob/master/examples/storm-starter/multilang/resources/splitsentence.py
18 | 
19 | import storm
20 | import urllib2
21 | from bs4 import BeautifulSoup
22 | 
23 | class URLBolt(storm.BasicBolt):
24 |     def process(self, tup):
25 |         url = tup.values[0]
26 |         # python urllib2
27 |         try:
28 |           html = urllib2.urlopen(url).read()
29 | 
30 |           # using BeautifulSoup, "Making the Soup"
31 |           soup = BeautifulSoup(html)
32 |           # return title and paragraph tags
33 |           urlText = soup.findAll({'title' : True, 'p' : True})
34 | 
35 |           #emit tuple if string exists
36 |           if urlText:
37 |             [storm.emit([t.string]) for t in urlText]
38 |         except:
39 |           pass
40 | 
41 | URLBolt().run()
42 | 


--------------------------------------------------------------------------------
/lesson3/stage4/src/jvm/udacity/storm/CountBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.HashMap;
20 | import java.util.Map;
21 | 
22 | /**
23 |  * A bolt that counts the words that it receives
24 |  */
25 | public class CountBolt extends BaseRichBolt
26 | {
27 |   // To output tuples from this bolt to the next stage bolts, if any
28 |   private OutputCollector collector;
29 | 
30 |   // Map to store the count of the words
31 |   private Map<String, Integer> countMap;
32 | 
33 |   @Override
34 |   public void prepare(
35 |       Map                     map,
36 |       TopologyContext         topologyContext,
37 |       OutputCollector         outputCollector)
38 |   {
39 | 
40 |     // save the collector for emitting tuples
41 |     collector = outputCollector;
42 | 
43 |     // create and initialize the map
44 |     countMap = new HashMap<String, Integer>();
45 |   }
46 | 
47 |   @Override
48 |   public void execute(Tuple tuple)
49 |   {
50 |     // get the word from the 1st column of incoming tuple
51 |     String word = tuple.getString(0);
52 | 
53 |     // check if the word is present in the map
54 |     if (countMap.get(word) == null) {
55 | 
56 |       // not present, add the word with a count of 1
57 |       countMap.put(word, 1);
58 |     } else {
59 | 
60 |       // already there, hence get the count
61 |       Integer val = countMap.get(word);
62 | 
63 |       // increment the count and save it to the map
64 |       countMap.put(word, ++val);
65 |     }
66 | 
67 |     // emit the word and count
68 |     collector.emit(new Values(word, countMap.get(word)));
69 |   }
70 | 
71 |   @Override
72 |   public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer)
73 |   {
74 |     // tell storm the schema of the output tuple for this spout
75 |     // tuple consists of a two columns called 'word' and 'count'
76 | 
77 |     // declare the first column 'word', second column 'count'
78 |     outputFieldsDeclarer.declare(new Fields("word","count"));
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/lesson3/stage4/src/jvm/udacity/storm/ParseTweetBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | /**
22 |  * A bolt that parses the tweet into words
23 |  */
24 | public class ParseTweetBolt extends BaseRichBolt 
25 | {
26 |   // To output tuples from this bolt to the count bolt
27 |   OutputCollector collector;
28 | 
29 |   @Override
30 |   public void prepare(
31 |       Map                     map,
32 |       TopologyContext         topologyContext,
33 |       OutputCollector         outputCollector) 
34 |   {
35 |     // save the output collector for emitting tuples
36 |     collector = outputCollector;
37 |   }
38 | 
39 |   @Override
40 |   public void execute(Tuple tuple) 
41 |   {
42 |     // get the 1st column 'tweet' from tuple
43 |     String tweet = tuple.getString(0);
44 | 
45 |     // provide the delimiters for splitting the tweet
46 |     String delims = "[ .,?!]+";
47 | 
48 |     // now split the tweet into tokens
49 |     String[] tokens = tweet.split(delims);
50 | 
51 |     // for each token/word, emit it
52 |     for (String token: tokens) {
53 |       collector.emit(new Values(token));
54 |     }
55 |   }
56 | 
57 |   @Override
58 |   public void declareOutputFields(OutputFieldsDeclarer declarer) 
59 |   {
60 |     // tell storm the schema of the output tuple for this spout
61 |     // tuple consists of a single column called 'tweet-word'
62 |     declarer.declare(new Fields("tweet-word"));
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/lesson3/stage4/src/jvm/udacity/storm/ReportBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | import com.lambdaworks.redis.RedisClient;
22 | import com.lambdaworks.redis.RedisConnection;
23 | 
24 | /**
25 |  * A bolt that prints the word and count to redis
26 |  */
27 | public class ReportBolt extends BaseRichBolt
28 | {
29 |   // place holder to keep the connection to redis
30 |   transient RedisConnection<String,String> redis;
31 | 
32 |   @Override
33 |   public void prepare(
34 |       Map                     map,
35 |       TopologyContext         topologyContext,
36 |       OutputCollector         outputCollector)
37 |   {
38 |     // instantiate a redis connection
39 |     RedisClient client = new RedisClient("localhost",6379);
40 | 
41 |     // initiate the actual connection
42 |     redis = client.connect();
43 |   }
44 | 
45 |   @Override
46 |   public void execute(Tuple tuple)
47 |   {
48 |     // access the first column 'word'
49 |     String word = tuple.getStringByField("word");
50 | 
51 |     // access the second column 'count'
52 |     Integer count = tuple.getIntegerByField("count");
53 | 
54 |     // publish the word count to redis using word as the key
55 |     redis.publish("WordCountTopology", word + "|" + Long.toString(count));
56 |   }
57 | 
58 |   public void declareOutputFields(OutputFieldsDeclarer declarer)
59 |   {
60 |     // nothing to add - since it is the final bolt
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/lesson3/stage4/src/jvm/udacity/storm/TweetTopology.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | class TweetTopology
20 | {
21 |   public static void main(String[] args) throws Exception
22 |   {
23 |     // create the topology
24 |     TopologyBuilder builder = new TopologyBuilder();
25 | 
26 |     /*
27 |      * In order to create the spout, you need to get twitter credentials
28 |      * If you need to use Twitter firehose/Tweet stream for your idea,
29 |      * create a set of credentials by following the instructions at
30 |      *
31 |      * https://dev.twitter.com/discussions/631
32 |      *
33 |      */
34 | 
35 |     // now create the tweet spout with the credentials
36 |     TweetSpout tweetSpout = new TweetSpout(
37 |         "[Your customer key]",
38 |         "[Your secret key]",
39 |         "[Your access token]",
40 |         "[Your access secret]"
41 |     );
42 | 
43 |     // attach the tweet spout to the topology - parallelism of 1
44 |     builder.setSpout("tweet-spout", tweetSpout, 1);
45 | 
46 |     // attach the parse tweet bolt using shuffle grouping
47 |     builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout");
48 | 
49 |     // attach the count bolt using fields grouping - parallelism of 15
50 |     //builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word"));
51 | 
52 |     // attach rolling count bolt using fields grouping - parallelism of 5
53 |     builder.setBolt("rolling-count-bolt", new RollingCountBolt(30, 10), 1).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word"));
54 | 
55 |     // attach the report bolt using global grouping - parallelism of 1
56 |     builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("rolling-count-bolt");
57 | 
58 |     // create the default config object
59 |     Config conf = new Config();
60 | 
61 |     // set the config in debugging mode
62 |     conf.setDebug(true);
63 | 
64 |     if (args != null && args.length > 0) {
65 | 
66 |       // run it in a live cluster
67 | 
68 |       // set the number of workers for running all spout and bolt tasks
69 |       conf.setNumWorkers(3);
70 | 
71 |       // create the topology and submit with config
72 |       StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
73 | 
74 |     } else {
75 | 
76 |       // run it in a simulated local cluster
77 | 
78 |       // set the number of threads to run - similar to setting number of workers in live cluster
79 |       conf.setMaxTaskParallelism(3);
80 | 
81 |       // create the local cluster instance
82 |       LocalCluster cluster = new LocalCluster();
83 | 
84 |       // submit the topology to the local cluster
85 |       cluster.submitTopology("tweet-word-count", conf, builder.createTopology());
86 | 
87 |       // let the topology run for 300 seconds. note topologies never terminate!
88 |       Utils.sleep(300000);
89 | 
90 |       // now kill the topology
91 |       cluster.killTopology("tweet-word-count");
92 | 
93 |       // we are done, so shutdown the local cluster
94 |       cluster.shutdown();
95 |     }
96 |   }
97 | }
98 | 


--------------------------------------------------------------------------------
/lesson3/stage4/src/jvm/udacity/storm/tools/NthLastModifiedTimeTracker.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.tools;
 2 | 
 3 | import backtype.storm.utils.Time;
 4 | import org.apache.commons.collections.buffer.CircularFifoBuffer;
 5 | 
 6 | /**
 7 |  * This class tracks the time-since-last-modify of a "thing" in a rolling fashion.
 8 |  * <p/>
 9 |  * For example, create a 5-slot tracker to track the five most recent time-since-last-modify.
10 |  * <p/>
11 |  * You must manually "mark" that the "something" that you want to track -- in terms of modification times -- has just
12 |  * been modified.
13 |  */
14 | public class NthLastModifiedTimeTracker {
15 | 
16 |   private static final int MILLIS_IN_SEC = 1000;
17 | 
18 |   private final CircularFifoBuffer lastModifiedTimesMillis;
19 | 
20 |   public NthLastModifiedTimeTracker(int numTimesToTrack) {
21 |     if (numTimesToTrack < 1) {
22 |       throw new IllegalArgumentException(
23 |           "numTimesToTrack must be greater than zero (you requested " + numTimesToTrack + ")");
24 |     }
25 |     lastModifiedTimesMillis = new CircularFifoBuffer(numTimesToTrack);
26 |     initLastModifiedTimesMillis();
27 |   }
28 | 
29 |   private void initLastModifiedTimesMillis() {
30 |     long nowCached = now();
31 |     for (int i = 0; i < lastModifiedTimesMillis.maxSize(); i++) {
32 |       lastModifiedTimesMillis.add(Long.valueOf(nowCached));
33 |     }
34 |   }
35 | 
36 |   private long now() {
37 |     return Time.currentTimeMillis();
38 |   }
39 | 
40 |   public int secondsSinceOldestModification() {
41 |     long modifiedTimeMillis = ((Long) lastModifiedTimesMillis.get()).longValue();
42 |     return (int) ((now() - modifiedTimeMillis) / MILLIS_IN_SEC);
43 |   }
44 | 
45 |   public void markAsModified() {
46 |     updateLastModifiedTime();
47 |   }
48 | 
49 |   private void updateLastModifiedTime() {
50 |     lastModifiedTimesMillis.add(now());
51 |   }
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/lesson3/stage4/src/jvm/udacity/storm/tools/SlotBasedCounter.java:
--------------------------------------------------------------------------------
  1 | //package storm.starter.tools;
  2 | package udacity.storm.tools;
  3 | 
  4 | import java.io.Serializable;
  5 | import java.util.HashMap;
  6 | import java.util.HashSet;
  7 | import java.util.Map;
  8 | import java.util.Set;
  9 | 
 10 | /**
 11 |  * This class provides per-slot counts of the occurrences of objects.
 12 |  * <p/>
 13 |  * It can be used, for instance, as a building block for implementing sliding window counting of objects.
 14 |  *
 15 |  * @param <T> The type of those objects we want to count.
 16 |  */
 17 | public final class SlotBasedCounter<T> implements Serializable {
 18 | 
 19 |   private static final long serialVersionUID = 4858185737378394432L;
 20 | 
 21 |   private final Map<T, long[]> objToCounts = new HashMap<T, long[]>();
 22 |   private final int numSlots;
 23 | 
 24 |   public SlotBasedCounter(int numSlots) {
 25 |     if (numSlots <= 0) {
 26 |       throw new IllegalArgumentException("Number of slots must be greater than zero (you requested " + numSlots + ")");
 27 |     }
 28 |     this.numSlots = numSlots;
 29 |   }
 30 | 
 31 |   public void incrementCount(T obj, int slot) {
 32 |     long[] counts = objToCounts.get(obj);
 33 |     if (counts == null) {
 34 |       counts = new long[this.numSlots];
 35 |       objToCounts.put(obj, counts);
 36 |     }
 37 |     counts[slot]++;
 38 |   }
 39 | 
 40 |   public long getCount(T obj, int slot) {
 41 |     long[] counts = objToCounts.get(obj);
 42 |     if (counts == null) {
 43 |       return 0;
 44 |     }
 45 |     else {
 46 |       return counts[slot];
 47 |     }
 48 |   }
 49 | 
 50 |   public Map<T, Long> getCounts() {
 51 |     Map<T, Long> result = new HashMap<T, Long>();
 52 |     for (T obj : objToCounts.keySet()) {
 53 |       result.put(obj, computeTotalCount(obj));
 54 |     }
 55 |     return result;
 56 |   }
 57 | 
 58 |   private long computeTotalCount(T obj) {
 59 |     long[] curr = objToCounts.get(obj);
 60 |     long total = 0;
 61 |     for (long l : curr) {
 62 |       total += l;
 63 |     }
 64 |     return total;
 65 |   }
 66 | 
 67 |   /**
 68 |    * Reset the slot count of any tracked objects to zero for the given slot.
 69 |    *
 70 |    * @param slot
 71 |    */
 72 |   public void wipeSlot(int slot) {
 73 |     for (T obj : objToCounts.keySet()) {
 74 |       resetSlotCountToZero(obj, slot);
 75 |     }
 76 |   }
 77 | 
 78 |   private void resetSlotCountToZero(T obj, int slot) {
 79 |     long[] counts = objToCounts.get(obj);
 80 |     counts[slot] = 0;
 81 |   }
 82 | 
 83 |   private boolean shouldBeRemovedFromCounter(T obj) {
 84 |     return computeTotalCount(obj) == 0;
 85 |   }
 86 | 
 87 |   /**
 88 |    * Remove any object from the counter whose total count is zero (to free up memory).
 89 |    */
 90 |   public void wipeZeros() {
 91 |     Set<T> objToBeRemoved = new HashSet<T>();
 92 |     for (T obj : objToCounts.keySet()) {
 93 |       if (shouldBeRemovedFromCounter(obj)) {
 94 |         objToBeRemoved.add(obj);
 95 |       }
 96 |     }
 97 |     for (T obj : objToBeRemoved) {
 98 |       objToCounts.remove(obj);
 99 |     }
100 |   }
101 | 
102 | }
103 | 


--------------------------------------------------------------------------------
/lesson3/stage4/src/jvm/udacity/storm/tools/TupleHelpers.java:
--------------------------------------------------------------------------------
 1 | //package storm.starter.util;
 2 | package udacity.storm.tools;
 3 | 
 4 | import backtype.storm.Constants;
 5 | import backtype.storm.tuple.Tuple;
 6 | 
 7 | public final class TupleHelpers {
 8 | 
 9 |   private TupleHelpers() {
10 |   }
11 | 
12 |   public static boolean isTickTuple(Tuple tuple) {
13 |     return tuple.getSourceComponent().equals(Constants.SYSTEM_COMPONENT_ID) && tuple.getSourceStreamId().equals(
14 |         Constants.SYSTEM_TICK_STREAM_ID);
15 |   }
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/lesson3/stage5/src/jvm/udacity/storm/AbstractRankerBolt.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Licensed to the Apache Software Foundation (ASF) under one
  3 |  * or more contributor license agreements.  See the NOTICE file
  4 |  * distributed with this work for additional information
  5 |  * regarding copyright ownership.  The ASF licenses this file
  6 |  * to you under the Apache License, Version 2.0 (the
  7 |  * "License"); you may not use this file except in compliance
  8 |  * with the License.  You may obtain a copy of the License at
  9 |  *
 10 |  * http://www.apache.org/licenses/LICENSE-2.0
 11 |  *
 12 |  * Unless required by applicable law or agreed to in writing, software
 13 |  * distributed under the License is distributed on an "AS IS" BASIS,
 14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 15 |  * See the License for the specific language governing permissions and
 16 |  * limitations under the License.
 17 |  */
 18 | package udacity.storm;
 19 | 
 20 | import backtype.storm.Config;
 21 | import backtype.storm.topology.BasicOutputCollector;
 22 | import backtype.storm.topology.OutputFieldsDeclarer;
 23 | import backtype.storm.topology.base.BaseBasicBolt;
 24 | import backtype.storm.tuple.Fields;
 25 | import backtype.storm.tuple.Tuple;
 26 | import backtype.storm.tuple.Values;
 27 | import org.apache.log4j.Logger;
 28 | //import storm.starter.tools.Rankings;
 29 | //import storm.starter.util.TupleHelpers;
 30 | 
 31 | import udacity.storm.tools.Rankings;
 32 | import udacity.storm.tools.TupleHelpers;
 33 | 
 34 | import java.util.HashMap;
 35 | import java.util.Map;
 36 | 
 37 | /**
 38 |  * This abstract bolt provides the basic behavior of bolts that rank objects according to their count.
 39 |  * <p/>
 40 |  * It uses a template method design pattern for {@link AbstractRankerBolt#execute(Tuple, BasicOutputCollector)} to allow
 41 |  * actual bolt implementations to specify how incoming tuples are processed, i.e. how the objects embedded within those
 42 |  * tuples are retrieved and counted.
 43 |  */
 44 | public abstract class AbstractRankerBolt extends BaseBasicBolt {
 45 | 
 46 |   private static final long serialVersionUID = 4931640198501530202L;
 47 |   private static final int DEFAULT_EMIT_FREQUENCY_IN_SECONDS = 2;
 48 |   private static final int DEFAULT_COUNT = 10;
 49 | 
 50 |   private final int emitFrequencyInSeconds;
 51 |   private final int count;
 52 |   private final Rankings rankings;
 53 | 
 54 |   public AbstractRankerBolt() {
 55 |     this(DEFAULT_COUNT, DEFAULT_EMIT_FREQUENCY_IN_SECONDS);
 56 |   }
 57 | 
 58 |   public AbstractRankerBolt(int topN) {
 59 |     this(topN, DEFAULT_EMIT_FREQUENCY_IN_SECONDS);
 60 |   }
 61 | 
 62 |   public AbstractRankerBolt(int topN, int emitFrequencyInSeconds) {
 63 |     if (topN < 1) {
 64 |       throw new IllegalArgumentException("topN must be >= 1 (you requested " + topN + ")");
 65 |     }
 66 |     if (emitFrequencyInSeconds < 1) {
 67 |       throw new IllegalArgumentException(
 68 |           "The emit frequency must be >= 1 seconds (you requested " + emitFrequencyInSeconds + " seconds)");
 69 |     }
 70 |     count = topN;
 71 |     this.emitFrequencyInSeconds = emitFrequencyInSeconds;
 72 |     rankings = new Rankings(count);
 73 |   }
 74 | 
 75 |   protected Rankings getRankings() {
 76 |     return rankings;
 77 |   }
 78 | 
 79 |   /**
 80 |    * This method functions as a template method (design pattern).
 81 |    */
 82 |   @Override
 83 |   public final void execute(Tuple tuple, BasicOutputCollector collector) {
 84 |     if (TupleHelpers.isTickTuple(tuple)) {
 85 |       getLogger().debug("Received tick tuple, triggering emit of current rankings");
 86 |       emitRankings(collector);
 87 |     }
 88 |     else {
 89 |       updateRankingsWithTuple(tuple);
 90 |     }
 91 |   }
 92 | 
 93 |   abstract void updateRankingsWithTuple(Tuple tuple);
 94 | 
 95 |   private void emitRankings(BasicOutputCollector collector) {
 96 |     collector.emit(new Values(rankings.copy()));
 97 |     getLogger().debug("Rankings: " + rankings);
 98 |   }
 99 | 
100 |   @Override
101 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
102 |     declarer.declare(new Fields("rankings"));
103 |   }
104 | 
105 |   @Override
106 |   public Map<String, Object> getComponentConfiguration() {
107 |     Map<String, Object> conf = new HashMap<String, Object>();
108 |     conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, emitFrequencyInSeconds);
109 |     return conf;
110 |   }
111 | 
112 |   abstract Logger getLogger();
113 | }
114 | 


--------------------------------------------------------------------------------
/lesson3/stage5/src/jvm/udacity/storm/CountBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.HashMap;
20 | import java.util.Map;
21 | 
22 | /**
23 |  * A bolt that counts the words that it receives
24 |  */
25 | public class CountBolt extends BaseRichBolt
26 | {
27 |   // To output tuples from this bolt to the next stage bolts, if any
28 |   private OutputCollector collector;
29 | 
30 |   // Map to store the count of the words
31 |   private Map<String, Long> countMap;
32 | 
33 |   @Override
34 |   public void prepare(
35 |       Map                     map,
36 |       TopologyContext         topologyContext,
37 |       OutputCollector         outputCollector)
38 |   {
39 | 
40 |     // save the collector for emitting tuples
41 |     collector = outputCollector;
42 | 
43 |     // create and initialize the map
44 |     countMap = new HashMap<String, Long>();
45 |   }
46 | 
47 |   @Override
48 |   public void execute(Tuple tuple)
49 |   {
50 |     // get the word from the 1st column of incoming tuple
51 |     String word = tuple.getString(0);
52 | 
53 |     // check if the word is present in the map
54 |     if (countMap.get(word) == null) {
55 | 
56 |       // not present, add the word with a count of 1
57 |       countMap.put(word, 1L);
58 |     } else {
59 | 
60 |       // already there, hence get the count
61 |       Long val = countMap.get(word);
62 | 
63 |       // increment the count and save it to the map
64 |       countMap.put(word, ++val);
65 |     }
66 | 
67 |     // emit the word and count
68 |     collector.emit(new Values(word, countMap.get(word)));
69 |   }
70 | 
71 |   @Override
72 |   public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer)
73 |   {
74 |     // tell storm the schema of the output tuple for this spout
75 |     // tuple consists of a two columns called 'word' and 'count'
76 | 
77 |     // declare the first column 'word', second column 'count'
78 |     outputFieldsDeclarer.declare(new Fields("word","count"));
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/lesson3/stage5/src/jvm/udacity/storm/IntermediateRankingsBolt.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  * http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package udacity.storm;
19 | 
20 | import backtype.storm.tuple.Tuple;
21 | import org.apache.log4j.Logger;
22 | //import storm.starter.tools.Rankable;
23 | //import storm.starter.tools.RankableObjectWithFields;
24 | 
25 | import udacity.storm.tools.Rankable;
26 | import udacity.storm.tools.RankableObjectWithFields;
27 | 
28 | /**
29 |  * This bolt ranks incoming objects by their count.
30 |  * <p/>
31 |  * It assumes the input tuples to adhere to the following format: (object, object_count, additionalField1,
32 |  * additionalField2, ..., additionalFieldN).
33 |  */
34 | public final class IntermediateRankingsBolt extends AbstractRankerBolt {
35 | 
36 |   private static final long serialVersionUID = -1369800530256637409L;
37 |   private static final Logger LOG = Logger.getLogger(IntermediateRankingsBolt.class);
38 | 
39 |   public IntermediateRankingsBolt() {
40 |     super();
41 |   }
42 | 
43 |   public IntermediateRankingsBolt(int topN) {
44 |     super(topN);
45 |   }
46 | 
47 |   public IntermediateRankingsBolt(int topN, int emitFrequencyInSeconds) {
48 |     super(topN, emitFrequencyInSeconds);
49 |   }
50 | 
51 |   @Override
52 |   void updateRankingsWithTuple(Tuple tuple) {
53 |     Rankable rankable = RankableObjectWithFields.from(tuple);
54 |     super.getRankings().updateWith(rankable);
55 |   }
56 | 
57 |   @Override
58 |   Logger getLogger() {
59 |     return LOG;
60 |   }
61 | }
62 | 


--------------------------------------------------------------------------------
/lesson3/stage5/src/jvm/udacity/storm/ParseTweetBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | import java.util.Arrays;
21 | 
22 | /**
23 |  * A bolt that parses the tweet into words
24 |  */
25 | public class ParseTweetBolt extends BaseRichBolt
26 | {
27 |   // To output tuples from this bolt to the count bolt
28 |   OutputCollector collector;
29 | 
30 |   private String[] skipWords = {"rt", "to", "me","la","on","that","que",
31 |     "followers","watch","know","not","have","like","I'm","new","good","do",
32 |     "more","es","te","followers","Followers","las","you","and","de","my","is",
33 |     "en","una","in","for","this","go","en","all","no","don't","up","are",
34 |     "http","http:","https","https:","http://","https://","with","just","your",
35 |     "para","want","your","you're","really","video","it's","when","they","their","much",
36 |     "would","what","them","todo","FOLLOW","retweet","RETWEET","even","right","like",
37 |     "bien","Like","will","Will","pero","Pero","can't","were","Can't","Were","TWITTER",
38 |     "make","take","This","from","about","como","esta","follows","followed"};
39 | 
40 |   @Override
41 |   public void prepare(
42 |       Map                     map,
43 |       TopologyContext         topologyContext,
44 |       OutputCollector         outputCollector)
45 |   {
46 |     // save the output collector for emitting tuples
47 |     collector = outputCollector;
48 |   }
49 | 
50 |   @Override
51 |   public void execute(Tuple tuple)
52 |   {
53 |     // get the 1st column 'tweet' from tuple
54 |     String tweet = tuple.getString(0);
55 | 
56 |     // provide the delimiters for splitting the tweet
57 |     String delims = "[ .,?!]+";
58 | 
59 |     // now split the tweet into tokens
60 |     String[] tokens = tweet.split(delims);
61 | 
62 |     // for each token/word, emit it
63 |     for (String token: tokens) {
64 |       //emit only words greater than length 3 and not stopword list
65 |       if(token.length() > 3 && !Arrays.asList(skipWords).contains(token)){
66 |         if(token.startsWith("#")){
67 |           collector.emit(new Values(token));
68 |         }
69 |       }
70 |     }
71 |   }
72 | 
73 |   @Override
74 |   public void declareOutputFields(OutputFieldsDeclarer declarer)
75 |   {
76 |     // tell storm the schema of the output tuple for this spout
77 |     // tuple consists of a single column called 'tweet-word'
78 |     declarer.declare(new Fields("tweet-word"));
79 |   }
80 | 
81 | }
82 | 


--------------------------------------------------------------------------------
/lesson3/stage5/src/jvm/udacity/storm/ReportBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | import com.lambdaworks.redis.RedisClient;
22 | import com.lambdaworks.redis.RedisConnection;
23 | 
24 | import udacity.storm.tools.*;
25 | import udacity.storm.tools.Rankings;
26 | import com.google.common.collect.ImmutableList;
27 | import com.google.common.collect.Lists;
28 | 
29 | /**
30 |  * A bolt that prints the word and count to redis
31 |  */
32 | public class ReportBolt extends BaseRichBolt
33 | {
34 |   // place holder to keep the connection to redis
35 |   transient RedisConnection<String,String> redis;
36 | 
37 |   @Override
38 |   public void prepare(
39 |       Map                     map,
40 |       TopologyContext         topologyContext,
41 |       OutputCollector         outputCollector)
42 |   {
43 |     // instantiate a redis connection
44 |     RedisClient client = new RedisClient("localhost",6379);
45 | 
46 |     // initiate the actual connection
47 |     redis = client.connect();
48 |   }
49 | 
50 |   @Override
51 |   public void execute(Tuple tuple)
52 |   {
53 |     Rankings rankableList = (Rankings) tuple.getValue(0);
54 | 
55 |     for (Rankable r: rankableList.getRankings()){
56 |       String word = r.getObject().toString();
57 |       Long count = r.getCount();
58 |       redis.publish("WordCountTopology", word + "|" + Long.toString(count));
59 |     }
60 | 
61 |     // access the first column 'word'
62 |     //String word = tuple.getStringByField("word");
63 | 
64 |     // access the second column 'count'
65 |     //String word = rankedWords.toString();
66 |     //Integer count = tuple.getIntegerByField("count");
67 |     //Long count = new Long(100);
68 | 
69 |     // publish the word count to redis using word as the key
70 |     //redis.publish("WordCountTopology", word + ":" + Long.toString(count));
71 |   }
72 | 
73 |   public void declareOutputFields(OutputFieldsDeclarer declarer)
74 |   {
75 |     // nothing to add - since it is the final bolt
76 |   }
77 | }
78 | 


--------------------------------------------------------------------------------
/lesson3/stage5/src/jvm/udacity/storm/TopNTweetTopology.java:
--------------------------------------------------------------------------------
  1 | package udacity.storm;
  2 | 
  3 | import backtype.storm.Config;
  4 | import backtype.storm.LocalCluster;
  5 | import backtype.storm.StormSubmitter;
  6 | import backtype.storm.spout.SpoutOutputCollector;
  7 | import backtype.storm.task.OutputCollector;
  8 | import backtype.storm.task.TopologyContext;
  9 | import backtype.storm.testing.TestWordSpout;
 10 | import backtype.storm.topology.OutputFieldsDeclarer;
 11 | import backtype.storm.topology.TopologyBuilder;
 12 | import backtype.storm.topology.base.BaseRichSpout;
 13 | import backtype.storm.topology.base.BaseRichBolt;
 14 | import backtype.storm.tuple.Fields;
 15 | import backtype.storm.tuple.Tuple;
 16 | import backtype.storm.tuple.Values;
 17 | import backtype.storm.utils.Utils;
 18 | 
 19 | import udacity.storm.spout.RandomSentenceSpout;
 20 | 
 21 | class TopNTweetTopology
 22 | {
 23 |   public static void main(String[] args) throws Exception
 24 |   {
 25 |     //Variable TOP_N number of words
 26 |     int TOP_N = 10;
 27 |     // create the topology
 28 |     TopologyBuilder builder = new TopologyBuilder();
 29 | 
 30 |     /*
 31 |      * In order to create the spout, you need to get twitter credentials
 32 |      * If you need to use Twitter firehose/Tweet stream for your idea,
 33 |      * create a set of credentials by following the instructions at
 34 |      *
 35 |      * https://dev.twitter.com/discussions/631
 36 |      *
 37 |      */
 38 | 
 39 |     // now create the tweet spout with the credentials
 40 |     TweetSpout tweetSpout = new TweetSpout(
 41 |         "[Your customer key]",
 42 |         "[Your secret key]",
 43 |         "[Your access token]",
 44 |         "[Your access secret]"
 45 | 
 46 |     );
 47 | 
 48 |     // attach the tweet spout to the topology - parallelism of 1
 49 |     builder.setSpout("tweet-spout", tweetSpout, 1);
 50 | 
 51 |     // attach the Random Sentence Spout to the topology - parallelism of 1
 52 |     //builder.setSpout("random-sentence-spout", new RandomSentenceSpout(), 1);
 53 | 
 54 |     // attach the parse tweet bolt using shuffle grouping
 55 |     builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout");
 56 |     //builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("random-sentence-spout");
 57 | 
 58 |     // attach the count bolt using fields grouping - parallelism of 15
 59 |     builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word"));
 60 | 
 61 |     // attach rolling count bolt using fields grouping - parallelism of 5
 62 |     // TEST
 63 |     //builder.setBolt("rolling-count-bolt", new RollingCountBolt(30, 10), 1).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word"));
 64 | 
 65 |     //from incubator-storm/.../storm/starter/RollingTopWords.java
 66 |     //builder.setBolt("intermediate-ranker", new IntermediateRankingsBolt(TOP_N), 4).fieldsGrouping("rolling-count-bolt", new Fields("obj"));
 67 | 
 68 |     builder.setBolt("intermediate-ranker", new IntermediateRankingsBolt(TOP_N), 4).fieldsGrouping("count-bolt", new Fields("word"));
 69 |     builder.setBolt("total-ranker", new TotalRankingsBolt(TOP_N)).globalGrouping("intermediate-ranker");
 70 | 
 71 |     // attach the report bolt using global grouping - parallelism of 1
 72 |     builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("total-ranker");
 73 | 
 74 |     // create the default config object
 75 |     Config conf = new Config();
 76 | 
 77 |     // set the config in debugging mode
 78 |     conf.setDebug(true);
 79 | 
 80 |     if (args != null && args.length > 0) {
 81 | 
 82 |       // run it in a live cluster
 83 | 
 84 |       // set the number of workers for running all spout and bolt tasks
 85 |       conf.setNumWorkers(3);
 86 | 
 87 |       // create the topology and submit with config
 88 |       StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
 89 | 
 90 |     } else {
 91 | 
 92 |       // run it in a simulated local cluster
 93 | 
 94 |       // set the number of threads to run - similar to setting number of workers in live cluster
 95 |       conf.setMaxTaskParallelism(3);
 96 | 
 97 |       // create the local cluster instance
 98 |       LocalCluster cluster = new LocalCluster();
 99 | 
100 |       // submit the topology to the local cluster
101 |       cluster.submitTopology("tweet-word-count", conf, builder.createTopology());
102 | 
103 |       // let the topology run for 300 seconds. note topologies never terminate!
104 |       Utils.sleep(300000);
105 | 
106 |       // now kill the topology
107 |       cluster.killTopology("tweet-word-count");
108 | 
109 |       // we are done, so shutdown the local cluster
110 |       cluster.shutdown();
111 |     }
112 |   }
113 | }
114 | 


--------------------------------------------------------------------------------
/lesson3/stage5/src/jvm/udacity/storm/TotalRankingsBolt.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  * http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package udacity.storm;
19 | 
20 | import backtype.storm.tuple.Tuple;
21 | import org.apache.log4j.Logger;
22 | //import storm.starter.tools.Rankings;
23 | 
24 | import udacity.storm.tools.Rankings;
25 | 
26 | /**
27 |  * This bolt merges incoming {@link Rankings}.
28 |  * <p/>
29 |  * It can be used to merge intermediate rankings generated by {@link IntermediateRankingsBolt} into a final,
30 |  * consolidated ranking. To do so, configure this bolt with a globalGrouping on {@link IntermediateRankingsBolt}.
31 |  */
32 | public final class TotalRankingsBolt extends AbstractRankerBolt {
33 | 
34 |   private static final long serialVersionUID = -8447525895532302198L;
35 |   private static final Logger LOG = Logger.getLogger(TotalRankingsBolt.class);
36 | 
37 |   public TotalRankingsBolt() {
38 |     super();
39 |   }
40 | 
41 |   public TotalRankingsBolt(int topN) {
42 |     super(topN);
43 |   }
44 | 
45 |   public TotalRankingsBolt(int topN, int emitFrequencyInSeconds) {
46 |     super(topN, emitFrequencyInSeconds);
47 |   }
48 | 
49 |   @Override
50 |   void updateRankingsWithTuple(Tuple tuple) {
51 |     Rankings rankingsToBeMerged = (Rankings) tuple.getValue(0);
52 |     super.getRankings().updateWith(rankingsToBeMerged);
53 |     super.getRankings().pruneZeroCounts();
54 |   }
55 | 
56 |   @Override
57 |   Logger getLogger() {
58 |     return LOG;
59 |   }
60 | 
61 | }
62 | 


--------------------------------------------------------------------------------
/lesson3/stage5/src/jvm/udacity/storm/spout/RandomSentenceSpout.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.spout;
 2 | 
 3 | import backtype.storm.spout.SpoutOutputCollector;
 4 | import backtype.storm.task.TopologyContext;
 5 | import backtype.storm.topology.OutputFieldsDeclarer;
 6 | import backtype.storm.topology.base.BaseRichSpout;
 7 | import backtype.storm.tuple.Fields;
 8 | import backtype.storm.tuple.Values;
 9 | import backtype.storm.utils.Utils;
10 | 
11 | import java.util.Map;
12 | import java.util.Random;
13 | 
14 | public class RandomSentenceSpout extends BaseRichSpout {
15 |   SpoutOutputCollector _collector;
16 |   Random _rand;
17 | 
18 | 
19 |   @Override
20 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
21 |     _collector = collector;
22 |     _rand = new Random();
23 |   }
24 | 
25 |   @Override
26 |   public void nextTuple() {
27 |     Utils.sleep(100);
28 |     String[] sentences = new String[]{
29 |       "the cow jumped over the moon",
30 |       "an apple a day keeps the doctor away",
31 |       "four score and seven years ago",
32 |       "snow white and the seven dwarfs",
33 |       "i am at two with nature"
34 |       };
35 |     String sentence = sentences[_rand.nextInt(sentences.length)];
36 |     _collector.emit(new Values(sentence));
37 |   }
38 | 
39 |   @Override
40 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
41 |     declarer.declare(new Fields("sentence"));
42 |   }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/lesson3/stage5/src/jvm/udacity/storm/tools/NthLastModifiedTimeTracker.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.tools;
 2 | 
 3 | import backtype.storm.utils.Time;
 4 | import org.apache.commons.collections.buffer.CircularFifoBuffer;
 5 | 
 6 | /**
 7 |  * This class tracks the time-since-last-modify of a "thing" in a rolling fashion.
 8 |  * <p/>
 9 |  * For example, create a 5-slot tracker to track the five most recent time-since-last-modify.
10 |  * <p/>
11 |  * You must manually "mark" that the "something" that you want to track -- in terms of modification times -- has just
12 |  * been modified.
13 |  */
14 | public class NthLastModifiedTimeTracker {
15 | 
16 |   private static final int MILLIS_IN_SEC = 1000;
17 | 
18 |   private final CircularFifoBuffer lastModifiedTimesMillis;
19 | 
20 |   public NthLastModifiedTimeTracker(int numTimesToTrack) {
21 |     if (numTimesToTrack < 1) {
22 |       throw new IllegalArgumentException(
23 |           "numTimesToTrack must be greater than zero (you requested " + numTimesToTrack + ")");
24 |     }
25 |     lastModifiedTimesMillis = new CircularFifoBuffer(numTimesToTrack);
26 |     initLastModifiedTimesMillis();
27 |   }
28 | 
29 |   private void initLastModifiedTimesMillis() {
30 |     long nowCached = now();
31 |     for (int i = 0; i < lastModifiedTimesMillis.maxSize(); i++) {
32 |       lastModifiedTimesMillis.add(Long.valueOf(nowCached));
33 |     }
34 |   }
35 | 
36 |   private long now() {
37 |     return Time.currentTimeMillis();
38 |   }
39 | 
40 |   public int secondsSinceOldestModification() {
41 |     long modifiedTimeMillis = ((Long) lastModifiedTimesMillis.get()).longValue();
42 |     return (int) ((now() - modifiedTimeMillis) / MILLIS_IN_SEC);
43 |   }
44 | 
45 |   public void markAsModified() {
46 |     updateLastModifiedTime();
47 |   }
48 | 
49 |   private void updateLastModifiedTime() {
50 |     lastModifiedTimesMillis.add(now());
51 |   }
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/lesson3/stage5/src/jvm/udacity/storm/tools/Rankable.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  * http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package udacity.storm.tools;
19 | 
20 | public interface Rankable extends Comparable<Rankable> {
21 | 
22 |   Object getObject();
23 | 
24 |   long getCount();
25 | 
26 |   /**
27 |    * Note: We do not defensively copy the object wrapped by the Rankable.  It is passed as is.
28 |    *
29 |    * @return a defensive copy
30 |    */
31 |   Rankable copy();
32 | }
33 | 


--------------------------------------------------------------------------------
/lesson3/stage5/src/jvm/udacity/storm/tools/SlotBasedCounter.java:
--------------------------------------------------------------------------------
  1 | //package storm.starter.tools;
  2 | package udacity.storm.tools;
  3 | 
  4 | import java.io.Serializable;
  5 | import java.util.HashMap;
  6 | import java.util.HashSet;
  7 | import java.util.Map;
  8 | import java.util.Set;
  9 | 
 10 | /**
 11 |  * This class provides per-slot counts of the occurrences of objects.
 12 |  * <p/>
 13 |  * It can be used, for instance, as a building block for implementing sliding window counting of objects.
 14 |  *
 15 |  * @param <T> The type of those objects we want to count.
 16 |  */
 17 | public final class SlotBasedCounter<T> implements Serializable {
 18 | 
 19 |   private static final long serialVersionUID = 4858185737378394432L;
 20 | 
 21 |   private final Map<T, long[]> objToCounts = new HashMap<T, long[]>();
 22 |   private final int numSlots;
 23 | 
 24 |   public SlotBasedCounter(int numSlots) {
 25 |     if (numSlots <= 0) {
 26 |       throw new IllegalArgumentException("Number of slots must be greater than zero (you requested " + numSlots + ")");
 27 |     }
 28 |     this.numSlots = numSlots;
 29 |   }
 30 | 
 31 |   public void incrementCount(T obj, int slot) {
 32 |     long[] counts = objToCounts.get(obj);
 33 |     if (counts == null) {
 34 |       counts = new long[this.numSlots];
 35 |       objToCounts.put(obj, counts);
 36 |     }
 37 |     counts[slot]++;
 38 |   }
 39 | 
 40 |   public long getCount(T obj, int slot) {
 41 |     long[] counts = objToCounts.get(obj);
 42 |     if (counts == null) {
 43 |       return 0;
 44 |     }
 45 |     else {
 46 |       return counts[slot];
 47 |     }
 48 |   }
 49 | 
 50 |   public Map<T, Long> getCounts() {
 51 |     Map<T, Long> result = new HashMap<T, Long>();
 52 |     for (T obj : objToCounts.keySet()) {
 53 |       result.put(obj, computeTotalCount(obj));
 54 |     }
 55 |     return result;
 56 |   }
 57 | 
 58 |   private long computeTotalCount(T obj) {
 59 |     long[] curr = objToCounts.get(obj);
 60 |     long total = 0;
 61 |     for (long l : curr) {
 62 |       total += l;
 63 |     }
 64 |     return total;
 65 |   }
 66 | 
 67 |   /**
 68 |    * Reset the slot count of any tracked objects to zero for the given slot.
 69 |    *
 70 |    * @param slot
 71 |    */
 72 |   public void wipeSlot(int slot) {
 73 |     for (T obj : objToCounts.keySet()) {
 74 |       resetSlotCountToZero(obj, slot);
 75 |     }
 76 |   }
 77 | 
 78 |   private void resetSlotCountToZero(T obj, int slot) {
 79 |     long[] counts = objToCounts.get(obj);
 80 |     counts[slot] = 0;
 81 |   }
 82 | 
 83 |   private boolean shouldBeRemovedFromCounter(T obj) {
 84 |     return computeTotalCount(obj) == 0;
 85 |   }
 86 | 
 87 |   /**
 88 |    * Remove any object from the counter whose total count is zero (to free up memory).
 89 |    */
 90 |   public void wipeZeros() {
 91 |     Set<T> objToBeRemoved = new HashSet<T>();
 92 |     for (T obj : objToCounts.keySet()) {
 93 |       if (shouldBeRemovedFromCounter(obj)) {
 94 |         objToBeRemoved.add(obj);
 95 |       }
 96 |     }
 97 |     for (T obj : objToBeRemoved) {
 98 |       objToCounts.remove(obj);
 99 |     }
100 |   }
101 | 
102 | }
103 | 


--------------------------------------------------------------------------------
/lesson3/stage5/src/jvm/udacity/storm/tools/TupleHelpers.java:
--------------------------------------------------------------------------------
 1 | //package storm.starter.util;
 2 | package udacity.storm.tools;
 3 | 
 4 | import backtype.storm.Constants;
 5 | import backtype.storm.tuple.Tuple;
 6 | 
 7 | public final class TupleHelpers {
 8 | 
 9 |   private TupleHelpers() {
10 |   }
11 | 
12 |   public static boolean isTickTuple(Tuple tuple) {
13 |     return tuple.getSourceComponent().equals(Constants.SYSTEM_COMPONENT_ID) && tuple.getSourceStreamId().equals(
14 |         Constants.SYSTEM_TICK_STREAM_ID);
15 |   }
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/lesson3/stage6/src/jvm/udacity/storm/ExclamationTopology.java:
--------------------------------------------------------------------------------
  1 | package udacity.storm;
  2 | 
  3 | import backtype.storm.Config;
  4 | import backtype.storm.LocalCluster;
  5 | import backtype.storm.StormSubmitter;
  6 | import backtype.storm.task.OutputCollector;
  7 | import backtype.storm.task.TopologyContext;
  8 | import backtype.storm.testing.TestWordSpout;
  9 | import backtype.storm.topology.OutputFieldsDeclarer;
 10 | import backtype.storm.topology.TopologyBuilder;
 11 | import backtype.storm.topology.base.BaseRichBolt;
 12 | import backtype.storm.tuple.Fields;
 13 | import backtype.storm.tuple.Tuple;
 14 | import backtype.storm.tuple.Values;
 15 | import backtype.storm.utils.Utils;
 16 | 
 17 | import java.util.HashMap;
 18 | import java.util.Map;
 19 | 
 20 | //******* Import MyLikesSpout and MyNamesSpout
 21 | 
 22 | 
 23 | 
 24 | /**
 25 |  * This is a basic example of a storm topology.
 26 |  *
 27 |  * This topology demonstrates how to add three exclamation marks '!!!'
 28 |  * to each word emitted
 29 |  *
 30 |  * This is an example for Udacity Real Time Analytics Course - ud381
 31 |  *
 32 |  */
 33 | public class ExclamationTopology {
 34 | 
 35 |   /**
 36 |    * A bolt that adds the exclamation marks '!!!' to word
 37 |    */
 38 |   public static class ExclamationBolt extends BaseRichBolt
 39 |   {
 40 |     // To output tuples from this bolt to the next stage bolts, if any
 41 |     OutputCollector _collector;
 42 | 
 43 |     @Override
 44 |     public void prepare(
 45 |         Map                     map,
 46 |         TopologyContext         topologyContext,
 47 |         OutputCollector         collector)
 48 |     {
 49 |       // save the output collector for emitting tuples
 50 |       _collector = collector;
 51 |     }
 52 | 
 53 |     @Override
 54 |     public void execute(Tuple tuple)
 55 |     {
 56 |       //**** ADD COMPONENT ID
 57 | 
 58 |       /*
 59 |        * Use component id to modify behavior
 60 |        */
 61 | 
 62 |       // get the column word from tuple
 63 |       String word = tuple.getString(0);
 64 | 
 65 |       // build the word with the exclamation marks appended
 66 |       StringBuilder exclamatedWord = new StringBuilder();
 67 |       exclamatedWord.append(word).append("!!!");
 68 | 
 69 |       // emit the word with exclamations
 70 |       _collector.emit(tuple, new Values(exclamatedWord.toString()));
 71 |     }
 72 | 
 73 |     @Override
 74 |     public void declareOutputFields(OutputFieldsDeclarer declarer)
 75 |     {
 76 |       // tell storm the schema of the output tuple for this spout
 77 | 
 78 |       // tuple consists of a single column called 'exclamated-word'
 79 |       declarer.declare(new Fields("exclamated-word"));
 80 |     }
 81 |   }
 82 | 
 83 |   public static void main(String[] args) throws Exception
 84 |   {
 85 |     // create the topology
 86 |     TopologyBuilder builder = new TopologyBuilder();
 87 | 
 88 |     // attach the word spout to the topology - parallelism of 10
 89 |     builder.setSpout("word", new TestWordSpout(), 10);
 90 | 
 91 |     // attach the exclamation bolt to the topology - parallelism of 3
 92 |     builder.setBolt("exclaim1", new ExclamationBolt(), 3).shuffleGrouping("word");
 93 | 
 94 |     // attach another exclamation bolt to the topology - parallelism of 2
 95 |     builder.setBolt("exclaim2", new ExclamationBolt(), 2).shuffleGrouping("exclaim1");
 96 | 
 97 |     // create the default config object
 98 |     Config conf = new Config();
 99 | 
100 |     // set the config in debugging mode
101 |     conf.setDebug(true);
102 | 
103 |     if (args != null && args.length > 0) {
104 | 
105 |       // run it in a live cluster
106 | 
107 |       // set the number of workers for running all spout and bolt tasks
108 |       conf.setNumWorkers(3);
109 | 
110 |       // create the topology and submit with config
111 |       StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
112 | 
113 |     } else {
114 | 
115 |       // run it in a simulated local cluster
116 | 
117 |       // create the local cluster instance
118 |       LocalCluster cluster = new LocalCluster();
119 | 
120 |       // submit the topology to the local cluster
121 |       cluster.submitTopology("exclamation", conf, builder.createTopology());
122 | 
123 |       // let the topology run for 30 seconds. note topologies never terminate!
124 |       Thread.sleep(30000);
125 | 
126 |       // kill the topology
127 |       cluster.killTopology("exclamation");
128 | 
129 |       // we are done, so shutdown the local cluster
130 |       cluster.shutdown();
131 |     }
132 |   }
133 | }
134 | 


--------------------------------------------------------------------------------
/lesson3/stage6/src/jvm/udacity/storm/ReportBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | import com.lambdaworks.redis.RedisClient;
22 | import com.lambdaworks.redis.RedisConnection;
23 | 
24 | /**
25 |  * A bolt that prints the word and count to redis
26 |  */
27 | public class ReportBolt extends BaseRichBolt
28 | {
29 |   // place holder to keep the connection to redis
30 |   transient RedisConnection<String,String> redis;
31 | 
32 |   @Override
33 |   public void prepare(
34 |       Map                     map,
35 |       TopologyContext         topologyContext,
36 |       OutputCollector         outputCollector)
37 |   {
38 |     // instantiate a redis connection
39 |     RedisClient client = new RedisClient("localhost",6379);
40 | 
41 |     // initiate the actual connection
42 |     redis = client.connect();
43 |   }
44 | 
45 |   @Override
46 |   public void execute(Tuple tuple)
47 |   {
48 |     // access the first column 'word'
49 |     String word = tuple.getStringByField("word");
50 | 
51 |     // access the second column 'count'
52 |     Integer count = tuple.getIntegerByField("count");
53 |     //Integer count = 30;
54 | 
55 |     // publish the word count to redis using word as the key
56 |     redis.publish("WordCountTopology", word + "|" + Long.toString(count));
57 |   }
58 | 
59 |   public void declareOutputFields(OutputFieldsDeclarer declarer)
60 |   {
61 |     // nothing to add - since it is the final bolt
62 |   }
63 | }
64 | 


--------------------------------------------------------------------------------
/lesson3/stage6/src/jvm/udacity/storm/spout/MyLikesSpout.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.spout;
 2 | 
 3 | import backtype.storm.spout.SpoutOutputCollector;
 4 | import backtype.storm.task.TopologyContext;
 5 | import backtype.storm.topology.OutputFieldsDeclarer;
 6 | import backtype.storm.topology.base.BaseRichSpout;
 7 | import backtype.storm.tuple.Fields;
 8 | import backtype.storm.tuple.Values;
 9 | import backtype.storm.utils.Utils;
10 | 
11 | import java.util.Map;
12 | import java.util.Random;
13 | 
14 | public class MyLikesSpout extends BaseRichSpout {
15 |   SpoutOutputCollector _collector;
16 |   Random _rand;
17 | 
18 | 
19 |   @Override
20 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
21 |     _collector = collector;
22 |     _rand = new Random();
23 |   }
24 | 
25 |   @Override
26 |   public void nextTuple() {
27 |     Utils.sleep(100);
28 |     String[] pairs = new String[]{
29 |       "Lewis # Udacity",
30 |       "Taylor # Cinematography",
31 |       "Justine # Dogs",
32 |       "Liz # Soccer",
33 |       "Kim # Art"
34 |       };
35 |     String pair = pairs[_rand.nextInt(pairs.length)];
36 |     String name = pair.split("#")[0].trim();
37 |     String favorite = pair.split("#")[1].trim();
38 |     //** TO DO: update emit and declareOutputFields to
39 |     //** emit "name" and "favorite" instead of "pair"
40 |     _collector.emit(new Values(pair));
41 |   }
42 | 
43 |   @Override
44 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
45 |     declarer.declare(new Fields("pair"));
46 |   }
47 | 
48 | }
49 | 


--------------------------------------------------------------------------------
/lesson3/stage6/src/jvm/udacity/storm/spout/MyNamesSpout.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.spout;
 2 | 
 3 | import backtype.storm.spout.SpoutOutputCollector;
 4 | import backtype.storm.task.TopologyContext;
 5 | import backtype.storm.topology.OutputFieldsDeclarer;
 6 | import backtype.storm.topology.base.BaseRichSpout;
 7 | import backtype.storm.tuple.Fields;
 8 | import backtype.storm.tuple.Values;
 9 | import backtype.storm.utils.Utils;
10 | 
11 | import java.util.Map;
12 | import java.util.Random;
13 | 
14 | public class MyNamesSpout extends BaseRichSpout {
15 |   SpoutOutputCollector _collector;
16 |   Random _rand;
17 | 
18 | 
19 |   @Override
20 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
21 |     _collector = collector;
22 |     _rand = new Random();
23 |   }
24 | 
25 |   @Override
26 |   public void nextTuple() {
27 |     Utils.sleep(100);
28 |     String[] names = new String[]{
29 |       "Taylor",
30 |       "Justine",
31 |       "Liz",
32 |       "Kim",
33 |       "Lewis"
34 |       };
35 |     String name = names[_rand.nextInt(names.length)];
36 |     _collector.emit(new Values(name));
37 |   }
38 | 
39 |   @Override
40 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
41 |     declarer.declare(new Fields("name"));
42 |   }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/lesson3/stage7/src/jvm/udacity/storm/ReportBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.Map;
20 | 
21 | import com.lambdaworks.redis.RedisClient;
22 | import com.lambdaworks.redis.RedisConnection;
23 | 
24 | /**
25 |  * A bolt that prints the word and count to redis
26 |  */
27 | public class ReportBolt extends BaseRichBolt
28 | {
29 |   // place holder to keep the connection to redis
30 |   transient RedisConnection<String,String> redis;
31 | 
32 |   @Override
33 |   public void prepare(
34 |       Map                     map,
35 |       TopologyContext         topologyContext,
36 |       OutputCollector         outputCollector)
37 |   {
38 |     // instantiate a redis connection
39 |     RedisClient client = new RedisClient("localhost",6379);
40 | 
41 |     // initiate the actual connection
42 |     redis = client.connect();
43 |   }
44 | 
45 |   @Override
46 |   public void execute(Tuple tuple)
47 |   {
48 |     // access the first column 'word'
49 |     //String word = tuple.getStringByField("word");
50 |     String word = (String) tuple.getValue(0);
51 | 
52 |     // access the second column 'count'
53 |     //Integer count = tuple.getIntegerByField("count");
54 |     Integer count = 30;
55 | 
56 |     // publish the word count to redis using word as the key
57 |     redis.publish("WordCountTopology", word + "|" + Long.toString(count));
58 |   }
59 | 
60 |   public void declareOutputFields(OutputFieldsDeclarer declarer)
61 |   {
62 |     // nothing to add - since it is the final bolt
63 |   }
64 | }
65 | 


--------------------------------------------------------------------------------
/lesson3/stage7/src/jvm/udacity/storm/spout/MyLikesSpout.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.spout;
 2 | 
 3 | import backtype.storm.spout.SpoutOutputCollector;
 4 | import backtype.storm.task.TopologyContext;
 5 | import backtype.storm.topology.OutputFieldsDeclarer;
 6 | import backtype.storm.topology.base.BaseRichSpout;
 7 | import backtype.storm.tuple.Fields;
 8 | import backtype.storm.tuple.Values;
 9 | import backtype.storm.utils.Utils;
10 | 
11 | import java.util.Map;
12 | import java.util.Random;
13 | 
14 | public class MyLikesSpout extends BaseRichSpout {
15 |   SpoutOutputCollector _collector;
16 |   Random _rand;
17 | 
18 | 
19 |   @Override
20 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
21 |     _collector = collector;
22 |     _rand = new Random();
23 |   }
24 | 
25 |   @Override
26 |   public void nextTuple() {
27 |     Utils.sleep(100);
28 |     String[] pairs = new String[]{
29 |       "Lewis # Udacity",
30 |       "Taylor # Cinematography",
31 |       "Justine # Dogs",
32 |       "Liz # Soccer",
33 |       "Kim # Art"
34 |       };
35 |     String pair = pairs[_rand.nextInt(pairs.length)];
36 |     String name = pair.split("#")[0].trim();
37 |     String favorite = pair.split("#")[1].trim();
38 |     //_collector.emit(new Values(pair, favorite));
39 |     _collector.emit(new Values(name, favorite));
40 |   }
41 | 
42 |   @Override
43 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
44 |     declarer.declare(new Fields("name","favorite"));
45 |   }
46 | 
47 | }
48 | 


--------------------------------------------------------------------------------
/lesson3/stage7/src/jvm/udacity/storm/spout/MyNamesSpout.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.spout;
 2 | 
 3 | import backtype.storm.spout.SpoutOutputCollector;
 4 | import backtype.storm.task.TopologyContext;
 5 | import backtype.storm.topology.OutputFieldsDeclarer;
 6 | import backtype.storm.topology.base.BaseRichSpout;
 7 | import backtype.storm.tuple.Fields;
 8 | import backtype.storm.tuple.Values;
 9 | import backtype.storm.utils.Utils;
10 | 
11 | import java.util.Map;
12 | import java.util.Random;
13 | 
14 | public class MyNamesSpout extends BaseRichSpout {
15 |   SpoutOutputCollector _collector;
16 |   Random _rand;
17 | 
18 | 
19 |   @Override
20 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
21 |     _collector = collector;
22 |     _rand = new Random();
23 |   }
24 | 
25 |   @Override
26 |   public void nextTuple() {
27 |     Utils.sleep(100);
28 |     String[] names = new String[]{
29 |       "Taylor",
30 |       "Justine",
31 |       "Liz",
32 |       "Kim",
33 |       "Lewis"
34 |       };
35 |     String name = names[_rand.nextInt(names.length)];
36 |     _collector.emit(new Values(name));
37 |   }
38 | 
39 |   @Override
40 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
41 |     declarer.declare(new Fields("name"));
42 |   }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/2014_Gaz_counties_national.txt:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/FinalProject/2014_Gaz_counties_national.txt


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/geoinfo.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/FinalProject/geoinfo.csv


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/MyPropFile.properties:
--------------------------------------------------------------------------------
1 | annotators = tokenize, ssplit, parse, sentiment


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/geocode/GeoName.java:
--------------------------------------------------------------------------------
  1 | /*
  2 | The MIT License (MIT)
  3 | [OSI Approved License]
  4 | The MIT License (MIT)
  5 | 
  6 | Copyright (c) 2014 Daniel Glasson
  7 | 
  8 | Permission is hereby granted, free of charge, to any person obtaining a copy
  9 | of this software and associated documentation files (the "Software"), to deal
 10 | in the Software without restriction, including without limitation the rights
 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 12 | copies of the Software, and to permit persons to whom the Software is
 13 | furnished to do so, subject to the following conditions:
 14 | 
 15 | The above copyright notice and this permission notice shall be included in
 16 | all copies or substantial portions of the Software.
 17 | 
 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 24 | THE SOFTWARE.
 25 | */
 26 | 
 27 | package geocode;
 28 | 
 29 | import geocode.kdtree.KDNodeComparator;
 30 | import static java.lang.Math.cos;
 31 | import static java.lang.Math.sin;
 32 | import static java.lang.Math.toRadians;
 33 | 
 34 | import java.util.Comparator;
 35 | 
 36 | /**
 37 |  * Created by Daniel Glasson on 18/05/2014.
 38 |  * This class works with a placenames files from http://download.geonames.org/export/dump/
 39 |  */
 40 | 
 41 | public class GeoName extends KDNodeComparator<GeoName> {
 42 |     public String name;
 43 |     public String geoid;
 44 |     public boolean majorPlace; // Major or minor place
 45 |     public double latitude;
 46 |     public double longitude;
 47 |     public double point[] = new double[3]; // The 3D coordinates of the point
 48 |     public String country;
 49 | 
 50 |     GeoName(String data) {
 51 |         String[] names = data.split(",");
 52 |         name = names[3];
 53 |         geoid = "c"+names[1];
 54 |         //majorPlace = names[6].equals("P");
 55 |         majorPlace = true;
 56 |         latitude = Double.parseDouble(names[8]);
 57 |         longitude = Double.parseDouble(names[9]);
 58 |         setPoint();
 59 |         country = names[8];
 60 |     }
 61 |     
 62 |     GeoName(Double latitude, Double longitude) {
 63 |         name = country = "Search";
 64 |         this.latitude = latitude;
 65 |         this.longitude = longitude;
 66 |         setPoint();
 67 |     }
 68 | 
 69 |     private void setPoint() {
 70 |         point[0] = cos(toRadians(latitude)) * cos(toRadians(longitude));
 71 |         point[1] = cos(toRadians(latitude)) * sin(toRadians(longitude));
 72 |         point[2] = sin(toRadians(latitude));
 73 |     }
 74 | 
 75 |     @Override
 76 |     public String toString() {
 77 |         return name;
 78 |     }
 79 | 
 80 |     @Override
 81 |     protected Double squaredDistance(Object other) {
 82 |         GeoName location = (GeoName)other;
 83 |         double x = this.point[0] - location.point[0];
 84 |         double y = this.point[1] - location.point[1];
 85 |         double z = this.point[2] - location.point[2];
 86 |         return (x*x) + (y*y) + (z*z);
 87 |     }
 88 | 
 89 |     @Override
 90 |     protected Double axisSquaredDistance(Object other, Integer axis) {
 91 |         GeoName location = (GeoName)other;
 92 |         Double distance = point[axis] - location.point[axis];
 93 |         return distance * distance;
 94 |     }
 95 | 
 96 |     @Override
 97 |     protected Comparator<GeoName> getComparator(Integer axis) {
 98 |         return GeoNameComparator.values()[axis];
 99 |     }
100 | 
101 |     protected static enum GeoNameComparator implements Comparator<GeoName> {
102 |         x {
103 |             @Override
104 |             public int compare(GeoName a, GeoName b) {
105 |                 return Double.compare(a.point[0], b.point[0]);
106 |             }
107 |         },
108 |         y {
109 |             @Override
110 |             public int compare(GeoName a, GeoName b) {
111 |                 return Double.compare(a.point[1], b.point[1]);
112 |             }
113 |         },
114 |         z {
115 |             @Override
116 |             public int compare(GeoName a, GeoName b) {
117 |                 return Double.compare(a.point[2], b.point[2]);
118 |             }
119 |         };
120 |     }
121 | }
122 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/geocode/ReverseGeoCode.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | The MIT License (MIT)
 3 | [OSI Approved License]
 4 | The MIT License (MIT)
 5 | 
 6 | Copyright (c) 2014 Daniel Glasson
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | */
26 | 
27 | package geocode;
28 | 
29 | import geocode.kdtree.KDTree;
30 | import java.io.*;
31 | import java.util.ArrayList;
32 | 
33 | /**
34 |  *
35 |  * Created by Daniel Glasson on 18/05/2014.
36 |  * Uses KD-trees to quickly find the nearest point
37 |  * 
38 |  * ReverseGeoCode reverseGeoCode = new ReverseGeoCode(new FileInputStream("c:\\AU.txt"), true);
39 |  * System.out.println("Nearest to -23.456, 123.456 is " + geocode.nearestPlace(-23.456, 123.456));
40 |  */
41 | public class ReverseGeoCode {
42 |     KDTree<GeoName> kdTree;
43 |     
44 |     // Get placenames from http://download.geonames.org/export/dump/
45 |     public ReverseGeoCode( InputStream placenames, Boolean majorOnly ) throws IOException {
46 |         ArrayList<GeoName> arPlaceNames;
47 |         arPlaceNames = new ArrayList<GeoName>();
48 |         // Read the geonames file in the directory
49 |         BufferedReader in = new BufferedReader(new InputStreamReader(placenames));
50 |         String str;
51 |         try {
52 |         	in.readLine();
53 |             while ((str = in.readLine()) != null) {
54 |                 GeoName newPlace = new GeoName(str);
55 |                 if ( !majorOnly || newPlace.majorPlace ) {
56 |                     arPlaceNames.add(new GeoName(str));
57 |                 }
58 |             }
59 |         } catch (IOException ex) {
60 |             in.close(); 
61 |             throw ex;
62 |         }
63 |         in.close();
64 |         kdTree = new KDTree<GeoName>(arPlaceNames);
65 |     }
66 | 
67 |     public GeoName nearestPlace(double latitude, double longitude) {
68 |         return kdTree.findNearest(new GeoName(latitude,longitude));
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/geocode/kdtree/KDNode.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | The MIT License (MIT)
 3 | [OSI Approved License]
 4 | The MIT License (MIT)
 5 | 
 6 | Copyright (c) 2014 Daniel Glasson
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | */
26 | 
27 | package geocode.kdtree;
28 | 
29 | /**
30 |  *
31 |  * @author Daniel Glasson
32 |  */
33 | public class KDNode<T extends KDNodeComparator<T>> {
34 |     KDNode<T> left;
35 |     KDNode<T> right;
36 |     T location;
37 | 
38 |     public KDNode( KDNode<T> left, KDNode<T> right, T location ) {
39 |         this.left = left;
40 |         this.right = right;
41 |         this.location = location;
42 |     }
43 | }


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/geocode/kdtree/KDNodeComparator.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | The MIT License (MIT)
 3 | [OSI Approved License]
 4 | The MIT License (MIT)
 5 | 
 6 | Copyright (c) 2014 Daniel Glasson
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | */
26 | 
27 | package geocode.kdtree;
28 | 
29 | import java.util.Comparator;
30 | 
31 | /**
32 |  *
33 |  * @author Daniel Glasson
34 |  * Make the user return a comparator for each axis
35 |  * Squared distances should be an optimisation
36 |  */
37 | public abstract class KDNodeComparator<T> { 
38 |     // This should return a comparator for whatever axis is passed in
39 |     protected abstract Comparator<T> getComparator(Integer axis);
40 |     
41 |     // Return squared distance between current and other
42 |     protected abstract <T> Double squaredDistance(T other);
43 |     
44 |     // Return squared distance between one axis only
45 |     protected abstract <T> Double axisSquaredDistance(T other, Integer axis);
46 | }
47 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/geocode/kdtree/KDTree.java:
--------------------------------------------------------------------------------
 1 | /*
 2 | The MIT License (MIT)
 3 | [OSI Approved License]
 4 | The MIT License (MIT)
 5 | 
 6 | Copyright (c) 2014 Daniel Glasson
 7 | 
 8 | Permission is hereby granted, free of charge, to any person obtaining a copy
 9 | of this software and associated documentation files (the "Software"), to deal
10 | in the Software without restriction, including without limitation the rights
11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 | copies of the Software, and to permit persons to whom the Software is
13 | furnished to do so, subject to the following conditions:
14 | 
15 | The above copyright notice and this permission notice shall be included in
16 | all copies or substantial portions of the Software.
17 | 
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24 | THE SOFTWARE.
25 | */
26 | 
27 | package geocode.kdtree;
28 | 
29 | import java.util.ArrayList;
30 | import java.util.Arrays;
31 | import java.util.Collections;
32 | import java.util.List;
33 | 
34 | /**
35 |  *
36 |  * @author Daniel Glasson
37 |  * A KD-Tree implementation to quickly find nearest points
38 |  * Currently implements createKDTree and findNearest as that's all that's required here
39 |  */
40 | public class KDTree<T extends KDNodeComparator<T>> {
41 |     private KDNode<T> root;
42 | 
43 |     public KDTree( List<T> items ) {
44 |         root = createKDTree(items, 0);
45 |     }
46 | 
47 |     public T findNearest( T search ) {
48 |         return findNearest(root, search, 0).location;
49 |     }
50 |         
51 |     // Only ever goes to log2(items.length) depth so lack of tail recursion is a non-issue
52 |     private KDNode<T> createKDTree( List<T> items, int depth ) {
53 |         if ( items.isEmpty() ) {
54 |             return null;
55 |         }
56 |         Collections.sort(items, items.get(0).getComparator(depth % 3));
57 |         int currentIndex = items.size()/2;
58 |         return new KDNode<T>(createKDTree(items.subList(0, currentIndex), depth+1), createKDTree(items.subList(currentIndex + 1, items.size()), depth+1), items.get(currentIndex));
59 |     }
60 | 
61 |     private KDNode<T> findNearest(KDNode<T> currentNode, T search, int depth) {
62 |         int direction = search.getComparator(depth % 3).compare( search, currentNode.location );
63 |         KDNode<T> next = (direction < 0) ? currentNode.left : currentNode.right;
64 |         KDNode<T> other = (direction < 0) ? currentNode.right : currentNode.left;
65 |         KDNode<T> best = (next == null) ? currentNode : findNearest(next, search, depth + 1); // Go to a leaf
66 |         if ( currentNode.location.squaredDistance(search) < best.location.squaredDistance(search) ) {
67 |             best = currentNode; // Set best as required
68 |         } 
69 |         if ( other != null ) {
70 |             if ( currentNode.location.axisSquaredDistance(search, depth % 3) < best.location.squaredDistance(search) ) {
71 |                 KDNode<T> possibleBest = findNearest( other, search, depth + 1 );
72 |                 if (  possibleBest.location.squaredDistance(search) < best.location.squaredDistance(search) ) {
73 |                     best = possibleBest;
74 |                 }
75 |             }
76 |         }
77 |         return best; // Work back up
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/geoinfo.csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/FinalProject/src/jvm/geoinfo.csv


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/CountBolt.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm;
 2 | 
 3 | import backtype.storm.Config;
 4 | import backtype.storm.LocalCluster;
 5 | import backtype.storm.StormSubmitter;
 6 | import backtype.storm.spout.SpoutOutputCollector;
 7 | import backtype.storm.task.OutputCollector;
 8 | import backtype.storm.task.TopologyContext;
 9 | import backtype.storm.testing.TestWordSpout;
10 | import backtype.storm.topology.OutputFieldsDeclarer;
11 | import backtype.storm.topology.TopologyBuilder;
12 | import backtype.storm.topology.base.BaseRichSpout;
13 | import backtype.storm.topology.base.BaseRichBolt;
14 | import backtype.storm.tuple.Fields;
15 | import backtype.storm.tuple.Tuple;
16 | import backtype.storm.tuple.Values;
17 | import backtype.storm.utils.Utils;
18 | 
19 | import java.util.HashMap;
20 | import java.util.Map;
21 | 
22 | /**
23 |  * A bolt that counts the words that it receives
24 |  */
25 | public class CountBolt extends BaseRichBolt
26 | {
27 |   // To output tuples from this bolt to the next stage bolts, if any
28 |   private OutputCollector collector;
29 | 
30 |   // Map to store the count of the words
31 |   private Map<String, Integer> countMap;
32 | 
33 |   @Override
34 |   public void prepare(
35 |       Map                     map,
36 |       TopologyContext         topologyContext,
37 |       OutputCollector         outputCollector)
38 |   {
39 | 
40 |     // save the collector for emitting tuples
41 |     collector = outputCollector;
42 | 
43 |     // create and initialize the map
44 |     countMap = new HashMap<String, Integer>();
45 |   }
46 | 
47 |   @Override
48 |   public void execute(Tuple tuple)
49 |   {
50 |     // get the word from the 1st column of incoming tuple
51 |     String word = tuple.getString(0);
52 | 
53 |     // check if the word is present in the map
54 |     if (countMap.get(word) == null) {
55 | 
56 |       // not present, add the word with a count of 1
57 |       countMap.put(word, 1);
58 |     } else {
59 | 
60 |       // already there, hence get the count
61 |       Integer val = countMap.get(word);
62 | 
63 |       // increment the count and save it to the map
64 |       countMap.put(word, ++val);
65 |     }
66 | 
67 |     // emit the word and count
68 |     collector.emit(new Values(word, countMap.get(word)));
69 |   }
70 | 
71 |   @Override
72 |   public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer)
73 |   {
74 |     // tell storm the schema of the output tuple for this spout
75 |     // tuple consists of a two columns called 'word' and 'count'
76 | 
77 |     // declare the first column 'word', second column 'count'
78 |     outputFieldsDeclarer.declare(new Fields("word","count"));
79 |   }
80 | }
81 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/TopNTweetTopology.java:
--------------------------------------------------------------------------------
  1 | package udacity.storm;
  2 | 
  3 | import backtype.storm.Config;
  4 | import backtype.storm.LocalCluster;
  5 | import backtype.storm.StormSubmitter;
  6 | import backtype.storm.topology.TopologyBuilder;
  7 | import backtype.storm.tuple.Fields;
  8 | import backtype.storm.utils.Utils;
  9 | 
 10 | class TopNTweetTopology
 11 | {
 12 |   public static void main(String[] args) throws Exception
 13 |   {
 14 |     //Variable TOP_N number of words
 15 |     int TOP_N = 5;
 16 |     // create the topology
 17 |     TopologyBuilder builder = new TopologyBuilder();
 18 | 
 19 |     /*
 20 |      * In order to create the spout, you need to get twitter credentials
 21 |      * If you need to use Twitter firehose/Tweet stream for your idea,
 22 |      * create a set of credentials by following the instructions at
 23 |      *
 24 |      * https://dev.twitter.com/discussions/631
 25 |      *
 26 |      */
 27 |     // now create the tweet spout with the credentials
 28 |     // credential
 29 |     TweetSpout tweetSpout = new TweetSpout(
 30 |             "",
 31 |             "",
 32 |             "",
 33 |             ""
 34 |     );
 35 | 
 36 |     // attach the tweet spout to the topology - parallelism of 1
 37 |     builder.setSpout("tweet-spout", tweetSpout, 1);
 38 | 
 39 |     // attach the parse tweet bolt using shuffle grouping
 40 |     builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout");
 41 |     builder.setBolt("infoBolt", new InfoBolt(), 10).fieldsGrouping("parse-tweet-bolt", new Fields("county_id"));
 42 |     builder.setBolt("top-words", new TopWords(), 10).fieldsGrouping("infoBolt", new Fields("county_id"));
 43 |     builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("top-words");
 44 | 
 45 |     // attach rolling count bolt using fields grouping - parallelism of 5
 46 |     //builder.setBolt("rolling-count-bolt", new RollingCountBolt(1000, 10), 1).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word"));
 47 | 
 48 |     //from incubator-storm/.../storm/starter/RollingTopWords.java
 49 |     //builder.setBolt("intermediate-ranker", new IntermediateRankingsBolt(TOP_N, 10), 2).fieldsGrouping("rolling-count-bolt", new Fields("obj"));
 50 |     //builder.setBolt("total-ranker", new TotalRankingsBolt(TOP_N, 2)).globalGrouping("intermediate-ranker");
 51 | 
 52 |     /*
 53 |      * total-ranker bolt output is broadcast (allGrouping) to all the top-tweets bolt instances so
 54 |      * that every one of them have access to the top hashtags
 55 |      * tweet-spout tweet stream will be distributed randomly to the top-tweets bolt instances
 56 |      */
 57 |     //builder.setBolt("top-tweets", new TweetsWithTopHashtagsBolt(), 4)
 58 |     //    .allGrouping("total-ranker")
 59 |     //    .shuffleGrouping("tweet-spout");
 60 | 
 61 |     // attach the report bolt using global grouping - parallelism of 1
 62 |     //builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("top-tweets");
 63 | 
 64 |     // create the default config object
 65 |     Config conf = new Config();
 66 | 
 67 |     // set the config in debugging mode
 68 |     conf.setDebug(true);
 69 | 
 70 |     if (args != null && args.length > 0) {
 71 | 
 72 |       // run it in a live cluster
 73 | 
 74 |       // set the number of workers for running all spout and bolt tasks
 75 |       conf.setNumWorkers(3);
 76 | 
 77 |       // create the topology and submit with config
 78 |       StormSubmitter.submitTopology(args[0], conf, builder.createTopology());
 79 | 
 80 |     } else {
 81 | 
 82 |       // run it in a simulated local cluster
 83 | 
 84 |       // set the number of threads to run - similar to setting number of workers in live cluster
 85 |       conf.setMaxTaskParallelism(4);
 86 | 
 87 |       // create the local cluster instance
 88 |       LocalCluster cluster = new LocalCluster();
 89 | 
 90 |       // submit the topology to the local cluster
 91 |       cluster.submitTopology("tweet-word-count", conf, builder.createTopology());
 92 | 
 93 |       // let the topology run for 300 seconds. note topologies never terminate!
 94 |       Utils.sleep(300000000);
 95 | 
 96 |       // now kill the topology
 97 |       cluster.killTopology("tweet-word-count");
 98 | 
 99 |       // we are done, so shutdown the local cluster
100 |       cluster.shutdown();
101 |     }
102 |   }
103 | }
104 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/spout/RandomSentenceSpout.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.spout;
 2 | 
 3 | import backtype.storm.spout.SpoutOutputCollector;
 4 | import backtype.storm.task.TopologyContext;
 5 | import backtype.storm.topology.OutputFieldsDeclarer;
 6 | import backtype.storm.topology.base.BaseRichSpout;
 7 | import backtype.storm.tuple.Fields;
 8 | import backtype.storm.tuple.Values;
 9 | import backtype.storm.utils.Utils;
10 | 
11 | import java.util.Map;
12 | import java.util.Random;
13 | 
14 | public class RandomSentenceSpout extends BaseRichSpout {
15 |   SpoutOutputCollector _collector;
16 |   Random _rand;
17 | 
18 | 
19 |   @Override
20 |   public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) {
21 |     _collector = collector;
22 |     _rand = new Random();
23 |   }
24 | 
25 |   @Override
26 |   public void nextTuple() {
27 |     Utils.sleep(100);
28 |     String[] sentences = new String[]{
29 |       "the cow jumped over the moon",
30 |       "an apple a day keeps the doctor away",
31 |       "four score and seven years ago",
32 |       "snow white and the seven dwarfs",
33 |       "i am at two with nature"
34 |       };
35 |     String sentence = sentences[_rand.nextInt(sentences.length)];
36 |     _collector.emit(new Values(sentence));
37 |   }
38 | 
39 |   @Override
40 |   public void declareOutputFields(OutputFieldsDeclarer declarer) {
41 |     declarer.declare(new Fields("sentence"));
42 |   }
43 | 
44 | }
45 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/tools/NthLastModifiedTimeTracker.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.tools;
 2 | 
 3 | import backtype.storm.utils.Time;
 4 | import org.apache.commons.collections.buffer.CircularFifoBuffer;
 5 | 
 6 | /**
 7 |  * This class tracks the time-since-last-modify of a "thing" in a rolling fashion.
 8 |  * <p/>
 9 |  * For example, create a 5-slot tracker to track the five most recent time-since-last-modify.
10 |  * <p/>
11 |  * You must manually "mark" that the "something" that you want to track -- in terms of modification times -- has just
12 |  * been modified.
13 |  */
14 | public class NthLastModifiedTimeTracker {
15 | 
16 |   private static final int MILLIS_IN_SEC = 1000;
17 | 
18 |   private final CircularFifoBuffer lastModifiedTimesMillis;
19 | 
20 |   public NthLastModifiedTimeTracker(int numTimesToTrack) {
21 |     if (numTimesToTrack < 1) {
22 |       throw new IllegalArgumentException(
23 |           "numTimesToTrack must be greater than zero (you requested " + numTimesToTrack + ")");
24 |     }
25 |     lastModifiedTimesMillis = new CircularFifoBuffer(numTimesToTrack);
26 |     initLastModifiedTimesMillis();
27 |   }
28 | 
29 |   private void initLastModifiedTimesMillis() {
30 |     long nowCached = now();
31 |     for (int i = 0; i < lastModifiedTimesMillis.maxSize(); i++) {
32 |       lastModifiedTimesMillis.add(Long.valueOf(nowCached));
33 |     }
34 |   }
35 | 
36 |   private long now() {
37 |     return Time.currentTimeMillis();
38 |   }
39 | 
40 |   public int secondsSinceOldestModification() {
41 |     long modifiedTimeMillis = ((Long) lastModifiedTimesMillis.get()).longValue();
42 |     return (int) ((now() - modifiedTimeMillis) / MILLIS_IN_SEC);
43 |   }
44 | 
45 |   public void markAsModified() {
46 |     updateLastModifiedTime();
47 |   }
48 | 
49 |   private void updateLastModifiedTime() {
50 |     lastModifiedTimesMillis.add(now());
51 |   }
52 | 
53 | }
54 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/tools/Rankable.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Licensed to the Apache Software Foundation (ASF) under one
 3 |  * or more contributor license agreements.  See the NOTICE file
 4 |  * distributed with this work for additional information
 5 |  * regarding copyright ownership.  The ASF licenses this file
 6 |  * to you under the Apache License, Version 2.0 (the
 7 |  * "License"); you may not use this file except in compliance
 8 |  * with the License.  You may obtain a copy of the License at
 9 |  *
10 |  * http://www.apache.org/licenses/LICENSE-2.0
11 |  *
12 |  * Unless required by applicable law or agreed to in writing, software
13 |  * distributed under the License is distributed on an "AS IS" BASIS,
14 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 |  * See the License for the specific language governing permissions and
16 |  * limitations under the License.
17 |  */
18 | package udacity.storm.tools;
19 | 
20 | public interface Rankable extends Comparable<Rankable> {
21 | 
22 |   Object getObject();
23 | 
24 |   long getCount();
25 | 
26 |   /**
27 |    * Note: We do not defensively copy the object wrapped by the Rankable.  It is passed as is.
28 |    *
29 |    * @return a defensive copy
30 |    */
31 |   Rankable copy();
32 | }
33 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/tools/SentimentAnalyzer.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.tools;
 2 | 
 3 | import java.util.Properties;
 4 | 
 5 | import edu.stanford.nlp.ling.CoreAnnotations;
 6 | import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations;
 7 | import edu.stanford.nlp.pipeline.Annotation;
 8 | import edu.stanford.nlp.pipeline.StanfordCoreNLP;
 9 | import edu.stanford.nlp.sentiment.SentimentCoreAnnotations;
10 | import edu.stanford.nlp.trees.Tree;
11 | import edu.stanford.nlp.util.CoreMap;
12 | 
13 | public class SentimentAnalyzer {
14 |     static StanfordCoreNLP pipeline;
15 |  
16 |     public static void init() {
17 |     	Properties props = new Properties();
18 |         props.setProperty("annotators", "tokenize,ssplit,parse,sentiment");
19 |         pipeline = new StanfordCoreNLP(props);
20 |     }
21 |  
22 |     public static int findSentiment(String tweet) {
23 |  
24 |         int mainSentiment = 0;
25 |         if (tweet != null && tweet.length() > 0) {
26 |             int longest = 0;
27 |             Annotation annotation = pipeline.process(tweet);
28 |             for (CoreMap sentence : annotation
29 |                     .get(CoreAnnotations.SentencesAnnotation.class)) {
30 |                 Tree tree = sentence
31 |                         .get(SentimentCoreAnnotations.AnnotatedTree.class);
32 |                 int sentiment = RNNCoreAnnotations.getPredictedClass(tree);
33 |                 String partText = sentence.toString();
34 |                 if (partText.length() > longest) {
35 |                     mainSentiment = sentiment;
36 |                     longest = partText.length();
37 |                 }
38 |  
39 |             }
40 |         }
41 |         return mainSentiment;
42 |     }
43 | }


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/tools/SlotBasedCounter.java:
--------------------------------------------------------------------------------
  1 | //package storm.starter.tools;
  2 | package udacity.storm.tools;
  3 | 
  4 | import java.io.Serializable;
  5 | import java.util.HashMap;
  6 | import java.util.HashSet;
  7 | import java.util.Map;
  8 | import java.util.Set;
  9 | 
 10 | /**
 11 |  * This class provides per-slot counts of the occurrences of objects.
 12 |  * <p/>
 13 |  * It can be used, for instance, as a building block for implementing sliding window counting of objects.
 14 |  *
 15 |  * @param <T> The type of those objects we want to count.
 16 |  */
 17 | public final class SlotBasedCounter<T> implements Serializable {
 18 | 
 19 |   private static final long serialVersionUID = 4858185737378394432L;
 20 | 
 21 |   private final Map<T, long[]> objToCounts = new HashMap<T, long[]>();
 22 |   private final int numSlots;
 23 | 
 24 |   public SlotBasedCounter(int numSlots) {
 25 |     if (numSlots <= 0) {
 26 |       throw new IllegalArgumentException("Number of slots must be greater than zero (you requested " + numSlots + ")");
 27 |     }
 28 |     this.numSlots = numSlots;
 29 |   }
 30 | 
 31 |   public void incrementCount(T obj, int slot) {
 32 |     long[] counts = objToCounts.get(obj);
 33 |     if (counts == null) {
 34 |       counts = new long[this.numSlots];
 35 |       objToCounts.put(obj, counts);
 36 |     }
 37 |     counts[slot]++;
 38 |   }
 39 | 
 40 |   public long getCount(T obj, int slot) {
 41 |     long[] counts = objToCounts.get(obj);
 42 |     if (counts == null) {
 43 |       return 0;
 44 |     }
 45 |     else {
 46 |       return counts[slot];
 47 |     }
 48 |   }
 49 | 
 50 |   public Map<T, Long> getCounts() {
 51 |     Map<T, Long> result = new HashMap<T, Long>();
 52 |     for (T obj : objToCounts.keySet()) {
 53 |       result.put(obj, computeTotalCount(obj));
 54 |     }
 55 |     return result;
 56 |   }
 57 | 
 58 |   private long computeTotalCount(T obj) {
 59 |     long[] curr = objToCounts.get(obj);
 60 |     long total = 0;
 61 |     for (long l : curr) {
 62 |       total += l;
 63 |     }
 64 |     return total;
 65 |   }
 66 | 
 67 |   /**
 68 |    * Reset the slot count of any tracked objects to zero for the given slot.
 69 |    *
 70 |    * @param slot
 71 |    */
 72 |   public void wipeSlot(int slot) {
 73 |     for (T obj : objToCounts.keySet()) {
 74 |       resetSlotCountToZero(obj, slot);
 75 |     }
 76 |   }
 77 | 
 78 |   private void resetSlotCountToZero(T obj, int slot) {
 79 |     long[] counts = objToCounts.get(obj);
 80 |     counts[slot] = 0;
 81 |   }
 82 | 
 83 |   private boolean shouldBeRemovedFromCounter(T obj) {
 84 |     return computeTotalCount(obj) == 0;
 85 |   }
 86 | 
 87 |   /**
 88 |    * Remove any object from the counter whose total count is zero (to free up memory).
 89 |    */
 90 |   public void wipeZeros() {
 91 |     Set<T> objToBeRemoved = new HashSet<T>();
 92 |     for (T obj : objToCounts.keySet()) {
 93 |       if (shouldBeRemovedFromCounter(obj)) {
 94 |         objToBeRemoved.add(obj);
 95 |       }
 96 |     }
 97 |     for (T obj : objToBeRemoved) {
 98 |       objToCounts.remove(obj);
 99 |     }
100 |   }
101 | 
102 | }
103 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/tools/TupleHelpers.java:
--------------------------------------------------------------------------------
 1 | //package storm.starter.util;
 2 | package udacity.storm.tools;
 3 | 
 4 | import backtype.storm.Constants;
 5 | import backtype.storm.tuple.Tuple;
 6 | 
 7 | public final class TupleHelpers {
 8 | 
 9 |   private TupleHelpers() {
10 |   }
11 | 
12 |   public static boolean isTickTuple(Tuple tuple) {
13 |     return tuple.getSourceComponent().equals(Constants.SYSTEM_COMPONENT_ID) && tuple.getSourceStreamId().equals(
14 |         Constants.SYSTEM_TICK_STREAM_ID);
15 |   }
16 | 
17 | }
18 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/tools/ValueComparator.java:
--------------------------------------------------------------------------------
 1 | package udacity.storm.tools;
 2 | 
 3 | import java.util.Comparator;
 4 | import java.util.Map;
 5 | 
 6 | public class ValueComparator implements Comparator<String> {
 7 | 
 8 | 	  private static final long serialVersionUID = -1549827195410578903L;
 9 |     Map<String, Integer> base;
10 |     public ValueComparator(Map<String, Integer> base) {
11 |         this.base = base;
12 |     }
13 | 
14 |     // Note: this comparator imposes orderings that are inconsistent with equals.    
15 |     public int compare(String a, String b) {
16 |         if (base.get(a) >= base.get(b)) {
17 |             return -1;
18 |         } else {
19 |             return 1;
20 |         } // returning 0 would merge keys
21 |     }
22 | }


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/README.md:
--------------------------------------------------------------------------------
1 | ud381
2 | =====
3 | 
4 | Real-Time Analytics with Storm
5 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/README.md:
--------------------------------------------------------------------------------
1 | Real-Time-Analytics
2 | ===================
3 | 
4 | A demo showing Flask, Redis pubsub, and HTML5 server side events.  Forked from Cheng-Han.
5 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/app.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, render_template, Response
 2 | 
 3 | import redis
 4 | 
 5 | app = Flask(__name__)
 6 | r = redis.StrictRedis(host='127.0.0.1', port=6379, db=0)
 7 | 
 8 | 
 9 | def event_stream():
10 |     pubsub = r.pubsub()
11 |     pubsub.subscribe('WordCountTopology')
12 |     for message in pubsub.listen():
13 |         print message
14 |         yield 'data: %s\n\n' % message['data']
15 | 
16 | 
17 | @app.route('/')
18 | def show_homepage():
19 |   #Word Cloud = cloud.html and app-cloud.js
20 |     return render_template("cloud.html")
21 | 
22 | @app.route('/basic')
23 | def show_basic():
24 |   #Basic d3 view = basic.html and app.js
25 |     return render_template("basic.html")
26 | 
27 | @app.route('/map')
28 | def show_map():
29 |   #Basic d3 view = basic.html and app.js
30 |     return render_template("map.html")
31 | 
32 | @app.route('/stream')
33 | def stream():
34 |     return Response(event_stream(), mimetype="text/event-stream")
35 | 
36 | 
37 | if __name__ == '__main__':
38 |     app.run(threaded=True,
39 |     host='0.0.0.0'
40 | )
41 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/d3/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2010-2014, Michael Bostock
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * The name Michael Bostock may not be used to endorse or promote products
15 |   derived from this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT,
21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
26 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/d3/d3.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/viz/d3/d3.zip


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/dump.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/viz/dump.rdb


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/rt-provision-32.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Real-Time Provisioning...."
 4 | 
 5 | echo "Java JDK..."
 6 | sudo apt-get install default-jdk -y
 7 | 
 8 | echo "Storm..."
 9 | #sudo wget http://apache.spinellicreations.com/incubator/storm/apache-storm-0.9.1-incubating/apache-storm-0.9.1-incubating.zip
10 | #sudo unzip -o /media/sf_VirtualBoxUbuntuShared/apache-storm-0.9.1-incubating.zip
11 | sudo wget http://www.trieuvan.com/apache/incubator/storm/apache-storm-0.9.2-incubating/apache-storm-0.9.2-incubating.zip
12 | sudo unzip -o $(pwd)/apache-storm-0.9.2-incubating.zip
13 | # use storm.0.9.2 for now...confirming with Twitter
14 | sudo ln -s $(pwd)/apache-storm-0.9.2-incubating/ /usr/share/storm
15 | sudo ln -s /usr/share/storm/bin/storm /usr/bin/storm
16 | 
17 | echo "Lein..."
18 | sudo wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein
19 | sudo mv lein /usr/bin
20 | sudo chmod 755 /usr/bin/lein
21 | lein
22 | 
23 | echo "Kafka..."
24 | #sudo wget http://www.motorlogy.com/apache/kafka/0.8.1.1/kafka_2.9.2-0.8.1.1.tgz
25 | #sudo tar -xvzf kafka_2.9.2-0.8.1.1.tgz
26 | 
27 | echo "Sublime..."
28 | #sudo wget http://c758482.r82.cf2.rackcdn.com/sublime_text_3_build_3047_x64.tar.bz2
29 | #sudo tar vxjf sublime_text_3_build_3047_x64.tar.bz2
30 | #sudo mv sublime_text_3 /opt/
31 | #sudo ln -s /opt/sublime_text_3/sublime_text /usr/bin/sublime
32 | 
33 | sudo wget http://c758482.r82.cf2.rackcdn.com/sublime_text_3_build_3047.tar.bz2
34 | sudo tar vxjf Sublime\ Text\ 2.tar.bz2
35 | sudo mv Sublime\ Text\ 2 /opt/
36 | sudo ln -s /opt/Sublime\ Text\ 2/sublime_text /usr/bin/sublime
37 | 
38 | echo "Maven run..."
39 | #cd /vagrant/storm-hack
40 | #mvn -f m4-pom.xml clean
41 | #mvn -f m4-pom.xml compile
42 | #mvn -f m4-pom.xml package
43 | #mvn -f m4-pom.xml clean
44 | #cd /vagrant
45 | 
46 | echo "IntelliJ..."
47 | sudo wget http://download-cf.jetbrains.com/idea/ideaIC-13.1.3.tar.gz
48 | sudo tar -xvzf ideaIC-13.1.3.tar.gz
49 | #sudo ln -s idea-IC-135.909/bin/idea.sh /usr/bin/idea
50 | 
51 | echo "Git..."
52 | sudo apt-get install git-core -y
53 | 
54 | echo "Redis (Python)..."
55 | sudo pip install redis
56 | 
57 | echo "MongoDB...removed"
58 | #sudo apt-key adv --keyserver keyserver.ubuntu.com --recv 7F0CEB10
59 | #sudo echo "deb http://downloads-distro.mongodb.org/repo/ubuntu-upstart dist 10gen" | sudo tee -a /etc/apt/sources.list.d/10gen.list
60 | #sudo apt-get -y update
61 | #sudo apt-get -y install mongodb-10gen
62 | 
63 | echo "Nodejs...(puppet attempt failed uy_nodejs-32...removed"
64 | #sudo wget http://nodejs.org/dist/v0.10.29/node-v0.10.29-linux-x86.tar.gz
65 | #sudo tar -xvzf node-v0.10.29-linux-x86
66 | #sudo /vagrant/node-v0.10.29-linux-x86
67 | #linking doesn't work....
68 | #sudo apt-add-repository ppa:chris-lea/node.js -y
69 | #sudo apt-get update -y
70 | #sudo apt-get install nodejs -y
71 | 
72 | echo "Adding from VagrantFile...."
73 | sudo ufw disable
74 | 
75 | sudo apt-get update -y
76 | 
77 | sudo apt-get install maven -y
78 | 
79 | sudo apt-get install vim -y
80 | 
81 | sudo apt-get --yes install zookeeper zookeeperd -y
82 | 
83 | sudo apt-get install redis-server -y
84 | 
85 | sudo apt-get install python-software-properties -y
86 | 
87 | sudo apt-get install python-pip -y
88 | 
89 | sudo pip install flask
90 | 
91 | sudo pip install redis
92 | 
93 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/static/Twitter_logo_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/viz/static/Twitter_logo_white.png


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/static/Udacity-logoRobot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/viz/static/Udacity-logoRobot.png


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/static/app-cloud.js:
--------------------------------------------------------------------------------
 1 | var source = new EventSource('/stream');
 2 | var hash = {};
 3 | var width = 1200;
 4 | var height = 700;
 5 | 
 6 | //update hash (associative array) with incoming word and count
 7 | source.onmessage = function (event) {
 8 |   word = event.data.split(":")[0];
 9 |   count = event.data.split(":")[1];
10 |   if(!skip(word)){
11 |     hash[word]=count;
12 |   }
13 | };
14 | 
15 | //update function for visualization
16 | var updateViz =  function(){
17 |   //print console message
18 |   console.log("cloudArray-1" + JSON.stringify(d3.entries(hash)));
19 | 
20 |   var frequency_list = d3.entries(hash);
21 | 
22 |   d3.layout.cloud().size([800, 300])
23 |   .words(frequency_list)
24 |   .rotate(0)
25 |   .fontSize(function(d) { return d.value; })
26 |   .on("end", draw)
27 |   .start();
28 | };
29 | 
30 | // run updateViz at #7000 milliseconds, or 7 second
31 | window.setInterval(updateViz, 7000);
32 | 
33 | //clean list, can be added to word skipping bolt
34 | var skipList = ["https","follow","1","2","please","following","followers","fucking","RT","the","at","a"];
35 | 
36 | var skip = function(tWord){
37 |   for(var i=0; i<skipList.length; i++){
38 |     if(tWord === skipList[i]){
39 |       return true;
40 |     }
41 |   }
42 |   return false;
43 | };
44 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/static/app-map.js:
--------------------------------------------------------------------------------
 1 | var source = new EventSource('/stream');
 2 | var hash = {};
 3 | var width = 1200;
 4 | var height = 700;
 5 | 
 6 | //update hash (associative array) with incoming word and count
 7 | source.onmessage = function (event) {
 8 |   county_id = event.data.split(":")[0];
 9 |   sentiment = event.data.split(":")[1]; 
10 |   URLInfo = event.data.split(":")[2];
11 |   hash[county_id]=sentiment;
12 |   
13 | 
14 | };
15 | 
16 | var rateById = d3.map();
17 | var us = d3.json("/static/us.json", function(error, us) {
18 |   if (error) return console.error(error);
19 | });
20 | 
21 | d3.tsv("/static/unemployment.tsv", function(error, d) {
22 |   rateById.set(d.id, +d.rate);
23 | });
24 | 
25 | 
26 | //update function for visualization
27 | var updateViz =  function(){
28 |   //print console message
29 |   console.log("Map-1" + JSON.stringify(d3.entries(hash)));
30 | 
31 |   var sentimentCountyData = d3.entries(hash);
32 |   
33 |   var quantize = d3.scale.quantize()
34 |         .domain([0, .15])
35 |         .range(d3.range(9).map(function(i) { return "q" + i + "-9"; }));
36 | 
37 |   var projection = d3.geo.albersUsa()
38 |         .scale(1280)
39 |         .translate([width / 2, height / 2]);
40 | 
41 |   var path = d3.geo.path()
42 |         .projection(projection);
43 | 
44 |   var svg = d3.select("body").append("svg")
45 |         .attr("width", width)
46 |         .attr("height", height);
47 | 
48 | 
49 |   svg.append("g")
50 |       .attr("class", "counties")
51 |     .selectAll("path")
52 |       .data(topojson.feature(us, us.objects.counties).features)
53 |     .enter().append("path")
54 |       .attr("class", function(d) { return quantize(rateById.get(d.id)); })
55 |       .attr("d", path);
56 | 
57 |   svg.append("path")
58 |       .datum(topojson.mesh(us, us.objects.states, function(a, b) { return a !== b; }))
59 |       .attr("class", "states")
60 |       .attr("d", path);
61 | 
62 | };
63 | 
64 | // run updateViz at #7000 milliseconds, or 7 second
65 | window.setInterval(updateViz, 7000);
66 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/static/app.js:
--------------------------------------------------------------------------------
 1 | var source = new EventSource('/stream');
 2 | var hash = {};
 3 | var width = 1200;
 4 | var height = 700;
 5 | 
 6 | source.onmessage = function (event) {
 7 |   word = event.data.split("|")[0];
 8 |   count = event.data.split("|")[1];
 9 |   if(!skip(word)){
10 |     hash[word]=count;
11 |   }
12 | };
13 | 
14 | var updateViz = function () {
15 |     var text = svgContainer.selectAll("text")
16 |     .data(d3.entries(hash), function(d){ return d.key; })
17 | 
18 | 
19 |     text.enter()
20 |     .append("text")
21 |     .attr("font-family", "sans-serif")
22 | 
23 |     text.text(function(d,i){ return d.key; })
24 |       .transition(1000)
25 |       .delay(500*Math.random())
26 |       .attr("x",function(d,i){ return (.5*d.value)+i*5; })
27 |       .attr("y",function(d,i){ return (1.5*d.value)+i*15; })
28 |       .attr("font-size", function(d,i){ return d.value+"px"; })
29 |       .attr("fill", function(d, i) { return colors(d.value + i*10); })
30 |       //comment following lines and uncomment previous for colors function
31 |       //.attr("fill",function(d,i){return "rgb("+
32 |       //Math.round(255/(1+Math.exp(-.001*d.value)))+","+
33 |       //Math.round(255-255/(1+Math.exp(-.01*d.value)))+","+
34 |       //Math.round(130-255/(1+Math.exp(-.01*d.value)))+")";});
35 | 
36 |     console.log("Array-2" + JSON.stringify(d3.entries(hash)));
37 | };
38 | 
39 | //update display every #1000 milliseconds
40 | window.setInterval(updateViz, 1000);
41 | 
42 | //clean list, can be added to word skipping bolt
43 | var skipList = ["https","follow","1","2","please","following","followers","fucking","RT","the","at","a"];
44 | 
45 | var skip = function(tWord){
46 |   for(var i=0; i<skipList.length; i++){
47 |     if(tWord === skipList[i]){
48 |       return true;
49 |     }
50 |   }
51 |   return false;
52 | };
53 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/templates/basic.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <title>Page Title</title>
 6 |   <!-- Latest compiled and minified CSS -->
 7 |   <link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css">
 8 |   <!-- Optional theme -->
 9 |   <link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap-theme.min.css">
10 |   <!-- APP js -->
11 |   <script src="//ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
12 |   <!-- add d3 from web -->
13 |   <script src="http://d3js.org/d3.v3.min.js" charset="utf-8"></script>
14 |   <script src="/static/app.js"></script>
15 | </head>
16 | 
17 | <style>
18 | body {
19 |   margin: 0;
20 |   background: #222;
21 |   min-width: 960px;
22 | }
23 | 
24 | rect {
25 |   fill: none;
26 |   pointer-events: all;
27 | }
28 | 
29 | circle {
30 |   fill: none;
31 |   stroke-width: 2.5px;
32 | }
33 | </style>
34 | 
35 | <body>
36 |   <h1 style="color:#55ACEE">Udacity and Twitter bring you Real-Time Analytics with Storm</h1>
37 |     <p style="color:#55ACEE">
38 |       &nbsp<img src="/static/Udacity-logoRobot.png" alt="Smiley face" height="60" width="60">
39 |       &nbsp<img src="/static/Twitter_logo_white.png" alt="Twitter logo" height="60" width="60">
40 |       &nbsp&nbspBasic d3!!!</p>
41 |   <div id="chart"></div>
42 | </body>
43 | <script>
44 | var colors = d3.scale.category10();
45 | var svgContainer = d3.select("body").append("svg")
46 | .attr("width", width)
47 | .attr("height", height);
48 | </script>
49 | </html>
50 | 


--------------------------------------------------------------------------------
/lesson4/TeamAwesome/viz/templates/map.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <meta charset="utf-8">
  3 | <style>
  4 | 
  5 | .counties {
  6 |   fill: none;
  7 | }
  8 | 
  9 | .states {
 10 |   fill: none;
 11 |   stroke: #fff;
 12 |   stroke-linejoin: round;
 13 | }
 14 | 
 15 | .q0-9 { fill:rgb(247,251,255); }
 16 | .q1-9 { fill:rgb(222,235,247); }
 17 | .q2-9 { fill:rgb(198,219,239); }
 18 | .q3-9 { fill:rgb(158,202,225); }
 19 | .q4-9 { fill:rgb(107,174,214); }
 20 | .q5-9 { fill:rgb(66,146,198); }
 21 | .q6-9 { fill:rgb(33,113,181); }
 22 | .q7-9 { fill:rgb(8,81,156); }
 23 | .q8-9 { fill:rgb(8,48,107); }
 24 | 
 25 | </style>
 26 | <body>
 27 | <script src="http://d3js.org/d3.v3.min.js"></script>
 28 | <script src="http://d3js.org/topojson.v1.min.js"></script>
 29 | <script src="http://datamaps.github.io/scripts/datamaps.usa.js?v=1"></script>
 30 | <script src="\static\countyLookup.js"></script>
 31 | 
 32 | <div id="container" style="position: relative; width: 1600px; height: 700px;"></div>
 33 | <script>
 34 | 
 35 | var colors = 
 36 |     [ "#0000FF",
 37 |       "#0066FF",
 38 |       "#3399FF",
 39 |       "#66CCFF",
 40 |       "#CCFFFF",
 41 |       "#CCFFCC",
 42 |       "#FFFFCC",
 43 |       "#FFFF99",
 44 |       "#FFFF66",
 45 |       "#FFFF00",
 46 |       "#FFFF00"];
 47 |       var source = new EventSource('/stream');
 48 |       var hashSentence = {};
 49 |       var hashSentiment = {};
 50 | 
 51 |       source.onmessage = function (event) {
 52 |         var county_id = event.data.split("DELIMITER")[0];
 53 |         var sentence = event.data.split("DELIMITER")[1];
 54 |         var sentiment = event.data.split("DELIMITER")[2]; 
 55 |         //var URLInfo = event.data.split("DELIMITER")[3]; 
 56 | 
 57 |         console.log("NEW DATA IS HERE " + event.data);
 58 | 
 59 |         hashSentence[county_id]=sentence;
 60 |         hashSentiment[county_id]=sentiment;
 61 | //hashURLInfo[county_id]=URLInfo;
 62 | 
 63 |       };
 64 | 
 65 |      var map = new Datamap({
 66 |         element: document.getElementById('container'),
 67 | 
 68 |         scope: 'counties',
 69 |         setProjection: function(element, options) {
 70 |             var projection, path;
 71 |             projection = d3.geo.albersUsa()
 72 |                 .scale(element.offsetWidth)
 73 |                 .translate([element.offsetWidth / 2, element.offsetHeight / 2]);
 74 | 
 75 |             path = d3.geo.path()
 76 |                 .projection( projection );
 77 | 
 78 |             return {path: path, projection: projection};
 79 |         },
 80 |         fills: {
 81 |             defaultFill: 'green'
 82 |         },
 83 | 
 84 |         data: {
 85 |         },
 86 | 
 87 |         geographyConfig: {
 88 |             dataUrl: '/static/us.json',
 89 |             popupTemplate: function(geo, data) {
 90 | 
 91 |               var lineOfTweets = "<p>" + hashSentence[geo.id] + "</p>";
 92 |               
 93 |               console.log(lineOfTweets)
 94 | 
 95 |                 return ['<div class="hoverinfo"><strong>',
 96 |                         'Top tweets in ', countyLookup[geo.id],  lineOfTweets, 
 97 |                         '</strong></div>'].join('');
 98 |             }
 99 |         }
100 |     });
101 | 
102 | 
103 | 
104 | 
105 | 
106 | 
107 | var updateViz =  function(){
108 | 
109 |     for(key in hashSentiment)
110 |     {
111 | 
112 |         console.log("REFRESH: " + key + ":" + hashSentiment[key]);
113 |         var data = {}; 
114 | 
115 |         if(hashSentiment[key])
116 |         {
117 |           data[key] = colors[Math.round(hashSentiment[key]*10)];
118 |           map.updateChoropleth(data);
119 |         
120 |         }
121 |     }
122 | 
123 |     hashSentiment = {};
124 | }
125 | 
126 | window.setInterval(updateViz, 1000);
127 | 
128 | </script>
129 | 
130 | 
131 | </body>


--------------------------------------------------------------------------------
/provision.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash -i
 2 | 
 3 | # The following are documented (and stolen from) here:
 4 | #   http://redsymbol.net/articles/unofficial-bash-strict-mode/
 5 | #
 6 | # In case that link dies, here's the simple version:
 7 | # 1) -e means if there's an error, stop execution.
 8 | # 2) -u means if we reference an undefined variable, blow up.
 9 | # 3) -o pipefail means that if a step in a pipe fails, the whole pipe fails, which in combination with 1) means
10 | #    that the script as a whole fails.
11 | 
12 | set -euo pipefail
13 | 
14 | sudo apt-get update -y
15 | 
16 | sudo apt-get -y install default-jdk maven vim zookeeper zookeeperd redis-server \
17 |     python-software-properties python-pip python tree
18 | 
19 | sudo pip install flask redis
20 | 
21 | echo "Storm..."
22 | # TODO maybe make this use the best mirror always?
23 | sudo mkdir /opt/storm
24 | cd /opt/storm
25 | sudo wget http://mirror.cogentco.com/pub/apache/incubator/storm/apache-storm-0.9.2-incubating/apache-storm-0.9.2-incubating.tar.gz
26 | sudo tar xvzf apache-storm-0.9.2-incubating.tar.gz
27 | sudo rm apache-storm-0.9.2-incubating.tar.gz
28 | sudo chmod +x /opt/storm/apache-storm-0.9.2-incubating/bin/storm
29 | sudo ln -s /opt/storm/apache-storm-0.9.2-incubating/bin/storm /usr/bin/storm
30 | 


--------------------------------------------------------------------------------
/viz/README.md:
--------------------------------------------------------------------------------
1 | Real-Time-Analytics
2 | ===================
3 | 
4 | A demo showing Flask, Redis pubsub, and HTML5 server side events.  Forked from Cheng-Han.
5 | 


--------------------------------------------------------------------------------
/viz/app.py:
--------------------------------------------------------------------------------
 1 | from flask import Flask, render_template, Response
 2 | 
 3 | import redis
 4 | 
 5 | app = Flask(__name__)
 6 | r = redis.StrictRedis(host='127.0.0.1', port=6379, db=0)
 7 | 
 8 | 
 9 | def event_stream():
10 |     pubsub = r.pubsub()
11 |     pubsub.subscribe('WordCountTopology')
12 |     for message in pubsub.listen():
13 |         print message
14 |         yield 'data: %s\n\n' % message['data']
15 | 
16 | 
17 | @app.route('/')
18 | def show_homepage():
19 |   #Word Cloud = cloud.html and app-cloud.js
20 |     return render_template("cloud.html")
21 | 
22 | @app.route('/basic')
23 | def show_basic():
24 |   #Basic d3 view = basic.html and app.js
25 |     return render_template("basic.html")
26 | 
27 | @app.route('/stream')
28 | def stream():
29 |     return Response(event_stream(), mimetype="text/event-stream")
30 | 
31 | 
32 | if __name__ == '__main__':
33 |     app.run(threaded=True,
34 |     host='0.0.0.0'
35 | )
36 | 


--------------------------------------------------------------------------------
/viz/d3/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2010-2014, Michael Bostock
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | * Redistributions of source code must retain the above copyright notice, this
 8 |   list of conditions and the following disclaimer.
 9 | 
10 | * Redistributions in binary form must reproduce the above copyright notice,
11 |   this list of conditions and the following disclaimer in the documentation
12 |   and/or other materials provided with the distribution.
13 | 
14 | * The name Michael Bostock may not be used to endorse or promote products
15 |   derived from this software without specific prior written permission.
16 | 
17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
20 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT,
21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
24 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
26 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 | 


--------------------------------------------------------------------------------
/viz/d3/d3.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/viz/d3/d3.zip


--------------------------------------------------------------------------------
/viz/dump.rdb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/viz/dump.rdb


--------------------------------------------------------------------------------
/viz/rt-provision-32.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "Real-Time Provisioning...."
 4 | 
 5 | echo "Java JDK..."
 6 | sudo apt-get install default-jdk -y
 7 | 
 8 | echo "Storm..."
 9 | #sudo wget http://apache.spinellicreations.com/incubator/storm/apache-storm-0.9.1-incubating/apache-storm-0.9.1-incubating.zip
10 | #sudo unzip -o /media/sf_VirtualBoxUbuntuShared/apache-storm-0.9.1-incubating.zip
11 | sudo wget http://www.trieuvan.com/apache/incubator/storm/apache-storm-0.9.2-incubating/apache-storm-0.9.2-incubating.zip
12 | sudo unzip -o $(pwd)/apache-storm-0.9.2-incubating.zip
13 | # use storm.0.9.2 for now...confirming with Twitter
14 | sudo ln -s $(pwd)/apache-storm-0.9.2-incubating/ /usr/share/storm
15 | sudo ln -s /usr/share/storm/bin/storm /usr/bin/storm
16 | 
17 | echo "Lein..."
18 | sudo wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein
19 | sudo mv lein /usr/bin
20 | sudo chmod 755 /usr/bin/lein
21 | lein
22 | 
23 | echo "Kafka..."
24 | #sudo wget http://www.motorlogy.com/apache/kafka/0.8.1.1/kafka_2.9.2-0.8.1.1.tgz
25 | #sudo tar -xvzf kafka_2.9.2-0.8.1.1.tgz
26 | 
27 | echo "Sublime..."
28 | #sudo wget http://c758482.r82.cf2.rackcdn.com/sublime_text_3_build_3047_x64.tar.bz2
29 | #sudo tar vxjf sublime_text_3_build_3047_x64.tar.bz2
30 | #sudo mv sublime_text_3 /opt/
31 | #sudo ln -s /opt/sublime_text_3/sublime_text /usr/bin/sublime
32 | 
33 | sudo wget http://c758482.r82.cf2.rackcdn.com/sublime_text_3_build_3047.tar.bz2
34 | sudo tar vxjf Sublime\ Text\ 2.tar.bz2
35 | sudo mv Sublime\ Text\ 2 /opt/
36 | sudo ln -s /opt/Sublime\ Text\ 2/sublime_text /usr/bin/sublime
37 | 
38 | echo "Maven run..."
39 | #cd /vagrant/storm-hack
40 | #mvn -f m4-pom.xml clean
41 | #mvn -f m4-pom.xml compile
42 | #mvn -f m4-pom.xml package
43 | #mvn -f m4-pom.xml clean
44 | #cd /vagrant
45 | 
46 | echo "IntelliJ..."
47 | sudo wget http://download-cf.jetbrains.com/idea/ideaIC-13.1.3.tar.gz
48 | sudo tar -xvzf ideaIC-13.1.3.tar.gz
49 | #sudo ln -s idea-IC-135.909/bin/idea.sh /usr/bin/idea
50 | 
51 | echo "Git..."
52 | sudo apt-get install git-core -y
53 | 
54 | echo "Redis (Python)..."
55 | sudo pip install redis
56 | 
57 | echo "MongoDB...removed"
58 | #sudo apt-key adv --keyserver keyserver.ubuntu.com --recv 7F0CEB10
59 | #sudo echo "deb http://downloads-distro.mongodb.org/repo/ubuntu-upstart dist 10gen" | sudo tee -a /etc/apt/sources.list.d/10gen.list
60 | #sudo apt-get -y update
61 | #sudo apt-get -y install mongodb-10gen
62 | 
63 | echo "Nodejs...(puppet attempt failed uy_nodejs-32...removed"
64 | #sudo wget http://nodejs.org/dist/v0.10.29/node-v0.10.29-linux-x86.tar.gz
65 | #sudo tar -xvzf node-v0.10.29-linux-x86
66 | #sudo /vagrant/node-v0.10.29-linux-x86
67 | #linking doesn't work....
68 | #sudo apt-add-repository ppa:chris-lea/node.js -y
69 | #sudo apt-get update -y
70 | #sudo apt-get install nodejs -y
71 | 
72 | echo "Adding from VagrantFile...."
73 | sudo ufw disable
74 | 
75 | sudo apt-get update -y
76 | 
77 | sudo apt-get install maven -y
78 | 
79 | sudo apt-get install vim -y
80 | 
81 | sudo apt-get --yes install zookeeper zookeeperd -y
82 | 
83 | sudo apt-get install redis-server -y
84 | 
85 | sudo apt-get install python-software-properties -y
86 | 
87 | sudo apt-get install python-pip -y
88 | 
89 | sudo pip install flask
90 | 
91 | sudo pip install redis
92 | 
93 | 


--------------------------------------------------------------------------------
/viz/static/Twitter_logo_white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/viz/static/Twitter_logo_white.png


--------------------------------------------------------------------------------
/viz/static/Udacity-logoRobot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/viz/static/Udacity-logoRobot.png


--------------------------------------------------------------------------------
/viz/static/app-cloud.js:
--------------------------------------------------------------------------------
 1 | // D3 Word Cloud Implementation by Eric Coopey:
 2 | // http://bl.ocks.org/ericcoopey/6382449
 3 | 
 4 | var source = new EventSource('/stream');
 5 | var hash = {};
 6 | var width = 1200;
 7 | var height = 700;
 8 | 
 9 | //update hash (associative array) with incoming word and count
10 | source.onmessage = function (event) {
11 |   word = event.data.split("|")[0];
12 |   count = event.data.split("|")[1];
13 |   if(!skip(word)){
14 |     hash[word]=count;
15 |   }
16 | };
17 | 
18 | //update function for visualization
19 | var updateViz =  function(){
20 |   //print console message
21 |   console.log("cloudArray-1" + JSON.stringify(d3.entries(hash)));
22 | 
23 |   var frequency_list = d3.entries(hash);
24 | 
25 |   d3.layout.cloud().size([800, 300])
26 |   .words(frequency_list)
27 |   .rotate(0)
28 |   .fontSize(function(d) { return d.value; })
29 |   .on("end", draw)
30 |   .start();
31 | };
32 | 
33 | // run updateViz at #7000 milliseconds, or 7 second
34 | window.setInterval(updateViz, 7000);
35 | 
36 | //clean list, can be added to word skipping bolt
37 | var skipList = ["https","follow","1","2","please","following","followers","fucking","RT","the","at","a"];
38 | 
39 | var skip = function(tWord){
40 |   for(var i=0; i<skipList.length; i++){
41 |     if(tWord === skipList[i]){
42 |       return true;
43 |     }
44 |   }
45 |   return false;
46 | };
47 | 


--------------------------------------------------------------------------------
/viz/static/app.js:
--------------------------------------------------------------------------------
 1 | var source = new EventSource('/stream');
 2 | var hash = {};
 3 | var width = 1200;
 4 | var height = 700;
 5 | 
 6 | source.onmessage = function (event) {
 7 |   word = event.data.split("|")[0];
 8 |   count = event.data.split("|")[1];
 9 |   if(!skip(word)){
10 |     hash[word]=count;
11 |   }
12 | };
13 | 
14 | var updateViz = function () {
15 |     var text = svgContainer.selectAll("text")
16 |     .data(d3.entries(hash), function(d){ return d.key; })
17 | 
18 | 
19 |     text.enter()
20 |     .append("text")
21 |     .attr("font-family", "sans-serif")
22 | 
23 |     text.text(function(d,i){ return d.key; })
24 |       .transition(1000)
25 |       .delay(500*Math.random())
26 |       .attr("x",function(d,i){ return (.5*d.value)+i*5; })
27 |       .attr("y",function(d,i){ return (1.5*d.value)+i*15; })
28 |       .attr("font-size", function(d,i){ return d.value+"px"; })
29 |       .attr("fill", function(d, i) { return colors(d.value + i*10); })
30 |       //comment following lines and uncomment previous for colors function
31 |       //.attr("fill",function(d,i){return "rgb("+
32 |       //Math.round(255/(1+Math.exp(-.001*d.value)))+","+
33 |       //Math.round(255-255/(1+Math.exp(-.01*d.value)))+","+
34 |       //Math.round(130-255/(1+Math.exp(-.01*d.value)))+")";});
35 | 
36 |     console.log("Array-2" + JSON.stringify(d3.entries(hash)));
37 | };
38 | 
39 | //update display every #1000 milliseconds
40 | window.setInterval(updateViz, 1000);
41 | 
42 | //clean list, can be added to word skipping bolt
43 | var skipList = ["https","follow","1","2","please","following","followers","fucking","RT","the","at","a"];
44 | 
45 | var skip = function(tWord){
46 |   for(var i=0; i<skipList.length; i++){
47 |     if(tWord === skipList[i]){
48 |       return true;
49 |     }
50 |   }
51 |   return false;
52 | };
53 | 


--------------------------------------------------------------------------------
/viz/templates/basic.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | <head>
 4 |   <meta charset="UTF-8">
 5 |   <title>Page Title</title>
 6 |   <!-- Latest compiled and minified CSS -->
 7 |   <link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap.min.css">
 8 |   <!-- Optional theme -->
 9 |   <link rel="stylesheet" href="//netdna.bootstrapcdn.com/bootstrap/3.1.1/css/bootstrap-theme.min.css">
10 |   <!-- APP js -->
11 |   <script src="//ajax.googleapis.com/ajax/libs/jquery/1.11.1/jquery.min.js"></script>
12 |   <!-- add d3 from web -->
13 |   <script src="http://d3js.org/d3.v3.min.js" charset="utf-8"></script>
14 |   <script src="/static/app.js"></script>
15 | </head>
16 | 
17 | <style>
18 | body {
19 |   margin: 0;
20 |   background: #222;
21 |   min-width: 960px;
22 | }
23 | 
24 | rect {
25 |   fill: none;
26 |   pointer-events: all;
27 | }
28 | 
29 | circle {
30 |   fill: none;
31 |   stroke-width: 2.5px;
32 | }
33 | </style>
34 | 
35 | <body>
36 |   <h1 style="color:#55ACEE">Udacity and Twitter bring you Real-Time Analytics with Storm</h1>
37 |     <p style="color:#55ACEE">
38 |       &nbsp<img src="/static/Udacity-logoRobot.png" alt="Smiley face" height="60" width="60">
39 |       &nbsp<img src="/static/Twitter_logo_white.png" alt="Twitter logo" height="60" width="60">
40 |       &nbsp&nbspBasic d3!!!</p>
41 |   <div id="chart"></div>
42 | </body>
43 | <script>
44 | var colors = d3.scale.category10();
45 | var svgContainer = d3.select("body").append("svg")
46 | .attr("width", width)
47 | .attr("height", height);
48 | </script>
49 | </html>
50 | 


--------------------------------------------------------------------------------