├── .github └── workflows │ └── manual.yml ├── .gitignore ├── CODEOWNERS ├── README.md ├── Vagrantfile ├── default.json ├── lesson1 ├── stage1 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── ExclamationTopology.java │ │ └── ReporterExclamationTopology.java ├── stage2 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── ExclamationTopology.java │ │ ├── ReporterExclamationTopology.java │ │ └── spout │ │ └── RandomSentenceSpout.java └── stage3 │ ├── pom.xml │ └── src │ └── jvm │ └── udacity │ └── storm │ ├── ExclamationTopology.java │ ├── ReporterExclamationTopology.java │ └── spout │ └── RandomSentenceSpout.java ├── lesson2 ├── stage1 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── WordCountTopology.java │ │ └── spout │ │ └── RandomSentenceSpout.java ├── stage2 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── WordCountTopology.java │ │ └── spout │ │ └── RandomSentenceSpout.java ├── stage3 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── SentenceCountTopology.java │ │ ├── WordCountTopology.java │ │ └── spout │ │ └── RandomSentenceSpout.java ├── stage4 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── SentenceCountTopology.java │ │ ├── SentenceWordCountTopology.java │ │ ├── WordCountTopology.java │ │ └── spout │ │ └── RandomSentenceSpout.java ├── stage5 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ └── TweetTopology.java ├── stage6 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── CountBolt.java │ │ ├── ParseTweetBolt.java │ │ ├── ReportBolt.java │ │ ├── TweetSpout.java │ │ └── TweetTopology.java └── stage7 │ ├── pom.xml │ └── src │ └── jvm │ └── udacity │ └── storm │ ├── CountBolt.java │ ├── ParseTweetBolt.java │ ├── ReportBolt.java │ ├── RollingCountBolt.java │ ├── TweetSpout.java │ ├── TweetTopology.java │ └── tools │ ├── NthLastModifiedTimeTracker.java │ ├── SlidingWindowCounter.java │ ├── SlotBasedCounter.java │ └── TupleHelpers.java ├── lesson3 ├── stage1 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── CountBolt.java │ │ ├── ParseTweetBolt.java │ │ ├── ReportBolt.java │ │ ├── TweetSpout.java │ │ └── TweetTopology.java ├── stage2 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── CountBolt.java │ │ ├── ParseTweetBolt.java │ │ ├── ReportBolt.java │ │ ├── SplitSentence.java │ │ ├── TweetSpout.java │ │ ├── TweetTopology.java │ │ └── resources │ │ ├── splitsentence.py │ │ └── storm.py ├── stage3 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── CountBolt.java │ │ ├── ParseTweetBolt.java │ │ ├── ReportBolt.java │ │ ├── SplitSentence.java │ │ ├── TweetSpout.java │ │ ├── TweetTopology.java │ │ ├── URLBolt.java │ │ └── resources │ │ ├── splitsentence.py │ │ ├── storm.py │ │ └── urltext.py ├── stage4 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── CountBolt.java │ │ ├── ParseTweetBolt.java │ │ ├── ReportBolt.java │ │ ├── RollingCountBolt.java │ │ ├── TweetSpout.java │ │ ├── TweetTopology.java │ │ └── tools │ │ ├── NthLastModifiedTimeTracker.java │ │ ├── SlidingWindowCounter.java │ │ ├── SlotBasedCounter.java │ │ └── TupleHelpers.java ├── stage5 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── AbstractRankerBolt.java │ │ ├── CountBolt.java │ │ ├── IntermediateRankingsBolt.java │ │ ├── ParseTweetBolt.java │ │ ├── ReportBolt.java │ │ ├── RollingCountBolt.java │ │ ├── TopNTweetTopology.java │ │ ├── TotalRankingsBolt.java │ │ ├── TweetSpout.java │ │ ├── spout │ │ └── RandomSentenceSpout.java │ │ └── tools │ │ ├── NthLastModifiedTimeTracker.java │ │ ├── Rankable.java │ │ ├── RankableObjectWithFields.java │ │ ├── Rankings.java │ │ ├── SlidingWindowCounter.java │ │ ├── SlotBasedCounter.java │ │ └── TupleHelpers.java ├── stage6 │ ├── pom.xml │ └── src │ │ └── jvm │ │ └── udacity │ │ └── storm │ │ ├── ExclamationTopology.java │ │ ├── ReportBolt.java │ │ └── spout │ │ ├── MyLikesSpout.java │ │ └── MyNamesSpout.java └── stage7 │ ├── pom.xml │ └── src │ └── jvm │ └── udacity │ └── storm │ ├── ExclamationTopology.java │ ├── ReportBolt.java │ └── spout │ ├── MyLikesSpout.java │ └── MyNamesSpout.java ├── lesson4 └── TeamAwesome │ ├── FinalProject │ ├── 2014_Gaz_counties_national.txt │ ├── geoinfo.csv │ ├── pom.xml │ └── src │ │ ├── MyPropFile.properties │ │ └── jvm │ │ ├── geocode │ │ ├── GeoName.java │ │ ├── ReverseGeoCode.java │ │ └── kdtree │ │ │ ├── KDNode.java │ │ │ ├── KDNodeComparator.java │ │ │ └── KDTree.java │ │ ├── geoinfo.csv │ │ └── udacity │ │ └── storm │ │ ├── CountBolt.java │ │ ├── InfoBolt.java │ │ ├── ParseTweetBolt.java │ │ ├── ReportBolt.java │ │ ├── TopNTweetTopology.java │ │ ├── TopWords.java │ │ ├── TweetSpout.java │ │ ├── spout │ │ └── RandomSentenceSpout.java │ │ └── tools │ │ ├── CountiesLookup.java │ │ ├── NthLastModifiedTimeTracker.java │ │ ├── Rankable.java │ │ ├── RankableObjectWithFields.java │ │ ├── Rankings.java │ │ ├── SentimentAnalyzer.java │ │ ├── SlidingWindowCounter.java │ │ ├── SlotBasedCounter.java │ │ ├── TupleHelpers.java │ │ └── ValueComparator.java │ ├── README.md │ └── viz │ ├── README.md │ ├── app.py │ ├── d3 │ ├── LICENSE │ ├── d3.js │ ├── d3.min.js │ └── d3.zip │ ├── dump.rdb │ ├── rt-provision-32.sh │ ├── static │ ├── Twitter_logo_white.png │ ├── Udacity-logoRobot.png │ ├── app-cloud.js │ ├── app-map.js │ ├── app.js │ ├── countyLookup.js │ ├── d3.layout.cloud.js │ ├── datamaps.js │ ├── unemployment.tsv │ ├── us - Copy.json │ └── us.json │ └── templates │ ├── basic.html │ ├── cloud.html │ └── map.html ├── provision.sh └── viz ├── README.md ├── app.py ├── d3 ├── LICENSE ├── d3.js ├── d3.min.js └── d3.zip ├── dump.rdb ├── rt-provision-32.sh ├── static ├── Twitter_logo_white.png ├── Udacity-logoRobot.png ├── app-cloud.js ├── app.js └── d3.layout.cloud.js └── templates ├── basic.html └── cloud.html /.github/workflows/manual.yml: -------------------------------------------------------------------------------- 1 | # Workflow to ensure whenever a Github PR is submitted, 2 | # a JIRA ticket gets created automatically. 3 | name: Manual Workflow 4 | 5 | # Controls when the action will run. 6 | on: 7 | # Triggers the workflow on pull request events but only for the master branch 8 | pull_request_target: 9 | types: [opened, reopened] 10 | 11 | # Allows you to run this workflow manually from the Actions tab 12 | workflow_dispatch: 13 | 14 | jobs: 15 | test-transition-issue: 16 | name: Convert Github Issue to Jira Issue 17 | runs-on: ubuntu-latest 18 | steps: 19 | - name: Checkout 20 | uses: actions/checkout@master 21 | 22 | - name: Login 23 | uses: atlassian/gajira-login@master 24 | env: 25 | JIRA_BASE_URL: ${{ secrets.JIRA_BASE_URL }} 26 | JIRA_USER_EMAIL: ${{ secrets.JIRA_USER_EMAIL }} 27 | JIRA_API_TOKEN: ${{ secrets.JIRA_API_TOKEN }} 28 | 29 | - name: Create NEW JIRA ticket 30 | id: create 31 | uses: atlassian/gajira-create@master 32 | with: 33 | project: CONUPDATE 34 | issuetype: Task 35 | summary: | 36 | Github PR [Assign the ND component] | Repo: ${{ github.repository }} | PR# ${{github.event.number}} 37 | description: | 38 | Repo link: https://github.com/${{ github.repository }} 39 | PR no. ${{ github.event.pull_request.number }} 40 | PR title: ${{ github.event.pull_request.title }} 41 | PR description: ${{ github.event.pull_request.description }} 42 | In addition, please resolve other issues, if any. 43 | fields: '{"components": [{"name":"Github PR"}], "customfield_16449":"https://classroom.udacity.com/", "customfield_16450":"Resolve the PR", "labels": ["github"], "priority":{"id": "4"}}' 44 | 45 | - name: Log created issue 46 | run: echo "Issue ${{ steps.create.outputs.issue }} was created" 47 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | bin/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | *.egg-info/ 22 | .installed.cfg 23 | *.egg 24 | 25 | # Installer logs 26 | pip-log.txt 27 | pip-delete-this-directory.txt 28 | 29 | # Unit test / coverage reports 30 | htmlcov/ 31 | .tox/ 32 | .coverage 33 | .cache 34 | nosetests.xml 35 | coverage.xml 36 | 37 | # Translations 38 | *.mo 39 | 40 | # Mr Developer 41 | .mr.developer.cfg 42 | .project 43 | .pydevproject 44 | 45 | # Rope 46 | .ropeproject 47 | 48 | # Django stuff: 49 | *.log 50 | *.pot 51 | 52 | # Sphinx documentation 53 | docs/_build/ 54 | 55 | # vagrant bookkeeping 56 | .vagrant/ 57 | 58 | # packer stuff 59 | packer_virtualbox-ovf_virtualbox.box 60 | 61 | # maven builds 62 | **/target/* 63 | 64 | .credentials 65 | -------------------------------------------------------------------------------- /CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @udacity/active-public-content -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Udacity and Twitter bring you Real-Time Analytics with Apache Storm 2 | ===== 3 | 4 | Join the course for free: 5 | www.udacity.com/course/ud381 6 | -------------------------------------------------------------------------------- /Vagrantfile: -------------------------------------------------------------------------------- 1 | # -*- mode: ruby -*- 2 | # vi: set ft=ruby : 3 | 4 | # Vagrantfile API/syntax version. Don't touch unless you know what you're doing! 5 | VAGRANTFILE_API_VERSION = "2" 6 | 7 | Vagrant.configure(VAGRANTFILE_API_VERSION) do |config| 8 | config.vm.box = "udacity/ud381" 9 | config.vm.network :forwarded_port, guest: 5000, host: 5000 10 | end 11 | -------------------------------------------------------------------------------- /default.json: -------------------------------------------------------------------------------- 1 | { 2 | "variables": { 3 | "home": "{{env `HOME`}}", 4 | "cloud_token": "{{env `VAGRANT_CLOUD_TOKEN`}}", 5 | "version": "{{env `BOX_VERSION`}}" 6 | }, 7 | "builders": [ 8 | { 9 | "type": "virtualbox-ovf", 10 | "source_path": "{{user `home`}}/.vagrant.d/boxes/box-cutter-VAGRANTSLASH-ubuntu1404-i386/1.0.2/virtualbox/box.ovf", 11 | "ssh_username": "vagrant", 12 | "ssh_password": "vagrant", 13 | "headless": true, 14 | "vboxmanage": [ 15 | ["modifyvm", "{{.Name}}", "--memory", "2048"], 16 | ["modifyvm", "{{.Name}}", "--cpus", "2"] 17 | ], 18 | "ssh_wait_timeout": "30s", 19 | "shutdown_command": "sudo shutdown -h now" 20 | } 21 | ], 22 | "provisioners": [ 23 | { 24 | "type": "shell", 25 | "script": "provision.sh" 26 | } 27 | ], 28 | "post-processors": [ 29 | [{ 30 | "type": "vagrant" 31 | }, 32 | { 33 | "type": "vagrant-cloud", 34 | "box_tag": "udacity/ud381", 35 | "access_token": "{{user `cloud_token`}}", 36 | "version": "{{user `version`}}" 37 | }] 38 | ] 39 | } 40 | -------------------------------------------------------------------------------- /lesson1/stage1/src/jvm/udacity/storm/ExclamationTopology.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.task.OutputCollector; 7 | import backtype.storm.task.TopologyContext; 8 | import backtype.storm.testing.TestWordSpout; 9 | import backtype.storm.topology.OutputFieldsDeclarer; 10 | import backtype.storm.topology.TopologyBuilder; 11 | import backtype.storm.topology.base.BaseRichBolt; 12 | import backtype.storm.tuple.Fields; 13 | import backtype.storm.tuple.Tuple; 14 | import backtype.storm.tuple.Values; 15 | import backtype.storm.utils.Utils; 16 | 17 | import java.util.Map; 18 | 19 | /** 20 | * This is a basic example of a Storm topology. 21 | */ 22 | 23 | /** 24 | * This is a basic example of a storm topology. 25 | * 26 | * This topology demonstrates how to add three exclamation marks '!!!' 27 | * to each word emitted 28 | * 29 | * This is an example for Udacity Real Time Analytics Course - ud381 30 | * 31 | */ 32 | public class ExclamationTopology { 33 | 34 | /** 35 | * A bolt that adds the exclamation marks '!!!' to word 36 | */ 37 | public static class ExclamationBolt extends BaseRichBolt 38 | { 39 | // To output tuples from this bolt to the next stage bolts, if any 40 | OutputCollector _collector; 41 | 42 | @Override 43 | public void prepare( 44 | Map map, 45 | TopologyContext topologyContext, 46 | OutputCollector collector) 47 | { 48 | // save the output collector for emitting tuples 49 | _collector = collector; 50 | } 51 | 52 | @Override 53 | public void execute(Tuple tuple) 54 | { 55 | // get the column word from tuple 56 | String word = tuple.getString(0); 57 | 58 | // build the word with the exclamation marks appended 59 | StringBuilder exclamatedWord = new StringBuilder(); 60 | exclamatedWord.append(word).append("!!!"); 61 | 62 | // emit the word with exclamations 63 | _collector.emit(tuple, new Values(exclamatedWord.toString())); 64 | } 65 | 66 | @Override 67 | public void declareOutputFields(OutputFieldsDeclarer declarer) 68 | { 69 | // tell storm the schema of the output tuple for this spout 70 | 71 | // tuple consists of a single column called 'exclamated-word' 72 | declarer.declare(new Fields("exclamated-word")); 73 | } 74 | } 75 | 76 | public static void main(String[] args) throws Exception 77 | { 78 | // create the topology 79 | TopologyBuilder builder = new TopologyBuilder(); 80 | 81 | // attach the word spout to the topology - parallelism of 10 82 | builder.setSpout("word", new TestWordSpout(), 10); 83 | 84 | // attach the exclamation bolt to the topology - parallelism of 3 85 | builder.setBolt("exclaim1", new ExclamationBolt(), 3).shuffleGrouping("word"); 86 | 87 | // attach another exclamation bolt to the topology - parallelism of 2 88 | builder.setBolt("exclaim2", new ExclamationBolt(), 2).shuffleGrouping("exclaim1"); 89 | 90 | // create the default config object 91 | Config conf = new Config(); 92 | 93 | // set the config in debugging mode 94 | conf.setDebug(true); 95 | 96 | if (args != null && args.length > 0) { 97 | 98 | // run it in a live cluster 99 | 100 | // set the number of workers for running all spout and bolt tasks 101 | conf.setNumWorkers(3); 102 | 103 | // create the topology and submit with config 104 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 105 | 106 | } else { 107 | 108 | // run it in a simulated local cluster 109 | 110 | // create the local cluster instance 111 | LocalCluster cluster = new LocalCluster(); 112 | 113 | // submit the topology to the local cluster 114 | cluster.submitTopology("exclamation", conf, builder.createTopology()); 115 | 116 | // let the topology run for 20 seconds. note topologies never terminate! 117 | Thread.sleep(20000); 118 | 119 | // kill the topology 120 | cluster.killTopology("exclamation"); 121 | 122 | // we are done, so shutdown the local cluster 123 | cluster.shutdown(); 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /lesson1/stage2/src/jvm/udacity/storm/ExclamationTopology.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.task.OutputCollector; 7 | import backtype.storm.task.TopologyContext; 8 | import backtype.storm.testing.TestWordSpout; 9 | import backtype.storm.topology.OutputFieldsDeclarer; 10 | import backtype.storm.topology.TopologyBuilder; 11 | import backtype.storm.topology.base.BaseRichBolt; 12 | import backtype.storm.tuple.Fields; 13 | import backtype.storm.tuple.Tuple; 14 | import backtype.storm.tuple.Values; 15 | import backtype.storm.utils.Utils; 16 | 17 | import java.util.Map; 18 | 19 | /** 20 | * This is a basic example of a Storm topology. 21 | */ 22 | 23 | /** 24 | * This is a basic example of a storm topology. 25 | * 26 | * This topology demonstrates how to add three exclamation marks '!!!' 27 | * to each word emitted 28 | * 29 | * This is an example for Udacity Real Time Analytics Course - ud381 30 | * 31 | */ 32 | public class ExclamationTopology { 33 | 34 | /** 35 | * A bolt that adds the exclamation marks '!!!' to word 36 | */ 37 | public static class ExclamationBolt extends BaseRichBolt 38 | { 39 | // To output tuples from this bolt to the next stage bolts, if any 40 | OutputCollector _collector; 41 | 42 | @Override 43 | public void prepare( 44 | Map map, 45 | TopologyContext topologyContext, 46 | OutputCollector collector) 47 | { 48 | // save the output collector for emitting tuples 49 | _collector = collector; 50 | } 51 | 52 | @Override 53 | public void execute(Tuple tuple) 54 | { 55 | // get the column word from tuple 56 | String word = tuple.getString(0); 57 | 58 | // build the word with the exclamation marks appended 59 | StringBuilder exclamatedWord = new StringBuilder(); 60 | exclamatedWord.append(word).append("!!!"); 61 | 62 | // emit the word with exclamations 63 | _collector.emit(tuple, new Values(exclamatedWord.toString())); 64 | } 65 | 66 | @Override 67 | public void declareOutputFields(OutputFieldsDeclarer declarer) 68 | { 69 | // tell storm the schema of the output tuple for this spout 70 | 71 | // tuple consists of a single column called 'exclamated-word' 72 | declarer.declare(new Fields("exclamated-word")); 73 | } 74 | } 75 | 76 | public static void main(String[] args) throws Exception 77 | { 78 | // create the topology 79 | TopologyBuilder builder = new TopologyBuilder(); 80 | 81 | // attach the word spout to the topology - parallelism of 10 82 | builder.setSpout("word", new TestWordSpout(), 10); 83 | 84 | // attach the exclamation bolt to the topology - parallelism of 3 85 | builder.setBolt("exclaim1", new ExclamationBolt(), 3).shuffleGrouping("word"); 86 | 87 | // attach another exclamation bolt to the topology - parallelism of 2 88 | builder.setBolt("exclaim2", new ExclamationBolt(), 2).shuffleGrouping("exclaim1"); 89 | 90 | // create the default config object 91 | Config conf = new Config(); 92 | 93 | // set the config in debugging mode 94 | conf.setDebug(true); 95 | 96 | if (args != null && args.length > 0) { 97 | 98 | // run it in a live cluster 99 | 100 | // set the number of workers for running all spout and bolt tasks 101 | conf.setNumWorkers(3); 102 | 103 | // create the topology and submit with config 104 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 105 | 106 | } else { 107 | 108 | // run it in a simulated local cluster 109 | 110 | // create the local cluster instance 111 | LocalCluster cluster = new LocalCluster(); 112 | 113 | // submit the topology to the local cluster 114 | cluster.submitTopology("exclamation", conf, builder.createTopology()); 115 | 116 | // let the topology run for 30 seconds. note topologies never terminate! 117 | Thread.sleep(30000); 118 | 119 | // kill the topology 120 | cluster.killTopology("exclamation"); 121 | 122 | // we are done, so shutdown the local cluster 123 | cluster.shutdown(); 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /lesson1/stage2/src/jvm/udacity/storm/spout/RandomSentenceSpout.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.spout; 2 | 3 | import backtype.storm.spout.SpoutOutputCollector; 4 | import backtype.storm.task.TopologyContext; 5 | import backtype.storm.topology.OutputFieldsDeclarer; 6 | import backtype.storm.topology.base.BaseRichSpout; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.tuple.Values; 9 | import backtype.storm.utils.Utils; 10 | 11 | import java.util.Map; 12 | import java.util.Random; 13 | 14 | public class RandomSentenceSpout extends BaseRichSpout { 15 | SpoutOutputCollector _collector; 16 | Random _rand; 17 | 18 | 19 | @Override 20 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 21 | _collector = collector; 22 | _rand = new Random(); 23 | } 24 | 25 | @Override 26 | public void nextTuple() { 27 | Utils.sleep(100); 28 | String[] sentences = new String[]{ 29 | "the cow jumped over the moon", 30 | "an apple a day keeps the doctor away", 31 | "four score and seven years ago", 32 | "snow white and the seven dwarfs", 33 | "i am at two with nature" 34 | }; 35 | String sentence = sentences[_rand.nextInt(sentences.length)]; 36 | _collector.emit(new Values(sentence)); 37 | } 38 | 39 | @Override 40 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 41 | declarer.declare(new Fields("sentence")); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /lesson1/stage3/src/jvm/udacity/storm/ExclamationTopology.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.task.OutputCollector; 7 | import backtype.storm.task.TopologyContext; 8 | import backtype.storm.testing.TestWordSpout; 9 | import backtype.storm.topology.OutputFieldsDeclarer; 10 | import backtype.storm.topology.TopologyBuilder; 11 | import backtype.storm.topology.base.BaseRichBolt; 12 | import backtype.storm.tuple.Fields; 13 | import backtype.storm.tuple.Tuple; 14 | import backtype.storm.tuple.Values; 15 | import backtype.storm.utils.Utils; 16 | 17 | import java.util.Map; 18 | 19 | /** 20 | * This is a basic example of a Storm topology. 21 | */ 22 | 23 | /** 24 | * This is a basic example of a storm topology. 25 | * 26 | * This topology demonstrates how to add three exclamation marks '!!!' 27 | * to each word emitted 28 | * 29 | * This is an example for Udacity Real Time Analytics Course - ud381 30 | * 31 | */ 32 | public class ExclamationTopology { 33 | 34 | /** 35 | * A bolt that adds the exclamation marks '!!!' to word 36 | */ 37 | public static class ExclamationBolt extends BaseRichBolt 38 | { 39 | // To output tuples from this bolt to the next stage bolts, if any 40 | OutputCollector _collector; 41 | 42 | @Override 43 | public void prepare( 44 | Map map, 45 | TopologyContext topologyContext, 46 | OutputCollector collector) 47 | { 48 | // save the output collector for emitting tuples 49 | _collector = collector; 50 | } 51 | 52 | @Override 53 | public void execute(Tuple tuple) 54 | { 55 | // get the column word from tuple 56 | String word = tuple.getString(0); 57 | 58 | // build the word with the exclamation marks appended 59 | StringBuilder exclamatedWord = new StringBuilder(); 60 | exclamatedWord.append(word).append("!!!"); 61 | 62 | // emit the word with exclamations 63 | _collector.emit(tuple, new Values(exclamatedWord.toString())); 64 | } 65 | 66 | @Override 67 | public void declareOutputFields(OutputFieldsDeclarer declarer) 68 | { 69 | // tell storm the schema of the output tuple for this spout 70 | 71 | // tuple consists of a single column called 'exclamated-word' 72 | declarer.declare(new Fields("exclamated-word")); 73 | } 74 | } 75 | 76 | public static void main(String[] args) throws Exception 77 | { 78 | // create the topology 79 | TopologyBuilder builder = new TopologyBuilder(); 80 | 81 | // attach the word spout to the topology - parallelism of 10 82 | builder.setSpout("word", new TestWordSpout(), 10); 83 | 84 | // attach the exclamation bolt to the topology - parallelism of 3 85 | builder.setBolt("exclaim1", new ExclamationBolt(), 3).shuffleGrouping("word"); 86 | 87 | // attach another exclamation bolt to the topology - parallelism of 2 88 | builder.setBolt("exclaim2", new ExclamationBolt(), 2).shuffleGrouping("exclaim1"); 89 | 90 | // create the default config object 91 | Config conf = new Config(); 92 | 93 | // set the config in debugging mode 94 | conf.setDebug(true); 95 | 96 | if (args != null && args.length > 0) { 97 | 98 | // run it in a live cluster 99 | 100 | // set the number of workers for running all spout and bolt tasks 101 | conf.setNumWorkers(3); 102 | 103 | // create the topology and submit with config 104 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 105 | 106 | } else { 107 | 108 | // run it in a simulated local cluster 109 | 110 | // create the local cluster instance 111 | LocalCluster cluster = new LocalCluster(); 112 | 113 | // submit the topology to the local cluster 114 | cluster.submitTopology("exclamation", conf, builder.createTopology()); 115 | 116 | // let the topology run for 30 seconds. note topologies never terminate! 117 | Thread.sleep(30000); 118 | 119 | // kill the topology 120 | cluster.killTopology("exclamation"); 121 | 122 | // we are done, so shutdown the local cluster 123 | cluster.shutdown(); 124 | } 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /lesson1/stage3/src/jvm/udacity/storm/spout/RandomSentenceSpout.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.spout; 2 | 3 | import backtype.storm.spout.SpoutOutputCollector; 4 | import backtype.storm.task.TopologyContext; 5 | import backtype.storm.topology.OutputFieldsDeclarer; 6 | import backtype.storm.topology.base.BaseRichSpout; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.tuple.Values; 9 | import backtype.storm.utils.Utils; 10 | 11 | import java.util.Map; 12 | import java.util.Random; 13 | 14 | public class RandomSentenceSpout extends BaseRichSpout { 15 | SpoutOutputCollector _collector; 16 | Random _rand; 17 | 18 | 19 | @Override 20 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 21 | _collector = collector; 22 | _rand = new Random(); 23 | } 24 | 25 | @Override 26 | public void nextTuple() { 27 | Utils.sleep(100); 28 | String[] sentences = new String[]{ 29 | "the cow jumped over the moon", 30 | "an apple a day keeps the doctor away", 31 | "four score and seven years ago", 32 | "snow white and the seven dwarfs", 33 | "i am at two with nature" 34 | }; 35 | String sentence = sentences[_rand.nextInt(sentences.length)]; 36 | _collector.emit(new Values(sentence)); 37 | } 38 | 39 | @Override 40 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 41 | declarer.declare(new Fields("sentence")); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /lesson2/stage1/src/jvm/udacity/storm/spout/RandomSentenceSpout.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.spout; 2 | 3 | import backtype.storm.spout.SpoutOutputCollector; 4 | import backtype.storm.task.TopologyContext; 5 | import backtype.storm.topology.OutputFieldsDeclarer; 6 | import backtype.storm.topology.base.BaseRichSpout; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.tuple.Values; 9 | import backtype.storm.utils.Utils; 10 | 11 | import java.util.Map; 12 | import java.util.Random; 13 | 14 | public class RandomSentenceSpout extends BaseRichSpout { 15 | SpoutOutputCollector _collector; 16 | Random _rand; 17 | 18 | 19 | @Override 20 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 21 | _collector = collector; 22 | _rand = new Random(); 23 | } 24 | 25 | @Override 26 | public void nextTuple() { 27 | Utils.sleep(100); 28 | String[] sentences = new String[]{ 29 | "the cow jumped over the moon", 30 | "an apple a day keeps the doctor away", 31 | "four score and seven years ago", 32 | "snow white and the seven dwarfs", 33 | "i am at two with nature" 34 | }; 35 | String sentence = sentences[_rand.nextInt(sentences.length)]; 36 | _collector.emit(new Values(sentence)); 37 | } 38 | 39 | @Override 40 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 41 | declarer.declare(new Fields("sentence")); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /lesson2/stage2/src/jvm/udacity/storm/spout/RandomSentenceSpout.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.spout; 2 | 3 | import backtype.storm.spout.SpoutOutputCollector; 4 | import backtype.storm.task.TopologyContext; 5 | import backtype.storm.topology.OutputFieldsDeclarer; 6 | import backtype.storm.topology.base.BaseRichSpout; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.tuple.Values; 9 | import backtype.storm.utils.Utils; 10 | 11 | import java.util.Map; 12 | import java.util.Random; 13 | 14 | public class RandomSentenceSpout extends BaseRichSpout { 15 | SpoutOutputCollector _collector; 16 | Random _rand; 17 | 18 | 19 | @Override 20 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 21 | _collector = collector; 22 | _rand = new Random(); 23 | } 24 | 25 | @Override 26 | public void nextTuple() { 27 | Utils.sleep(100); 28 | String[] sentences = new String[]{ 29 | "the cow jumped over the moon", 30 | "an apple a day keeps the doctor away", 31 | "four score and seven years ago", 32 | "snow white and the seven dwarfs", 33 | "i am at two with nature" }; 34 | String sentence = sentences[_rand.nextInt(sentences.length)]; 35 | _collector.emit(new Values(sentence)); 36 | } 37 | 38 | @Override 39 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 40 | declarer.declare(new Fields("sentence")); 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /lesson2/stage3/src/jvm/udacity/storm/spout/RandomSentenceSpout.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.spout; 2 | 3 | import backtype.storm.spout.SpoutOutputCollector; 4 | import backtype.storm.task.TopologyContext; 5 | import backtype.storm.topology.OutputFieldsDeclarer; 6 | import backtype.storm.topology.base.BaseRichSpout; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.tuple.Values; 9 | import backtype.storm.utils.Utils; 10 | 11 | import java.util.Map; 12 | import java.util.Random; 13 | 14 | public class RandomSentenceSpout extends BaseRichSpout { 15 | SpoutOutputCollector _collector; 16 | Random _rand; 17 | 18 | 19 | @Override 20 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 21 | _collector = collector; 22 | _rand = new Random(); 23 | } 24 | 25 | @Override 26 | public void nextTuple() { 27 | Utils.sleep(100); 28 | String[] sentences = new String[]{ 29 | "the cow jumped over the moon", 30 | "an apple a day keeps the doctor away", 31 | "four score and seven years ago", 32 | "snow white and the seven dwarfs", 33 | "i am at two with nature" 34 | }; 35 | String sentence = sentences[_rand.nextInt(sentences.length)]; 36 | _collector.emit(new Values(sentence)); 37 | } 38 | 39 | @Override 40 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 41 | declarer.declare(new Fields("sentence")); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /lesson2/stage4/src/jvm/udacity/storm/spout/RandomSentenceSpout.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.spout; 2 | 3 | import backtype.storm.spout.SpoutOutputCollector; 4 | import backtype.storm.task.TopologyContext; 5 | import backtype.storm.topology.OutputFieldsDeclarer; 6 | import backtype.storm.topology.base.BaseRichSpout; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.tuple.Values; 9 | import backtype.storm.utils.Utils; 10 | 11 | import java.util.Map; 12 | import java.util.Random; 13 | 14 | public class RandomSentenceSpout extends BaseRichSpout { 15 | SpoutOutputCollector _collector; 16 | Random _rand; 17 | 18 | 19 | @Override 20 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 21 | _collector = collector; 22 | _rand = new Random(); 23 | } 24 | 25 | @Override 26 | public void nextTuple() { 27 | Utils.sleep(100); 28 | String[] sentences = new String[]{ 29 | "the cow jumped over the moon", 30 | "an apple a day keeps the doctor away", 31 | "four score and seven years ago", 32 | "snow white and the seven dwarfs", 33 | "i am at two with nature" 34 | }; 35 | String sentence = sentences[_rand.nextInt(sentences.length)]; 36 | _collector.emit(new Values(sentence)); 37 | } 38 | 39 | @Override 40 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 41 | declarer.declare(new Fields("sentence")); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /lesson2/stage6/src/jvm/udacity/storm/CountBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.HashMap; 20 | import java.util.Map; 21 | 22 | /** 23 | * A bolt that counts the words that it receives 24 | */ 25 | public class CountBolt extends BaseRichBolt 26 | { 27 | // To output tuples from this bolt to the next stage bolts, if any 28 | private OutputCollector collector; 29 | 30 | // Map to store the count of the words 31 | private Map countMap; 32 | 33 | @Override 34 | public void prepare( 35 | Map map, 36 | TopologyContext topologyContext, 37 | OutputCollector outputCollector) 38 | { 39 | 40 | // save the collector for emitting tuples 41 | collector = outputCollector; 42 | 43 | // create and initialize the map 44 | countMap = new HashMap(); 45 | } 46 | 47 | @Override 48 | public void execute(Tuple tuple) 49 | { 50 | // get the word from the 1st column of incoming tuple 51 | String word = tuple.getString(0); 52 | 53 | // check if the word is present in the map 54 | if (countMap.get(word) == null) { 55 | 56 | // not present, add the word with a count of 1 57 | countMap.put(word, 1); 58 | } else { 59 | 60 | // already there, hence get the count 61 | Integer val = countMap.get(word); 62 | 63 | // increment the count and save it to the map 64 | countMap.put(word, ++val); 65 | } 66 | 67 | // emit the word and count 68 | collector.emit(new Values(word, countMap.get(word))); 69 | } 70 | 71 | @Override 72 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) 73 | { 74 | // tell storm the schema of the output tuple for this spout 75 | // tuple consists of a two columns called 'word' and 'count' 76 | 77 | // declare the first column 'word', second column 'count' 78 | outputFieldsDeclarer.declare(new Fields("word","count")); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /lesson2/stage6/src/jvm/udacity/storm/ParseTweetBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | /** 22 | * A bolt that parses the tweet into words 23 | */ 24 | public class ParseTweetBolt extends BaseRichBolt 25 | { 26 | // To output tuples from this bolt to the count bolt 27 | OutputCollector collector; 28 | 29 | @Override 30 | public void prepare( 31 | Map map, 32 | TopologyContext topologyContext, 33 | OutputCollector outputCollector) 34 | { 35 | // save the output collector for emitting tuples 36 | collector = outputCollector; 37 | } 38 | 39 | @Override 40 | public void execute(Tuple tuple) 41 | { 42 | // get the 1st column 'tweet' from tuple 43 | String tweet = tuple.getString(0); 44 | 45 | // provide the delimiters for splitting the tweet 46 | String delims = "[ .,?!]+"; 47 | 48 | // now split the tweet into tokens 49 | String[] tokens = tweet.split(delims); 50 | 51 | // for each token/word, emit it 52 | for (String token: tokens) { 53 | collector.emit(new Values(token)); 54 | } 55 | } 56 | 57 | @Override 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) 59 | { 60 | // tell storm the schema of the output tuple for this spout 61 | // tuple consists of a single column called 'tweet-word' 62 | declarer.declare(new Fields("tweet-word")); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /lesson2/stage6/src/jvm/udacity/storm/ReportBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | import com.lambdaworks.redis.RedisClient; 22 | import com.lambdaworks.redis.RedisConnection; 23 | 24 | /** 25 | * A bolt that prints the word and count to redis 26 | */ 27 | public class ReportBolt extends BaseRichBolt 28 | { 29 | // place holder to keep the connection to redis 30 | transient RedisConnection redis; 31 | 32 | @Override 33 | public void prepare( 34 | Map map, 35 | TopologyContext topologyContext, 36 | OutputCollector outputCollector) 37 | { 38 | // instantiate a redis connection 39 | RedisClient client = new RedisClient("localhost",6379); 40 | 41 | // initiate the actual connection 42 | redis = client.connect(); 43 | } 44 | 45 | @Override 46 | public void execute(Tuple tuple) 47 | { 48 | // access the first column 'word' 49 | String word = tuple.getStringByField("word"); 50 | 51 | // access the second column 'count' 52 | Integer count = tuple.getIntegerByField("count"); 53 | 54 | // publish the word count to redis using word as the key 55 | redis.publish("WordCountTopology", word + "|" + Long.toString(count)); 56 | } 57 | 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) 59 | { 60 | // nothing to add - since it is the final bolt 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /lesson2/stage6/src/jvm/udacity/storm/TweetTopology.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | class TweetTopology 20 | { 21 | public static void main(String[] args) throws Exception 22 | { 23 | // create the topology 24 | TopologyBuilder builder = new TopologyBuilder(); 25 | 26 | /* 27 | * In order to create the spout, you need to get twitter credentials 28 | * If you need to use Twitter firehose/Tweet stream for your idea, 29 | * create a set of credentials by following the instructions at 30 | * 31 | * https://dev.twitter.com/discussions/631 32 | * 33 | */ 34 | 35 | // now create the tweet spout with the credentials 36 | TweetSpout tweetSpout = new TweetSpout( 37 | "[Your customer key]", 38 | "[Your secret key]", 39 | "[Your access token]", 40 | "[Your access secret]" 41 | ); 42 | 43 | //********************************************************************* 44 | // Complete the Topology. 45 | // Part 0: attach the tweet spout to the topology - parallelism of 1 46 | // Part 1: // attach the parse tweet bolt, parallelism of 10 (what grouping is needed?) 47 | // Part 2: // attach the count bolt, parallelism of 15 (what grouping is needed?) 48 | // Part 3: attach the report bolt, parallelism of 1 (what grouping is needed?) 49 | // Submit and run the topology. 50 | 51 | 52 | //********************************************************************* 53 | 54 | // create the default config object 55 | Config conf = new Config(); 56 | 57 | // set the config in debugging mode 58 | conf.setDebug(true); 59 | 60 | if (args != null && args.length > 0) { 61 | 62 | // run it in a live cluster 63 | 64 | // set the number of workers for running all spout and bolt tasks 65 | conf.setNumWorkers(3); 66 | 67 | // create the topology and submit with config 68 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 69 | 70 | } else { 71 | 72 | // run it in a simulated local cluster 73 | 74 | // set the number of threads to run - similar to setting number of workers in live cluster 75 | conf.setMaxTaskParallelism(3); 76 | 77 | // create the local cluster instance 78 | LocalCluster cluster = new LocalCluster(); 79 | 80 | // submit the topology to the local cluster 81 | cluster.submitTopology("tweet-word-count", conf, builder.createTopology()); 82 | 83 | // let the topology run for 300 seconds. note topologies never terminate! 84 | Utils.sleep(300000); 85 | 86 | // now kill the topology 87 | cluster.killTopology("tweet-word-count"); 88 | 89 | // we are done, so shutdown the local cluster 90 | cluster.shutdown(); 91 | } 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /lesson2/stage7/src/jvm/udacity/storm/CountBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.HashMap; 20 | import java.util.Map; 21 | 22 | /** 23 | * A bolt that counts the words that it receives 24 | */ 25 | public class CountBolt extends BaseRichBolt 26 | { 27 | // To output tuples from this bolt to the next stage bolts, if any 28 | private OutputCollector collector; 29 | 30 | // Map to store the count of the words 31 | private Map countMap; 32 | 33 | @Override 34 | public void prepare( 35 | Map map, 36 | TopologyContext topologyContext, 37 | OutputCollector outputCollector) 38 | { 39 | 40 | // save the collector for emitting tuples 41 | collector = outputCollector; 42 | 43 | // create and initialize the map 44 | countMap = new HashMap(); 45 | } 46 | 47 | @Override 48 | public void execute(Tuple tuple) 49 | { 50 | // get the word from the 1st column of incoming tuple 51 | String word = tuple.getString(0); 52 | 53 | // check if the word is present in the map 54 | if (countMap.get(word) == null) { 55 | 56 | // not present, add the word with a count of 1 57 | countMap.put(word, 1); 58 | } else { 59 | 60 | // already there, hence get the count 61 | Integer val = countMap.get(word); 62 | 63 | // increment the count and save it to the map 64 | countMap.put(word, ++val); 65 | } 66 | 67 | // emit the word and count 68 | collector.emit(new Values(word, countMap.get(word))); 69 | } 70 | 71 | @Override 72 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) 73 | { 74 | // tell storm the schema of the output tuple for this spout 75 | // tuple consists of a two columns called 'word' and 'count' 76 | 77 | // declare the first column 'word', second column 'count' 78 | outputFieldsDeclarer.declare(new Fields("word","count")); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /lesson2/stage7/src/jvm/udacity/storm/ParseTweetBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | /** 22 | * A bolt that parses the tweet into words 23 | */ 24 | public class ParseTweetBolt extends BaseRichBolt 25 | { 26 | // To output tuples from this bolt to the count bolt 27 | OutputCollector collector; 28 | 29 | @Override 30 | public void prepare( 31 | Map map, 32 | TopologyContext topologyContext, 33 | OutputCollector outputCollector) 34 | { 35 | // save the output collector for emitting tuples 36 | collector = outputCollector; 37 | } 38 | 39 | @Override 40 | public void execute(Tuple tuple) 41 | { 42 | // get the 1st column 'tweet' from tuple 43 | String tweet = tuple.getString(0); 44 | 45 | // provide the delimiters for splitting the tweet 46 | String delims = "[ .,?!]+"; 47 | 48 | // now split the tweet into tokens 49 | String[] tokens = tweet.split(delims); 50 | 51 | // for each token/word, emit it 52 | for (String token: tokens) { 53 | collector.emit(new Values(token)); 54 | } 55 | } 56 | 57 | @Override 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) 59 | { 60 | // tell storm the schema of the output tuple for this spout 61 | // tuple consists of a single column called 'tweet-word' 62 | declarer.declare(new Fields("tweet-word")); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /lesson2/stage7/src/jvm/udacity/storm/ReportBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | import com.lambdaworks.redis.RedisClient; 22 | import com.lambdaworks.redis.RedisConnection; 23 | 24 | /** 25 | * A bolt that prints the word and count to redis 26 | */ 27 | public class ReportBolt extends BaseRichBolt 28 | { 29 | // place holder to keep the connection to redis 30 | transient RedisConnection redis; 31 | 32 | @Override 33 | public void prepare( 34 | Map map, 35 | TopologyContext topologyContext, 36 | OutputCollector outputCollector) 37 | { 38 | // instantiate a redis connection 39 | RedisClient client = new RedisClient("localhost",6379); 40 | 41 | // initiate the actual connection 42 | redis = client.connect(); 43 | } 44 | 45 | @Override 46 | public void execute(Tuple tuple) 47 | { 48 | // access the first column 'word' 49 | String word = tuple.getStringByField("word"); 50 | 51 | // access the second column 'count' 52 | Integer count = tuple.getIntegerByField("count"); 53 | 54 | // publish the word count to redis using word as the key 55 | redis.publish("WordCountTopology", word + "|" + Long.toString(count)); 56 | } 57 | 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) 59 | { 60 | // nothing to add - since it is the final bolt 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /lesson2/stage7/src/jvm/udacity/storm/TweetTopology.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | class TweetTopology 20 | { 21 | public static void main(String[] args) throws Exception 22 | { 23 | // create the topology 24 | TopologyBuilder builder = new TopologyBuilder(); 25 | 26 | /* 27 | * In order to create the spout, you need to get twitter credentials 28 | * If you need to use Twitter firehose/Tweet stream for your idea, 29 | * create a set of credentials by following the instructions at 30 | * 31 | * https://dev.twitter.com/discussions/631 32 | * 33 | */ 34 | 35 | // now create the tweet spout with the credentials 36 | TweetSpout tweetSpout = new TweetSpout( 37 | "[Your customer key]", 38 | "[Your secret key]", 39 | "[Your access token]", 40 | "[Your access secret]" 41 | ); 42 | 43 | // attach the tweet spout to the topology - parallelism of 1 44 | builder.setSpout("tweet-spout", tweetSpout, 1); 45 | 46 | // attach the parse tweet bolt using shuffle grouping 47 | builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout"); 48 | 49 | // attach the count bolt using fields grouping - parallelism of 15 50 | //builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word")); 51 | 52 | // attach rolling count bolt using fields grouping - parallelism of 5 53 | builder.setBolt("rolling-count-bolt", new RollingCountBolt(30, 10), 1).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word")); 54 | 55 | // attach the report bolt using global grouping - parallelism of 1 56 | builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("rolling-count-bolt"); 57 | 58 | // create the default config object 59 | Config conf = new Config(); 60 | 61 | // set the config in debugging mode 62 | conf.setDebug(true); 63 | 64 | if (args != null && args.length > 0) { 65 | 66 | // run it in a live cluster 67 | 68 | // set the number of workers for running all spout and bolt tasks 69 | conf.setNumWorkers(3); 70 | 71 | // create the topology and submit with config 72 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 73 | 74 | } else { 75 | 76 | // run it in a simulated local cluster 77 | 78 | // set the number of threads to run - similar to setting number of workers in live cluster 79 | conf.setMaxTaskParallelism(3); 80 | 81 | // create the local cluster instance 82 | LocalCluster cluster = new LocalCluster(); 83 | 84 | // submit the topology to the local cluster 85 | cluster.submitTopology("tweet-word-count", conf, builder.createTopology()); 86 | 87 | // let the topology run for 300 seconds. note topologies never terminate! 88 | Utils.sleep(300000); 89 | 90 | // now kill the topology 91 | cluster.killTopology("tweet-word-count"); 92 | 93 | // we are done, so shutdown the local cluster 94 | cluster.shutdown(); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /lesson2/stage7/src/jvm/udacity/storm/tools/NthLastModifiedTimeTracker.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.tools; 2 | 3 | import backtype.storm.utils.Time; 4 | import org.apache.commons.collections.buffer.CircularFifoBuffer; 5 | 6 | /** 7 | * This class tracks the time-since-last-modify of a "thing" in a rolling fashion. 8 | *

9 | * For example, create a 5-slot tracker to track the five most recent time-since-last-modify. 10 | *

11 | * You must manually "mark" that the "something" that you want to track -- in terms of modification times -- has just 12 | * been modified. 13 | */ 14 | public class NthLastModifiedTimeTracker { 15 | 16 | private static final int MILLIS_IN_SEC = 1000; 17 | 18 | private final CircularFifoBuffer lastModifiedTimesMillis; 19 | 20 | public NthLastModifiedTimeTracker(int numTimesToTrack) { 21 | if (numTimesToTrack < 1) { 22 | throw new IllegalArgumentException( 23 | "numTimesToTrack must be greater than zero (you requested " + numTimesToTrack + ")"); 24 | } 25 | lastModifiedTimesMillis = new CircularFifoBuffer(numTimesToTrack); 26 | initLastModifiedTimesMillis(); 27 | } 28 | 29 | private void initLastModifiedTimesMillis() { 30 | long nowCached = now(); 31 | for (int i = 0; i < lastModifiedTimesMillis.maxSize(); i++) { 32 | lastModifiedTimesMillis.add(Long.valueOf(nowCached)); 33 | } 34 | } 35 | 36 | private long now() { 37 | return Time.currentTimeMillis(); 38 | } 39 | 40 | public int secondsSinceOldestModification() { 41 | long modifiedTimeMillis = ((Long) lastModifiedTimesMillis.get()).longValue(); 42 | return (int) ((now() - modifiedTimeMillis) / MILLIS_IN_SEC); 43 | } 44 | 45 | public void markAsModified() { 46 | updateLastModifiedTime(); 47 | } 48 | 49 | private void updateLastModifiedTime() { 50 | lastModifiedTimesMillis.add(now()); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /lesson2/stage7/src/jvm/udacity/storm/tools/SlotBasedCounter.java: -------------------------------------------------------------------------------- 1 | //package storm.starter.tools; 2 | package udacity.storm.tools; 3 | 4 | import java.io.Serializable; 5 | import java.util.HashMap; 6 | import java.util.HashSet; 7 | import java.util.Map; 8 | import java.util.Set; 9 | 10 | /** 11 | * This class provides per-slot counts of the occurrences of objects. 12 | *

13 | * It can be used, for instance, as a building block for implementing sliding window counting of objects. 14 | * 15 | * @param The type of those objects we want to count. 16 | */ 17 | public final class SlotBasedCounter implements Serializable { 18 | 19 | private static final long serialVersionUID = 4858185737378394432L; 20 | 21 | private final Map objToCounts = new HashMap(); 22 | private final int numSlots; 23 | 24 | public SlotBasedCounter(int numSlots) { 25 | if (numSlots <= 0) { 26 | throw new IllegalArgumentException("Number of slots must be greater than zero (you requested " + numSlots + ")"); 27 | } 28 | this.numSlots = numSlots; 29 | } 30 | 31 | public void incrementCount(T obj, int slot) { 32 | long[] counts = objToCounts.get(obj); 33 | if (counts == null) { 34 | counts = new long[this.numSlots]; 35 | objToCounts.put(obj, counts); 36 | } 37 | counts[slot]++; 38 | } 39 | 40 | public long getCount(T obj, int slot) { 41 | long[] counts = objToCounts.get(obj); 42 | if (counts == null) { 43 | return 0; 44 | } 45 | else { 46 | return counts[slot]; 47 | } 48 | } 49 | 50 | public Map getCounts() { 51 | Map result = new HashMap(); 52 | for (T obj : objToCounts.keySet()) { 53 | result.put(obj, computeTotalCount(obj)); 54 | } 55 | return result; 56 | } 57 | 58 | private long computeTotalCount(T obj) { 59 | long[] curr = objToCounts.get(obj); 60 | long total = 0; 61 | for (long l : curr) { 62 | total += l; 63 | } 64 | return total; 65 | } 66 | 67 | /** 68 | * Reset the slot count of any tracked objects to zero for the given slot. 69 | * 70 | * @param slot 71 | */ 72 | public void wipeSlot(int slot) { 73 | for (T obj : objToCounts.keySet()) { 74 | resetSlotCountToZero(obj, slot); 75 | } 76 | } 77 | 78 | private void resetSlotCountToZero(T obj, int slot) { 79 | long[] counts = objToCounts.get(obj); 80 | counts[slot] = 0; 81 | } 82 | 83 | private boolean shouldBeRemovedFromCounter(T obj) { 84 | return computeTotalCount(obj) == 0; 85 | } 86 | 87 | /** 88 | * Remove any object from the counter whose total count is zero (to free up memory). 89 | */ 90 | public void wipeZeros() { 91 | Set objToBeRemoved = new HashSet(); 92 | for (T obj : objToCounts.keySet()) { 93 | if (shouldBeRemovedFromCounter(obj)) { 94 | objToBeRemoved.add(obj); 95 | } 96 | } 97 | for (T obj : objToBeRemoved) { 98 | objToCounts.remove(obj); 99 | } 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /lesson2/stage7/src/jvm/udacity/storm/tools/TupleHelpers.java: -------------------------------------------------------------------------------- 1 | //package storm.starter.util; 2 | package udacity.storm.tools; 3 | 4 | import backtype.storm.Constants; 5 | import backtype.storm.tuple.Tuple; 6 | 7 | public final class TupleHelpers { 8 | 9 | private TupleHelpers() { 10 | } 11 | 12 | public static boolean isTickTuple(Tuple tuple) { 13 | return tuple.getSourceComponent().equals(Constants.SYSTEM_COMPONENT_ID) && tuple.getSourceStreamId().equals( 14 | Constants.SYSTEM_TICK_STREAM_ID); 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /lesson3/stage1/src/jvm/udacity/storm/CountBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.HashMap; 20 | import java.util.Map; 21 | 22 | /** 23 | * A bolt that counts the words that it receives 24 | */ 25 | public class CountBolt extends BaseRichBolt 26 | { 27 | // To output tuples from this bolt to the next stage bolts, if any 28 | private OutputCollector collector; 29 | 30 | // Map to store the count of the words 31 | private Map countMap; 32 | 33 | @Override 34 | public void prepare( 35 | Map map, 36 | TopologyContext topologyContext, 37 | OutputCollector outputCollector) 38 | { 39 | 40 | // save the collector for emitting tuples 41 | collector = outputCollector; 42 | 43 | // create and initialize the map 44 | countMap = new HashMap(); 45 | } 46 | 47 | @Override 48 | public void execute(Tuple tuple) 49 | { 50 | // get the word from the 1st column of incoming tuple 51 | String word = tuple.getString(0); 52 | 53 | // check if the word is present in the map 54 | if (countMap.get(word) == null) { 55 | 56 | // not present, add the word with a count of 1 57 | countMap.put(word, 1); 58 | } else { 59 | 60 | // already there, hence get the count 61 | Integer val = countMap.get(word); 62 | 63 | // increment the count and save it to the map 64 | countMap.put(word, ++val); 65 | } 66 | 67 | // emit the word and count 68 | collector.emit(new Values(word, countMap.get(word))); 69 | } 70 | 71 | @Override 72 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) 73 | { 74 | // tell storm the schema of the output tuple for this spout 75 | // tuple consists of a two columns called 'word' and 'count' 76 | 77 | // declare the first column 'word', second column 'count' 78 | outputFieldsDeclarer.declare(new Fields("word","count")); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /lesson3/stage1/src/jvm/udacity/storm/ParseTweetBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | /** 22 | * A bolt that parses the tweet into words 23 | */ 24 | public class ParseTweetBolt extends BaseRichBolt 25 | { 26 | // To output tuples from this bolt to the count bolt 27 | OutputCollector collector; 28 | 29 | @Override 30 | public void prepare( 31 | Map map, 32 | TopologyContext topologyContext, 33 | OutputCollector outputCollector) 34 | { 35 | // save the output collector for emitting tuples 36 | collector = outputCollector; 37 | } 38 | 39 | @Override 40 | public void execute(Tuple tuple) 41 | { 42 | // get the 1st column 'tweet' from tuple 43 | String tweet = tuple.getString(0); 44 | 45 | // provide the delimiters for splitting the tweet 46 | String delims = "[ .,?!]+"; 47 | 48 | // now split the tweet into tokens 49 | String[] tokens = tweet.split(delims); 50 | 51 | // for each token/word, emit it 52 | for (String token: tokens) { 53 | collector.emit(new Values(token)); 54 | } 55 | } 56 | 57 | @Override 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) 59 | { 60 | // tell storm the schema of the output tuple for this spout 61 | // tuple consists of a single column called 'tweet-word' 62 | declarer.declare(new Fields("tweet-word")); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /lesson3/stage1/src/jvm/udacity/storm/ReportBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | import com.lambdaworks.redis.RedisClient; 22 | import com.lambdaworks.redis.RedisConnection; 23 | 24 | /** 25 | * A bolt that prints the word and count to redis 26 | */ 27 | public class ReportBolt extends BaseRichBolt 28 | { 29 | // place holder to keep the connection to redis 30 | transient RedisConnection redis; 31 | 32 | @Override 33 | public void prepare( 34 | Map map, 35 | TopologyContext topologyContext, 36 | OutputCollector outputCollector) 37 | { 38 | // instantiate a redis connection 39 | RedisClient client = new RedisClient("localhost",6379); 40 | 41 | // initiate the actual connection 42 | redis = client.connect(); 43 | } 44 | 45 | @Override 46 | public void execute(Tuple tuple) 47 | { 48 | // access the first column 'word' 49 | String word = tuple.getStringByField("word"); 50 | 51 | // access the second column 'count' 52 | Integer count = tuple.getIntegerByField("count"); 53 | 54 | // publish the word count to redis using word as the key 55 | redis.publish("WordCountTopology", word + "|" + Long.toString(count)); 56 | } 57 | 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) 59 | { 60 | // nothing to add - since it is the final bolt 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /lesson3/stage1/src/jvm/udacity/storm/TweetTopology.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | // NOTE - you must install the python Beautiful Soup module in Ubuntu 20 | // before this code will run. In your virtual machine, run: 21 | // 22 | // sudo apt-get install python-bs4 23 | // 24 | // see Lesson 4 for details on adding this provision to your Vagrantfile 25 | 26 | // Lesson 3 Stage 1 is taken from Lesson 2 Stage 6 to provide a basic starting point. 27 | // Copy, paste and uncomment the following resources module in the POM.xml file. 28 | // This defines the src/jvm/udacity/storm/resources location needed for python shell 29 | 30 | // 31 | // 32 | // 33 | // src/jvm/udacity/storm 34 | // 35 | // 36 | // 37 | 38 | // 39 | 40 | class TweetTopology 41 | { 42 | public static void main(String[] args) throws Exception 43 | { 44 | // create the topology 45 | TopologyBuilder builder = new TopologyBuilder(); 46 | 47 | /* 48 | * In order to create the spout, you need to get twitter credentials 49 | * If you need to use Twitter firehose/Tweet stream for your idea, 50 | * create a set of credentials by following the instructions at 51 | * 52 | * https://dev.twitter.com/discussions/631 53 | * 54 | */ 55 | 56 | // now create the tweet spout with the credentials 57 | TweetSpout tweetSpout = new TweetSpout( 58 | "[Your customer key]", 59 | "[Your secret key]", 60 | "[Your access token]", 61 | "[Your access secret]" 62 | ); 63 | 64 | // attach the tweet spout to the topology - parallelism of 1 65 | builder.setSpout("tweet-spout", tweetSpout, 1); 66 | 67 | // attach the parse tweet bolt using shuffle grouping 68 | builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout"); 69 | 70 | // attach the count bolt using fields grouping - parallelism of 15 71 | builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word")); 72 | 73 | // attach the report bolt using global grouping - parallelism of 1 74 | builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("count-bolt"); 75 | 76 | // create the default config object 77 | Config conf = new Config(); 78 | 79 | // set the config in debugging mode 80 | conf.setDebug(true); 81 | 82 | if (args != null && args.length > 0) { 83 | 84 | // run it in a live cluster 85 | 86 | // set the number of workers for running all spout and bolt tasks 87 | conf.setNumWorkers(3); 88 | 89 | // create the topology and submit with config 90 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 91 | 92 | } else { 93 | 94 | // run it in a simulated local cluster 95 | 96 | // set the number of threads to run - similar to setting number of workers in live cluster 97 | conf.setMaxTaskParallelism(3); 98 | 99 | // create the local cluster instance 100 | LocalCluster cluster = new LocalCluster(); 101 | 102 | // submit the topology to the local cluster 103 | cluster.submitTopology("tweet-word-count", conf, builder.createTopology()); 104 | 105 | // let the topology run for 30 seconds. note topologies never terminate! 106 | Utils.sleep(30000); 107 | 108 | // now kill the topology 109 | cluster.killTopology("tweet-word-count"); 110 | 111 | // we are done, so shutdown the local cluster 112 | cluster.shutdown(); 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /lesson3/stage2/src/jvm/udacity/storm/CountBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.HashMap; 20 | import java.util.Map; 21 | 22 | /** 23 | * A bolt that counts the words that it receives 24 | */ 25 | public class CountBolt extends BaseRichBolt 26 | { 27 | // To output tuples from this bolt to the next stage bolts, if any 28 | private OutputCollector collector; 29 | 30 | // Map to store the count of the words 31 | private Map countMap; 32 | 33 | @Override 34 | public void prepare( 35 | Map map, 36 | TopologyContext topologyContext, 37 | OutputCollector outputCollector) 38 | { 39 | 40 | // save the collector for emitting tuples 41 | collector = outputCollector; 42 | 43 | // create and initialize the map 44 | countMap = new HashMap(); 45 | } 46 | 47 | @Override 48 | public void execute(Tuple tuple) 49 | { 50 | // get the word from the 1st column of incoming tuple 51 | String word = tuple.getString(0); 52 | 53 | // check if the word is present in the map 54 | if (countMap.get(word) == null) { 55 | 56 | // not present, add the word with a count of 1 57 | countMap.put(word, 1); 58 | } else { 59 | 60 | // already there, hence get the count 61 | Integer val = countMap.get(word); 62 | 63 | // increment the count and save it to the map 64 | countMap.put(word, ++val); 65 | } 66 | 67 | // emit the word and count 68 | collector.emit(new Values(word, countMap.get(word))); 69 | } 70 | 71 | @Override 72 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) 73 | { 74 | // tell storm the schema of the output tuple for this spout 75 | // tuple consists of a two columns called 'word' and 'count' 76 | 77 | // declare the first column 'word', second column 'count' 78 | outputFieldsDeclarer.declare(new Fields("word","count")); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /lesson3/stage2/src/jvm/udacity/storm/ParseTweetBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | /** 22 | * A bolt that parses the tweet into words 23 | */ 24 | public class ParseTweetBolt extends BaseRichBolt 25 | { 26 | // To output tuples from this bolt to the count bolt 27 | OutputCollector collector; 28 | 29 | @Override 30 | public void prepare( 31 | Map map, 32 | TopologyContext topologyContext, 33 | OutputCollector outputCollector) 34 | { 35 | // save the output collector for emitting tuples 36 | collector = outputCollector; 37 | } 38 | 39 | @Override 40 | public void execute(Tuple tuple) 41 | { 42 | // get the 1st column 'tweet' from tuple 43 | String tweet = tuple.getString(0); 44 | 45 | // provide the delimiters for splitting the tweet 46 | String delims = "[ .,?!]+"; 47 | 48 | // now split the tweet into tokens 49 | String[] tokens = tweet.split(delims); 50 | 51 | // for each token/word, emit it 52 | for (String token: tokens) { 53 | collector.emit(new Values(token)); 54 | } 55 | } 56 | 57 | @Override 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) 59 | { 60 | // tell storm the schema of the output tuple for this spout 61 | // tuple consists of a single column called 'tweet-word' 62 | declarer.declare(new Fields("tweet-word")); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /lesson3/stage2/src/jvm/udacity/storm/ReportBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | import com.lambdaworks.redis.RedisClient; 22 | import com.lambdaworks.redis.RedisConnection; 23 | 24 | /** 25 | * A bolt that prints the word and count to redis 26 | */ 27 | public class ReportBolt extends BaseRichBolt 28 | { 29 | // place holder to keep the connection to redis 30 | transient RedisConnection redis; 31 | 32 | @Override 33 | public void prepare( 34 | Map map, 35 | TopologyContext topologyContext, 36 | OutputCollector outputCollector) 37 | { 38 | // instantiate a redis connection 39 | RedisClient client = new RedisClient("localhost",6379); 40 | 41 | // initiate the actual connection 42 | redis = client.connect(); 43 | } 44 | 45 | @Override 46 | public void execute(Tuple tuple) 47 | { 48 | // access the first column 'word' 49 | String word = tuple.getStringByField("word"); 50 | 51 | // access the second column 'count' 52 | Integer count = tuple.getIntegerByField("count"); 53 | 54 | // publish the word count to redis using word as the key 55 | redis.publish("WordCountTopology", word + "|" + Long.toString(count)); 56 | } 57 | 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) 59 | { 60 | // nothing to add - since it is the final bolt 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /lesson3/stage2/src/jvm/udacity/storm/SplitSentence.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.task.ShellBolt; 7 | import backtype.storm.topology.BasicOutputCollector; 8 | import backtype.storm.topology.IRichBolt; 9 | import backtype.storm.topology.OutputFieldsDeclarer; 10 | import backtype.storm.topology.TopologyBuilder; 11 | import backtype.storm.topology.base.BaseBasicBolt; 12 | import backtype.storm.tuple.Fields; 13 | import backtype.storm.tuple.Tuple; 14 | import backtype.storm.tuple.Values; 15 | //import storm.starter.spout.RandomSentenceSpout; 16 | 17 | import java.util.HashMap; 18 | import java.util.Map; 19 | 20 | /** 21 | * A bolt that parses the tweet into words 22 | */ 23 | 24 | // https://github.com/apache/storm/blob/master/examples/storm-starter/src/jvm/storm/starter/WordCountTopology.java 25 | 26 | public class SplitSentence extends ShellBolt implements IRichBolt { 27 | 28 | public SplitSentence() { 29 | super("python", "splitsentence.py"); 30 | } 31 | 32 | @Override 33 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 34 | declarer.declare(new Fields("word")); 35 | } 36 | 37 | @Override 38 | public Map getComponentConfiguration() { 39 | return null; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /lesson3/stage2/src/jvm/udacity/storm/TweetTopology.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | // NOTE - you must install the python Beautiful Soup module in Ubuntu 20 | // before this code will run. In your virtual machine, run: 21 | // 22 | // sudo apt-get install python-bs4 23 | // 24 | // see Lesson 4 for details on adding this provision to your Vagrantfile 25 | 26 | class TweetTopology 27 | { 28 | public static void main(String[] args) throws Exception 29 | { 30 | // create the topology 31 | TopologyBuilder builder = new TopologyBuilder(); 32 | 33 | /* 34 | * In order to create the spout, you need to get twitter credentials 35 | * If you need to use Twitter firehose/Tweet stream for your idea, 36 | * create a set of credentials by following the instructions at 37 | * 38 | * https://dev.twitter.com/discussions/631 39 | * 40 | */ 41 | 42 | // now create the tweet spout with the credentials 43 | TweetSpout tweetSpout = new TweetSpout( 44 | "[Your customer key]", 45 | "[Your secret key]", 46 | "[Your access token]", 47 | "[Your access secret]" 48 | ); 49 | 50 | // attach the tweet spout to the topology - parallelism of 1 51 | builder.setSpout("tweet-spout", tweetSpout, 1); 52 | 53 | // attach the parse tweet bolt using shuffle grouping 54 | //builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout"); 55 | 56 | //************* replace Java ParseTweetBolt with Java/Python SplitSentence 57 | builder.setBolt("python-split-sentence", new SplitSentence(), 10).shuffleGrouping("tweet-spout"); 58 | 59 | // attach the count bolt using fields grouping - parallelism of 15 60 | //builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word")); 61 | 62 | //************* replace Java "parse-tweet-bolt" with Java/Python "python-split-sentence" 63 | builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("python-split-sentence", new Fields("word")); 64 | 65 | // attach the report bolt using global grouping - parallelism of 1 66 | builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("count-bolt"); 67 | 68 | // create the default config object 69 | Config conf = new Config(); 70 | 71 | // set the config in debugging mode 72 | conf.setDebug(true); 73 | 74 | if (args != null && args.length > 0) { 75 | 76 | // run it in a live cluster 77 | 78 | // set the number of workers for running all spout and bolt tasks 79 | conf.setNumWorkers(3); 80 | 81 | // create the topology and submit with config 82 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 83 | 84 | } else { 85 | 86 | // run it in a simulated local cluster 87 | 88 | // set the number of threads to run - similar to setting number of workers in live cluster 89 | conf.setMaxTaskParallelism(3); 90 | 91 | // create the local cluster instance 92 | LocalCluster cluster = new LocalCluster(); 93 | 94 | // submit the topology to the local cluster 95 | cluster.submitTopology("tweet-word-count", conf, builder.createTopology()); 96 | 97 | // let the topology run for 1000*30 seconds. note topologies never terminate! 98 | Utils.sleep(1000*30000); 99 | 100 | // now kill the topology 101 | cluster.killTopology("tweet-word-count"); 102 | 103 | // we are done, so shutdown the local cluster 104 | cluster.shutdown(); 105 | } 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /lesson3/stage2/src/jvm/udacity/storm/resources/splitsentence.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # https://github.com/apache/storm/blob/master/examples/storm-starter/multilang/resources/splitsentence.py 18 | 19 | import storm 20 | 21 | class SplitSentenceBolt(storm.BasicBolt): 22 | def process(self, tup): 23 | #TO DO: Add check for empty values 24 | words = tup.values[0].split(" ") 25 | for word in words: 26 | storm.emit([word]) 27 | 28 | SplitSentenceBolt().run() 29 | -------------------------------------------------------------------------------- /lesson3/stage3/src/jvm/udacity/storm/CountBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.HashMap; 20 | import java.util.Map; 21 | 22 | /** 23 | * A bolt that counts the words that it receives 24 | */ 25 | public class CountBolt extends BaseRichBolt 26 | { 27 | // To output tuples from this bolt to the next stage bolts, if any 28 | private OutputCollector collector; 29 | 30 | // Map to store the count of the words 31 | private Map countMap; 32 | 33 | @Override 34 | public void prepare( 35 | Map map, 36 | TopologyContext topologyContext, 37 | OutputCollector outputCollector) 38 | { 39 | 40 | // save the collector for emitting tuples 41 | collector = outputCollector; 42 | 43 | // create and initialize the map 44 | countMap = new HashMap(); 45 | } 46 | 47 | @Override 48 | public void execute(Tuple tuple) 49 | { 50 | // get the word from the 1st column of incoming tuple 51 | String word = tuple.getString(0); 52 | 53 | // check if the word is present in the map 54 | if (countMap.get(word) == null) { 55 | 56 | // not present, add the word with a count of 1 57 | countMap.put(word, 1); 58 | } else { 59 | 60 | // already there, hence get the count 61 | Integer val = countMap.get(word); 62 | 63 | // increment the count and save it to the map 64 | countMap.put(word, ++val); 65 | } 66 | 67 | // emit the word and count 68 | collector.emit(new Values(word, countMap.get(word))); 69 | } 70 | 71 | @Override 72 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) 73 | { 74 | // tell storm the schema of the output tuple for this spout 75 | // tuple consists of a two columns called 'word' and 'count' 76 | 77 | // declare the first column 'word', second column 'count' 78 | outputFieldsDeclarer.declare(new Fields("word","count")); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /lesson3/stage3/src/jvm/udacity/storm/ParseTweetBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | /** 22 | * A bolt that parses the tweet into words 23 | */ 24 | public class ParseTweetBolt extends BaseRichBolt 25 | { 26 | // To output tuples from this bolt to the count bolt 27 | OutputCollector collector; 28 | 29 | @Override 30 | public void prepare( 31 | Map map, 32 | TopologyContext topologyContext, 33 | OutputCollector outputCollector) 34 | { 35 | // save the output collector for emitting tuples 36 | collector = outputCollector; 37 | } 38 | 39 | @Override 40 | public void execute(Tuple tuple) 41 | { 42 | // get the 1st column 'tweet' from tuple 43 | String tweet = tuple.getString(0); 44 | 45 | // provide the delimiters for splitting the tweet 46 | String delims = "[ .,?!]+"; 47 | 48 | // now split the tweet into tokens 49 | String[] tokens = tweet.split(delims); 50 | 51 | // for each token/word, emit it 52 | for (String token: tokens) { 53 | collector.emit(new Values(token)); 54 | } 55 | } 56 | 57 | @Override 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) 59 | { 60 | // tell storm the schema of the output tuple for this spout 61 | // tuple consists of a single column called 'tweet-word' 62 | declarer.declare(new Fields("tweet-word")); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /lesson3/stage3/src/jvm/udacity/storm/ReportBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | import com.lambdaworks.redis.RedisClient; 22 | import com.lambdaworks.redis.RedisConnection; 23 | 24 | /** 25 | * A bolt that prints the word and count to redis 26 | */ 27 | public class ReportBolt extends BaseRichBolt 28 | { 29 | // place holder to keep the connection to redis 30 | transient RedisConnection redis; 31 | 32 | @Override 33 | public void prepare( 34 | Map map, 35 | TopologyContext topologyContext, 36 | OutputCollector outputCollector) 37 | { 38 | // instantiate a redis connection 39 | RedisClient client = new RedisClient("localhost",6379); 40 | 41 | // initiate the actual connection 42 | redis = client.connect(); 43 | } 44 | 45 | @Override 46 | public void execute(Tuple tuple) 47 | { 48 | // access the first column 'word' 49 | String word = tuple.getStringByField("word"); 50 | 51 | // access the second column 'count' 52 | Integer count = tuple.getIntegerByField("count"); 53 | 54 | // publish the word count to redis using word as the key 55 | redis.publish("WordCountTopology", word + "|" + Long.toString(count)); 56 | } 57 | 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) 59 | { 60 | // nothing to add - since it is the final bolt 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /lesson3/stage3/src/jvm/udacity/storm/SplitSentence.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.task.ShellBolt; 7 | import backtype.storm.topology.BasicOutputCollector; 8 | import backtype.storm.topology.IRichBolt; 9 | import backtype.storm.topology.OutputFieldsDeclarer; 10 | import backtype.storm.topology.TopologyBuilder; 11 | import backtype.storm.topology.base.BaseBasicBolt; 12 | import backtype.storm.tuple.Fields; 13 | import backtype.storm.tuple.Tuple; 14 | import backtype.storm.tuple.Values; 15 | //import storm.starter.spout.RandomSentenceSpout; 16 | 17 | import java.util.HashMap; 18 | import java.util.Map; 19 | 20 | /** 21 | * A bolt that parses the tweet into words 22 | */ 23 | 24 | // https://github.com/apache/storm/blob/master/examples/storm-starter/src/jvm/storm/starter/WordCountTopology.java 25 | 26 | public class SplitSentence extends ShellBolt implements IRichBolt { 27 | 28 | public SplitSentence() { 29 | super("python", "splitsentence.py"); 30 | } 31 | 32 | @Override 33 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 34 | declarer.declare(new Fields("word")); 35 | } 36 | 37 | @Override 38 | public Map getComponentConfiguration() { 39 | return null; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /lesson3/stage3/src/jvm/udacity/storm/TweetTopology.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | // NOTE - you must install the python Beautiful Soup module in Ubuntu 20 | // before this code will run. In your virtual machine, run: 21 | // 22 | // sudo apt-get install python-bs4 23 | // 24 | // see Lesson 4 for details on adding this provision to your Vagrantfile 25 | 26 | 27 | 28 | class TweetTopology 29 | { 30 | public static void main(String[] args) throws Exception 31 | { 32 | // create the topology 33 | TopologyBuilder builder = new TopologyBuilder(); 34 | 35 | /* 36 | * In order to create the spout, you need to get twitter credentials 37 | * If you need to use Twitter firehose/Tweet stream for your idea, 38 | * create a set of credentials by following the instructions at 39 | * 40 | * https://dev.twitter.com/discussions/631 41 | * 42 | */ 43 | 44 | // now create the tweet spout with the credentials 45 | TweetSpout tweetSpout = new TweetSpout( 46 | "[Your customer key]", 47 | "[Your secret key]", 48 | "[Your access token]", 49 | "[Your access secret]" 50 | ); 51 | 52 | // attach the tweet spout to the topology - parallelism of 1 53 | builder.setSpout("tweet-spout", tweetSpout, 1); 54 | 55 | // attach the parse tweet bolt using shuffle grouping 56 | //builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout"); 57 | 58 | //************* replace with URLBolt emitting text using shuffle grouping 59 | builder.setBolt("python-URL-bolt", new URLBolt(), 10).shuffleGrouping("tweet-spout"); 60 | 61 | 62 | //************* replace Java ParseTweetBolt with Java/Python SplitSentence 63 | builder.setBolt("python-split-sentence", new SplitSentence(), 10).shuffleGrouping("python-URL-bolt"); 64 | 65 | // attach the count bolt using fields grouping - parallelism of 15 66 | //builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word")); 67 | 68 | //************* replace Java "parse-tweet-bolt" with Java/Python "python-split-sentence" 69 | builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("python-split-sentence", new Fields("word")); 70 | 71 | // attach the report bolt using global grouping - parallelism of 1 72 | builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("count-bolt"); 73 | 74 | // create the default config object 75 | Config conf = new Config(); 76 | 77 | // set the config in debugging mode 78 | conf.setDebug(true); 79 | 80 | if (args != null && args.length > 0) { 81 | 82 | // run it in a live cluster 83 | 84 | // set the number of workers for running all spout and bolt tasks 85 | conf.setNumWorkers(3); 86 | 87 | // create the topology and submit with config 88 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 89 | 90 | } else { 91 | 92 | // run it in a simulated local cluster 93 | 94 | // set the number of threads to run - similar to setting number of workers in live cluster 95 | conf.setMaxTaskParallelism(3); 96 | 97 | // create the local cluster instance 98 | LocalCluster cluster = new LocalCluster(); 99 | 100 | // submit the topology to the local cluster 101 | cluster.submitTopology("tweet-word-count", conf, builder.createTopology()); 102 | 103 | // let the topology run for 1000*30 seconds. note topologies never terminate! 104 | Utils.sleep(1000*30000); 105 | 106 | // now kill the topology 107 | cluster.killTopology("tweet-word-count"); 108 | 109 | // we are done, so shutdown the local cluster 110 | cluster.shutdown(); 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /lesson3/stage3/src/jvm/udacity/storm/URLBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.task.ShellBolt; 7 | import backtype.storm.topology.BasicOutputCollector; 8 | import backtype.storm.topology.IRichBolt; 9 | import backtype.storm.topology.OutputFieldsDeclarer; 10 | import backtype.storm.topology.TopologyBuilder; 11 | import backtype.storm.topology.base.BaseBasicBolt; 12 | import backtype.storm.tuple.Fields; 13 | import backtype.storm.tuple.Tuple; 14 | import backtype.storm.tuple.Values; 15 | //import storm.starter.spout.RandomSentenceSpout; 16 | 17 | import java.util.HashMap; 18 | import java.util.Map; 19 | 20 | /** 21 | * A bolt that parses the tweet into words 22 | */ 23 | 24 | // https://github.com/apache/storm/blob/master/examples/storm-starter/src/jvm/storm/starter/WordCountTopology.java 25 | 26 | public class URLBolt extends ShellBolt implements IRichBolt { 27 | 28 | public URLBolt() { 29 | super("python", "urltext.py"); 30 | } 31 | 32 | @Override 33 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 34 | declarer.declare(new Fields("text")); 35 | } 36 | 37 | @Override 38 | public Map getComponentConfiguration() { 39 | return null; 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /lesson3/stage3/src/jvm/udacity/storm/resources/splitsentence.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # https://github.com/apache/storm/blob/master/examples/storm-starter/multilang/resources/splitsentence.py 18 | 19 | import storm 20 | 21 | class SplitSentenceBolt(storm.BasicBolt): 22 | def process(self, tup): 23 | #added to check for empty values 24 | if tup.values[0]: 25 | words = tup.values[0].split(" ") 26 | if words: 27 | for word in words: 28 | storm.emit([word]) 29 | 30 | SplitSentenceBolt().run() 31 | -------------------------------------------------------------------------------- /lesson3/stage3/src/jvm/udacity/storm/resources/urltext.py: -------------------------------------------------------------------------------- 1 | # Licensed to the Apache Software Foundation (ASF) under one 2 | # or more contributor license agreements. See the NOTICE file 3 | # distributed with this work for additional information 4 | # regarding copyright ownership. The ASF licenses this file 5 | # to you under the Apache License, Version 2.0 (the 6 | # "License"); you may not use this file except in compliance 7 | # with the License. You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # https://github.com/apache/storm/blob/master/examples/storm-starter/multilang/resources/splitsentence.py 18 | 19 | import storm 20 | import urllib2 21 | from bs4 import BeautifulSoup 22 | 23 | class URLBolt(storm.BasicBolt): 24 | def process(self, tup): 25 | url = tup.values[0] 26 | # python urllib2 27 | try: 28 | html = urllib2.urlopen(url).read() 29 | 30 | # using BeautifulSoup, "Making the Soup" 31 | soup = BeautifulSoup(html) 32 | # return title and paragraph tags 33 | urlText = soup.findAll({'title' : True, 'p' : True}) 34 | 35 | #emit tuple if string exists 36 | if urlText: 37 | [storm.emit([t.string]) for t in urlText] 38 | except: 39 | pass 40 | 41 | URLBolt().run() 42 | -------------------------------------------------------------------------------- /lesson3/stage4/src/jvm/udacity/storm/CountBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.HashMap; 20 | import java.util.Map; 21 | 22 | /** 23 | * A bolt that counts the words that it receives 24 | */ 25 | public class CountBolt extends BaseRichBolt 26 | { 27 | // To output tuples from this bolt to the next stage bolts, if any 28 | private OutputCollector collector; 29 | 30 | // Map to store the count of the words 31 | private Map countMap; 32 | 33 | @Override 34 | public void prepare( 35 | Map map, 36 | TopologyContext topologyContext, 37 | OutputCollector outputCollector) 38 | { 39 | 40 | // save the collector for emitting tuples 41 | collector = outputCollector; 42 | 43 | // create and initialize the map 44 | countMap = new HashMap(); 45 | } 46 | 47 | @Override 48 | public void execute(Tuple tuple) 49 | { 50 | // get the word from the 1st column of incoming tuple 51 | String word = tuple.getString(0); 52 | 53 | // check if the word is present in the map 54 | if (countMap.get(word) == null) { 55 | 56 | // not present, add the word with a count of 1 57 | countMap.put(word, 1); 58 | } else { 59 | 60 | // already there, hence get the count 61 | Integer val = countMap.get(word); 62 | 63 | // increment the count and save it to the map 64 | countMap.put(word, ++val); 65 | } 66 | 67 | // emit the word and count 68 | collector.emit(new Values(word, countMap.get(word))); 69 | } 70 | 71 | @Override 72 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) 73 | { 74 | // tell storm the schema of the output tuple for this spout 75 | // tuple consists of a two columns called 'word' and 'count' 76 | 77 | // declare the first column 'word', second column 'count' 78 | outputFieldsDeclarer.declare(new Fields("word","count")); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /lesson3/stage4/src/jvm/udacity/storm/ParseTweetBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | /** 22 | * A bolt that parses the tweet into words 23 | */ 24 | public class ParseTweetBolt extends BaseRichBolt 25 | { 26 | // To output tuples from this bolt to the count bolt 27 | OutputCollector collector; 28 | 29 | @Override 30 | public void prepare( 31 | Map map, 32 | TopologyContext topologyContext, 33 | OutputCollector outputCollector) 34 | { 35 | // save the output collector for emitting tuples 36 | collector = outputCollector; 37 | } 38 | 39 | @Override 40 | public void execute(Tuple tuple) 41 | { 42 | // get the 1st column 'tweet' from tuple 43 | String tweet = tuple.getString(0); 44 | 45 | // provide the delimiters for splitting the tweet 46 | String delims = "[ .,?!]+"; 47 | 48 | // now split the tweet into tokens 49 | String[] tokens = tweet.split(delims); 50 | 51 | // for each token/word, emit it 52 | for (String token: tokens) { 53 | collector.emit(new Values(token)); 54 | } 55 | } 56 | 57 | @Override 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) 59 | { 60 | // tell storm the schema of the output tuple for this spout 61 | // tuple consists of a single column called 'tweet-word' 62 | declarer.declare(new Fields("tweet-word")); 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /lesson3/stage4/src/jvm/udacity/storm/ReportBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | import com.lambdaworks.redis.RedisClient; 22 | import com.lambdaworks.redis.RedisConnection; 23 | 24 | /** 25 | * A bolt that prints the word and count to redis 26 | */ 27 | public class ReportBolt extends BaseRichBolt 28 | { 29 | // place holder to keep the connection to redis 30 | transient RedisConnection redis; 31 | 32 | @Override 33 | public void prepare( 34 | Map map, 35 | TopologyContext topologyContext, 36 | OutputCollector outputCollector) 37 | { 38 | // instantiate a redis connection 39 | RedisClient client = new RedisClient("localhost",6379); 40 | 41 | // initiate the actual connection 42 | redis = client.connect(); 43 | } 44 | 45 | @Override 46 | public void execute(Tuple tuple) 47 | { 48 | // access the first column 'word' 49 | String word = tuple.getStringByField("word"); 50 | 51 | // access the second column 'count' 52 | Integer count = tuple.getIntegerByField("count"); 53 | 54 | // publish the word count to redis using word as the key 55 | redis.publish("WordCountTopology", word + "|" + Long.toString(count)); 56 | } 57 | 58 | public void declareOutputFields(OutputFieldsDeclarer declarer) 59 | { 60 | // nothing to add - since it is the final bolt 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /lesson3/stage4/src/jvm/udacity/storm/TweetTopology.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | class TweetTopology 20 | { 21 | public static void main(String[] args) throws Exception 22 | { 23 | // create the topology 24 | TopologyBuilder builder = new TopologyBuilder(); 25 | 26 | /* 27 | * In order to create the spout, you need to get twitter credentials 28 | * If you need to use Twitter firehose/Tweet stream for your idea, 29 | * create a set of credentials by following the instructions at 30 | * 31 | * https://dev.twitter.com/discussions/631 32 | * 33 | */ 34 | 35 | // now create the tweet spout with the credentials 36 | TweetSpout tweetSpout = new TweetSpout( 37 | "[Your customer key]", 38 | "[Your secret key]", 39 | "[Your access token]", 40 | "[Your access secret]" 41 | ); 42 | 43 | // attach the tweet spout to the topology - parallelism of 1 44 | builder.setSpout("tweet-spout", tweetSpout, 1); 45 | 46 | // attach the parse tweet bolt using shuffle grouping 47 | builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout"); 48 | 49 | // attach the count bolt using fields grouping - parallelism of 15 50 | //builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word")); 51 | 52 | // attach rolling count bolt using fields grouping - parallelism of 5 53 | builder.setBolt("rolling-count-bolt", new RollingCountBolt(30, 10), 1).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word")); 54 | 55 | // attach the report bolt using global grouping - parallelism of 1 56 | builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("rolling-count-bolt"); 57 | 58 | // create the default config object 59 | Config conf = new Config(); 60 | 61 | // set the config in debugging mode 62 | conf.setDebug(true); 63 | 64 | if (args != null && args.length > 0) { 65 | 66 | // run it in a live cluster 67 | 68 | // set the number of workers for running all spout and bolt tasks 69 | conf.setNumWorkers(3); 70 | 71 | // create the topology and submit with config 72 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 73 | 74 | } else { 75 | 76 | // run it in a simulated local cluster 77 | 78 | // set the number of threads to run - similar to setting number of workers in live cluster 79 | conf.setMaxTaskParallelism(3); 80 | 81 | // create the local cluster instance 82 | LocalCluster cluster = new LocalCluster(); 83 | 84 | // submit the topology to the local cluster 85 | cluster.submitTopology("tweet-word-count", conf, builder.createTopology()); 86 | 87 | // let the topology run for 300 seconds. note topologies never terminate! 88 | Utils.sleep(300000); 89 | 90 | // now kill the topology 91 | cluster.killTopology("tweet-word-count"); 92 | 93 | // we are done, so shutdown the local cluster 94 | cluster.shutdown(); 95 | } 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /lesson3/stage4/src/jvm/udacity/storm/tools/NthLastModifiedTimeTracker.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.tools; 2 | 3 | import backtype.storm.utils.Time; 4 | import org.apache.commons.collections.buffer.CircularFifoBuffer; 5 | 6 | /** 7 | * This class tracks the time-since-last-modify of a "thing" in a rolling fashion. 8 | *

9 | * For example, create a 5-slot tracker to track the five most recent time-since-last-modify. 10 | *

11 | * You must manually "mark" that the "something" that you want to track -- in terms of modification times -- has just 12 | * been modified. 13 | */ 14 | public class NthLastModifiedTimeTracker { 15 | 16 | private static final int MILLIS_IN_SEC = 1000; 17 | 18 | private final CircularFifoBuffer lastModifiedTimesMillis; 19 | 20 | public NthLastModifiedTimeTracker(int numTimesToTrack) { 21 | if (numTimesToTrack < 1) { 22 | throw new IllegalArgumentException( 23 | "numTimesToTrack must be greater than zero (you requested " + numTimesToTrack + ")"); 24 | } 25 | lastModifiedTimesMillis = new CircularFifoBuffer(numTimesToTrack); 26 | initLastModifiedTimesMillis(); 27 | } 28 | 29 | private void initLastModifiedTimesMillis() { 30 | long nowCached = now(); 31 | for (int i = 0; i < lastModifiedTimesMillis.maxSize(); i++) { 32 | lastModifiedTimesMillis.add(Long.valueOf(nowCached)); 33 | } 34 | } 35 | 36 | private long now() { 37 | return Time.currentTimeMillis(); 38 | } 39 | 40 | public int secondsSinceOldestModification() { 41 | long modifiedTimeMillis = ((Long) lastModifiedTimesMillis.get()).longValue(); 42 | return (int) ((now() - modifiedTimeMillis) / MILLIS_IN_SEC); 43 | } 44 | 45 | public void markAsModified() { 46 | updateLastModifiedTime(); 47 | } 48 | 49 | private void updateLastModifiedTime() { 50 | lastModifiedTimesMillis.add(now()); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /lesson3/stage4/src/jvm/udacity/storm/tools/SlotBasedCounter.java: -------------------------------------------------------------------------------- 1 | //package storm.starter.tools; 2 | package udacity.storm.tools; 3 | 4 | import java.io.Serializable; 5 | import java.util.HashMap; 6 | import java.util.HashSet; 7 | import java.util.Map; 8 | import java.util.Set; 9 | 10 | /** 11 | * This class provides per-slot counts of the occurrences of objects. 12 | *

13 | * It can be used, for instance, as a building block for implementing sliding window counting of objects. 14 | * 15 | * @param The type of those objects we want to count. 16 | */ 17 | public final class SlotBasedCounter implements Serializable { 18 | 19 | private static final long serialVersionUID = 4858185737378394432L; 20 | 21 | private final Map objToCounts = new HashMap(); 22 | private final int numSlots; 23 | 24 | public SlotBasedCounter(int numSlots) { 25 | if (numSlots <= 0) { 26 | throw new IllegalArgumentException("Number of slots must be greater than zero (you requested " + numSlots + ")"); 27 | } 28 | this.numSlots = numSlots; 29 | } 30 | 31 | public void incrementCount(T obj, int slot) { 32 | long[] counts = objToCounts.get(obj); 33 | if (counts == null) { 34 | counts = new long[this.numSlots]; 35 | objToCounts.put(obj, counts); 36 | } 37 | counts[slot]++; 38 | } 39 | 40 | public long getCount(T obj, int slot) { 41 | long[] counts = objToCounts.get(obj); 42 | if (counts == null) { 43 | return 0; 44 | } 45 | else { 46 | return counts[slot]; 47 | } 48 | } 49 | 50 | public Map getCounts() { 51 | Map result = new HashMap(); 52 | for (T obj : objToCounts.keySet()) { 53 | result.put(obj, computeTotalCount(obj)); 54 | } 55 | return result; 56 | } 57 | 58 | private long computeTotalCount(T obj) { 59 | long[] curr = objToCounts.get(obj); 60 | long total = 0; 61 | for (long l : curr) { 62 | total += l; 63 | } 64 | return total; 65 | } 66 | 67 | /** 68 | * Reset the slot count of any tracked objects to zero for the given slot. 69 | * 70 | * @param slot 71 | */ 72 | public void wipeSlot(int slot) { 73 | for (T obj : objToCounts.keySet()) { 74 | resetSlotCountToZero(obj, slot); 75 | } 76 | } 77 | 78 | private void resetSlotCountToZero(T obj, int slot) { 79 | long[] counts = objToCounts.get(obj); 80 | counts[slot] = 0; 81 | } 82 | 83 | private boolean shouldBeRemovedFromCounter(T obj) { 84 | return computeTotalCount(obj) == 0; 85 | } 86 | 87 | /** 88 | * Remove any object from the counter whose total count is zero (to free up memory). 89 | */ 90 | public void wipeZeros() { 91 | Set objToBeRemoved = new HashSet(); 92 | for (T obj : objToCounts.keySet()) { 93 | if (shouldBeRemovedFromCounter(obj)) { 94 | objToBeRemoved.add(obj); 95 | } 96 | } 97 | for (T obj : objToBeRemoved) { 98 | objToCounts.remove(obj); 99 | } 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /lesson3/stage4/src/jvm/udacity/storm/tools/TupleHelpers.java: -------------------------------------------------------------------------------- 1 | //package storm.starter.util; 2 | package udacity.storm.tools; 3 | 4 | import backtype.storm.Constants; 5 | import backtype.storm.tuple.Tuple; 6 | 7 | public final class TupleHelpers { 8 | 9 | private TupleHelpers() { 10 | } 11 | 12 | public static boolean isTickTuple(Tuple tuple) { 13 | return tuple.getSourceComponent().equals(Constants.SYSTEM_COMPONENT_ID) && tuple.getSourceStreamId().equals( 14 | Constants.SYSTEM_TICK_STREAM_ID); 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /lesson3/stage5/src/jvm/udacity/storm/AbstractRankerBolt.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package udacity.storm; 19 | 20 | import backtype.storm.Config; 21 | import backtype.storm.topology.BasicOutputCollector; 22 | import backtype.storm.topology.OutputFieldsDeclarer; 23 | import backtype.storm.topology.base.BaseBasicBolt; 24 | import backtype.storm.tuple.Fields; 25 | import backtype.storm.tuple.Tuple; 26 | import backtype.storm.tuple.Values; 27 | import org.apache.log4j.Logger; 28 | //import storm.starter.tools.Rankings; 29 | //import storm.starter.util.TupleHelpers; 30 | 31 | import udacity.storm.tools.Rankings; 32 | import udacity.storm.tools.TupleHelpers; 33 | 34 | import java.util.HashMap; 35 | import java.util.Map; 36 | 37 | /** 38 | * This abstract bolt provides the basic behavior of bolts that rank objects according to their count. 39 | *

40 | * It uses a template method design pattern for {@link AbstractRankerBolt#execute(Tuple, BasicOutputCollector)} to allow 41 | * actual bolt implementations to specify how incoming tuples are processed, i.e. how the objects embedded within those 42 | * tuples are retrieved and counted. 43 | */ 44 | public abstract class AbstractRankerBolt extends BaseBasicBolt { 45 | 46 | private static final long serialVersionUID = 4931640198501530202L; 47 | private static final int DEFAULT_EMIT_FREQUENCY_IN_SECONDS = 2; 48 | private static final int DEFAULT_COUNT = 10; 49 | 50 | private final int emitFrequencyInSeconds; 51 | private final int count; 52 | private final Rankings rankings; 53 | 54 | public AbstractRankerBolt() { 55 | this(DEFAULT_COUNT, DEFAULT_EMIT_FREQUENCY_IN_SECONDS); 56 | } 57 | 58 | public AbstractRankerBolt(int topN) { 59 | this(topN, DEFAULT_EMIT_FREQUENCY_IN_SECONDS); 60 | } 61 | 62 | public AbstractRankerBolt(int topN, int emitFrequencyInSeconds) { 63 | if (topN < 1) { 64 | throw new IllegalArgumentException("topN must be >= 1 (you requested " + topN + ")"); 65 | } 66 | if (emitFrequencyInSeconds < 1) { 67 | throw new IllegalArgumentException( 68 | "The emit frequency must be >= 1 seconds (you requested " + emitFrequencyInSeconds + " seconds)"); 69 | } 70 | count = topN; 71 | this.emitFrequencyInSeconds = emitFrequencyInSeconds; 72 | rankings = new Rankings(count); 73 | } 74 | 75 | protected Rankings getRankings() { 76 | return rankings; 77 | } 78 | 79 | /** 80 | * This method functions as a template method (design pattern). 81 | */ 82 | @Override 83 | public final void execute(Tuple tuple, BasicOutputCollector collector) { 84 | if (TupleHelpers.isTickTuple(tuple)) { 85 | getLogger().debug("Received tick tuple, triggering emit of current rankings"); 86 | emitRankings(collector); 87 | } 88 | else { 89 | updateRankingsWithTuple(tuple); 90 | } 91 | } 92 | 93 | abstract void updateRankingsWithTuple(Tuple tuple); 94 | 95 | private void emitRankings(BasicOutputCollector collector) { 96 | collector.emit(new Values(rankings.copy())); 97 | getLogger().debug("Rankings: " + rankings); 98 | } 99 | 100 | @Override 101 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 102 | declarer.declare(new Fields("rankings")); 103 | } 104 | 105 | @Override 106 | public Map getComponentConfiguration() { 107 | Map conf = new HashMap(); 108 | conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, emitFrequencyInSeconds); 109 | return conf; 110 | } 111 | 112 | abstract Logger getLogger(); 113 | } 114 | -------------------------------------------------------------------------------- /lesson3/stage5/src/jvm/udacity/storm/CountBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.HashMap; 20 | import java.util.Map; 21 | 22 | /** 23 | * A bolt that counts the words that it receives 24 | */ 25 | public class CountBolt extends BaseRichBolt 26 | { 27 | // To output tuples from this bolt to the next stage bolts, if any 28 | private OutputCollector collector; 29 | 30 | // Map to store the count of the words 31 | private Map countMap; 32 | 33 | @Override 34 | public void prepare( 35 | Map map, 36 | TopologyContext topologyContext, 37 | OutputCollector outputCollector) 38 | { 39 | 40 | // save the collector for emitting tuples 41 | collector = outputCollector; 42 | 43 | // create and initialize the map 44 | countMap = new HashMap(); 45 | } 46 | 47 | @Override 48 | public void execute(Tuple tuple) 49 | { 50 | // get the word from the 1st column of incoming tuple 51 | String word = tuple.getString(0); 52 | 53 | // check if the word is present in the map 54 | if (countMap.get(word) == null) { 55 | 56 | // not present, add the word with a count of 1 57 | countMap.put(word, 1L); 58 | } else { 59 | 60 | // already there, hence get the count 61 | Long val = countMap.get(word); 62 | 63 | // increment the count and save it to the map 64 | countMap.put(word, ++val); 65 | } 66 | 67 | // emit the word and count 68 | collector.emit(new Values(word, countMap.get(word))); 69 | } 70 | 71 | @Override 72 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) 73 | { 74 | // tell storm the schema of the output tuple for this spout 75 | // tuple consists of a two columns called 'word' and 'count' 76 | 77 | // declare the first column 'word', second column 'count' 78 | outputFieldsDeclarer.declare(new Fields("word","count")); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /lesson3/stage5/src/jvm/udacity/storm/IntermediateRankingsBolt.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package udacity.storm; 19 | 20 | import backtype.storm.tuple.Tuple; 21 | import org.apache.log4j.Logger; 22 | //import storm.starter.tools.Rankable; 23 | //import storm.starter.tools.RankableObjectWithFields; 24 | 25 | import udacity.storm.tools.Rankable; 26 | import udacity.storm.tools.RankableObjectWithFields; 27 | 28 | /** 29 | * This bolt ranks incoming objects by their count. 30 | *

31 | * It assumes the input tuples to adhere to the following format: (object, object_count, additionalField1, 32 | * additionalField2, ..., additionalFieldN). 33 | */ 34 | public final class IntermediateRankingsBolt extends AbstractRankerBolt { 35 | 36 | private static final long serialVersionUID = -1369800530256637409L; 37 | private static final Logger LOG = Logger.getLogger(IntermediateRankingsBolt.class); 38 | 39 | public IntermediateRankingsBolt() { 40 | super(); 41 | } 42 | 43 | public IntermediateRankingsBolt(int topN) { 44 | super(topN); 45 | } 46 | 47 | public IntermediateRankingsBolt(int topN, int emitFrequencyInSeconds) { 48 | super(topN, emitFrequencyInSeconds); 49 | } 50 | 51 | @Override 52 | void updateRankingsWithTuple(Tuple tuple) { 53 | Rankable rankable = RankableObjectWithFields.from(tuple); 54 | super.getRankings().updateWith(rankable); 55 | } 56 | 57 | @Override 58 | Logger getLogger() { 59 | return LOG; 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /lesson3/stage5/src/jvm/udacity/storm/ParseTweetBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | import java.util.Arrays; 21 | 22 | /** 23 | * A bolt that parses the tweet into words 24 | */ 25 | public class ParseTweetBolt extends BaseRichBolt 26 | { 27 | // To output tuples from this bolt to the count bolt 28 | OutputCollector collector; 29 | 30 | private String[] skipWords = {"rt", "to", "me","la","on","that","que", 31 | "followers","watch","know","not","have","like","I'm","new","good","do", 32 | "more","es","te","followers","Followers","las","you","and","de","my","is", 33 | "en","una","in","for","this","go","en","all","no","don't","up","are", 34 | "http","http:","https","https:","http://","https://","with","just","your", 35 | "para","want","your","you're","really","video","it's","when","they","their","much", 36 | "would","what","them","todo","FOLLOW","retweet","RETWEET","even","right","like", 37 | "bien","Like","will","Will","pero","Pero","can't","were","Can't","Were","TWITTER", 38 | "make","take","This","from","about","como","esta","follows","followed"}; 39 | 40 | @Override 41 | public void prepare( 42 | Map map, 43 | TopologyContext topologyContext, 44 | OutputCollector outputCollector) 45 | { 46 | // save the output collector for emitting tuples 47 | collector = outputCollector; 48 | } 49 | 50 | @Override 51 | public void execute(Tuple tuple) 52 | { 53 | // get the 1st column 'tweet' from tuple 54 | String tweet = tuple.getString(0); 55 | 56 | // provide the delimiters for splitting the tweet 57 | String delims = "[ .,?!]+"; 58 | 59 | // now split the tweet into tokens 60 | String[] tokens = tweet.split(delims); 61 | 62 | // for each token/word, emit it 63 | for (String token: tokens) { 64 | //emit only words greater than length 3 and not stopword list 65 | if(token.length() > 3 && !Arrays.asList(skipWords).contains(token)){ 66 | if(token.startsWith("#")){ 67 | collector.emit(new Values(token)); 68 | } 69 | } 70 | } 71 | } 72 | 73 | @Override 74 | public void declareOutputFields(OutputFieldsDeclarer declarer) 75 | { 76 | // tell storm the schema of the output tuple for this spout 77 | // tuple consists of a single column called 'tweet-word' 78 | declarer.declare(new Fields("tweet-word")); 79 | } 80 | 81 | } 82 | -------------------------------------------------------------------------------- /lesson3/stage5/src/jvm/udacity/storm/ReportBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | import com.lambdaworks.redis.RedisClient; 22 | import com.lambdaworks.redis.RedisConnection; 23 | 24 | import udacity.storm.tools.*; 25 | import udacity.storm.tools.Rankings; 26 | import com.google.common.collect.ImmutableList; 27 | import com.google.common.collect.Lists; 28 | 29 | /** 30 | * A bolt that prints the word and count to redis 31 | */ 32 | public class ReportBolt extends BaseRichBolt 33 | { 34 | // place holder to keep the connection to redis 35 | transient RedisConnection redis; 36 | 37 | @Override 38 | public void prepare( 39 | Map map, 40 | TopologyContext topologyContext, 41 | OutputCollector outputCollector) 42 | { 43 | // instantiate a redis connection 44 | RedisClient client = new RedisClient("localhost",6379); 45 | 46 | // initiate the actual connection 47 | redis = client.connect(); 48 | } 49 | 50 | @Override 51 | public void execute(Tuple tuple) 52 | { 53 | Rankings rankableList = (Rankings) tuple.getValue(0); 54 | 55 | for (Rankable r: rankableList.getRankings()){ 56 | String word = r.getObject().toString(); 57 | Long count = r.getCount(); 58 | redis.publish("WordCountTopology", word + "|" + Long.toString(count)); 59 | } 60 | 61 | // access the first column 'word' 62 | //String word = tuple.getStringByField("word"); 63 | 64 | // access the second column 'count' 65 | //String word = rankedWords.toString(); 66 | //Integer count = tuple.getIntegerByField("count"); 67 | //Long count = new Long(100); 68 | 69 | // publish the word count to redis using word as the key 70 | //redis.publish("WordCountTopology", word + ":" + Long.toString(count)); 71 | } 72 | 73 | public void declareOutputFields(OutputFieldsDeclarer declarer) 74 | { 75 | // nothing to add - since it is the final bolt 76 | } 77 | } 78 | -------------------------------------------------------------------------------- /lesson3/stage5/src/jvm/udacity/storm/TopNTweetTopology.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import udacity.storm.spout.RandomSentenceSpout; 20 | 21 | class TopNTweetTopology 22 | { 23 | public static void main(String[] args) throws Exception 24 | { 25 | //Variable TOP_N number of words 26 | int TOP_N = 10; 27 | // create the topology 28 | TopologyBuilder builder = new TopologyBuilder(); 29 | 30 | /* 31 | * In order to create the spout, you need to get twitter credentials 32 | * If you need to use Twitter firehose/Tweet stream for your idea, 33 | * create a set of credentials by following the instructions at 34 | * 35 | * https://dev.twitter.com/discussions/631 36 | * 37 | */ 38 | 39 | // now create the tweet spout with the credentials 40 | TweetSpout tweetSpout = new TweetSpout( 41 | "[Your customer key]", 42 | "[Your secret key]", 43 | "[Your access token]", 44 | "[Your access secret]" 45 | 46 | ); 47 | 48 | // attach the tweet spout to the topology - parallelism of 1 49 | builder.setSpout("tweet-spout", tweetSpout, 1); 50 | 51 | // attach the Random Sentence Spout to the topology - parallelism of 1 52 | //builder.setSpout("random-sentence-spout", new RandomSentenceSpout(), 1); 53 | 54 | // attach the parse tweet bolt using shuffle grouping 55 | builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout"); 56 | //builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("random-sentence-spout"); 57 | 58 | // attach the count bolt using fields grouping - parallelism of 15 59 | builder.setBolt("count-bolt", new CountBolt(), 15).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word")); 60 | 61 | // attach rolling count bolt using fields grouping - parallelism of 5 62 | // TEST 63 | //builder.setBolt("rolling-count-bolt", new RollingCountBolt(30, 10), 1).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word")); 64 | 65 | //from incubator-storm/.../storm/starter/RollingTopWords.java 66 | //builder.setBolt("intermediate-ranker", new IntermediateRankingsBolt(TOP_N), 4).fieldsGrouping("rolling-count-bolt", new Fields("obj")); 67 | 68 | builder.setBolt("intermediate-ranker", new IntermediateRankingsBolt(TOP_N), 4).fieldsGrouping("count-bolt", new Fields("word")); 69 | builder.setBolt("total-ranker", new TotalRankingsBolt(TOP_N)).globalGrouping("intermediate-ranker"); 70 | 71 | // attach the report bolt using global grouping - parallelism of 1 72 | builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("total-ranker"); 73 | 74 | // create the default config object 75 | Config conf = new Config(); 76 | 77 | // set the config in debugging mode 78 | conf.setDebug(true); 79 | 80 | if (args != null && args.length > 0) { 81 | 82 | // run it in a live cluster 83 | 84 | // set the number of workers for running all spout and bolt tasks 85 | conf.setNumWorkers(3); 86 | 87 | // create the topology and submit with config 88 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 89 | 90 | } else { 91 | 92 | // run it in a simulated local cluster 93 | 94 | // set the number of threads to run - similar to setting number of workers in live cluster 95 | conf.setMaxTaskParallelism(3); 96 | 97 | // create the local cluster instance 98 | LocalCluster cluster = new LocalCluster(); 99 | 100 | // submit the topology to the local cluster 101 | cluster.submitTopology("tweet-word-count", conf, builder.createTopology()); 102 | 103 | // let the topology run for 300 seconds. note topologies never terminate! 104 | Utils.sleep(300000); 105 | 106 | // now kill the topology 107 | cluster.killTopology("tweet-word-count"); 108 | 109 | // we are done, so shutdown the local cluster 110 | cluster.shutdown(); 111 | } 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /lesson3/stage5/src/jvm/udacity/storm/TotalRankingsBolt.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package udacity.storm; 19 | 20 | import backtype.storm.tuple.Tuple; 21 | import org.apache.log4j.Logger; 22 | //import storm.starter.tools.Rankings; 23 | 24 | import udacity.storm.tools.Rankings; 25 | 26 | /** 27 | * This bolt merges incoming {@link Rankings}. 28 | *

29 | * It can be used to merge intermediate rankings generated by {@link IntermediateRankingsBolt} into a final, 30 | * consolidated ranking. To do so, configure this bolt with a globalGrouping on {@link IntermediateRankingsBolt}. 31 | */ 32 | public final class TotalRankingsBolt extends AbstractRankerBolt { 33 | 34 | private static final long serialVersionUID = -8447525895532302198L; 35 | private static final Logger LOG = Logger.getLogger(TotalRankingsBolt.class); 36 | 37 | public TotalRankingsBolt() { 38 | super(); 39 | } 40 | 41 | public TotalRankingsBolt(int topN) { 42 | super(topN); 43 | } 44 | 45 | public TotalRankingsBolt(int topN, int emitFrequencyInSeconds) { 46 | super(topN, emitFrequencyInSeconds); 47 | } 48 | 49 | @Override 50 | void updateRankingsWithTuple(Tuple tuple) { 51 | Rankings rankingsToBeMerged = (Rankings) tuple.getValue(0); 52 | super.getRankings().updateWith(rankingsToBeMerged); 53 | super.getRankings().pruneZeroCounts(); 54 | } 55 | 56 | @Override 57 | Logger getLogger() { 58 | return LOG; 59 | } 60 | 61 | } 62 | -------------------------------------------------------------------------------- /lesson3/stage5/src/jvm/udacity/storm/spout/RandomSentenceSpout.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.spout; 2 | 3 | import backtype.storm.spout.SpoutOutputCollector; 4 | import backtype.storm.task.TopologyContext; 5 | import backtype.storm.topology.OutputFieldsDeclarer; 6 | import backtype.storm.topology.base.BaseRichSpout; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.tuple.Values; 9 | import backtype.storm.utils.Utils; 10 | 11 | import java.util.Map; 12 | import java.util.Random; 13 | 14 | public class RandomSentenceSpout extends BaseRichSpout { 15 | SpoutOutputCollector _collector; 16 | Random _rand; 17 | 18 | 19 | @Override 20 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 21 | _collector = collector; 22 | _rand = new Random(); 23 | } 24 | 25 | @Override 26 | public void nextTuple() { 27 | Utils.sleep(100); 28 | String[] sentences = new String[]{ 29 | "the cow jumped over the moon", 30 | "an apple a day keeps the doctor away", 31 | "four score and seven years ago", 32 | "snow white and the seven dwarfs", 33 | "i am at two with nature" 34 | }; 35 | String sentence = sentences[_rand.nextInt(sentences.length)]; 36 | _collector.emit(new Values(sentence)); 37 | } 38 | 39 | @Override 40 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 41 | declarer.declare(new Fields("sentence")); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /lesson3/stage5/src/jvm/udacity/storm/tools/NthLastModifiedTimeTracker.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.tools; 2 | 3 | import backtype.storm.utils.Time; 4 | import org.apache.commons.collections.buffer.CircularFifoBuffer; 5 | 6 | /** 7 | * This class tracks the time-since-last-modify of a "thing" in a rolling fashion. 8 | *

9 | * For example, create a 5-slot tracker to track the five most recent time-since-last-modify. 10 | *

11 | * You must manually "mark" that the "something" that you want to track -- in terms of modification times -- has just 12 | * been modified. 13 | */ 14 | public class NthLastModifiedTimeTracker { 15 | 16 | private static final int MILLIS_IN_SEC = 1000; 17 | 18 | private final CircularFifoBuffer lastModifiedTimesMillis; 19 | 20 | public NthLastModifiedTimeTracker(int numTimesToTrack) { 21 | if (numTimesToTrack < 1) { 22 | throw new IllegalArgumentException( 23 | "numTimesToTrack must be greater than zero (you requested " + numTimesToTrack + ")"); 24 | } 25 | lastModifiedTimesMillis = new CircularFifoBuffer(numTimesToTrack); 26 | initLastModifiedTimesMillis(); 27 | } 28 | 29 | private void initLastModifiedTimesMillis() { 30 | long nowCached = now(); 31 | for (int i = 0; i < lastModifiedTimesMillis.maxSize(); i++) { 32 | lastModifiedTimesMillis.add(Long.valueOf(nowCached)); 33 | } 34 | } 35 | 36 | private long now() { 37 | return Time.currentTimeMillis(); 38 | } 39 | 40 | public int secondsSinceOldestModification() { 41 | long modifiedTimeMillis = ((Long) lastModifiedTimesMillis.get()).longValue(); 42 | return (int) ((now() - modifiedTimeMillis) / MILLIS_IN_SEC); 43 | } 44 | 45 | public void markAsModified() { 46 | updateLastModifiedTime(); 47 | } 48 | 49 | private void updateLastModifiedTime() { 50 | lastModifiedTimesMillis.add(now()); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /lesson3/stage5/src/jvm/udacity/storm/tools/Rankable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package udacity.storm.tools; 19 | 20 | public interface Rankable extends Comparable { 21 | 22 | Object getObject(); 23 | 24 | long getCount(); 25 | 26 | /** 27 | * Note: We do not defensively copy the object wrapped by the Rankable. It is passed as is. 28 | * 29 | * @return a defensive copy 30 | */ 31 | Rankable copy(); 32 | } 33 | -------------------------------------------------------------------------------- /lesson3/stage5/src/jvm/udacity/storm/tools/SlotBasedCounter.java: -------------------------------------------------------------------------------- 1 | //package storm.starter.tools; 2 | package udacity.storm.tools; 3 | 4 | import java.io.Serializable; 5 | import java.util.HashMap; 6 | import java.util.HashSet; 7 | import java.util.Map; 8 | import java.util.Set; 9 | 10 | /** 11 | * This class provides per-slot counts of the occurrences of objects. 12 | *

13 | * It can be used, for instance, as a building block for implementing sliding window counting of objects. 14 | * 15 | * @param The type of those objects we want to count. 16 | */ 17 | public final class SlotBasedCounter implements Serializable { 18 | 19 | private static final long serialVersionUID = 4858185737378394432L; 20 | 21 | private final Map objToCounts = new HashMap(); 22 | private final int numSlots; 23 | 24 | public SlotBasedCounter(int numSlots) { 25 | if (numSlots <= 0) { 26 | throw new IllegalArgumentException("Number of slots must be greater than zero (you requested " + numSlots + ")"); 27 | } 28 | this.numSlots = numSlots; 29 | } 30 | 31 | public void incrementCount(T obj, int slot) { 32 | long[] counts = objToCounts.get(obj); 33 | if (counts == null) { 34 | counts = new long[this.numSlots]; 35 | objToCounts.put(obj, counts); 36 | } 37 | counts[slot]++; 38 | } 39 | 40 | public long getCount(T obj, int slot) { 41 | long[] counts = objToCounts.get(obj); 42 | if (counts == null) { 43 | return 0; 44 | } 45 | else { 46 | return counts[slot]; 47 | } 48 | } 49 | 50 | public Map getCounts() { 51 | Map result = new HashMap(); 52 | for (T obj : objToCounts.keySet()) { 53 | result.put(obj, computeTotalCount(obj)); 54 | } 55 | return result; 56 | } 57 | 58 | private long computeTotalCount(T obj) { 59 | long[] curr = objToCounts.get(obj); 60 | long total = 0; 61 | for (long l : curr) { 62 | total += l; 63 | } 64 | return total; 65 | } 66 | 67 | /** 68 | * Reset the slot count of any tracked objects to zero for the given slot. 69 | * 70 | * @param slot 71 | */ 72 | public void wipeSlot(int slot) { 73 | for (T obj : objToCounts.keySet()) { 74 | resetSlotCountToZero(obj, slot); 75 | } 76 | } 77 | 78 | private void resetSlotCountToZero(T obj, int slot) { 79 | long[] counts = objToCounts.get(obj); 80 | counts[slot] = 0; 81 | } 82 | 83 | private boolean shouldBeRemovedFromCounter(T obj) { 84 | return computeTotalCount(obj) == 0; 85 | } 86 | 87 | /** 88 | * Remove any object from the counter whose total count is zero (to free up memory). 89 | */ 90 | public void wipeZeros() { 91 | Set objToBeRemoved = new HashSet(); 92 | for (T obj : objToCounts.keySet()) { 93 | if (shouldBeRemovedFromCounter(obj)) { 94 | objToBeRemoved.add(obj); 95 | } 96 | } 97 | for (T obj : objToBeRemoved) { 98 | objToCounts.remove(obj); 99 | } 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /lesson3/stage5/src/jvm/udacity/storm/tools/TupleHelpers.java: -------------------------------------------------------------------------------- 1 | //package storm.starter.util; 2 | package udacity.storm.tools; 3 | 4 | import backtype.storm.Constants; 5 | import backtype.storm.tuple.Tuple; 6 | 7 | public final class TupleHelpers { 8 | 9 | private TupleHelpers() { 10 | } 11 | 12 | public static boolean isTickTuple(Tuple tuple) { 13 | return tuple.getSourceComponent().equals(Constants.SYSTEM_COMPONENT_ID) && tuple.getSourceStreamId().equals( 14 | Constants.SYSTEM_TICK_STREAM_ID); 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /lesson3/stage6/src/jvm/udacity/storm/ExclamationTopology.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.task.OutputCollector; 7 | import backtype.storm.task.TopologyContext; 8 | import backtype.storm.testing.TestWordSpout; 9 | import backtype.storm.topology.OutputFieldsDeclarer; 10 | import backtype.storm.topology.TopologyBuilder; 11 | import backtype.storm.topology.base.BaseRichBolt; 12 | import backtype.storm.tuple.Fields; 13 | import backtype.storm.tuple.Tuple; 14 | import backtype.storm.tuple.Values; 15 | import backtype.storm.utils.Utils; 16 | 17 | import java.util.HashMap; 18 | import java.util.Map; 19 | 20 | //******* Import MyLikesSpout and MyNamesSpout 21 | 22 | 23 | 24 | /** 25 | * This is a basic example of a storm topology. 26 | * 27 | * This topology demonstrates how to add three exclamation marks '!!!' 28 | * to each word emitted 29 | * 30 | * This is an example for Udacity Real Time Analytics Course - ud381 31 | * 32 | */ 33 | public class ExclamationTopology { 34 | 35 | /** 36 | * A bolt that adds the exclamation marks '!!!' to word 37 | */ 38 | public static class ExclamationBolt extends BaseRichBolt 39 | { 40 | // To output tuples from this bolt to the next stage bolts, if any 41 | OutputCollector _collector; 42 | 43 | @Override 44 | public void prepare( 45 | Map map, 46 | TopologyContext topologyContext, 47 | OutputCollector collector) 48 | { 49 | // save the output collector for emitting tuples 50 | _collector = collector; 51 | } 52 | 53 | @Override 54 | public void execute(Tuple tuple) 55 | { 56 | //**** ADD COMPONENT ID 57 | 58 | /* 59 | * Use component id to modify behavior 60 | */ 61 | 62 | // get the column word from tuple 63 | String word = tuple.getString(0); 64 | 65 | // build the word with the exclamation marks appended 66 | StringBuilder exclamatedWord = new StringBuilder(); 67 | exclamatedWord.append(word).append("!!!"); 68 | 69 | // emit the word with exclamations 70 | _collector.emit(tuple, new Values(exclamatedWord.toString())); 71 | } 72 | 73 | @Override 74 | public void declareOutputFields(OutputFieldsDeclarer declarer) 75 | { 76 | // tell storm the schema of the output tuple for this spout 77 | 78 | // tuple consists of a single column called 'exclamated-word' 79 | declarer.declare(new Fields("exclamated-word")); 80 | } 81 | } 82 | 83 | public static void main(String[] args) throws Exception 84 | { 85 | // create the topology 86 | TopologyBuilder builder = new TopologyBuilder(); 87 | 88 | // attach the word spout to the topology - parallelism of 10 89 | builder.setSpout("word", new TestWordSpout(), 10); 90 | 91 | // attach the exclamation bolt to the topology - parallelism of 3 92 | builder.setBolt("exclaim1", new ExclamationBolt(), 3).shuffleGrouping("word"); 93 | 94 | // attach another exclamation bolt to the topology - parallelism of 2 95 | builder.setBolt("exclaim2", new ExclamationBolt(), 2).shuffleGrouping("exclaim1"); 96 | 97 | // create the default config object 98 | Config conf = new Config(); 99 | 100 | // set the config in debugging mode 101 | conf.setDebug(true); 102 | 103 | if (args != null && args.length > 0) { 104 | 105 | // run it in a live cluster 106 | 107 | // set the number of workers for running all spout and bolt tasks 108 | conf.setNumWorkers(3); 109 | 110 | // create the topology and submit with config 111 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 112 | 113 | } else { 114 | 115 | // run it in a simulated local cluster 116 | 117 | // create the local cluster instance 118 | LocalCluster cluster = new LocalCluster(); 119 | 120 | // submit the topology to the local cluster 121 | cluster.submitTopology("exclamation", conf, builder.createTopology()); 122 | 123 | // let the topology run for 30 seconds. note topologies never terminate! 124 | Thread.sleep(30000); 125 | 126 | // kill the topology 127 | cluster.killTopology("exclamation"); 128 | 129 | // we are done, so shutdown the local cluster 130 | cluster.shutdown(); 131 | } 132 | } 133 | } 134 | -------------------------------------------------------------------------------- /lesson3/stage6/src/jvm/udacity/storm/ReportBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | import com.lambdaworks.redis.RedisClient; 22 | import com.lambdaworks.redis.RedisConnection; 23 | 24 | /** 25 | * A bolt that prints the word and count to redis 26 | */ 27 | public class ReportBolt extends BaseRichBolt 28 | { 29 | // place holder to keep the connection to redis 30 | transient RedisConnection redis; 31 | 32 | @Override 33 | public void prepare( 34 | Map map, 35 | TopologyContext topologyContext, 36 | OutputCollector outputCollector) 37 | { 38 | // instantiate a redis connection 39 | RedisClient client = new RedisClient("localhost",6379); 40 | 41 | // initiate the actual connection 42 | redis = client.connect(); 43 | } 44 | 45 | @Override 46 | public void execute(Tuple tuple) 47 | { 48 | // access the first column 'word' 49 | String word = tuple.getStringByField("word"); 50 | 51 | // access the second column 'count' 52 | Integer count = tuple.getIntegerByField("count"); 53 | //Integer count = 30; 54 | 55 | // publish the word count to redis using word as the key 56 | redis.publish("WordCountTopology", word + "|" + Long.toString(count)); 57 | } 58 | 59 | public void declareOutputFields(OutputFieldsDeclarer declarer) 60 | { 61 | // nothing to add - since it is the final bolt 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /lesson3/stage6/src/jvm/udacity/storm/spout/MyLikesSpout.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.spout; 2 | 3 | import backtype.storm.spout.SpoutOutputCollector; 4 | import backtype.storm.task.TopologyContext; 5 | import backtype.storm.topology.OutputFieldsDeclarer; 6 | import backtype.storm.topology.base.BaseRichSpout; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.tuple.Values; 9 | import backtype.storm.utils.Utils; 10 | 11 | import java.util.Map; 12 | import java.util.Random; 13 | 14 | public class MyLikesSpout extends BaseRichSpout { 15 | SpoutOutputCollector _collector; 16 | Random _rand; 17 | 18 | 19 | @Override 20 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 21 | _collector = collector; 22 | _rand = new Random(); 23 | } 24 | 25 | @Override 26 | public void nextTuple() { 27 | Utils.sleep(100); 28 | String[] pairs = new String[]{ 29 | "Lewis # Udacity", 30 | "Taylor # Cinematography", 31 | "Justine # Dogs", 32 | "Liz # Soccer", 33 | "Kim # Art" 34 | }; 35 | String pair = pairs[_rand.nextInt(pairs.length)]; 36 | String name = pair.split("#")[0].trim(); 37 | String favorite = pair.split("#")[1].trim(); 38 | //** TO DO: update emit and declareOutputFields to 39 | //** emit "name" and "favorite" instead of "pair" 40 | _collector.emit(new Values(pair)); 41 | } 42 | 43 | @Override 44 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 45 | declarer.declare(new Fields("pair")); 46 | } 47 | 48 | } 49 | -------------------------------------------------------------------------------- /lesson3/stage6/src/jvm/udacity/storm/spout/MyNamesSpout.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.spout; 2 | 3 | import backtype.storm.spout.SpoutOutputCollector; 4 | import backtype.storm.task.TopologyContext; 5 | import backtype.storm.topology.OutputFieldsDeclarer; 6 | import backtype.storm.topology.base.BaseRichSpout; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.tuple.Values; 9 | import backtype.storm.utils.Utils; 10 | 11 | import java.util.Map; 12 | import java.util.Random; 13 | 14 | public class MyNamesSpout extends BaseRichSpout { 15 | SpoutOutputCollector _collector; 16 | Random _rand; 17 | 18 | 19 | @Override 20 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 21 | _collector = collector; 22 | _rand = new Random(); 23 | } 24 | 25 | @Override 26 | public void nextTuple() { 27 | Utils.sleep(100); 28 | String[] names = new String[]{ 29 | "Taylor", 30 | "Justine", 31 | "Liz", 32 | "Kim", 33 | "Lewis" 34 | }; 35 | String name = names[_rand.nextInt(names.length)]; 36 | _collector.emit(new Values(name)); 37 | } 38 | 39 | @Override 40 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 41 | declarer.declare(new Fields("name")); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /lesson3/stage7/src/jvm/udacity/storm/ReportBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.Map; 20 | 21 | import com.lambdaworks.redis.RedisClient; 22 | import com.lambdaworks.redis.RedisConnection; 23 | 24 | /** 25 | * A bolt that prints the word and count to redis 26 | */ 27 | public class ReportBolt extends BaseRichBolt 28 | { 29 | // place holder to keep the connection to redis 30 | transient RedisConnection redis; 31 | 32 | @Override 33 | public void prepare( 34 | Map map, 35 | TopologyContext topologyContext, 36 | OutputCollector outputCollector) 37 | { 38 | // instantiate a redis connection 39 | RedisClient client = new RedisClient("localhost",6379); 40 | 41 | // initiate the actual connection 42 | redis = client.connect(); 43 | } 44 | 45 | @Override 46 | public void execute(Tuple tuple) 47 | { 48 | // access the first column 'word' 49 | //String word = tuple.getStringByField("word"); 50 | String word = (String) tuple.getValue(0); 51 | 52 | // access the second column 'count' 53 | //Integer count = tuple.getIntegerByField("count"); 54 | Integer count = 30; 55 | 56 | // publish the word count to redis using word as the key 57 | redis.publish("WordCountTopology", word + "|" + Long.toString(count)); 58 | } 59 | 60 | public void declareOutputFields(OutputFieldsDeclarer declarer) 61 | { 62 | // nothing to add - since it is the final bolt 63 | } 64 | } 65 | -------------------------------------------------------------------------------- /lesson3/stage7/src/jvm/udacity/storm/spout/MyLikesSpout.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.spout; 2 | 3 | import backtype.storm.spout.SpoutOutputCollector; 4 | import backtype.storm.task.TopologyContext; 5 | import backtype.storm.topology.OutputFieldsDeclarer; 6 | import backtype.storm.topology.base.BaseRichSpout; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.tuple.Values; 9 | import backtype.storm.utils.Utils; 10 | 11 | import java.util.Map; 12 | import java.util.Random; 13 | 14 | public class MyLikesSpout extends BaseRichSpout { 15 | SpoutOutputCollector _collector; 16 | Random _rand; 17 | 18 | 19 | @Override 20 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 21 | _collector = collector; 22 | _rand = new Random(); 23 | } 24 | 25 | @Override 26 | public void nextTuple() { 27 | Utils.sleep(100); 28 | String[] pairs = new String[]{ 29 | "Lewis # Udacity", 30 | "Taylor # Cinematography", 31 | "Justine # Dogs", 32 | "Liz # Soccer", 33 | "Kim # Art" 34 | }; 35 | String pair = pairs[_rand.nextInt(pairs.length)]; 36 | String name = pair.split("#")[0].trim(); 37 | String favorite = pair.split("#")[1].trim(); 38 | //_collector.emit(new Values(pair, favorite)); 39 | _collector.emit(new Values(name, favorite)); 40 | } 41 | 42 | @Override 43 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 44 | declarer.declare(new Fields("name","favorite")); 45 | } 46 | 47 | } 48 | -------------------------------------------------------------------------------- /lesson3/stage7/src/jvm/udacity/storm/spout/MyNamesSpout.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.spout; 2 | 3 | import backtype.storm.spout.SpoutOutputCollector; 4 | import backtype.storm.task.TopologyContext; 5 | import backtype.storm.topology.OutputFieldsDeclarer; 6 | import backtype.storm.topology.base.BaseRichSpout; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.tuple.Values; 9 | import backtype.storm.utils.Utils; 10 | 11 | import java.util.Map; 12 | import java.util.Random; 13 | 14 | public class MyNamesSpout extends BaseRichSpout { 15 | SpoutOutputCollector _collector; 16 | Random _rand; 17 | 18 | 19 | @Override 20 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 21 | _collector = collector; 22 | _rand = new Random(); 23 | } 24 | 25 | @Override 26 | public void nextTuple() { 27 | Utils.sleep(100); 28 | String[] names = new String[]{ 29 | "Taylor", 30 | "Justine", 31 | "Liz", 32 | "Kim", 33 | "Lewis" 34 | }; 35 | String name = names[_rand.nextInt(names.length)]; 36 | _collector.emit(new Values(name)); 37 | } 38 | 39 | @Override 40 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 41 | declarer.declare(new Fields("name")); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/2014_Gaz_counties_national.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/FinalProject/2014_Gaz_counties_national.txt -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/geoinfo.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/FinalProject/geoinfo.csv -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/MyPropFile.properties: -------------------------------------------------------------------------------- 1 | annotators = tokenize, ssplit, parse, sentiment -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/geocode/GeoName.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | [OSI Approved License] 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2014 Daniel Glasson 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | */ 26 | 27 | package geocode; 28 | 29 | import geocode.kdtree.KDNodeComparator; 30 | import static java.lang.Math.cos; 31 | import static java.lang.Math.sin; 32 | import static java.lang.Math.toRadians; 33 | 34 | import java.util.Comparator; 35 | 36 | /** 37 | * Created by Daniel Glasson on 18/05/2014. 38 | * This class works with a placenames files from http://download.geonames.org/export/dump/ 39 | */ 40 | 41 | public class GeoName extends KDNodeComparator { 42 | public String name; 43 | public String geoid; 44 | public boolean majorPlace; // Major or minor place 45 | public double latitude; 46 | public double longitude; 47 | public double point[] = new double[3]; // The 3D coordinates of the point 48 | public String country; 49 | 50 | GeoName(String data) { 51 | String[] names = data.split(","); 52 | name = names[3]; 53 | geoid = "c"+names[1]; 54 | //majorPlace = names[6].equals("P"); 55 | majorPlace = true; 56 | latitude = Double.parseDouble(names[8]); 57 | longitude = Double.parseDouble(names[9]); 58 | setPoint(); 59 | country = names[8]; 60 | } 61 | 62 | GeoName(Double latitude, Double longitude) { 63 | name = country = "Search"; 64 | this.latitude = latitude; 65 | this.longitude = longitude; 66 | setPoint(); 67 | } 68 | 69 | private void setPoint() { 70 | point[0] = cos(toRadians(latitude)) * cos(toRadians(longitude)); 71 | point[1] = cos(toRadians(latitude)) * sin(toRadians(longitude)); 72 | point[2] = sin(toRadians(latitude)); 73 | } 74 | 75 | @Override 76 | public String toString() { 77 | return name; 78 | } 79 | 80 | @Override 81 | protected Double squaredDistance(Object other) { 82 | GeoName location = (GeoName)other; 83 | double x = this.point[0] - location.point[0]; 84 | double y = this.point[1] - location.point[1]; 85 | double z = this.point[2] - location.point[2]; 86 | return (x*x) + (y*y) + (z*z); 87 | } 88 | 89 | @Override 90 | protected Double axisSquaredDistance(Object other, Integer axis) { 91 | GeoName location = (GeoName)other; 92 | Double distance = point[axis] - location.point[axis]; 93 | return distance * distance; 94 | } 95 | 96 | @Override 97 | protected Comparator getComparator(Integer axis) { 98 | return GeoNameComparator.values()[axis]; 99 | } 100 | 101 | protected static enum GeoNameComparator implements Comparator { 102 | x { 103 | @Override 104 | public int compare(GeoName a, GeoName b) { 105 | return Double.compare(a.point[0], b.point[0]); 106 | } 107 | }, 108 | y { 109 | @Override 110 | public int compare(GeoName a, GeoName b) { 111 | return Double.compare(a.point[1], b.point[1]); 112 | } 113 | }, 114 | z { 115 | @Override 116 | public int compare(GeoName a, GeoName b) { 117 | return Double.compare(a.point[2], b.point[2]); 118 | } 119 | }; 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/geocode/ReverseGeoCode.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | [OSI Approved License] 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2014 Daniel Glasson 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | */ 26 | 27 | package geocode; 28 | 29 | import geocode.kdtree.KDTree; 30 | import java.io.*; 31 | import java.util.ArrayList; 32 | 33 | /** 34 | * 35 | * Created by Daniel Glasson on 18/05/2014. 36 | * Uses KD-trees to quickly find the nearest point 37 | * 38 | * ReverseGeoCode reverseGeoCode = new ReverseGeoCode(new FileInputStream("c:\\AU.txt"), true); 39 | * System.out.println("Nearest to -23.456, 123.456 is " + geocode.nearestPlace(-23.456, 123.456)); 40 | */ 41 | public class ReverseGeoCode { 42 | KDTree kdTree; 43 | 44 | // Get placenames from http://download.geonames.org/export/dump/ 45 | public ReverseGeoCode( InputStream placenames, Boolean majorOnly ) throws IOException { 46 | ArrayList arPlaceNames; 47 | arPlaceNames = new ArrayList(); 48 | // Read the geonames file in the directory 49 | BufferedReader in = new BufferedReader(new InputStreamReader(placenames)); 50 | String str; 51 | try { 52 | in.readLine(); 53 | while ((str = in.readLine()) != null) { 54 | GeoName newPlace = new GeoName(str); 55 | if ( !majorOnly || newPlace.majorPlace ) { 56 | arPlaceNames.add(new GeoName(str)); 57 | } 58 | } 59 | } catch (IOException ex) { 60 | in.close(); 61 | throw ex; 62 | } 63 | in.close(); 64 | kdTree = new KDTree(arPlaceNames); 65 | } 66 | 67 | public GeoName nearestPlace(double latitude, double longitude) { 68 | return kdTree.findNearest(new GeoName(latitude,longitude)); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/geocode/kdtree/KDNode.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | [OSI Approved License] 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2014 Daniel Glasson 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | */ 26 | 27 | package geocode.kdtree; 28 | 29 | /** 30 | * 31 | * @author Daniel Glasson 32 | */ 33 | public class KDNode> { 34 | KDNode left; 35 | KDNode right; 36 | T location; 37 | 38 | public KDNode( KDNode left, KDNode right, T location ) { 39 | this.left = left; 40 | this.right = right; 41 | this.location = location; 42 | } 43 | } -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/geocode/kdtree/KDNodeComparator.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | [OSI Approved License] 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2014 Daniel Glasson 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | */ 26 | 27 | package geocode.kdtree; 28 | 29 | import java.util.Comparator; 30 | 31 | /** 32 | * 33 | * @author Daniel Glasson 34 | * Make the user return a comparator for each axis 35 | * Squared distances should be an optimisation 36 | */ 37 | public abstract class KDNodeComparator { 38 | // This should return a comparator for whatever axis is passed in 39 | protected abstract Comparator getComparator(Integer axis); 40 | 41 | // Return squared distance between current and other 42 | protected abstract Double squaredDistance(T other); 43 | 44 | // Return squared distance between one axis only 45 | protected abstract Double axisSquaredDistance(T other, Integer axis); 46 | } 47 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/geocode/kdtree/KDTree.java: -------------------------------------------------------------------------------- 1 | /* 2 | The MIT License (MIT) 3 | [OSI Approved License] 4 | The MIT License (MIT) 5 | 6 | Copyright (c) 2014 Daniel Glasson 7 | 8 | Permission is hereby granted, free of charge, to any person obtaining a copy 9 | of this software and associated documentation files (the "Software"), to deal 10 | in the Software without restriction, including without limitation the rights 11 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 | copies of the Software, and to permit persons to whom the Software is 13 | furnished to do so, subject to the following conditions: 14 | 15 | The above copyright notice and this permission notice shall be included in 16 | all copies or substantial portions of the Software. 17 | 18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 21 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 23 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 24 | THE SOFTWARE. 25 | */ 26 | 27 | package geocode.kdtree; 28 | 29 | import java.util.ArrayList; 30 | import java.util.Arrays; 31 | import java.util.Collections; 32 | import java.util.List; 33 | 34 | /** 35 | * 36 | * @author Daniel Glasson 37 | * A KD-Tree implementation to quickly find nearest points 38 | * Currently implements createKDTree and findNearest as that's all that's required here 39 | */ 40 | public class KDTree> { 41 | private KDNode root; 42 | 43 | public KDTree( List items ) { 44 | root = createKDTree(items, 0); 45 | } 46 | 47 | public T findNearest( T search ) { 48 | return findNearest(root, search, 0).location; 49 | } 50 | 51 | // Only ever goes to log2(items.length) depth so lack of tail recursion is a non-issue 52 | private KDNode createKDTree( List items, int depth ) { 53 | if ( items.isEmpty() ) { 54 | return null; 55 | } 56 | Collections.sort(items, items.get(0).getComparator(depth % 3)); 57 | int currentIndex = items.size()/2; 58 | return new KDNode(createKDTree(items.subList(0, currentIndex), depth+1), createKDTree(items.subList(currentIndex + 1, items.size()), depth+1), items.get(currentIndex)); 59 | } 60 | 61 | private KDNode findNearest(KDNode currentNode, T search, int depth) { 62 | int direction = search.getComparator(depth % 3).compare( search, currentNode.location ); 63 | KDNode next = (direction < 0) ? currentNode.left : currentNode.right; 64 | KDNode other = (direction < 0) ? currentNode.right : currentNode.left; 65 | KDNode best = (next == null) ? currentNode : findNearest(next, search, depth + 1); // Go to a leaf 66 | if ( currentNode.location.squaredDistance(search) < best.location.squaredDistance(search) ) { 67 | best = currentNode; // Set best as required 68 | } 69 | if ( other != null ) { 70 | if ( currentNode.location.axisSquaredDistance(search, depth % 3) < best.location.squaredDistance(search) ) { 71 | KDNode possibleBest = findNearest( other, search, depth + 1 ); 72 | if ( possibleBest.location.squaredDistance(search) < best.location.squaredDistance(search) ) { 73 | best = possibleBest; 74 | } 75 | } 76 | } 77 | return best; // Work back up 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/geoinfo.csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/FinalProject/src/jvm/geoinfo.csv -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/CountBolt.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.spout.SpoutOutputCollector; 7 | import backtype.storm.task.OutputCollector; 8 | import backtype.storm.task.TopologyContext; 9 | import backtype.storm.testing.TestWordSpout; 10 | import backtype.storm.topology.OutputFieldsDeclarer; 11 | import backtype.storm.topology.TopologyBuilder; 12 | import backtype.storm.topology.base.BaseRichSpout; 13 | import backtype.storm.topology.base.BaseRichBolt; 14 | import backtype.storm.tuple.Fields; 15 | import backtype.storm.tuple.Tuple; 16 | import backtype.storm.tuple.Values; 17 | import backtype.storm.utils.Utils; 18 | 19 | import java.util.HashMap; 20 | import java.util.Map; 21 | 22 | /** 23 | * A bolt that counts the words that it receives 24 | */ 25 | public class CountBolt extends BaseRichBolt 26 | { 27 | // To output tuples from this bolt to the next stage bolts, if any 28 | private OutputCollector collector; 29 | 30 | // Map to store the count of the words 31 | private Map countMap; 32 | 33 | @Override 34 | public void prepare( 35 | Map map, 36 | TopologyContext topologyContext, 37 | OutputCollector outputCollector) 38 | { 39 | 40 | // save the collector for emitting tuples 41 | collector = outputCollector; 42 | 43 | // create and initialize the map 44 | countMap = new HashMap(); 45 | } 46 | 47 | @Override 48 | public void execute(Tuple tuple) 49 | { 50 | // get the word from the 1st column of incoming tuple 51 | String word = tuple.getString(0); 52 | 53 | // check if the word is present in the map 54 | if (countMap.get(word) == null) { 55 | 56 | // not present, add the word with a count of 1 57 | countMap.put(word, 1); 58 | } else { 59 | 60 | // already there, hence get the count 61 | Integer val = countMap.get(word); 62 | 63 | // increment the count and save it to the map 64 | countMap.put(word, ++val); 65 | } 66 | 67 | // emit the word and count 68 | collector.emit(new Values(word, countMap.get(word))); 69 | } 70 | 71 | @Override 72 | public void declareOutputFields(OutputFieldsDeclarer outputFieldsDeclarer) 73 | { 74 | // tell storm the schema of the output tuple for this spout 75 | // tuple consists of a two columns called 'word' and 'count' 76 | 77 | // declare the first column 'word', second column 'count' 78 | outputFieldsDeclarer.declare(new Fields("word","count")); 79 | } 80 | } 81 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/TopNTweetTopology.java: -------------------------------------------------------------------------------- 1 | package udacity.storm; 2 | 3 | import backtype.storm.Config; 4 | import backtype.storm.LocalCluster; 5 | import backtype.storm.StormSubmitter; 6 | import backtype.storm.topology.TopologyBuilder; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.utils.Utils; 9 | 10 | class TopNTweetTopology 11 | { 12 | public static void main(String[] args) throws Exception 13 | { 14 | //Variable TOP_N number of words 15 | int TOP_N = 5; 16 | // create the topology 17 | TopologyBuilder builder = new TopologyBuilder(); 18 | 19 | /* 20 | * In order to create the spout, you need to get twitter credentials 21 | * If you need to use Twitter firehose/Tweet stream for your idea, 22 | * create a set of credentials by following the instructions at 23 | * 24 | * https://dev.twitter.com/discussions/631 25 | * 26 | */ 27 | // now create the tweet spout with the credentials 28 | // credential 29 | TweetSpout tweetSpout = new TweetSpout( 30 | "", 31 | "", 32 | "", 33 | "" 34 | ); 35 | 36 | // attach the tweet spout to the topology - parallelism of 1 37 | builder.setSpout("tweet-spout", tweetSpout, 1); 38 | 39 | // attach the parse tweet bolt using shuffle grouping 40 | builder.setBolt("parse-tweet-bolt", new ParseTweetBolt(), 10).shuffleGrouping("tweet-spout"); 41 | builder.setBolt("infoBolt", new InfoBolt(), 10).fieldsGrouping("parse-tweet-bolt", new Fields("county_id")); 42 | builder.setBolt("top-words", new TopWords(), 10).fieldsGrouping("infoBolt", new Fields("county_id")); 43 | builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("top-words"); 44 | 45 | // attach rolling count bolt using fields grouping - parallelism of 5 46 | //builder.setBolt("rolling-count-bolt", new RollingCountBolt(1000, 10), 1).fieldsGrouping("parse-tweet-bolt", new Fields("tweet-word")); 47 | 48 | //from incubator-storm/.../storm/starter/RollingTopWords.java 49 | //builder.setBolt("intermediate-ranker", new IntermediateRankingsBolt(TOP_N, 10), 2).fieldsGrouping("rolling-count-bolt", new Fields("obj")); 50 | //builder.setBolt("total-ranker", new TotalRankingsBolt(TOP_N, 2)).globalGrouping("intermediate-ranker"); 51 | 52 | /* 53 | * total-ranker bolt output is broadcast (allGrouping) to all the top-tweets bolt instances so 54 | * that every one of them have access to the top hashtags 55 | * tweet-spout tweet stream will be distributed randomly to the top-tweets bolt instances 56 | */ 57 | //builder.setBolt("top-tweets", new TweetsWithTopHashtagsBolt(), 4) 58 | // .allGrouping("total-ranker") 59 | // .shuffleGrouping("tweet-spout"); 60 | 61 | // attach the report bolt using global grouping - parallelism of 1 62 | //builder.setBolt("report-bolt", new ReportBolt(), 1).globalGrouping("top-tweets"); 63 | 64 | // create the default config object 65 | Config conf = new Config(); 66 | 67 | // set the config in debugging mode 68 | conf.setDebug(true); 69 | 70 | if (args != null && args.length > 0) { 71 | 72 | // run it in a live cluster 73 | 74 | // set the number of workers for running all spout and bolt tasks 75 | conf.setNumWorkers(3); 76 | 77 | // create the topology and submit with config 78 | StormSubmitter.submitTopology(args[0], conf, builder.createTopology()); 79 | 80 | } else { 81 | 82 | // run it in a simulated local cluster 83 | 84 | // set the number of threads to run - similar to setting number of workers in live cluster 85 | conf.setMaxTaskParallelism(4); 86 | 87 | // create the local cluster instance 88 | LocalCluster cluster = new LocalCluster(); 89 | 90 | // submit the topology to the local cluster 91 | cluster.submitTopology("tweet-word-count", conf, builder.createTopology()); 92 | 93 | // let the topology run for 300 seconds. note topologies never terminate! 94 | Utils.sleep(300000000); 95 | 96 | // now kill the topology 97 | cluster.killTopology("tweet-word-count"); 98 | 99 | // we are done, so shutdown the local cluster 100 | cluster.shutdown(); 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/spout/RandomSentenceSpout.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.spout; 2 | 3 | import backtype.storm.spout.SpoutOutputCollector; 4 | import backtype.storm.task.TopologyContext; 5 | import backtype.storm.topology.OutputFieldsDeclarer; 6 | import backtype.storm.topology.base.BaseRichSpout; 7 | import backtype.storm.tuple.Fields; 8 | import backtype.storm.tuple.Values; 9 | import backtype.storm.utils.Utils; 10 | 11 | import java.util.Map; 12 | import java.util.Random; 13 | 14 | public class RandomSentenceSpout extends BaseRichSpout { 15 | SpoutOutputCollector _collector; 16 | Random _rand; 17 | 18 | 19 | @Override 20 | public void open(Map conf, TopologyContext context, SpoutOutputCollector collector) { 21 | _collector = collector; 22 | _rand = new Random(); 23 | } 24 | 25 | @Override 26 | public void nextTuple() { 27 | Utils.sleep(100); 28 | String[] sentences = new String[]{ 29 | "the cow jumped over the moon", 30 | "an apple a day keeps the doctor away", 31 | "four score and seven years ago", 32 | "snow white and the seven dwarfs", 33 | "i am at two with nature" 34 | }; 35 | String sentence = sentences[_rand.nextInt(sentences.length)]; 36 | _collector.emit(new Values(sentence)); 37 | } 38 | 39 | @Override 40 | public void declareOutputFields(OutputFieldsDeclarer declarer) { 41 | declarer.declare(new Fields("sentence")); 42 | } 43 | 44 | } 45 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/tools/NthLastModifiedTimeTracker.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.tools; 2 | 3 | import backtype.storm.utils.Time; 4 | import org.apache.commons.collections.buffer.CircularFifoBuffer; 5 | 6 | /** 7 | * This class tracks the time-since-last-modify of a "thing" in a rolling fashion. 8 | *

9 | * For example, create a 5-slot tracker to track the five most recent time-since-last-modify. 10 | *

11 | * You must manually "mark" that the "something" that you want to track -- in terms of modification times -- has just 12 | * been modified. 13 | */ 14 | public class NthLastModifiedTimeTracker { 15 | 16 | private static final int MILLIS_IN_SEC = 1000; 17 | 18 | private final CircularFifoBuffer lastModifiedTimesMillis; 19 | 20 | public NthLastModifiedTimeTracker(int numTimesToTrack) { 21 | if (numTimesToTrack < 1) { 22 | throw new IllegalArgumentException( 23 | "numTimesToTrack must be greater than zero (you requested " + numTimesToTrack + ")"); 24 | } 25 | lastModifiedTimesMillis = new CircularFifoBuffer(numTimesToTrack); 26 | initLastModifiedTimesMillis(); 27 | } 28 | 29 | private void initLastModifiedTimesMillis() { 30 | long nowCached = now(); 31 | for (int i = 0; i < lastModifiedTimesMillis.maxSize(); i++) { 32 | lastModifiedTimesMillis.add(Long.valueOf(nowCached)); 33 | } 34 | } 35 | 36 | private long now() { 37 | return Time.currentTimeMillis(); 38 | } 39 | 40 | public int secondsSinceOldestModification() { 41 | long modifiedTimeMillis = ((Long) lastModifiedTimesMillis.get()).longValue(); 42 | return (int) ((now() - modifiedTimeMillis) / MILLIS_IN_SEC); 43 | } 44 | 45 | public void markAsModified() { 46 | updateLastModifiedTime(); 47 | } 48 | 49 | private void updateLastModifiedTime() { 50 | lastModifiedTimesMillis.add(now()); 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/tools/Rankable.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package udacity.storm.tools; 19 | 20 | public interface Rankable extends Comparable { 21 | 22 | Object getObject(); 23 | 24 | long getCount(); 25 | 26 | /** 27 | * Note: We do not defensively copy the object wrapped by the Rankable. It is passed as is. 28 | * 29 | * @return a defensive copy 30 | */ 31 | Rankable copy(); 32 | } 33 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/tools/SentimentAnalyzer.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.tools; 2 | 3 | import java.util.Properties; 4 | 5 | import edu.stanford.nlp.ling.CoreAnnotations; 6 | import edu.stanford.nlp.neural.rnn.RNNCoreAnnotations; 7 | import edu.stanford.nlp.pipeline.Annotation; 8 | import edu.stanford.nlp.pipeline.StanfordCoreNLP; 9 | import edu.stanford.nlp.sentiment.SentimentCoreAnnotations; 10 | import edu.stanford.nlp.trees.Tree; 11 | import edu.stanford.nlp.util.CoreMap; 12 | 13 | public class SentimentAnalyzer { 14 | static StanfordCoreNLP pipeline; 15 | 16 | public static void init() { 17 | Properties props = new Properties(); 18 | props.setProperty("annotators", "tokenize,ssplit,parse,sentiment"); 19 | pipeline = new StanfordCoreNLP(props); 20 | } 21 | 22 | public static int findSentiment(String tweet) { 23 | 24 | int mainSentiment = 0; 25 | if (tweet != null && tweet.length() > 0) { 26 | int longest = 0; 27 | Annotation annotation = pipeline.process(tweet); 28 | for (CoreMap sentence : annotation 29 | .get(CoreAnnotations.SentencesAnnotation.class)) { 30 | Tree tree = sentence 31 | .get(SentimentCoreAnnotations.AnnotatedTree.class); 32 | int sentiment = RNNCoreAnnotations.getPredictedClass(tree); 33 | String partText = sentence.toString(); 34 | if (partText.length() > longest) { 35 | mainSentiment = sentiment; 36 | longest = partText.length(); 37 | } 38 | 39 | } 40 | } 41 | return mainSentiment; 42 | } 43 | } -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/tools/SlotBasedCounter.java: -------------------------------------------------------------------------------- 1 | //package storm.starter.tools; 2 | package udacity.storm.tools; 3 | 4 | import java.io.Serializable; 5 | import java.util.HashMap; 6 | import java.util.HashSet; 7 | import java.util.Map; 8 | import java.util.Set; 9 | 10 | /** 11 | * This class provides per-slot counts of the occurrences of objects. 12 | *

13 | * It can be used, for instance, as a building block for implementing sliding window counting of objects. 14 | * 15 | * @param The type of those objects we want to count. 16 | */ 17 | public final class SlotBasedCounter implements Serializable { 18 | 19 | private static final long serialVersionUID = 4858185737378394432L; 20 | 21 | private final Map objToCounts = new HashMap(); 22 | private final int numSlots; 23 | 24 | public SlotBasedCounter(int numSlots) { 25 | if (numSlots <= 0) { 26 | throw new IllegalArgumentException("Number of slots must be greater than zero (you requested " + numSlots + ")"); 27 | } 28 | this.numSlots = numSlots; 29 | } 30 | 31 | public void incrementCount(T obj, int slot) { 32 | long[] counts = objToCounts.get(obj); 33 | if (counts == null) { 34 | counts = new long[this.numSlots]; 35 | objToCounts.put(obj, counts); 36 | } 37 | counts[slot]++; 38 | } 39 | 40 | public long getCount(T obj, int slot) { 41 | long[] counts = objToCounts.get(obj); 42 | if (counts == null) { 43 | return 0; 44 | } 45 | else { 46 | return counts[slot]; 47 | } 48 | } 49 | 50 | public Map getCounts() { 51 | Map result = new HashMap(); 52 | for (T obj : objToCounts.keySet()) { 53 | result.put(obj, computeTotalCount(obj)); 54 | } 55 | return result; 56 | } 57 | 58 | private long computeTotalCount(T obj) { 59 | long[] curr = objToCounts.get(obj); 60 | long total = 0; 61 | for (long l : curr) { 62 | total += l; 63 | } 64 | return total; 65 | } 66 | 67 | /** 68 | * Reset the slot count of any tracked objects to zero for the given slot. 69 | * 70 | * @param slot 71 | */ 72 | public void wipeSlot(int slot) { 73 | for (T obj : objToCounts.keySet()) { 74 | resetSlotCountToZero(obj, slot); 75 | } 76 | } 77 | 78 | private void resetSlotCountToZero(T obj, int slot) { 79 | long[] counts = objToCounts.get(obj); 80 | counts[slot] = 0; 81 | } 82 | 83 | private boolean shouldBeRemovedFromCounter(T obj) { 84 | return computeTotalCount(obj) == 0; 85 | } 86 | 87 | /** 88 | * Remove any object from the counter whose total count is zero (to free up memory). 89 | */ 90 | public void wipeZeros() { 91 | Set objToBeRemoved = new HashSet(); 92 | for (T obj : objToCounts.keySet()) { 93 | if (shouldBeRemovedFromCounter(obj)) { 94 | objToBeRemoved.add(obj); 95 | } 96 | } 97 | for (T obj : objToBeRemoved) { 98 | objToCounts.remove(obj); 99 | } 100 | } 101 | 102 | } 103 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/tools/TupleHelpers.java: -------------------------------------------------------------------------------- 1 | //package storm.starter.util; 2 | package udacity.storm.tools; 3 | 4 | import backtype.storm.Constants; 5 | import backtype.storm.tuple.Tuple; 6 | 7 | public final class TupleHelpers { 8 | 9 | private TupleHelpers() { 10 | } 11 | 12 | public static boolean isTickTuple(Tuple tuple) { 13 | return tuple.getSourceComponent().equals(Constants.SYSTEM_COMPONENT_ID) && tuple.getSourceStreamId().equals( 14 | Constants.SYSTEM_TICK_STREAM_ID); 15 | } 16 | 17 | } 18 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/FinalProject/src/jvm/udacity/storm/tools/ValueComparator.java: -------------------------------------------------------------------------------- 1 | package udacity.storm.tools; 2 | 3 | import java.util.Comparator; 4 | import java.util.Map; 5 | 6 | public class ValueComparator implements Comparator { 7 | 8 | private static final long serialVersionUID = -1549827195410578903L; 9 | Map base; 10 | public ValueComparator(Map base) { 11 | this.base = base; 12 | } 13 | 14 | // Note: this comparator imposes orderings that are inconsistent with equals. 15 | public int compare(String a, String b) { 16 | if (base.get(a) >= base.get(b)) { 17 | return -1; 18 | } else { 19 | return 1; 20 | } // returning 0 would merge keys 21 | } 22 | } -------------------------------------------------------------------------------- /lesson4/TeamAwesome/README.md: -------------------------------------------------------------------------------- 1 | ud381 2 | ===== 3 | 4 | Real-Time Analytics with Storm 5 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/viz/README.md: -------------------------------------------------------------------------------- 1 | Real-Time-Analytics 2 | =================== 3 | 4 | A demo showing Flask, Redis pubsub, and HTML5 server side events. Forked from Cheng-Han. 5 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/viz/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, Response 2 | 3 | import redis 4 | 5 | app = Flask(__name__) 6 | r = redis.StrictRedis(host='127.0.0.1', port=6379, db=0) 7 | 8 | 9 | def event_stream(): 10 | pubsub = r.pubsub() 11 | pubsub.subscribe('WordCountTopology') 12 | for message in pubsub.listen(): 13 | print message 14 | yield 'data: %s\n\n' % message['data'] 15 | 16 | 17 | @app.route('/') 18 | def show_homepage(): 19 | #Word Cloud = cloud.html and app-cloud.js 20 | return render_template("cloud.html") 21 | 22 | @app.route('/basic') 23 | def show_basic(): 24 | #Basic d3 view = basic.html and app.js 25 | return render_template("basic.html") 26 | 27 | @app.route('/map') 28 | def show_map(): 29 | #Basic d3 view = basic.html and app.js 30 | return render_template("map.html") 31 | 32 | @app.route('/stream') 33 | def stream(): 34 | return Response(event_stream(), mimetype="text/event-stream") 35 | 36 | 37 | if __name__ == '__main__': 38 | app.run(threaded=True, 39 | host='0.0.0.0' 40 | ) 41 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/viz/d3/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010-2014, Michael Bostock 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * The name Michael Bostock may not be used to endorse or promote products 15 | derived from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT, 21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 26 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/viz/d3/d3.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/viz/d3/d3.zip -------------------------------------------------------------------------------- /lesson4/TeamAwesome/viz/dump.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/viz/dump.rdb -------------------------------------------------------------------------------- /lesson4/TeamAwesome/viz/rt-provision-32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Real-Time Provisioning...." 4 | 5 | echo "Java JDK..." 6 | sudo apt-get install default-jdk -y 7 | 8 | echo "Storm..." 9 | #sudo wget http://apache.spinellicreations.com/incubator/storm/apache-storm-0.9.1-incubating/apache-storm-0.9.1-incubating.zip 10 | #sudo unzip -o /media/sf_VirtualBoxUbuntuShared/apache-storm-0.9.1-incubating.zip 11 | sudo wget http://www.trieuvan.com/apache/incubator/storm/apache-storm-0.9.2-incubating/apache-storm-0.9.2-incubating.zip 12 | sudo unzip -o $(pwd)/apache-storm-0.9.2-incubating.zip 13 | # use storm.0.9.2 for now...confirming with Twitter 14 | sudo ln -s $(pwd)/apache-storm-0.9.2-incubating/ /usr/share/storm 15 | sudo ln -s /usr/share/storm/bin/storm /usr/bin/storm 16 | 17 | echo "Lein..." 18 | sudo wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein 19 | sudo mv lein /usr/bin 20 | sudo chmod 755 /usr/bin/lein 21 | lein 22 | 23 | echo "Kafka..." 24 | #sudo wget http://www.motorlogy.com/apache/kafka/0.8.1.1/kafka_2.9.2-0.8.1.1.tgz 25 | #sudo tar -xvzf kafka_2.9.2-0.8.1.1.tgz 26 | 27 | echo "Sublime..." 28 | #sudo wget http://c758482.r82.cf2.rackcdn.com/sublime_text_3_build_3047_x64.tar.bz2 29 | #sudo tar vxjf sublime_text_3_build_3047_x64.tar.bz2 30 | #sudo mv sublime_text_3 /opt/ 31 | #sudo ln -s /opt/sublime_text_3/sublime_text /usr/bin/sublime 32 | 33 | sudo wget http://c758482.r82.cf2.rackcdn.com/sublime_text_3_build_3047.tar.bz2 34 | sudo tar vxjf Sublime\ Text\ 2.tar.bz2 35 | sudo mv Sublime\ Text\ 2 /opt/ 36 | sudo ln -s /opt/Sublime\ Text\ 2/sublime_text /usr/bin/sublime 37 | 38 | echo "Maven run..." 39 | #cd /vagrant/storm-hack 40 | #mvn -f m4-pom.xml clean 41 | #mvn -f m4-pom.xml compile 42 | #mvn -f m4-pom.xml package 43 | #mvn -f m4-pom.xml clean 44 | #cd /vagrant 45 | 46 | echo "IntelliJ..." 47 | sudo wget http://download-cf.jetbrains.com/idea/ideaIC-13.1.3.tar.gz 48 | sudo tar -xvzf ideaIC-13.1.3.tar.gz 49 | #sudo ln -s idea-IC-135.909/bin/idea.sh /usr/bin/idea 50 | 51 | echo "Git..." 52 | sudo apt-get install git-core -y 53 | 54 | echo "Redis (Python)..." 55 | sudo pip install redis 56 | 57 | echo "MongoDB...removed" 58 | #sudo apt-key adv --keyserver keyserver.ubuntu.com --recv 7F0CEB10 59 | #sudo echo "deb http://downloads-distro.mongodb.org/repo/ubuntu-upstart dist 10gen" | sudo tee -a /etc/apt/sources.list.d/10gen.list 60 | #sudo apt-get -y update 61 | #sudo apt-get -y install mongodb-10gen 62 | 63 | echo "Nodejs...(puppet attempt failed uy_nodejs-32...removed" 64 | #sudo wget http://nodejs.org/dist/v0.10.29/node-v0.10.29-linux-x86.tar.gz 65 | #sudo tar -xvzf node-v0.10.29-linux-x86 66 | #sudo /vagrant/node-v0.10.29-linux-x86 67 | #linking doesn't work.... 68 | #sudo apt-add-repository ppa:chris-lea/node.js -y 69 | #sudo apt-get update -y 70 | #sudo apt-get install nodejs -y 71 | 72 | echo "Adding from VagrantFile...." 73 | sudo ufw disable 74 | 75 | sudo apt-get update -y 76 | 77 | sudo apt-get install maven -y 78 | 79 | sudo apt-get install vim -y 80 | 81 | sudo apt-get --yes install zookeeper zookeeperd -y 82 | 83 | sudo apt-get install redis-server -y 84 | 85 | sudo apt-get install python-software-properties -y 86 | 87 | sudo apt-get install python-pip -y 88 | 89 | sudo pip install flask 90 | 91 | sudo pip install redis 92 | 93 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/viz/static/Twitter_logo_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/viz/static/Twitter_logo_white.png -------------------------------------------------------------------------------- /lesson4/TeamAwesome/viz/static/Udacity-logoRobot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/lesson4/TeamAwesome/viz/static/Udacity-logoRobot.png -------------------------------------------------------------------------------- /lesson4/TeamAwesome/viz/static/app-cloud.js: -------------------------------------------------------------------------------- 1 | var source = new EventSource('/stream'); 2 | var hash = {}; 3 | var width = 1200; 4 | var height = 700; 5 | 6 | //update hash (associative array) with incoming word and count 7 | source.onmessage = function (event) { 8 | word = event.data.split(":")[0]; 9 | count = event.data.split(":")[1]; 10 | if(!skip(word)){ 11 | hash[word]=count; 12 | } 13 | }; 14 | 15 | //update function for visualization 16 | var updateViz = function(){ 17 | //print console message 18 | console.log("cloudArray-1" + JSON.stringify(d3.entries(hash))); 19 | 20 | var frequency_list = d3.entries(hash); 21 | 22 | d3.layout.cloud().size([800, 300]) 23 | .words(frequency_list) 24 | .rotate(0) 25 | .fontSize(function(d) { return d.value; }) 26 | .on("end", draw) 27 | .start(); 28 | }; 29 | 30 | // run updateViz at #7000 milliseconds, or 7 second 31 | window.setInterval(updateViz, 7000); 32 | 33 | //clean list, can be added to word skipping bolt 34 | var skipList = ["https","follow","1","2","please","following","followers","fucking","RT","the","at","a"]; 35 | 36 | var skip = function(tWord){ 37 | for(var i=0; i 2 | 3 | 4 | 5 | Page Title 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 34 | 35 | 36 |

Udacity and Twitter bring you Real-Time Analytics with Storm

37 |

38 |  Smiley face 39 |  Twitter logo 40 |   Basic d3!!!

41 |
42 | 43 | 49 | 50 | -------------------------------------------------------------------------------- /lesson4/TeamAwesome/viz/templates/map.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 26 | 27 | 28 | 29 | 30 | 31 | 32 |
33 | 129 | 130 | 131 | -------------------------------------------------------------------------------- /provision.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -i 2 | 3 | # The following are documented (and stolen from) here: 4 | # http://redsymbol.net/articles/unofficial-bash-strict-mode/ 5 | # 6 | # In case that link dies, here's the simple version: 7 | # 1) -e means if there's an error, stop execution. 8 | # 2) -u means if we reference an undefined variable, blow up. 9 | # 3) -o pipefail means that if a step in a pipe fails, the whole pipe fails, which in combination with 1) means 10 | # that the script as a whole fails. 11 | 12 | set -euo pipefail 13 | 14 | sudo apt-get update -y 15 | 16 | sudo apt-get -y install default-jdk maven vim zookeeper zookeeperd redis-server \ 17 | python-software-properties python-pip python tree 18 | 19 | sudo pip install flask redis 20 | 21 | echo "Storm..." 22 | # TODO maybe make this use the best mirror always? 23 | sudo mkdir /opt/storm 24 | cd /opt/storm 25 | sudo wget http://mirror.cogentco.com/pub/apache/incubator/storm/apache-storm-0.9.2-incubating/apache-storm-0.9.2-incubating.tar.gz 26 | sudo tar xvzf apache-storm-0.9.2-incubating.tar.gz 27 | sudo rm apache-storm-0.9.2-incubating.tar.gz 28 | sudo chmod +x /opt/storm/apache-storm-0.9.2-incubating/bin/storm 29 | sudo ln -s /opt/storm/apache-storm-0.9.2-incubating/bin/storm /usr/bin/storm 30 | -------------------------------------------------------------------------------- /viz/README.md: -------------------------------------------------------------------------------- 1 | Real-Time-Analytics 2 | =================== 3 | 4 | A demo showing Flask, Redis pubsub, and HTML5 server side events. Forked from Cheng-Han. 5 | -------------------------------------------------------------------------------- /viz/app.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template, Response 2 | 3 | import redis 4 | 5 | app = Flask(__name__) 6 | r = redis.StrictRedis(host='127.0.0.1', port=6379, db=0) 7 | 8 | 9 | def event_stream(): 10 | pubsub = r.pubsub() 11 | pubsub.subscribe('WordCountTopology') 12 | for message in pubsub.listen(): 13 | print message 14 | yield 'data: %s\n\n' % message['data'] 15 | 16 | 17 | @app.route('/') 18 | def show_homepage(): 19 | #Word Cloud = cloud.html and app-cloud.js 20 | return render_template("cloud.html") 21 | 22 | @app.route('/basic') 23 | def show_basic(): 24 | #Basic d3 view = basic.html and app.js 25 | return render_template("basic.html") 26 | 27 | @app.route('/stream') 28 | def stream(): 29 | return Response(event_stream(), mimetype="text/event-stream") 30 | 31 | 32 | if __name__ == '__main__': 33 | app.run(threaded=True, 34 | host='0.0.0.0' 35 | ) 36 | -------------------------------------------------------------------------------- /viz/d3/LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2010-2014, Michael Bostock 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * The name Michael Bostock may not be used to endorse or promote products 15 | derived from this software without specific prior written permission. 16 | 17 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 18 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 20 | DISCLAIMED. IN NO EVENT SHALL MICHAEL BOSTOCK BE LIABLE FOR ANY DIRECT, 21 | INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 22 | BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 24 | OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 25 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, 26 | EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 | -------------------------------------------------------------------------------- /viz/d3/d3.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/viz/d3/d3.zip -------------------------------------------------------------------------------- /viz/dump.rdb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/viz/dump.rdb -------------------------------------------------------------------------------- /viz/rt-provision-32.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | echo "Real-Time Provisioning...." 4 | 5 | echo "Java JDK..." 6 | sudo apt-get install default-jdk -y 7 | 8 | echo "Storm..." 9 | #sudo wget http://apache.spinellicreations.com/incubator/storm/apache-storm-0.9.1-incubating/apache-storm-0.9.1-incubating.zip 10 | #sudo unzip -o /media/sf_VirtualBoxUbuntuShared/apache-storm-0.9.1-incubating.zip 11 | sudo wget http://www.trieuvan.com/apache/incubator/storm/apache-storm-0.9.2-incubating/apache-storm-0.9.2-incubating.zip 12 | sudo unzip -o $(pwd)/apache-storm-0.9.2-incubating.zip 13 | # use storm.0.9.2 for now...confirming with Twitter 14 | sudo ln -s $(pwd)/apache-storm-0.9.2-incubating/ /usr/share/storm 15 | sudo ln -s /usr/share/storm/bin/storm /usr/bin/storm 16 | 17 | echo "Lein..." 18 | sudo wget https://raw.githubusercontent.com/technomancy/leiningen/stable/bin/lein 19 | sudo mv lein /usr/bin 20 | sudo chmod 755 /usr/bin/lein 21 | lein 22 | 23 | echo "Kafka..." 24 | #sudo wget http://www.motorlogy.com/apache/kafka/0.8.1.1/kafka_2.9.2-0.8.1.1.tgz 25 | #sudo tar -xvzf kafka_2.9.2-0.8.1.1.tgz 26 | 27 | echo "Sublime..." 28 | #sudo wget http://c758482.r82.cf2.rackcdn.com/sublime_text_3_build_3047_x64.tar.bz2 29 | #sudo tar vxjf sublime_text_3_build_3047_x64.tar.bz2 30 | #sudo mv sublime_text_3 /opt/ 31 | #sudo ln -s /opt/sublime_text_3/sublime_text /usr/bin/sublime 32 | 33 | sudo wget http://c758482.r82.cf2.rackcdn.com/sublime_text_3_build_3047.tar.bz2 34 | sudo tar vxjf Sublime\ Text\ 2.tar.bz2 35 | sudo mv Sublime\ Text\ 2 /opt/ 36 | sudo ln -s /opt/Sublime\ Text\ 2/sublime_text /usr/bin/sublime 37 | 38 | echo "Maven run..." 39 | #cd /vagrant/storm-hack 40 | #mvn -f m4-pom.xml clean 41 | #mvn -f m4-pom.xml compile 42 | #mvn -f m4-pom.xml package 43 | #mvn -f m4-pom.xml clean 44 | #cd /vagrant 45 | 46 | echo "IntelliJ..." 47 | sudo wget http://download-cf.jetbrains.com/idea/ideaIC-13.1.3.tar.gz 48 | sudo tar -xvzf ideaIC-13.1.3.tar.gz 49 | #sudo ln -s idea-IC-135.909/bin/idea.sh /usr/bin/idea 50 | 51 | echo "Git..." 52 | sudo apt-get install git-core -y 53 | 54 | echo "Redis (Python)..." 55 | sudo pip install redis 56 | 57 | echo "MongoDB...removed" 58 | #sudo apt-key adv --keyserver keyserver.ubuntu.com --recv 7F0CEB10 59 | #sudo echo "deb http://downloads-distro.mongodb.org/repo/ubuntu-upstart dist 10gen" | sudo tee -a /etc/apt/sources.list.d/10gen.list 60 | #sudo apt-get -y update 61 | #sudo apt-get -y install mongodb-10gen 62 | 63 | echo "Nodejs...(puppet attempt failed uy_nodejs-32...removed" 64 | #sudo wget http://nodejs.org/dist/v0.10.29/node-v0.10.29-linux-x86.tar.gz 65 | #sudo tar -xvzf node-v0.10.29-linux-x86 66 | #sudo /vagrant/node-v0.10.29-linux-x86 67 | #linking doesn't work.... 68 | #sudo apt-add-repository ppa:chris-lea/node.js -y 69 | #sudo apt-get update -y 70 | #sudo apt-get install nodejs -y 71 | 72 | echo "Adding from VagrantFile...." 73 | sudo ufw disable 74 | 75 | sudo apt-get update -y 76 | 77 | sudo apt-get install maven -y 78 | 79 | sudo apt-get install vim -y 80 | 81 | sudo apt-get --yes install zookeeper zookeeperd -y 82 | 83 | sudo apt-get install redis-server -y 84 | 85 | sudo apt-get install python-software-properties -y 86 | 87 | sudo apt-get install python-pip -y 88 | 89 | sudo pip install flask 90 | 91 | sudo pip install redis 92 | 93 | -------------------------------------------------------------------------------- /viz/static/Twitter_logo_white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/viz/static/Twitter_logo_white.png -------------------------------------------------------------------------------- /viz/static/Udacity-logoRobot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/udacity/ud381/7a0258191719c79d48e267eefce2e1a7921cd559/viz/static/Udacity-logoRobot.png -------------------------------------------------------------------------------- /viz/static/app-cloud.js: -------------------------------------------------------------------------------- 1 | // D3 Word Cloud Implementation by Eric Coopey: 2 | // http://bl.ocks.org/ericcoopey/6382449 3 | 4 | var source = new EventSource('/stream'); 5 | var hash = {}; 6 | var width = 1200; 7 | var height = 700; 8 | 9 | //update hash (associative array) with incoming word and count 10 | source.onmessage = function (event) { 11 | word = event.data.split("|")[0]; 12 | count = event.data.split("|")[1]; 13 | if(!skip(word)){ 14 | hash[word]=count; 15 | } 16 | }; 17 | 18 | //update function for visualization 19 | var updateViz = function(){ 20 | //print console message 21 | console.log("cloudArray-1" + JSON.stringify(d3.entries(hash))); 22 | 23 | var frequency_list = d3.entries(hash); 24 | 25 | d3.layout.cloud().size([800, 300]) 26 | .words(frequency_list) 27 | .rotate(0) 28 | .fontSize(function(d) { return d.value; }) 29 | .on("end", draw) 30 | .start(); 31 | }; 32 | 33 | // run updateViz at #7000 milliseconds, or 7 second 34 | window.setInterval(updateViz, 7000); 35 | 36 | //clean list, can be added to word skipping bolt 37 | var skipList = ["https","follow","1","2","please","following","followers","fucking","RT","the","at","a"]; 38 | 39 | var skip = function(tWord){ 40 | for(var i=0; i 2 | 3 | 4 | 5 | Page Title 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 34 | 35 | 36 |

Udacity and Twitter bring you Real-Time Analytics with Storm

37 |

38 |  Smiley face 39 |  Twitter logo 40 |   Basic d3!!!

41 |
42 | 43 | 49 | 50 | --------------------------------------------------------------------------------