├── trident ├── lib ├── src │ └── main │ │ └── java │ │ └── com │ │ └── colorcloud │ │ └── trident │ │ ├── LocationStateQuery.java │ │ ├── Skeleton.java │ │ ├── LocationState.java │ │ ├── storage │ │ └── JedisDB.java │ │ ├── BatchAggregate.java │ │ ├── ParallelismExample1.java │ │ ├── Utils.java │ │ ├── FakeTweetsBatchSpout.java │ │ ├── GroupAggregate.java │ │ ├── Demo.java │ │ └── DrpcStream.java └── README.md ├── wordcount ├── .gitignore ├── target │ └── stale │ │ └── extract-native.dependencies ├── test │ └── wordcount │ │ └── test │ │ └── core.clj ├── project.clj ├── README └── src │ └── wordcount │ └── core.clj ├── dbconn ├── target │ ├── native │ │ ├── Windows │ │ │ ├── x86 │ │ │ │ └── sqlitejdbc.dll │ │ │ └── amd64 │ │ │ │ └── sqlitejdbc.dll │ │ ├── Linux │ │ │ ├── amd64 │ │ │ │ └── libsqlitejdbc.so │ │ │ └── i386 │ │ │ │ └── libsqlitejdbc.so │ │ └── Mac │ │ │ ├── i386 │ │ │ └── libsqlitejdbc.jnilib │ │ │ └── x86_64 │ │ │ └── libsqlitejdbc.jnilib │ └── stale │ │ └── extract-native.dependencies ├── test │ └── dbconn │ │ └── test │ │ └── core.clj ├── project.clj ├── src │ └── dbconn │ │ ├── core.clj │ │ ├── redis │ │ ├── redis_persister.clj │ │ └── redis_datamapper.clj │ │ └── mysql │ │ └── mysql_datamapper.clj └── README ├── trident-clj ├── test │ └── dbconn │ │ └── test │ │ └── core.clj ├── target │ ├── dev │ │ └── stale │ │ │ └── extract-native.dependencies │ ├── provided │ │ └── stale │ │ │ └── extract-native.dependencies │ ├── uberjar+provided │ │ └── stale │ │ │ └── extract-native.dependencies │ └── dev+uberjar+provided │ │ └── stale │ │ └── extract-native.dependencies ├── src │ └── trident_clj │ │ ├── prn_filter.clj │ │ ├── persister.clj │ │ ├── loc_aggregator.clj │ │ ├── core.clj │ │ ├── tweet_spout.clj │ │ ├── redis │ │ ├── redis_persister.clj │ │ └── redis_datamapper.clj │ │ └── mysql │ │ └── mysql_datamapper.clj ├── project.clj └── README.md ├── msgqueue ├── test │ └── msgqueue │ │ └── test │ │ └── core.clj ├── target │ └── stale │ │ └── extract-native.dependencies ├── src │ └── msgqueue │ │ ├── util.clj │ │ ├── rabbitmq │ │ ├── sender.clj │ │ ├── receiver.clj │ │ ├── worker_usage.clj │ │ ├── rabbitmq.clj │ │ └── worker.clj │ │ └── core.clj ├── project.clj └── README ├── .gitignore ├── deftest.clj ├── README.md ├── simple-http.clj ├── defclass.clj ├── db.clj ├── defspout.clj ├── spellcheck.clj ├── mockstub.clj ├── io.clj ├── task-executor.clj ├── java-array.clj ├── class.clj └── qsort.clj /trident/lib: -------------------------------------------------------------------------------- 1 | ../wordcount/lib -------------------------------------------------------------------------------- /wordcount/.gitignore: -------------------------------------------------------------------------------- 1 | /pom.xml 2 | *jar 3 | /lib 4 | /classes 5 | /native 6 | /.lein-failures 7 | /checkouts 8 | /.lein-deps-sum 9 | -------------------------------------------------------------------------------- /dbconn/target/native/Windows/x86/sqlitejdbc.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/life0fun/clojure-idiom/HEAD/dbconn/target/native/Windows/x86/sqlitejdbc.dll -------------------------------------------------------------------------------- /wordcount/target/stale/extract-native.dependencies: -------------------------------------------------------------------------------- 1 | ([:dependencies ([org.clojure/clojure "1.4.0"] [commons-collections/commons-collections "3.2.1"])]) -------------------------------------------------------------------------------- /dbconn/target/native/Linux/amd64/libsqlitejdbc.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/life0fun/clojure-idiom/HEAD/dbconn/target/native/Linux/amd64/libsqlitejdbc.so -------------------------------------------------------------------------------- /dbconn/target/native/Linux/i386/libsqlitejdbc.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/life0fun/clojure-idiom/HEAD/dbconn/target/native/Linux/i386/libsqlitejdbc.so -------------------------------------------------------------------------------- /dbconn/target/native/Mac/i386/libsqlitejdbc.jnilib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/life0fun/clojure-idiom/HEAD/dbconn/target/native/Mac/i386/libsqlitejdbc.jnilib -------------------------------------------------------------------------------- /dbconn/target/native/Windows/amd64/sqlitejdbc.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/life0fun/clojure-idiom/HEAD/dbconn/target/native/Windows/amd64/sqlitejdbc.dll -------------------------------------------------------------------------------- /dbconn/target/native/Mac/x86_64/libsqlitejdbc.jnilib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/life0fun/clojure-idiom/HEAD/dbconn/target/native/Mac/x86_64/libsqlitejdbc.jnilib -------------------------------------------------------------------------------- /dbconn/test/dbconn/test/core.clj: -------------------------------------------------------------------------------- 1 | (ns dbconn.test.core 2 | (:use [dbconn.core]) 3 | (:use [clojure.test])) 4 | 5 | (deftest replace-me ;; FIXME: write 6 | (is false "No tests have been written.")) 7 | -------------------------------------------------------------------------------- /trident-clj/test/dbconn/test/core.clj: -------------------------------------------------------------------------------- 1 | (ns dbconn.test.core 2 | (:use [dbconn.core]) 3 | (:use [clojure.test])) 4 | 5 | (deftest replace-me ;; FIXME: write 6 | (is false "No tests have been written.")) 7 | -------------------------------------------------------------------------------- /msgqueue/test/msgqueue/test/core.clj: -------------------------------------------------------------------------------- 1 | (ns msgqueue.test.core 2 | (:use [msgqueue.core]) 3 | (:use [clojure.test])) 4 | 5 | (deftest replace-me ;; FIXME: write 6 | (is false "No tests have been written.")) 7 | -------------------------------------------------------------------------------- /wordcount/test/wordcount/test/core.clj: -------------------------------------------------------------------------------- 1 | (ns wordcount.test.core 2 | (:use [wordcount.core]) 3 | (:use [clojure.test])) 4 | 5 | (deftest replace-me ;; FIXME: write 6 | (is false "No tests have been written.")) 7 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | clojure-koans 2 | .* 3 | /target 4 | /lib 5 | /classes 6 | /checkouts 7 | pom.xml 8 | pom.xml.asc 9 | *.jar 10 | *.class 11 | .lein-deps-sum 12 | .lein-failures 13 | .lein-plugins 14 | .lein-repl-history 15 | -------------------------------------------------------------------------------- /msgqueue/target/stale/extract-native.dependencies: -------------------------------------------------------------------------------- 1 | ([:dependencies ([org.clojure/clojure "1.3.0"] [org.clojure/clojure-contrib "1.2.0"] [org.clojure/java.jdbc "0.2.3"] [mysql/mysql-connector-java "5.1.6"] [clj-redis/clj-redis "0.0.12"] [com.rabbitmq/amqp-client "2.3.1"] [org.clojure/data.json "0.2.2"])]) -------------------------------------------------------------------------------- /trident-clj/target/dev/stale/extract-native.dependencies: -------------------------------------------------------------------------------- 1 | ([:dependencies ([org.clojure/clojure "1.4.0"] [org.clojure/clojure-contrib "1.2.0"] [org.clojure/tools.logging "0.2.6"] [korma/korma "0.3.0-RC5"] [clj-redis/clj-redis "0.0.12"] [org.clojure/data.json "0.2.2"] [clj-time/clj-time "0.5.1"] [storm/storm "0.9.0-wip15"])]) -------------------------------------------------------------------------------- /msgqueue/src/msgqueue/util.clj: -------------------------------------------------------------------------------- 1 | ;; util module. 2 | ;; require can now take a :refer option. :refer takes a list of symbols to refer 3 | ;; from the namespace or :all to bring in all public vars." 4 | ;; 5 | 6 | (ns msgqueue.util) 7 | 8 | (defn debug [] 9 | (prn " ... msg queue util debug print hello world ...")) 10 | -------------------------------------------------------------------------------- /trident-clj/target/provided/stale/extract-native.dependencies: -------------------------------------------------------------------------------- 1 | ([:dependencies ([org.clojure/clojure "1.4.0"] [org.clojure/clojure-contrib "1.2.0"] [org.clojure/tools.logging "0.2.6"] [korma/korma "0.3.0-RC5"] [clj-redis/clj-redis "0.0.12"] [org.clojure/data.json "0.2.2"] [clj-time/clj-time "0.5.1"] [storm/storm "0.9.0-wip15"])]) -------------------------------------------------------------------------------- /trident-clj/target/uberjar+provided/stale/extract-native.dependencies: -------------------------------------------------------------------------------- 1 | ([:dependencies ([org.clojure/clojure "1.4.0"] [org.clojure/clojure-contrib "1.2.0"] [org.clojure/tools.logging "0.2.6"] [korma/korma "0.3.0-RC5"] [clj-redis/clj-redis "0.0.12"] [org.clojure/data.json "0.2.2"] [clj-time/clj-time "0.5.1"] [storm/storm "0.9.0-wip15"])]) -------------------------------------------------------------------------------- /trident-clj/target/dev+uberjar+provided/stale/extract-native.dependencies: -------------------------------------------------------------------------------- 1 | ([:dependencies ([org.clojure/clojure "1.4.0"] [org.clojure/clojure-contrib "1.2.0"] [org.clojure/tools.logging "0.2.6"] [korma/korma "0.3.0-RC5"] [clj-redis/clj-redis "0.0.12"] [org.clojure/data.json "0.2.2"] [clj-time/clj-time "0.5.1"] [storm/storm "0.9.0-wip15"])]) -------------------------------------------------------------------------------- /dbconn/target/stale/extract-native.dependencies: -------------------------------------------------------------------------------- 1 | ([:dependencies ([org.clojure/clojure "1.3.0"] [org.clojure/clojure-contrib "1.2.0"] [korma/korma "0.3.0-RC5"] [org.clojure/java.jdbc "0.2.3"] [mysql/mysql-connector-java "5.1.6"] [org.postgresql/postgresql "9.2-1002-jdbc4"] [org.xerial/sqlite-jdbc "3.7.2"] [clj-redis/clj-redis "0.0.12"] [clojure-rabbitmq/clojure-rabbitmq "0.2.1"] [org.clojure/data.json "0.2.2"])]) -------------------------------------------------------------------------------- /wordcount/project.clj: -------------------------------------------------------------------------------- 1 | (defproject wordcount "1.0.0-SNAPSHOT" 2 | :description "first project for word count" 3 | :source-path "src" 4 | :aot :all 5 | :repositories {"local" ~(str (.toURI (java.io.File. "local_jars")))} 6 | :dependencies [ [org.clojure/clojure "1.4.0"] 7 | [commons-collections/commons-collections "3.2.1"] ] 8 | :dev-dependencies [[storm "0.8.2"] 9 | [org.clojure/clojure-contrib "1.2.0"]] 10 | :main wordcount.core) 11 | -------------------------------------------------------------------------------- /wordcount/README: -------------------------------------------------------------------------------- 1 | # lein dependency management 2 | 3 | lein is the npm for clojure. 4 | 5 | lein version 6 | lein new my-app 7 | cd my-app 8 | vi project.clj 9 | 10 | dependency management by :dependencies key in a vector. 11 | 12 | lein deps 13 | 14 | causes dependency packages to be download to global ~/.m2/repository/ 15 | and the copied to your lib folder. 16 | dev dependencies into lib/dev folder. 17 | 18 | ## build 19 | lein clean 20 | lein deps 21 | lein compile 22 | lein run -m package.name.class 23 | 24 | 25 | -------------------------------------------------------------------------------- /msgqueue/project.clj: -------------------------------------------------------------------------------- 1 | (defproject msgqueue "1.0.0-SNAPSHOT" 2 | :description "A distributed msg queue based on rabbitmq" 3 | :repositories {"local" ~(str (.toURI (java.io.File. "local_jars")))} 4 | :dependencies [ 5 | [org.clojure/clojure "1.3.0"] 6 | [org.clojure/clojure-contrib "1.2.0"] ;; for clojure.contrib.sql 7 | [org.clojure/java.jdbc "0.2.3"] ; jdbc 8 | [mysql/mysql-connector-java "5.1.6"] 9 | [clj-redis "0.0.12"] ; 10 | ;[clojure-rabbitmq "0.2.1"] 11 | ;[rabbitmq-client "1.7.0"] 12 | [com.rabbitmq/amqp-client "2.3.1"] ; use amqp-client ! 13 | [org.clojure/data.json "0.2.2"] ;; json package 14 | ] 15 | :main msgqueue.core) ; set main entry 16 | -------------------------------------------------------------------------------- /dbconn/project.clj: -------------------------------------------------------------------------------- 1 | (defproject dbconn "1.0.0-SNAPSHOT" 2 | :description "storage connection with mysql or redis" 3 | :repositories {"local" ~(str (.toURI (java.io.File. "local_jars")))} 4 | :dependencies [ 5 | [org.clojure/clojure "1.3.0"] 6 | [org.clojure/clojure-contrib "1.2.0"] ; for clojure.contrib.sql 7 | [korma "0.3.0-RC5"] ; awesome korma 8 | [org.clojure/java.jdbc "0.2.3"] ; jdbc 9 | [mysql/mysql-connector-java "5.1.6"] ; mysql jdbc driver 10 | [org.postgresql/postgresql "9.2-1002-jdbc4"] ; postgresql 11 | [org.xerial/sqlite-jdbc "3.7.2"] ; sqlite 12 | [clj-redis "0.0.12"] ; 13 | [clojure-rabbitmq "0.2.1"] 14 | [org.clojure/data.json "0.2.2"] ;; json package 15 | ] 16 | :main dbconn.core) ; set main entry 17 | -------------------------------------------------------------------------------- /msgqueue/src/msgqueue/rabbitmq/sender.clj: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; define a set of method to send msg to queue 3 | ;; 4 | (ns msgqueue.rabbitmq.sender 5 | (:use msgqueue.rabbitmq.rabbitmq)) 6 | 7 | (defn send-single-msg [routingkey] 8 | (println "Sending test msg to " routingkey) 9 | (with-rabbit ["localhost" "guest" "guest"] 10 | (send-message routingkey (str "sending test message to " routingkey))) 11 | (println "done!")) 12 | 13 | (defn send-multicast 14 | ([] 15 | (send-multicast "fanex" "logs-all-level")) 16 | ([exchgname routingkey] 17 | (println "Multicasting to " exchgname routingkey) 18 | (with-rabbit ["localhost" "guest" "guest"] 19 | (send-message exchgname FANOUT-EXCHANGE-TYPE routingkey "Broadcast! Multicasting all logs")) 20 | (println "done!"))) 21 | 22 | (defn test-send [qname msg] 23 | (println "Test sending msg to " qname msg) 24 | (with-rabbit ["localhost" "guest" "guest"] 25 | (send-message qname msg)) 26 | (println "done!")) 27 | -------------------------------------------------------------------------------- /trident-clj/src/trident_clj/prn_filter.clj: -------------------------------------------------------------------------------- 1 | (ns trident-clj.prn-filter 2 | (:import [java.io FileReader] 3 | [java.util Map Map$Entry List ArrayList Collection Iterator HashMap]) 4 | (:import [storm.trident.operation TridentCollector Function] 5 | [backtype.storm.tuple Values]) 6 | (:require [clojure.string :as str] 7 | [clojure.tools.logging :as log]) 8 | (:gen-class 9 | :name com.colorcloud.trident.PrintFilter ; namespace to 10 | :implements [storm.trident.operation.Filter])) ; implement filter interface 11 | 12 | 13 | (defn -prepare ; gen-class method prefix by - 14 | " called once, better for init global var and db conn " 15 | [this conf ^storm.trident.operation.TridentOperationContext context] 16 | (prn " --- PrintFilter prepare ---")) 17 | 18 | (defn -cleanup 19 | "called once to release resource upon tear down" 20 | [] 21 | (prn " --- PrintFilter cleanup ----")) 22 | 23 | (defn -isKeep ; 24 | "process each tuple, predicate to decide whether to keep" 25 | [this ^storm.trident.tuple.TridentTuple tuple] 26 | (prn " " tuple) 27 | true) 28 | -------------------------------------------------------------------------------- /deftest.clj: -------------------------------------------------------------------------------- 1 | (ns tdd 2 | (:import (java.text SimpleDateFormat) 3 | (java.util Calendar GregorianCalendar)) 4 | (:use [clojure.test])) 5 | 6 | (defn date [date-string] 7 | (let [f (SimpleDateFormat. "yyyy-MM-dd") 8 | d (.parse f date-string)] 9 | (doto (GregorianCalendar.) 10 | (.setTime d)))) 11 | 12 | (defn day-from [d] 13 | (.get d Calendar/DAY_OF_MONTH)) 14 | 15 | (defn month-from [d] 16 | (inc (.get d Calendar/MONTH))) 17 | 18 | (defn year-from [d] 19 | (.get d Calendar/YEAR)) 20 | 21 | (deftest test-simple-data-parsing 22 | (let [d (date "2009-1-22")] 23 | (is (= (day-from d) 22)))) 24 | 25 | (deftest test-simple-data-parsing 26 | (let [d (date "2009-01-22")] 27 | (is (= (month-from d) 1)) 28 | (is (= (day-from d) 22)) 29 | (is (= (year-from d) 2009)))) 30 | 31 | ; 32 | (defn as-string [date] 33 | (let [y (year-from date) 34 | m (month-from date) 35 | d (day-from date)] 36 | (str-join "-" [y m d]))) 37 | 38 | ; each test fn test one fn 39 | (deftest test-as-string 40 | (let [d (date "2009-01-22")] 41 | (is (= (as-string d) "2009-01-22")))) 42 | 43 | (run-tests tdd) -------------------------------------------------------------------------------- /trident/src/main/java/com/colorcloud/trident/LocationStateQuery.java: -------------------------------------------------------------------------------- 1 | package com.colorcloud.trident; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import storm.trident.operation.TridentCollector; 7 | import storm.trident.state.BaseQueryFunction; 8 | import storm.trident.tuple.TridentTuple; 9 | import backtype.storm.tuple.Values; 10 | 11 | 12 | public class LocationStateQuery extends BaseQueryFunction{ 13 | 14 | // for each tuple, get the location tag, update global location map, return cnt 15 | @Override 16 | public List batchRetrieve(LocationState state, List args) { 17 | List loccnts = new ArrayList(); 18 | for(TridentTuple tuple : args){ 19 | String loc = (String) tuple.getValue(0); // first field is location 20 | long cnt = state.incrementAndGet(loc); 21 | loccnts.add(cnt); 22 | } 23 | 24 | return loccnts; 25 | } 26 | 27 | @Override 28 | public void execute(TridentTuple tuple, Long locCnt, TridentCollector collector) { 29 | //emit by tweet id 30 | String loc = (String) tuple.getValue(0); 31 | collector.emit(new Values(loc, locCnt)); 32 | } 33 | 34 | 35 | } -------------------------------------------------------------------------------- /trident/src/main/java/com/colorcloud/trident/Skeleton.java: -------------------------------------------------------------------------------- 1 | package com.colorcloud.trident; 2 | 3 | import java.io.IOException; 4 | 5 | import storm.trident.TridentTopology; 6 | import backtype.storm.Config; 7 | import backtype.storm.LocalCluster; 8 | import backtype.storm.LocalDRPC; 9 | import backtype.storm.generated.StormTopology; 10 | import backtype.storm.tuple.Fields; 11 | 12 | /** 13 | * This topology use a stream to wrap fake tweets batch spout and dump all tuples. 14 | */ 15 | public class Skeleton { 16 | 17 | public static StormTopology buildTopology(LocalDRPC drpc) throws IOException { 18 | FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(); 19 | 20 | TridentTopology topology = new TridentTopology(); 21 | 22 | // for each tuple, dump content. 23 | topology.newStream("spout", spout).each(new Fields("id", "text", "actor", "location", "date"), 24 | new Utils.PrintFilter()); 25 | 26 | return topology.build(); 27 | } 28 | 29 | public static void main(String[] args) throws Exception { 30 | Config conf = new Config(); 31 | 32 | LocalDRPC drpc = new LocalDRPC(); 33 | LocalCluster cluster = new LocalCluster(); 34 | cluster.submitTopology("hackaton", conf, buildTopology(drpc)); 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /msgqueue/src/msgqueue/core.clj: -------------------------------------------------------------------------------- 1 | ;; the main entry for message queue 2 | 3 | (ns msgqueue.core 4 | (:require [clojure.string :as str]) 5 | (:require [clojure.java.jdbc :as sql]) 6 | (:import [java.io FileReader] 7 | [java.util Map Map$Entry List ArrayList Collection Iterator HashMap]) 8 | (:use [msgqueue.util]) ; use util namespace without fully qualified name. 9 | (:use [msgqueue.rabbitmq.rabbitmq] 10 | [msgqueue.rabbitmq.worker] 11 | [msgqueue.rabbitmq.sender] 12 | [msgqueue.rabbitmq.receiver] 13 | [msgqueue.rabbitmq.worker-usage]) 14 | (:gen-class :main true)) ; need gen-class :main in order for lein run 15 | 16 | 17 | (def test-queue-name "test_queue") 18 | 19 | (defn test-rabbit [role] 20 | (condp = (first role) 21 | "receiver" (test-receive test-queue-name) 22 | "sender" (test-send test-queue-name "hello world"))) 23 | 24 | (defn test-worker [role] 25 | (condp = (first role) 26 | "workerhandler" (start-handler-process) 27 | "workertask" (worker-task))) 28 | 29 | ; main entry, refered from prj.clj and lein run will execute. 30 | (defn -main [& args] 31 | (prn " >>> starting message queue " args " <<< ") 32 | ;(test-rabbit args)) 33 | (test-worker args)) 34 | -------------------------------------------------------------------------------- /msgqueue/src/msgqueue/rabbitmq/receiver.clj: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; define a set of msg queue recevier in this namespace 3 | ;; 4 | (ns msgqueue.rabbitmq.receiver 5 | (:use clojure.contrib.str-utils) 6 | (:use msgqueue.rabbitmq.rabbitmq)) 7 | 8 | 9 | ; get multicast msg from fanout exchange with specified multicast key 10 | (defn receive-multicast [multicast-key] 11 | (println "Waiting for broadcast..." multicast-key) 12 | (with-rabbit ["localhost" "guest" "guest"] 13 | (println (next-message-from "fanex" FANOUT-EXCHANGE-TYPE multicast-key)))) 14 | 15 | ; get multi-part msg 16 | (defn receive-multipart [qname] 17 | (with-rabbit ["localhost" "guest" "guest"] 18 | (println "Waiting for messages...") 19 | ; abstract msg queue into msg seq, and group 2 msg into a pair 20 | (let [message-pairs (partition 2 (message-seq qname))] 21 | (doseq [message-pair message-pairs] 22 | (str-join "::" message-pair))))) 23 | 24 | ; give a queue name and a handler callback, for each msg, invoke callback. 25 | (defn handle-msg [qname handler] 26 | (with-rabbit ["localhost" "guest" "guest"] 27 | (println "Waiting for messages from " qname) 28 | (doseq [message (message-seq qname)] 29 | (handler message)))) 30 | 31 | (defn test-receive [qname] 32 | (handle-msg qname prn)) 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /trident/src/main/java/com/colorcloud/trident/LocationState.java: -------------------------------------------------------------------------------- 1 | package com.colorcloud.trident; 2 | 3 | import java.io.Serializable; 4 | import java.util.concurrent.ConcurrentHashMap; 5 | import java.util.concurrent.ConcurrentMap; 6 | import java.util.concurrent.atomic.AtomicLong; 7 | 8 | import storm.trident.state.State; 9 | 10 | public class LocationState implements State, Serializable{ 11 | 12 | ConcurrentMap locationMap; 13 | 14 | public LocationState(int partitionIdx, int numPartitions) { 15 | locationMap = new ConcurrentHashMap(); 16 | System.out.println("LocationState : makeState : " + partitionIdx + " / " + numPartitions); 17 | } 18 | 19 | /** 20 | * You got love clj! (def m (atom {})) (swap! m update-in [:c] (fnil inc 0)) 21 | */ 22 | public long incrementAndGet(String loc){ 23 | long cnt = 1; 24 | AtomicLong firstv = new AtomicLong(1); 25 | // put only when absent, otherwise get. 26 | AtomicLong cur = locationMap.putIfAbsent(loc, firstv); 27 | if (cur != null){ 28 | cnt = cur.incrementAndGet(); 29 | } 30 | return cnt; 31 | } 32 | 33 | @Override 34 | public void beginCommit(Long txid) { 35 | // TODO Auto-generated method stub 36 | } 37 | 38 | @Override 39 | public void commit(Long txid) { 40 | // TODO Auto-generated method stub 41 | } 42 | 43 | } 44 | -------------------------------------------------------------------------------- /trident/src/main/java/com/colorcloud/trident/storage/JedisDB.java: -------------------------------------------------------------------------------- 1 | package com.colorcloud.trident.storage; 2 | 3 | 4 | import java.util.Map; 5 | 6 | import redis.clients.jedis.Jedis; 7 | 8 | public class JedisDB { 9 | //private static final long serialVersionUID = 7526472295622776147L; 10 | 11 | private String host; 12 | private int port; 13 | private Jedis jedis; 14 | 15 | public JedisDB() { 16 | host = "localhost"; 17 | port = 6379; 18 | jedis = new Jedis(host, port); 19 | System.out.println("JedisDB connecting to server"); 20 | } 21 | 22 | public JedisDB(String h, int p){ 23 | host = h; 24 | port = p; 25 | jedis = new Jedis(host, port); 26 | } 27 | 28 | /** 29 | * append a vector of string values to the redis keyed list. 30 | */ 31 | public long rpush(final String key, final String... strings){ 32 | return jedis.rpush(key, strings); 33 | } 34 | 35 | /** 36 | * set hash field and value of the key. 37 | */ 38 | public Long hset(final String key, final String field, final String value) { 39 | return jedis.hset(key, field, value); 40 | } 41 | 42 | /** 43 | * store a map into redis 44 | */ 45 | public void storeMap(final String key, final Map map){ 46 | for (Map.Entry entry : map.entrySet()) { 47 | String field = entry.getKey().toString(); 48 | Object value = entry.getValue(); 49 | jedis.hset(key, field, value.toString()); 50 | } 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /trident-clj/project.clj: -------------------------------------------------------------------------------- 1 | (defproject trident-clj "1.0.0-SNAPSHOT" 2 | :description "storm trident with redis storage in clojure" 3 | :repositories {"local" ~(str (.toURI (java.io.File. "local_jars")))} 4 | :source-paths ["src"] ; where the namespace directory starts 5 | :test-paths ["test"] 6 | :dependencies [ ; lein2, all deps is under maven repo, $HOME/.m2/repository 7 | [org.clojure/clojure "1.4.0"] 8 | [org.clojure/clojure-contrib "1.2.0"] 9 | [org.clojure/tools.logging "0.2.6"] ; logging 10 | [korma "0.3.0-RC5"] ; awesome korm 11 | [clj-redis "0.0.12"] ; 12 | [org.clojure/data.json "0.2.2"] ; json package 13 | [clj-time "0.5.1"] 14 | [storm "0.9.0-wip15"] 15 | ] 16 | ; activate profiles to config projects. settings in certain profile is not propagated 17 | ; downstream to projects that deps on your proj. 18 | ; use with-profile to select certain profile to run on. 19 | :profiles {:dev 20 | {:dependencies 21 | [ 22 | ]}} 23 | ; to enure uberjar works by not deleting non project classes 24 | :keep-non-project-classes true 25 | ; to run lein do clean, with-profile dev compile, with-profile dev run. 26 | :main trident-clj.core 27 | :warn-on-reflection true 28 | ; may just :aot :all 29 | :aot [trident-clj.core 30 | trident-clj.tweet-spout 31 | trident-clj.persister 32 | trident-clj.prn-filter] 33 | :min-lein-version "2.0.0") -------------------------------------------------------------------------------- /trident/README.md: -------------------------------------------------------------------------------- 1 | # Storm Trident example 2 | 3 | This is a simple example of using Storm Trident for data processing. 4 | 5 | ## Setup 6 | 7 | This is no project scaffolding for storm project. We need to create new project manually and run storm cluster manually. 8 | 9 | 1. create an eclipse java project add include storm jar files as dependency. 10 | mkdir -p src/main/java/com/colorcloud/trident 11 | 12 | config java build path add source folder to point to src/main/java. 13 | 14 | 2. cp all dependent jar files in to lib folder, or just link it from jars in some existing projects without copying. 15 | 16 | 3. start redis server, we will store intermediate state into redis server. 17 | 18 | 4. run main class file by pointing classpath to libs and local bin and resource. 19 | java -cp ./bin:lib/*:lib/dev/*:src/main/resources com.colorcloud.trident.GroupAggregate 20 | 21 | 5. To run topology, build in eclipse and execute the class main. 22 | 23 | java -cp ./bin:lib/*:lib/dev/*:src/main/resources com.colorcloud.trident.GroupAggregate 24 | 25 | java -cp ./bin:lib/*:lib/dev/*:src/main/resources com.colorcloud.trident.DrpcStream 26 | 27 | ## Trident Notes. 28 | 29 | 1. Each batch is a reliable transaction. 30 | 2. drpc.execute(TOPNAME, text) is used to inject text stream into top. 31 | 3. GroupBy and aggregation functions are invoked per batch, init and complete upon batch start, and aggregate on each trident tuple in the batch. 32 | 33 | 34 | ## Trident by Clojure. 35 | 36 | Clojure DSL for Trident is still under development. 37 | For now, we can use gen-class to write trident topology in Clojure. 38 | check whoahbot's github and blog. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # clojure idioms 2 | 3 | This repo contains patterns and examples of idiomatic clojure. 4 | This includes common clojure patterns for map reduce, best practice to connect to distributed components, utilize dependent libraries, DSL for other platforms, core.async examples, and some algorithms and puzzles in clojure. 5 | 6 | ## dbconn 7 | 8 | The project illustrates clojure way to connect to Mysql and Redis and read/write records from/to the underlying data store with ORM data model. 9 | 10 | ## msqqueue 11 | 12 | The project illustrates clojure way of abstracting rabbitmq queue into lazy sequence and use clojure sequence library to process message queues. We use rabbitmq to distribute computations to a swarm of workers and use callback queue to retrieve the result. 13 | 14 | ## wordcount 15 | 16 | A simple demo of data processing using storm. We create a topology to transform stream of logs into in memory state by filtering, grouping, and aggregation. Working in progress to provide state query API in real-time. 17 | 18 | ## 4clojure 19 | 20 | My solutions to 4clojure puzzels. Currently ranked 162 out of 14099 users. 21 | 22 | ## Trident 23 | 24 | This repo contains example of creating storm trident for processing logs with filter and aggregation. We illustrate with two implmenetations, one with java and one with clojure. 25 | 26 | ## Trident-clj 27 | 28 | Re-implement Trident example of log processing with grouping and aggregation using clojure. Trident currently does not have clojure DSL. Implementation is based on clojure gen-class. 29 | 30 | ## others 31 | 32 | Other small modules contains examples of idiomatic clojure programming, e.g., core.async, graph algorithm, qsort in 5 lines, defmacro, etc. -------------------------------------------------------------------------------- /dbconn/src/dbconn/core.clj: -------------------------------------------------------------------------------- 1 | (ns dbconn.core 2 | (:require [clojure.string :as str]) 3 | (:require [clojure.java.jdbc :as sql]) 4 | (:import [java.io FileReader] 5 | [java.util Map Map$Entry List ArrayList Collection Iterator HashMap]) 6 | (:require [clj-redis.client :as redis]) ; bring in redis namespace 7 | (:use [dbconn.redis.redis-datamapper ]) 8 | (:use [dbconn.redis.redis-persister]) 9 | (:use [dbconn.mysql.mysql-datamapper]) ; use bring-in the entire namespace 10 | (:gen-class :main true)) ; bring in redis namespace 11 | 12 | 13 | ; simple test redis 14 | (defn test-redis [db] 15 | ; set key 16 | (redis/set db "foo" "bar") 17 | (prn (redis/get db "foo")) 18 | (redis/rpush db "cars" "celica") 19 | (redis/rpush db "cars" "accord") 20 | (prn (redis/lrange db "cars" 0 -1)) 21 | (redis/sadd db "lang" "clojure") 22 | (redis/sadd db "lang" "javascript") 23 | (prn (redis/smembers db "lang"))) 24 | 25 | ; use def type macro to create a data type to encap data from/to redis 26 | ; we call this redis-type and we can instantiate redis objects and use builder pattern. 27 | (def-redis-type mobile-user 28 | (string-type :id :name :start-time :parent-id) 29 | (list-type :apps) 30 | (parimary-key :id :parent-id) 31 | (format :json) 32 | (key-separator "##")) 33 | 34 | (defn test-mobile-user [] 35 | (prn (mobile-user :name)) 36 | (mobile-user :format) 37 | (let [m (mobile-user :new)] ; instantiate and using redis object. 38 | (m :set! :name "sparkle") 39 | (m :get :name) 40 | (m :save!))) 41 | 42 | ; 43 | ; test connection to mysql 44 | (defn test-sqlkorma [] 45 | (prn "testing sql korma lib") 46 | ;(populate-db) 47 | (get-user "jackson")) 48 | 49 | ; the main 50 | (defn -main [] 51 | (prn " >>>> starting dbconn.core main <<<<< ") 52 | (test-redis redis-db) 53 | (test-mobile-user) 54 | (test-sqlkorma)) 55 | 56 | 57 | -------------------------------------------------------------------------------- /simple-http.clj: -------------------------------------------------------------------------------- 1 | (ns joy.web 2 | (:import (com.sun.net.httpserver HttpHandler HttpExchange HttpServer) 3 | (java.net InetSocketAddress HttpURLConnection) 4 | (java.io IOException FilterOutputStream) 5 | (java.util Arrays))) 6 | 7 | (defn new-server [port path handler] 8 | (doto (HttpServer/create (InetSocketAddress. port) 0) 9 | (.createContext path handler) 10 | (.setExecutor nil) 11 | (.start))) 12 | 13 | ;; default handle is a proxy on top of http request handler. 14 | (defn default-handler [txt] 15 | (proxy [HttpHandler] [] 16 | (handle [exchange] 17 | (.sendResponseHeaders exchange HttpURLConnection/HTTP_OK 0) 18 | (doto (.getResponseBody exchange) 19 | (.write (.getBytes txt)) 20 | (.close))))) ;; Close over txt 21 | 22 | (def server 23 | (new-server 3001 "/joy/hello" (default-handler "Hello Cleveland"))) 24 | 25 | 26 | ;; bind the ret of default-handle to a var, and pass to server 27 | (.stop server 0) 28 | (def p (default-handler 29 | "There's no problem that can't be solved with another level of indirection")) 30 | (def server (new-server 8123 "/joy/hello" p)) 31 | 32 | ;; 33 | ;; now make default handler ret a fn 34 | (defn make-handler-fn [fltr txt] 35 | (fn [this exchange] ;; this captures the caller, which is proxy of http handler. 36 | (let [b (.getBytes txt)] 37 | (-> exchange 38 | .getResponseHeaders 39 | (.set "Content-Type" "text/html")) 40 | (.sendResponseHeaders exchange 41 | HttpURLConnection/HTTP_OK 42 | 0) 43 | (doto (fltr (.getResponseBody exchange)) 44 | (.write b) 45 | (.close))))) 46 | 47 | ;; bind the ret fn of make-handler-fn 48 | ;; update-proxy the p proxy with the mapping of handle 49 | (defn change-message 50 | "Convenience method to change a proxy's output message" 51 | ([p txt] (change-message p identity txt)) 52 | ([p fltr txt] 53 | (update-proxy p {"handle" (make-handler-fn fltr txt)}))) 54 | 55 | 56 | ;; 57 | ;; now we can use 58 | (change-message p "Hello Dynamic!") 59 | 60 | -------------------------------------------------------------------------------- /dbconn/README: -------------------------------------------------------------------------------- 1 | # dbconn 2 | Testing connectivity to mysql and redis using Redis and mysql-connector 3 | 4 | ## Setup project 5 | 6 | ### project.clj 7 | 8 | Lib format: group-id/artifact-id, version string. 9 | Clojure uses the following paths to resolve dependency library. 10 | central (http://repo1.maven.org/maven2), 11 | local (file:$HOME/macsrc/clj/cljaction/cljaction-test/local_jars/), 12 | clojars (http://clojars.org/repo/) 13 | 14 | add lib coordinates (artifact-id and (if differing) group-id) go into your project.clj’s :dependencies vector. 15 | require the package into ns macro’s :require list. (ns package.namespace) = (src/package/namespace) 16 | 17 | Specify main function for the project. 18 | 19 | :main dbconn.core) 20 | 21 | ### Package is Namespace 22 | 23 | Package is namespece, and named after convention src/package/file.clj maps to 24 | (ns package.file) 25 | 26 | ### Require, use, import and gen-class. 27 | 28 | Import java packages and each classes. [java.util Map List] 29 | Require loads the lib, :as to give a short name to avoid qualified full name. 30 | Require or use other namespace's code as though its yours. 31 | Require can now take a :refer option. :refer takes a list of symbols to refer 32 | from the namespace or :all to bring in all public vars." 33 | 34 | (ns cljaction-test.core 35 | (:require [clojure.string :as str]) 36 | (:require [clojure.java.jdbc :as sql]) 37 | (:import [java.io FileReader] 38 | [java.util Map Map$Entry List ArrayList Collection Iterator HashMap] 39 | ) 40 | (:use [cljaction-test.util]) ;; use util namespace without fully qualified name. 41 | (:use [cljaction-test.chapter14-worker]) ;; use chapter14_worker 42 | (:use [cljaction-test.chapter14-worker-usage]) 43 | (:gen-class :main true)) 44 | 45 | 46 | gen-class gen a java class that delegate java class .method to clj :prefix func. (.main to -main) 47 | clojure in action java interop has great depth in gen-class options. 48 | 49 | for mutables, only single field available called state. 50 | : with or without :gen-class, clj with gen a set of classes for each clj function. 51 | args: :name, :init, :constructor :state, 52 | http://kotka.de/blog/2010/02/gen-class_how_it_works_and_how_to_use_it.html 53 | 54 | 55 | ## Run 56 | lein compile 57 | lein run 58 | -------------------------------------------------------------------------------- /trident-clj/README.md: -------------------------------------------------------------------------------- 1 | # Storm Trident example written in clojure. 2 | 3 | This is a simple example of using Storm Trident for data processing written in clojure 4 | 5 | ## Setup 6 | 7 | In lein-2, all dependent libs are in maven repos at $HOME/.m2/repository/package 8 | If you have local jar, use mvn deploy rather than mvn install. 9 | 10 | lein-2 new trident-clj 11 | lein-2 deps 12 | lein-2 with-profile dev compile 13 | lein-2 with-profile dev run 14 | 15 | 16 | ## dependency 17 | 1. storm deps on exact clojure 1.4.0 ver. 18 | 19 | 20 | # Notes about Lein uberjar 21 | 1. use lein jar to generate jar of only your code (not include dep jars) 22 | 23 | 2. use lein uberjar to gen standalone jar that include all dep jars(including clojure jar). 24 | 25 | 3. lein-2 with-profile dev uberjar generate jar 26 | java -jar target/dev+uberjar/trident-clj-1.0.0-SNAPSHOT-standalone.jar 27 | 3. lein-2 uberjar generate jar 28 | java -jar target/trident-clj-1.0.0-SNAPSHOT-standalone.jar 29 | 30 | 4. Lein will remove non project class during package. This leads to class not found error when running standalone jar. Turn on the flag in project.clj. 31 | ; to enure uberjar works by not deleting non project classes 32 | :keep-non-project-classes true 33 | 34 | 35 | ## gen-class notes 36 | 37 | (:gen-class 38 | :name com.colorcloud.trident.TweetSpout ; convert this ns to class Tweet 39 | :prefix - ; class methods prefix, class method get this as first arg. 40 | :state state ; :state defines a method which will return the object's state. put a atom {} to store your serializables. 41 | :init init ; Must return [ [superclass-constructor-args] state] 42 | :constructors {[] [] ; empty arg constructor 43 | [String int] []} ; a map of constructor sig to superclass construcotr signature 44 | :extends storm.trident.operation.BaseAggregator 45 | :implements [storm.trident.spout.IBatchSpout])) ; this ns impl Function 46 | 47 | ## redis data mapper 48 | 49 | Start redis server, we will store intermediate state into redis server. 50 | 51 | We use redis data mapper to store object model in redis. 52 | 53 | 54 | ## Trident Notes. 55 | 56 | 1. Each batch is a reliable transaction. 57 | 2. GroupBy and aggregation functions are invoked per batch, init and complete upon batch start, and aggregate on each trident tuple in the batch. 58 | -------------------------------------------------------------------------------- /defclass.clj: -------------------------------------------------------------------------------- 1 | ;; inheritence 2 | (declare this find-method) 3 | 4 | (defn new-object [klass] 5 | (let [state (ref {})] 6 | (fn thiz [command & args] 7 | (condp = command 8 | :class klass 9 | :class-name (klass :name) 10 | :set! (let [[k v] args] 11 | (dosync (alter state assoc k v)) 12 | nil) 13 | :get (let [[key] args] 14 | (key @state)) 15 | (let [method (klass :method command)] 16 | (if-not method 17 | (throw (RuntimeException. (str "Unable to respond to " command)))) 18 | (binding [this thiz] 19 | (apply method args))))))) 20 | 21 | (defn new-class [class-name parent methods] 22 | (fn klass [command & args] 23 | (condp = command 24 | :name (name class-name) 25 | :parent parent 26 | :new (new-object klass) 27 | :methods methods 28 | :method (let [[method-name] args] 29 | (find-method method-name klass))))) 30 | 31 | (def OBJECT (new-class :OBJECT nil {})) 32 | 33 | (defn find-method [method-name klass] 34 | (or ((klass :methods) method-name) 35 | (if-not (= #'OBJECT klass) 36 | (find-method method-name (klass :parent))))) 37 | 38 | (defn method-spec [sexpr] 39 | (let [name (keyword (second sexpr)) 40 | body (next sexpr)] 41 | [name (conj body 'fn)])) 42 | 43 | (defn method-specs [sexprs] 44 | (->> sexprs 45 | (filter #(= 'method (first %))) 46 | (mapcat method-spec) 47 | (apply hash-map))) 48 | 49 | (defn parent-class-spec [sexprs] 50 | (let [extends-spec (filter #(= 'extends (first %)) sexprs) 51 | extends (first extends-spec)] 52 | (if (empty? extends) 53 | 'OBJECT 54 | (last extends)))) 55 | 56 | (defmacro defclass [class-name & specs] 57 | (let [parent-class (parent-class-spec specs) 58 | fns (or (method-specs specs) {})] 59 | `(def ~class-name (new-class '~class-name #'~parent-class ~fns)))) 60 | 61 | (defclass Person 62 | (method age [] 63 | (* 2 10)) 64 | (method about [diff] 65 | (str "I was born about " (+ diff (this :age)) " years ago"))) 66 | 67 | (defclass Woman 68 | (extends Person) 69 | (method greet [v] 70 | (str "Hello, " v)) 71 | (method age [] 72 | (* 2 9))) 73 | 74 | (def donna (Woman :new)) 75 | (donna :greet "Shelly") 76 | (donna :age) 77 | (donna :about 3) 78 | 79 | 80 | -------------------------------------------------------------------------------- /db.clj: -------------------------------------------------------------------------------- 1 | ; data store connection 2 | 3 | ; using clj-record as ORM to store models 4 | ; to define a model, you define a namespace. 5 | 6 | ; Requiring clj-record.boot pulls in several different namespaces, 7 | ; including core, associations, and validation. 8 | ; you only need to call the init-model function from the clj-record.core namespace. 9 | (ns com.colorcloud.model.user 10 | (:require clj-record.boot) 11 | (:use [clojure.contrib.sql])) ; (use 'clojure.contrib.sql) 12 | 13 | ; require the clj-record.boot namespace. Internally, the library is made up of several 14 | ; different namespaces that contain code related to things like 15 | ; associations, validations, callbacks, and serialization. 16 | 17 | (def db ; mysql db configuration using jdbc driver 18 | {:classname "com.mysql.jdbc.Driver" 19 | :subprotocol "mysql" 20 | :user "root" 21 | :password "password" 22 | :subname "//localhost/damages_dev"}) 23 | 24 | ; now call init-model to create a model store with validations, etc 25 | (clj-record.core/init-model 26 | (:associations (has-many charges))) 27 | 28 | ; to use the model, import it and start to create records model/create, etc 29 | (require '(com.colorcloud.model [user :as user])) 30 | (user/create {:login "rob" 31 | :first_name "Robert" 32 | :last_name "Berger" 33 | :password "secret" 34 | :email_address "rob@runa.com"}) 35 | 36 | ; get record by id 37 | (user/get-record 1) 38 | 39 | ; search record by find like mongo json query 40 | (user/find-records {:first_name "robert"}) 41 | 42 | ; update a record 43 | (user/update {:login "stevenson" :id 2}) 44 | 45 | (user/destroy-record {:id 2}) 46 | 47 | ; association : charge model fk point to user model 48 | (ns com.colorcloud.model.charge 49 | (:require clj-record.boot)) 50 | 51 | ; init-model with association and validation, etc 52 | (clj-record.core/init-model 53 | (:associations (belongs-to user)) 54 | (:validation ; validate input data 55 | (:amount_dollars "Must be positive!" #(>= % 0)) 56 | (:amount_cents "Must be positive!" #(>= % 0))) 57 | 58 | (:callbacks ; data change handler, before-save, before-update, and after-load. 59 | (:before-save (fn [record] 60 | (if-not (:category record) 61 | (assoc record :category "uncategorized") 62 | record))))) 63 | 64 | ; validate input data 65 | (let [errors (charge/validate {:amount_dollars 0 66 | :amount_cents -10 67 | :date "2010-01-10" 68 | :vendor_name "amazon"})] 69 | (println errors)) 70 | 71 | -------------------------------------------------------------------------------- /trident/src/main/java/com/colorcloud/trident/BatchAggregate.java: -------------------------------------------------------------------------------- 1 | package com.colorcloud.trident; 2 | 3 | import java.io.IOException; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | import org.apache.commons.collections.MapUtils; 8 | 9 | import storm.trident.TridentTopology; 10 | import storm.trident.operation.BaseAggregator; 11 | import storm.trident.operation.TridentCollector; 12 | import storm.trident.tuple.TridentTuple; 13 | import backtype.storm.Config; 14 | import backtype.storm.LocalCluster; 15 | import backtype.storm.LocalDRPC; 16 | import backtype.storm.generated.StormTopology; 17 | import backtype.storm.tuple.Fields; 18 | import backtype.storm.tuple.Values; 19 | 20 | /** 21 | * This example shows the usage of aggregate() method for aggregating the WHOLE batch of Tuples. 22 | *

23 | * Because we aggregate the whole batch, we produce a hashmap with the counts per each location. 24 | * Note how we only use the collector at the end of the Aggregator so we don't emit new Tuples for each Tuple that we process: 25 | * we only emit one Tuple per batch. 26 | * For updating databases that's the best approach: you don't usually want to overload your DB 27 | * with one update per each Tuple. 28 | */ 29 | public class BatchAggregate { 30 | 31 | @SuppressWarnings({ "serial" }) 32 | public static class LocationAggregator extends BaseAggregator> { 33 | 34 | @Override 35 | public Map init(Object batchId, TridentCollector collector) { 36 | return new HashMap(); 37 | } 38 | 39 | @Override 40 | public void aggregate(Map val, TridentTuple tuple, TridentCollector collector) { 41 | String location = tuple.getString(0); 42 | val.put(location, MapUtils.getInteger(val, location, 0) + 1); 43 | } 44 | 45 | @Override 46 | public void complete(Map val, TridentCollector collector) { 47 | collector.emit(new Values(val)); 48 | } 49 | } 50 | 51 | public static StormTopology buildTopology(LocalDRPC drpc) throws IOException { 52 | FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(100); 53 | 54 | TridentTopology topology = new TridentTopology(); 55 | topology.newStream("spout", spout) 56 | .aggregate(new Fields("location"), new LocationAggregator(), new Fields("location_counts")) 57 | .each(new Fields("location_counts"), new Utils.PrintFilter()); 58 | 59 | return topology.build(); 60 | } 61 | 62 | public static void main(String[] args) throws Exception { 63 | Config conf = new Config(); 64 | 65 | LocalDRPC drpc = new LocalDRPC(); 66 | LocalCluster cluster = new LocalCluster(); 67 | cluster.submitTopology("location_batchaggregate", conf, buildTopology(drpc)); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /trident/src/main/java/com/colorcloud/trident/ParallelismExample1.java: -------------------------------------------------------------------------------- 1 | package com.colorcloud.trident; 2 | 3 | import java.io.IOException; 4 | import java.util.Map; 5 | 6 | import storm.trident.TridentTopology; 7 | import storm.trident.operation.BaseFilter; 8 | import storm.trident.operation.TridentOperationContext; 9 | import storm.trident.tuple.TridentTuple; 10 | import backtype.storm.Config; 11 | import backtype.storm.LocalCluster; 12 | import backtype.storm.LocalDRPC; 13 | import backtype.storm.generated.StormTopology; 14 | import backtype.storm.tuple.Fields; 15 | 16 | /** 17 | * This example is useful for understanding how parallelism and partitioning works. parallelismHit() is applied down 18 | * until the next partitioning operation. Therefore here we have 5 processes (Bolts) applying a filter and 2 processes 19 | * creating messages (Spouts). 20 | *

21 | * But because we are partitioning by actor and applying a filter that only keeps tweets from one actor, we see in 22 | * stderr that it is always the same partition who is filtering the tweets, which makes sense. 23 | *

24 | * Now comment out the partitionBy() and uncomment the shuffle(), what happens? 25 | * 26 | * @author pere 27 | */ 28 | public class ParallelismExample1 { 29 | 30 | @SuppressWarnings("serial") 31 | public static class PerActorTweetsFilter extends BaseFilter { 32 | 33 | private int partitionIndex; 34 | private String actor; 35 | 36 | public PerActorTweetsFilter(String actor) { 37 | this.actor = actor; 38 | } 39 | 40 | @SuppressWarnings("rawtypes") 41 | @Override 42 | public void prepare(Map conf, TridentOperationContext context) { 43 | this.partitionIndex = context.getPartitionIndex(); 44 | } 45 | 46 | @Override 47 | public boolean isKeep(TridentTuple tuple) { 48 | boolean filter = tuple.getString(0).equals(actor); 49 | if(filter) { 50 | System.err.println("I am partition [" + partitionIndex + "] and I have kept a tweet by: " 51 | + actor); 52 | } 53 | return filter; 54 | } 55 | } 56 | 57 | public static StormTopology buildTopology(LocalDRPC drpc) throws IOException { 58 | FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(); 59 | 60 | TridentTopology topology = new TridentTopology(); 61 | topology.newStream("spout", spout) 62 | .parallelismHint(2) 63 | .partitionBy(new Fields("actor")) 64 | // .shuffle() 65 | .each(new Fields("actor", "text"), new PerActorTweetsFilter("dave")).parallelismHint(5) 66 | .each(new Fields("actor", "text"), new Utils.PrintFilter()); 67 | 68 | return topology.build(); 69 | } 70 | 71 | public static void main(String[] args) throws Exception { 72 | Config conf = new Config(); 73 | 74 | LocalDRPC drpc = new LocalDRPC(); 75 | LocalCluster cluster = new LocalCluster(); 76 | cluster.submitTopology("hackaton", conf, buildTopology(drpc)); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /trident-clj/src/trident_clj/persister.clj: -------------------------------------------------------------------------------- 1 | (ns trident-clj.persister 2 | (:import [java.io FileReader] 3 | [java.util Map Map$Entry List ArrayList Collection Iterator HashMap]) 4 | (:import [storm.trident.operation TridentCollector Function] 5 | [backtype.storm.tuple Values]) 6 | (:require [clojure.string :as str] 7 | [clojure.tools.logging :as log]) 8 | (:require [clj-redis.client :as redis]) ; bring in redis namespace 9 | (:require [trident-clj.redis.redis-datamapper :refer :all]) 10 | (:require [trident-clj.redis.redis-persister :refer :all]) 11 | (:gen-class 12 | :name com.colorcloud.trident.Persister ; convert this ns to class Tweet 13 | :implements [storm.trident.operation.Function])) ; this ns impl Function 14 | 15 | 16 | ; create redis model tweet-rant to store aggregated for each tweet 17 | (defn create-tweet-model [] 18 | (def-redis-type tweet-rant 19 | (string-type :id :actor :location :text :time) 20 | (list-type :followers) 21 | (primary-key :id :actor) 22 | (format :json) 23 | (key-separator "##"))) 24 | 25 | 26 | ; create a data object to persist data into redis 27 | (defn store-tweet [id actor text location ts cnt] 28 | (let [tweet (tweet-rant :new)] 29 | (tweet :set! :id (str id)) 30 | (tweet :set! :actor actor) 31 | (tweet :set! :location location) 32 | (tweet :set! :text text) 33 | (tweet :set! :time ts) 34 | (tweet :add! :followers (str actor "-follower-" 1)) 35 | (tweet :add! :followers (str actor "follower-" 2)) 36 | (tweet :save!))) 37 | 38 | ; defed redis type to find by primary key. 39 | (defn find-tweet [id actor] 40 | (tweet-rant :find id actor)) 41 | 42 | 43 | (defn verify-tweet [id actor text loc ts] 44 | (let [db-tweet (find-tweet id actor)] 45 | ;(prn text " -- " (db-tweet :get :followers) (db-tweet :get-state)))) 46 | (prn text " -- " (db-tweet :get-state)))) 47 | 48 | 49 | ; prepare called once on start. init global state here. 50 | (defn -prepare ; gen-class method prefix by - 51 | " perpare : init global var and db conn " 52 | [this conf context] 53 | (prn "Persister prepare once") 54 | ; init redis connection to db within redis data mapper 55 | (init-redis-db) 56 | (create-tweet-model)) 57 | 58 | 59 | ; Function protocol method, invoked per each tuple emitted from upstream 60 | (defn -execute 61 | "process each tuple, aggregate group by location, persist to redis" 62 | [this ^storm.trident.tuple.TridentTuple tuple ^TridentCollector collector] 63 | (let [id (.getString tuple 0) 64 | actor (.getString tuple 1) 65 | text (.getString tuple 2) 66 | loc (.getString tuple 3) 67 | ts (.getString tuple 4)] 68 | ;(prn "TweetAggregator : execute " id actor text loc tm) 69 | (store-tweet id actor text loc ts 1) 70 | ;(verify-tweet id actor text loc ts) 71 | (.emit collector (Values. (to-array [(vector id actor)]))))) ; pk-val within one list 72 | -------------------------------------------------------------------------------- /defspout.clj: -------------------------------------------------------------------------------- 1 | (defn normalize-fns [body] 2 | (for [[name args & impl] body 3 | :let [args (-> "this" 4 | gensym 5 | (cons args) 6 | vec)]] 7 | (concat [name args] impl) 8 | )) 9 | 10 | (defmacro defspout [name output-spec & [opts & impl :as all]] 11 | (println opts " : impl= " impl " all= ", all) 12 | (if-not (map? opts) 13 | `(defspout ~name ~output-spec {} ~@all) 14 | (let [worker-name (symbol (str name "__")) 15 | conf-fn-name (symbol (str name "__conf__")) 16 | params (:params opts) 17 | conf-code (:conf opts) 18 | prepare? (:prepare opts) 19 | prepare? (if (nil? prepare?) true prepare?) 20 | fn-body (if prepare? 21 | (cons 'fn impl) 22 | (let [[args & impl-body] impl 23 | coll-sym (first args) 24 | prepargs [(gensym "conf") (gensym "context") coll-sym]] 25 | `(fn ~prepargs (spout (~'nextTuple [] ~@impl-body))))) 26 | definer (if params 27 | `(defn ~name [& args#] 28 | (println ~output-spec ~worker-name ~conf-fn-name args#)) 29 | `(def ~name 30 | (println ~output-spec ~worker-name ~conf-fn-name [])) 31 | ) 32 | ] 33 | (println "name=" name) 34 | (println "params=" params) 35 | (println "output-spec=" output-spec) 36 | (println "impl=" impl) 37 | (println "fn-body=" fn-body) 38 | (println "conf-fn-name=" conf-fn-name) 39 | (println "=------" ) 40 | ;(println definer ) 41 | `(do 42 | (defn ~conf-fn-name ~(if params params []) 43 | ~conf-code 44 | ) 45 | (defn ~worker-name ~(if params params []) 46 | ~fn-body 47 | ) 48 | ~definer 49 | )))) 50 | 51 | (defmacro spout [& body] 52 | (let [[spout-fns other-fns] (split-with #(not (symbol? %)) body) 53 | fns (normalize-fns spout-fns)] 54 | `(reify ISpout 55 | ~@fns 56 | ~@other-fns))) 57 | 58 | 59 | 60 | (defspout sentence-spout ["sentence"] 61 | [conf context collector] 62 | (let [sentences ["a little brown dog" 63 | "the man petted the dog" 64 | "four score and seven years ago" 65 | "an apple a day keeps the doctor away"]] 66 | (println sentences) 67 | )) 68 | 69 | (defspout sentence-spout-parameterized ["word"] {:params [sentences] :prepare false} 70 | [collector] 71 | (Thread/sleep 500) 72 | (println collector [(rand-nth sentences)])) 73 | -------------------------------------------------------------------------------- /trident-clj/src/trident_clj/loc_aggregator.clj: -------------------------------------------------------------------------------- 1 | (ns trident-clj.loc-aggregator 2 | (:import [java.io FileReader] 3 | [java.util Map Map$Entry List ArrayList Collection Iterator HashMap]) 4 | (:import [storm.trident.operation TridentCollector Function] 5 | [backtype.storm.tuple Values]) 6 | (:require [clojure.string :as str] 7 | [clojure.tools.logging :as log]) 8 | (:require [clj-redis.client :as redis]) ; bring in redis namespace 9 | (:require [trident-clj.redis.redis-datamapper :refer :all]) 10 | (:require [trident-clj.redis.redis-persister :refer :all]) 11 | (:gen-class 12 | :extends storm.trident.operation.BaseAggregator 13 | :name com.colorcloud.trident.LocAggregator ; convert this ns to class Tweet 14 | )) ; this ns impl Function 15 | 16 | 17 | ; create redis model tweet-rant to store aggregated for each tweet 18 | (defn create-tweet-model [] 19 | (def-redis-type tweet-rant 20 | (string-type :id :actor :location :text :time) 21 | (list-type :followers) 22 | (primary-key :id :actor) 23 | (format :json) 24 | (key-separator "##"))) 25 | 26 | ; create a data object to persist data into redis 27 | (defn store-tweet [id actor text location ts cnt] 28 | (let [tweet (tweet-rant :new)] 29 | (tweet :set! :id (str id)) 30 | (tweet :set! :actor actor) 31 | (tweet :set! :location location) 32 | (tweet :set! :text text) 33 | (tweet :set! :time ts) 34 | (tweet :add! :followers (str actor "-follower-" 1)) 35 | (tweet :add! :followers (str actor "follower-" 2)) 36 | (tweet :save!))) 37 | 38 | ; defed redis type to find by primary key. 39 | (defn find-tweet [id actor] 40 | (tweet-rant :find id actor)) 41 | 42 | (defn verify-tweet [id actor text loc ts] 43 | (let [db-tweet (find-tweet id actor)] 44 | ;(prn text " -- " (db-tweet :get :followers) (db-tweet :get-state)))) 45 | (prn text " -- " (db-tweet :get-state)))) 46 | 47 | 48 | ; prepare operation called once on start. init global state here. 49 | (defn -prepare ; gen-class method prefix by - 50 | " perpare : init global var and db conn " 51 | [this conf context] 52 | (prn "LocAggregator prepare once") 53 | ; init redis connection to db within redis data mapper 54 | (def loc-map (atom {})) 55 | (def batch-cnt (atom 0))) 56 | 57 | (defn -cleanup 58 | [this] 59 | (prn "loc aggregation clean up")) 60 | 61 | (defn -init 62 | [this batch-id collector] 63 | (prn "init aggregator called for batch " batch-id) 64 | ; reset batch counter 65 | (reset! batch-cnt 0) 66 | loc-map) ; ret loc-state 67 | 68 | ; aggregate values in bucket after grouping. 69 | (defn -aggregate 70 | "given global state, aggregate current tuple into global state, ret void" 71 | [this loc-map tuple collector] 72 | (let [id (.getString tuple 0) 73 | actor (.getString tuple 1) 74 | loc (keyword (.getString tuple 3)) ; incoming tuple [] 75 | rediskey (.getValueByField tuple "rediskey") 76 | cnt @batch-cnt 77 | sum (@loc-map loc) ; ret nil when map does not have key 78 | incsum ((fnil inc 0) sum)] ; in case first time, replace nil by 0 79 | ; now update tot 80 | (prn "aggregating for tuple " loc cnt sum incsum tuple) 81 | (swap! batch-cnt inc) ; incr cnt within the batch 82 | (swap! loc-map (fn [m] (merge-with + m {loc 1}))) 83 | (.emit collector (Values. (to-array [id actor incsum rediskey]))))) 84 | 85 | (defn -complete 86 | [this loc-map collector] 87 | (prn "aggregator batch completed " @loc-map)) 88 | ;(.emit collector (Values. (to-array @loc-map)))) 89 | -------------------------------------------------------------------------------- /trident-clj/src/trident_clj/core.clj: -------------------------------------------------------------------------------- 1 | (ns trident-clj.core 2 | "trident in clj example" 3 | (:require [clojure.string :as str]) 4 | (:import [java.io FileReader] 5 | [java.util Map Map$Entry List ArrayList Collection Iterator HashMap]) 6 | (:require [clj-redis.client :as redis]) ; bring in redis namespace 7 | (:import [backtype.storm Config StormSubmitter LocalCluster LocalDRPC] 8 | [backtype.storm.spout SchemeAsMultiScheme] 9 | [backtype.storm.tuple Fields Tuple] 10 | [storm.trident TridentTopology] 11 | [storm.trident.tuple TridentTupleView] 12 | [storm.trident.testing MemoryMapState$Factory] 13 | [storm.trident.operation.Filter] 14 | [storm.trident.operation.builtin Count Sum MapGet FilterNull]) 15 | (:require [trident-clj.loc-aggregator] 16 | [trident-clj.tweet-spout] 17 | [trident-clj.prn-filter] 18 | [trident-clj.persister] 19 | [clojure.tools.logging :as log]) 20 | (:use [backtype.storm clojure config]) 21 | (:gen-class)) 22 | 23 | ; instantiate a fake tweet spout class. We gen-class from clj module(ns) 24 | (defn fake-tweet-spout 25 | [batch-size] 26 | (prn " >>> creating fake tweet spout <<<") 27 | ; constructor takes 2 args 28 | ;(com.colorcloud.trident.TweetSpout. "data/500_sentences_en.txt" 100)) 29 | (com.colorcloud.trident.TweetSpout.)) 30 | 31 | ; build a storm top by config the passed in trident top 32 | ; connect spout and each filters and aggregators 33 | (defn bld-tweet-top 34 | [trident-top] 35 | (let [tweet-spout (fake-tweet-spout 100) ; spout 36 | counter (storm.trident.operation.builtin.Count.) 37 | persister (com.colorcloud.trident.Persister.) 38 | locaggregator (com.colorcloud.trident.LocAggregator.) ; loc aggregator 39 | prnfilter (com.colorcloud.trident.PrintFilter.)] 40 | (-> trident-top 41 | (.newStream "spout" tweet-spout) 42 | (.each (Fields. ["id" "actor" "text" "location" "time"]) persister (Fields. ["rediskey"])) 43 | ;(.each (Fields. ["id" "actor" "text" "location" "time" "rediskey"]) prnfilter) 44 | ; groupBy create virtual streams grouped to next, must followed by aggregator 45 | ; grouped stream, after aggregation, only contains grouping key and other fields emitted from aggregator. 46 | (.groupBy (Fields. ["location"])) 47 | ;(.aggregate (Fields. ["location"]) counter (Fields. ["count"])) ; [grp-key other-key] 48 | (.aggregate (Fields. ["id" "actor" "text" "location" "time" "rediskey"]) locaggregator (Fields. ["id" "actor" "count" "rediskey"])) 49 | (.each (Fields. ["id" "actor" "location" "count" "rediskey"]) prnfilter)) 50 | trident-top)) ; return configurated trident top 51 | 52 | ; give a config, build a top and run it on a cluster 53 | (defn run-local-topology 54 | [config drpc] 55 | (let [cluster (LocalCluster.) ; create a cluster 56 | tweet-top (bld-tweet-top (TridentTopology.))] ; build tweet topology 57 | ;(.setDebug cluster-config true) 58 | (.submitTopology cluster ; submit top to cluster with the following config 59 | "location_groupaggregate" ; top instance name 60 | config 61 | (.build tweet-top)))) 62 | 63 | ; gen-class - main 64 | (defn -main 65 | [& args] 66 | (let [drpc (LocalDRPC.) 67 | config (Config.)] ; create a local drpc 68 | (prn " >>>> starting dbconn.core main <<<<< ") 69 | (run-local-topology config drpc) 70 | (while true 71 | ; drpc execute function-name function-args 72 | (log/info "Word count: %s" (.execute drpc "words" "baby")) 73 | (Thread/sleep 1000)))) -------------------------------------------------------------------------------- /msgqueue/README: -------------------------------------------------------------------------------- 1 | # msgqueue 2 | 3 | Distribute computation across rabbitmq and model rabbitmq message queues as clojure lazy sequences. 4 | 5 | # The rabbitmq client lib. 6 | 7 | There are many rabbitmq client libs, the most robust one is amqp-client. 8 | We use that lib, rather than rabbitmq-client-1.7.0.jar. Note that clojure-rabbitmq depends on rabbitmq-client-17.0.jar. 9 | 10 | [com.rabbitmq/amqp-client "2.3.1"] 11 | 12 | ;[clojure-rabbitmq "0.2.1"] 13 | ;[rabbitmq-client "1.7.0"] 14 | 15 | ## Dependency lib resolver 16 | 17 | Lib format: group-id/artifact-id, version string. 18 | Clojure uses the following paths to resolve dependency library. 19 | central (http://repo1.maven.org/maven2), 20 | local (file:$HOME/macsrc/clj/cljaction/cljaction-test/local_jars/), 21 | clojars (http://clojars.org/repo/) 22 | 23 | add lib coordinates (artifact-id and (if differing) group-id) go into your project.clj’s :dependencies vector. 24 | 25 | When you specify a lib jar name, [com.rabbitmq/amqp-client "2.3.1"] 26 | lein resolve the jar by looking at the package.2.3.1.pom.xml for the jar from the maven or clojar and copy the downloaded files into lib folder. 27 | 28 | require the package into ns macro’s :require list. 29 | (ns package.namespace) = (src/package/namespace) 30 | 31 | Specify main function for the project. 32 | 33 | :main msgqueue.core) 34 | 35 | ## Package is Namespace 36 | 37 | Package is namespece, and named after convention src/package/file.clj maps to 38 | (ns package.file) 39 | 40 | ## Require, use, import and gen-class. 41 | 42 | Import java packages and each classes. [java.util Map List] 43 | Require loads the lib, :as to give a short name to avoid qualified full name. 44 | Require or use other namespace's code as though its yours. 45 | Require can now take a :refer option. :refer takes a list of symbols to refer, so you not bring in entire symbols. 46 | from the namespace or :all to bring in all public vars." 47 | 48 | 49 | gen-class gen a java class that delegate java class .method to clj :prefix func. (.main to -main) 50 | clojure in action java interop has great depth in gen-class options. 51 | 52 | for mutables, only single field available called state. 53 | : with or without :gen-class, clj with gen a set of classes for each clj function. 54 | args: :name, :init, :constructor :state, 55 | http://kotka.de/blog/2010/02/gen-class_how_it_works_and_how_to_use_it.html 56 | 57 | 58 | ## AMQP client and Jar dependencies 59 | 60 | There are two libs for rabbitmq client. 61 | [rabbitmq-client "1.7.0"] 62 | [com.rabbitmq/amqp-client "2.3.1"] 63 | 64 | The rabbitmq-client 1.7.0 is deprecated. It contains a reference to contrib-1.0.0-SNAPSHOT in its project.clj. This is the reason for error msg during compilation. 65 | 66 | We are using amqp-client 2.3.1 client. 67 | 68 | 69 | # Args for main 70 | 71 | You can pass in args to main the same as java args for main. 72 | lein compile 73 | lein run [arg1 arg2 ...] 74 | 75 | To run rabbitmq consumer process, 76 | lein run workerhandler 77 | 78 | To run rabbitmq producer 79 | lein run workertask 80 | 81 | ## Namespace closures 82 | 83 | All global vars with root bindings form the closure of the namespace. 84 | For example, workers defined in (ns msgqueue.rabbitmq.worker) is private 85 | closure to all fns in the namespace. 86 | (def workers (ref {})) 87 | 88 | so if you distribute worker thru msg queue, with different threads at different machines, just use defworker to add the computation fn to the workers map. When different threads load the namespace, the computation fn object is accessible through map to the threads. Hence, the sender can just send the fn name string, and the processor can extract the fn object from the map, which is a closure in the namespace. 89 | 90 | After getting fn object from the namespace map, apply the fn with the passed in args. 91 | (let [value (apply worker-handler worker-args)]) 92 | -------------------------------------------------------------------------------- /dbconn/src/dbconn/redis/redis_persister.clj: -------------------------------------------------------------------------------- 1 | (ns dbconn.redis.redis-persister 2 | (:require [clj-redis.client :as redis]) ; bring in redis namespace 3 | (:require [clojure.data.json :as json])) 4 | 5 | ; clojure does not support cyclic dependencies. break cyclic by having a third ns 6 | ; contains common defs. If you have cyclic deps, you did not get your abstraction right. 7 | ; init redis connection 8 | (def redis-db (redis/init :url "redis://localhost")) 9 | 10 | ;; serialization 11 | (defmulti serialize (fn [format key-type value] 12 | [format key-type])) 13 | 14 | (defmethod serialize [:json :string-type] [format key-type value] 15 | (json/write-str value)) 16 | 17 | (defmethod serialize [:json :list-type] [format key-type value] 18 | (map json/write-str value)) 19 | 20 | (defmethod serialize [:clj-str :string-type] [format key-type value] 21 | (pr-str value)) ; pr-str output double quote as part of str 22 | 23 | (defmethod serialize [:clj-str :list-type] [format key-type value] 24 | (map pr-str value)) 25 | 26 | 27 | ;; deserialization 28 | (defmulti deserialize (fn [format key-type serialized] 29 | [format key-type])) 30 | 31 | (defmethod deserialize [:json :string-type] [format key-type serialized] 32 | (json/read-str serialized)) 33 | 34 | (defmethod deserialize [:json :list-type] [format key-type serialized] 35 | (map json/read-str serialized)) 36 | 37 | (defmethod deserialize [:clj-str :string-type] [format key-type serialized] 38 | (read-string serialized)) 39 | 40 | (defmethod deserialize [:clj-str :list-type] [format key-type serialized] 41 | (map read-string serialized)) 42 | 43 | (def inserters { 44 | :string-type redis/set 45 | :list-type redis/rpush 46 | }) 47 | 48 | (def fetchers { 49 | :string-type (fn [key] 50 | {key {:value (redis/get key) 51 | :key-type :string-type}}) 52 | :list-type (fn [key] 53 | {key {:value (redis/lrange key 0 (redis/llen key)) 54 | :key-type :list-type}})}) 55 | 56 | (defn insert-into-redis [persistable] 57 | (let [inserter (fn [[k v]] 58 | (prn "insert-into-redis" k v (v :key-type) "v:value" (v :value)) 59 | (cond 60 | (= (v :key-type) :string-type) ((inserters :string-type) redis-db k (v :value)) 61 | (= (v :key-type) :list-type) (doall (map #((inserters :list-type) redis-db k %) (v :value)))))] 62 | (doall (map inserter persistable)))) 63 | 64 | (defn persistable-for [redis-object] 65 | (let [redis-type (redis-object :type) 66 | separator (redis-type :key-separator) 67 | format (redis-type :format) 68 | pk-value (redis-object :primary-key-value) 69 | kv-persister (fn [[k v]] 70 | (let [key-type (redis-type :key-type k)] 71 | {(str pk-value separator k) 72 | {:value (serialize format key-type v) 73 | :key-type key-type}}))] 74 | (apply merge (map kv-persister (redis-object :get-state))))) 75 | 76 | (defn persist [redis-object] 77 | (insert-into-redis (persistable-for redis-object)) 78 | true) 79 | 80 | (defn deserialize-state [serialized redis-type] 81 | (let [format (redis-type :format) 82 | separator (redis-type :key-separator) 83 | key-from (fn [k] (read-string (last (.split k separator)))) 84 | deserializer (fn [[k {:keys [key-type value]}]] 85 | (if-not value 86 | {} 87 | {(key-from k) (deserialize format key-type value)}))] 88 | (apply merge (map deserializer serialized)))) 89 | 90 | (defn find-by-primary-key [redis-type pk-values] 91 | (let [string-keys (redis-type :string-keys pk-values) 92 | list-keys (redis-type :list-keys pk-values) 93 | string-maps (apply merge (map #((fetchers :string-type) %) string-keys)) 94 | list-maps (apply merge (map #((fetchers :list-type) %) list-keys)) 95 | serialized (merge string-maps list-maps) 96 | deserialized (deserialize-state serialized redis-type)] 97 | (if (empty? deserialized) 98 | nil 99 | (redis-type :new-with-state deserialized)))) 100 | -------------------------------------------------------------------------------- /trident/src/main/java/com/colorcloud/trident/Utils.java: -------------------------------------------------------------------------------- 1 | package com.colorcloud.trident; 2 | 3 | import java.util.ArrayList; 4 | import java.util.Collections; 5 | import java.util.Comparator; 6 | import java.util.HashMap; 7 | import java.util.List; 8 | import java.util.Map; 9 | import java.util.Map.Entry; 10 | 11 | import storm.trident.operation.BaseFunction; 12 | import storm.trident.operation.CombinerAggregator; 13 | import storm.trident.operation.Filter; 14 | import storm.trident.operation.TridentCollector; 15 | import storm.trident.operation.TridentOperationContext; 16 | import storm.trident.tuple.TridentTuple; 17 | import backtype.storm.tuple.Values; 18 | 19 | /** 20 | * Misc. util classes that can be used for implementing some stream processing. 21 | */ 22 | public class Utils { 23 | 24 | /** 25 | * A filter that filters nothing but prints the tuples it sees. Useful to test and debug things. 26 | */ 27 | @SuppressWarnings({ "serial", "rawtypes" }) 28 | public static class PrintFilter implements Filter { 29 | @Override 30 | public void prepare(Map conf, TridentOperationContext context) { 31 | } 32 | @Override 33 | public void cleanup() { 34 | } 35 | 36 | @Override 37 | public boolean isKeep(TridentTuple tuple) { 38 | System.out.println("-- tuple : " + tuple); 39 | return true; 40 | } 41 | } 42 | 43 | /** 44 | * Given a hashmap with string keys and integer counts, returns the "top" map of it. 45 | * "n" specifies the size of the top to return. 46 | */ 47 | public final static Map getTopNOfMap(Map map, int n) { 48 | List> entryList = new ArrayList>(map.size()); 49 | entryList.addAll(map.entrySet()); 50 | Collections.sort(entryList, new Comparator>() { 51 | @Override 52 | public int compare(Entry arg0, Entry arg1) { 53 | return arg1.getValue().compareTo(arg0.getValue()); 54 | } 55 | }); 56 | 57 | Map toReturn = new HashMap(); 58 | for(Map.Entry entry: entryList.subList(0, Math.min(entryList.size(), n))) { 59 | toReturn.put(entry.getKey(), entry.getValue()); 60 | } 61 | return toReturn; 62 | } 63 | 64 | /** 65 | * fn to process each tuple, sum two fields. 66 | */ 67 | public static class AddAndMultiply extends BaseFunction { 68 | public void execute(TridentTuple tuple, TridentCollector collector) { 69 | int i1 = tuple.getInteger(0); 70 | int i2 = tuple.getInteger(1); 71 | collector.emit(new Values(i1 + i2, i1 * i2)); 72 | } 73 | } 74 | 75 | public static class TextProcessor extends BaseFunction{ 76 | @Override 77 | public void prepare(Map conf, TridentOperationContext context) { 78 | } 79 | 80 | @Override 81 | public void execute(TridentTuple tuple, TridentCollector collector) { 82 | String v = (String) tuple.getValue(0); 83 | collector.emit(new Values(v + " : TextProcessor-ed")); 84 | } 85 | } 86 | 87 | /** 88 | * emit a single tuple for each group of tuple, this combiner aggregator always used after groupBy() 89 | * to distinct count of duplicate tuples with same field. 90 | * .each(new Fields("followers"), new ExpandList(), new Fields("follower")) 91 | * .groupBy(new Fields("follower")) 92 | * .aggregate(new One(), new Fields("one")) 93 | */ 94 | public class One implements CombinerAggregator { 95 | public Integer init(TridentTuple tuple) { 96 | return 1; 97 | } 98 | 99 | public Integer combine(Integer val1, Integer val2) { 100 | return 1; 101 | } 102 | 103 | public Integer zero() { 104 | return 1; 105 | } 106 | } 107 | 108 | public class MovingAvg implements CombinerAggregator { 109 | public Integer init(TridentTuple tuple) { 110 | return 1; 111 | } 112 | 113 | public Integer combine(Integer val1, Integer val2) { 114 | return 1; 115 | } 116 | 117 | public Integer zero() { 118 | return 1; 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /spellcheck.clj: -------------------------------------------------------------------------------- 1 | ; http://norvig.com/spell-correct.html 2 | ; 3 | 4 | ; norvig python version of spell check. 5 | ; max score of {lang model, P(c) is english. and Error model, P(w|c) w means c } 6 | ; 7 | ; import re, collections 8 | ; 9 | ; def words(text): return re.findall('[a-z]+', text.lower()) 10 | ; 11 | ; def train(features): 12 | ; model = collections.defaultdict(lambda: 1) # new word have default val 1. 13 | ; for f in features: 14 | ; model[f] += 1 15 | ; return model 16 | ; 17 | ; NWORDS = train(words(file('big.txt').read())) 18 | ; 19 | ; alphabet = 'abcdefghijklmnopqrstuvwxyz' 20 | ; 21 | ; gen a set of all words that is 1 distance away [delete, transpose, replace, insert] 22 | ; def edits1(word): 23 | ; [('', 'hello'), ('h', 'ello'), ('he', 'llo'), ('hel', 'lo'), ('hell', 'o'), ('hello', '')] 24 | ; splits = [(word[:i], word[i:]) for i in range(len(word) + 1)] 25 | ; ['ello', 'hllo', 'helo', 'helo', 'hell'] 26 | ; my version: [word[:i]+word[i+1:] for i in xrange(len(word))] 27 | ; deletes = [a + b[1:] for a, b in splits if b] 28 | ; ['ehllo', 'hlelo', 'hello', 'helol'] 29 | ; transposes = [a + b[1] + b[0] + b[2:] for a, b in splits if len(b)>1] 30 | ; replaces = [a + c + b[1:] for a, b in splits for c in alphabet if b] 31 | ; inserts = [a + c + b for a, b in splits for c in alphabet] 32 | ; return set(deletes + transposes + replaces + inserts) 33 | ; 34 | ; def known_edits2(word): 35 | ; return set(e2 for e1 in edits1(word) for e2 in edits1(e1) if e2 in NWORDS) 36 | ; 37 | ; def known(words): return set(w for w in words if w in NWORDS) 38 | ; 39 | ; def correct(word): 40 | ; candidates = known([word]) or known(edits1(word)) or known_edits2(word) or [word] 41 | ; return max(candidates, key=NWORDS.get) 42 | 43 | 44 | (defn words [text] (re-seq #"[a-z]+" (.toLowerCase text))) 45 | 46 | ; put each word into hashmap with default cnt val to 1 47 | (defn train [features] 48 | (reduce (fn [model f] (assoc model f (inc (get model f 1)))) {} features)) 49 | 50 | ; entire word collection dict 51 | (def *nwords* (train (words (slurp "big.txt")))) 52 | 53 | ; sequence comprehension to produce a set of all combinations of del, perm, repl, insert. 54 | (defn edits1 [word] ; given a word, mutate 55 | (let [alphabet "abcdefghijklmnopqrstuvwxyz" n (count word)] 56 | (distinct (concat 57 | ; seq comprehension, deletes one char a time 58 | (for [i (range n)] (str (subs word 0 i) (subs word (inc i)))) 59 | ; transpose, swap one nb a time ("ehllo" "hlelo" "hello" "helol") 60 | (for [i (range (dec n))] ; seq comprehension of all items 61 | (str (subs word 0 i) (nth word (inc i)) (nth word i) (subs word (+ 2 i)))) 62 | ; seq thru, replace with seq of chars one at a time. 63 | (for [i (range n) c alphabet] (str (subs word 0 i) c (subs word (inc i)))) 64 | ; seq thru, insert from seq of chars one at a time. 65 | (for [i (range (inc n)) c alphabet] (str (subs word 0 i) c (subs word i))))))) 66 | 67 | ; ret a list of good english word from the passed in list of candidate words 68 | (defn known 69 | [words nwords] ; take a word list, seq thru validate each. 70 | (let [result (set (for [w words :when (nwords w)] w))] ; [e for e in [0..8] when e % 2] 71 | (if (empty? result) 72 | nil 73 | result))) 74 | 75 | ; take a word, get dist 1 list, then for each dist 1 word, gen dist 2 word list 76 | (defn known-edits2 77 | [word nwords] 78 | (set (for [e1 (edits1 word) e2 (edits1 e1) :when (nwords e2)] e2))) 79 | 80 | ; when checking a word, first check if the word itself valid, then validate any of 81 | ; its dist-1, dist-2 word. 82 | (defn correct [word nwords] 83 | (let [candidates (or (known [word] nwords) ; is word good english word ? 84 | (known (edits1 word) nwords) ; if not, dist 1 list of this word 85 | (known-edits2 word nwords) ; then dist 2 list of this word 86 | [word])] ; failed to find any of word, dist1, dist2 good, ret word itself 87 | ; among dist-1 2 mutation list, sel the max which (fn x) is the max 88 | (apply max-key #(get nwords % 1) candidates))) 89 | 90 | ; to use, check passed in against word dist collection 91 | (correct "misstake" *nwords*) 92 | (correct "speling" *nwords*) 93 | -------------------------------------------------------------------------------- /trident-clj/src/trident_clj/tweet_spout.clj: -------------------------------------------------------------------------------- 1 | (ns trident-clj.tweet-spout 2 | (:import [java.io FileReader] 3 | [java.util Random Map Map$Entry List ArrayList Collection Iterator HashMap]) 4 | (:import [backtype.storm Config StormSubmitter LocalCluster LocalDRPC] 5 | [backtype.storm.spout SchemeAsMultiScheme] 6 | [storm.trident.operation TridentCollector Function] 7 | [backtype.storm.tuple Fields Tuple] 8 | [storm.trident.spout.IBatchSpout] 9 | [backtype.storm.tuple Values]) 10 | (:require [clojure.string :as str] 11 | [clojure.tools.logging :as log] 12 | [clojure.contrib.io :refer [pwd]] 13 | [clj-time.core :as clj-time :exclude [extend]] ; clj-time abbrev to core 14 | [clj-time.format]) 15 | (:require [clj-redis.client :as redis]) ; bring in redis namespace 16 | (:gen-class 17 | :name com.colorcloud.trident.TweetSpout ; convert this ns to class Tweet 18 | :state state ; put serialiazable object here. 19 | :init init ; Must return [ [superclass-constructor-args] state] 20 | :constructors {[] [] ; empty arg constructor 21 | [String int] []} ; a map of constructor sig to superclass construcotr signature 22 | :implements [storm.trident.spout.IBatchSpout])) ; this ns impl Function 23 | 24 | ; init always ret a vector with the first be superclass constructor arg, and 25 | ; the second is instance state. 26 | ; Must return [ [superclass-constructor-args] state] 27 | (defn -init 28 | "init state, no this pointer, ret a global concurrent map stores all states" 29 | ([] ; empty arg constructor, use default file at current pwd is project root. 30 | [[] {:batchsize 10 :srcfile "data/500_sentences_en.txt"}]) 31 | ([srcfile batchsize] 32 | ; cant use atom, as it is not serializable 33 | ;[[] (atom {:batchsize 10 :sentences [] :random-gen (Random.) })]) 34 | [[] {:batchsize batchsize :srcfile srcfile}])) 35 | 36 | ; open fn only called once, so instantiate global state here. 37 | (defn -open ; gen-class method prefix by - 38 | "called once when instantiate spout instance, init state here" 39 | [this conf context] 40 | ; populate sentence vector inside atom map 41 | ; (let [state (.state this) 42 | ; sentence-vec (:sentence @state)] 43 | ; (swap! state assoc :sentence (conj sentence-vec "this is first tweet")) 44 | ; (swap! state assoc :sentence (conj sentence-vec "those are second tweet")))) 45 | (let [state (.state this) 46 | srcfile (:srcfile state)] 47 | ; def namespace global mutable shared state. 48 | (prn "spout open called : " srcfile (pwd)) 49 | (def TWEETCNT (atom 0)) 50 | (def ACTORS ["stefan" "dave" "pere" "nathan" "doug" "ted" "mary" "rose"]) 51 | (def LOCATIONS ["Spain" "USA" "Spain" "USA" "USA" "USA" "UK" "France"]) 52 | (def SUBJECTS ["berlin" "justinbieber" "hadoop" "life" "bigdata"]) 53 | ; connect to rabbitmq or logstash src, abstract queue as a lazy sequence. 54 | (def srcseq (atom (line-seq (clojure.java.io/reader srcfile)))))) 55 | 56 | (defn -ack 57 | [this batch-id] 58 | (prn " ack " batch-id)) 59 | 60 | (defn -close 61 | [this]) 62 | 63 | (defn -getComponentConfiguration 64 | [this] 65 | (Config.)) ; just instantiate a new config object 66 | 67 | ; output stream field spec. 68 | (defn -getOutputFields 69 | [this] 70 | (Fields. ["id" "actor" "text" "location" "time"])) 71 | 72 | ; feed the top with next tweet 73 | (defn getNextTweet 74 | "get next tweet from preload sentence vector" 75 | [this] 76 | (let [state (.state this) 77 | text (first @srcseq) 78 | idx (rand-int (count ACTORS)) 79 | actor (ACTORS idx) 80 | location (LOCATIONS idx) 81 | ts (clj-time.format/unparse (clj-time.format/formatters :date-time) (clj-time/now))] 82 | ;text (["this is first tweet" "that are second tweet"] (rand-int 2))] 83 | ; for now, just simple fake random between 0 1 84 | (swap! TWEETCNT inc) 85 | (swap! srcseq (fn [s] (next s))) ; srcseq = (next srcseq) 86 | ;(prn "emit " text @TWEETCNT actor location ts) 87 | (Values. (to-array [(str @TWEETCNT) actor text location ts])))) 88 | 89 | (defn -emitBatch 90 | "emit a batch of tuples" 91 | [this batchId ^storm.trident.operation.TridentCollector collector] 92 | (let [state (.state this) 93 | sz (:batchsize state)] 94 | (doseq [i (range sz)] 95 | (.emit collector (getNextTweet this))))) -------------------------------------------------------------------------------- /msgqueue/src/msgqueue/rabbitmq/worker_usage.clj: -------------------------------------------------------------------------------- 1 | ; 2 | ; use worker macro create a set of workers to execute computations 3 | ; in async mode. 4 | ; full fledged version is https://github.com/amitrathore/swarmiji 5 | ; 6 | (ns msgqueue.rabbitmq.worker-usage 7 | (:use msgqueue.rabbitmq.rabbitmq) 8 | (:use msgqueue.rabbitmq.worker)) 9 | 10 | ; def a var with root binding to fn named with args and expr as body, 11 | ; most importantly, add fn object to workers map to make it namespace closure. 12 | ; remote thread running the same code, load this ns and can access the workers map to get fn object from fn name 13 | ; so client threads can just send fn name string thru rabbitmq and remote processor just extract fn object defned here. 14 | ; fn object from map and evaluate fn object expression with the args. 15 | (defworker long-computation-one [x y] ; defn fn body expression. 16 | (Thread/sleep 3000) 17 | (* x y)) 18 | 19 | ; defworker create a ns global var bind to worker-runner macro, which is an anonymous 20 | ; fn that dispatch task name and args to rabbitmq, and get back a closure to check worker status. 21 | (defworker long-computation-two [a b c] 22 | (Thread/sleep 2000) 23 | (+ a b c)) 24 | 25 | (defworker expensive-audit-log [z] 26 | (println "expensive audit log:" z) 27 | (Thread/sleep 4000)) 28 | 29 | ; called from main, test worker, send worker request across. 30 | (defn worker-task [] 31 | (println "Dispatching test worker") 32 | (with-rabbit ["localhost" "guest" "guest"] 33 | ; evaluate anonymous fn reted from worker-runner, 34 | ; which on-swarm send task name and args to remote and get a closure stub to check ret status. 35 | (let [one (long-computation-one 10 20) 36 | two (long-computation-two 3 5 7)] 37 | ;(fire-and-forget expensive-audit-log 100) 38 | ;(run-worker-everywhere expensive-audit-log 777) 39 | 40 | ; from-swarm macor is a loop to check worker closure returned from defworker. 41 | (from-swarm [one two] 42 | (println "one:" (one :value)) 43 | (println "two:" (two :value))))) 44 | (println "done!")) 45 | 46 | 47 | ; apply(invoke) fn object extracted from worker namespace workers map 48 | ; with the passed in args, ret result in a ret map. 49 | (defn response-for [worker-handler worker-args] 50 | (try 51 | (let [value (apply worker-handler worker-args)] 52 | {:value value :status :success}) 53 | (catch Exception e 54 | {:status :error}))) 55 | 56 | 57 | ; Use a FutureTask to block on the result of invoking the fn expr with args 58 | ; once FutureTask returns, send back the result thru return-q 59 | (defn process-request [worker-handler worker-args return-q] 60 | (future 61 | (with-rabbit ["localhost" "guest" "guest"] 62 | (let [response-envelope (response-for worker-handler worker-args)] 63 | (if return-q (send-message return-q response-envelope)))))) 64 | 65 | 66 | ; Remote server worker process listens msg from rabbitmq queue, process msg, 67 | ; extrace fn expression from global workers by task name and evaluate fn object. 68 | (defn handle-request-message [req-str] 69 | (try 70 | (let [req (read-string req-str) 71 | worker-name (req :worker-name) 72 | worker-args (req :worker-args) 73 | return-q (req :return-q) 74 | ; extract fn object from worker namespace workers map by task name. 75 | worker-handler (@workers worker-name)] 76 | (if (not (nil? worker-handler)) 77 | (do 78 | (println "Processing:" worker-name "with args:" worker-args) 79 | (process-request worker-handler worker-args return-q)))) 80 | (catch Exception e))) 81 | 82 | 83 | ; start listening on workers queue, which abstracted as message-seq. 84 | ; put it inside a FutureTask as it is blocking call. use clojure seq lib to doseq. 85 | (defn start-handler-process [] 86 | (println "Serving up" (count @workers) "workers.") 87 | (future ; for blocking calls, use future to create FutureTask for async non-blocking 88 | (with-rabbit ["localhost" "guest" "guest"] 89 | (doseq [request-message (message-seq WORKER-QUEUE)] 90 | (handle-request-message request-message))))) 91 | 92 | 93 | ; start listening on bcast queue 94 | ; put it inside a FutureTask as it is blocking call. use clojure seq lib to doseq. 95 | (defn start-broadcast-listener [] 96 | (future 97 | (with-rabbit ["localhost" "guest" "guest"] 98 | (println "Starting worker handler...") 99 | (doseq [request-message (message-seq BROADCAST-EXCHANGE FANOUT-EXCHANGE-TYPE BROADCAST-QUEUE)] 100 | (handle-request-message request-message))))) 101 | 102 | -------------------------------------------------------------------------------- /msgqueue/src/msgqueue/rabbitmq/rabbitmq.clj: -------------------------------------------------------------------------------- 1 | ;; 2 | ;; add cljaction-test ns path prefix. 3 | ;; (:import (com.rabbitmq.client ConnectionParameters ConnectionFactory QueueingConsumer))) 4 | ;; ;[rabbitmq-client "1.7.0"] 5 | ;; [com.rabbitmq/amqp-client "2.3.1"] 6 | 7 | (ns msgqueue.rabbitmq.rabbitmq 8 | (:import (com.rabbitmq.client ConnectionFactory 9 | ;ConnectionParameters 10 | Connection Channel QueueingConsumer))) 11 | 12 | 13 | ; dynamic bindable variable for connection. each thread can re-bind connection 14 | ; that is private to the thread whichever rebind it. 15 | (def ^:dynamic *rabbit-connection*) 16 | (def DEFAULT-EXCHANGE-NAME "defaultex") 17 | (def DEFAULT-EXCHANGE-TYPE "direct") 18 | (def FANOUT-EXCHANGE-TYPE "fanout") 19 | 20 | 21 | ; get a uniq queue name. routing key is used rather than qname. 22 | (defn random-queue-name [] 23 | (str (java.util.UUID/randomUUID))) 24 | 25 | ; amqp-client 2.3.1 version. 26 | (defn new-connection [host username password] 27 | (prn "new-connection :" host username password) 28 | (.newConnection 29 | (doto (ConnectionFactory.) 30 | (.setVirtualHost "/") 31 | (.setUsername username) 32 | (.setPassword password) 33 | (.setHost host)))) 34 | 35 | ; this is rabbitmq-client 1.7.0 version. 36 | ; (defn new-connection [q-host q-username q-password] 37 | ; (let [params (doto (ConnectionParameters.) 38 | ; (.setVirtualHost "/") 39 | ; (.setUsername q-username) 40 | ; (.setPassword q-password))] 41 | ; (.newConnection (ConnectionFactory. params) q-host))) 42 | 43 | ; eval exprs within a new connection. 44 | (defmacro with-rabbit [[mq-host mq-username mq-password] & exprs] 45 | `(with-open [connection# (new-connection ~mq-host ~mq-username ~mq-password)] 46 | (binding [*rabbit-connection* connection#] 47 | (do ~@exprs)))) 48 | 49 | ; send-message, default routing-key is queue-name 50 | (defn send-message 51 | ([routing-key message-object] 52 | (send-message DEFAULT-EXCHANGE-NAME DEFAULT-EXCHANGE-TYPE routing-key message-object)) 53 | ([exchange-name exchange-type routing-key message-object] 54 | (with-open [channel (.createChannel *rabbit-connection*)] 55 | (.exchangeDeclare channel exchange-name exchange-type) 56 | (.queueDeclare channel routing-key false false false nil) 57 | (.basicPublish channel exchange-name routing-key nil (.getBytes (str message-object)))))) 58 | 59 | ; get the next msg from the consumer. The consumer already attached to a queue. 60 | ; this is a blocking call. Better wrap it inside a future task. 61 | (defn delivery-from [channel consumer] 62 | (let [delivery (.nextDelivery consumer)] 63 | (.basicAck channel (.. delivery getEnvelope getDeliveryTag) false) 64 | (String. (.getBody delivery)))) 65 | 66 | ; 67 | (defn consumer-for 68 | ([channel queue-name] 69 | ; default queue name is queue routing key 70 | (consumer-for channel DEFAULT-EXCHANGE-NAME DEFAULT-EXCHANGE-TYPE 71 | queue-name queue-name)) 72 | ([channel exchange-name exchange-type queue-name routing-key] 73 | (let [consumer (QueueingConsumer. channel)] 74 | (.exchangeDeclare channel exchange-name exchange-type) 75 | (.queueDeclare channel queue-name false false false nil) 76 | (.queueBind channel queue-name exchange-name routing-key) ; tell exchg queue is interest in msg with this routing key. 77 | (.basicConsume channel queue-name consumer) 78 | consumer))) 79 | 80 | ; default routing key is queue name. 81 | (defn next-message-from 82 | ([queue-name] 83 | (next-message-from DEFAULT-EXCHANGE-NAME DEFAULT-EXCHANGE-TYPE queue-name queue-name)) 84 | ([exchange-name exchange-type routing-key] 85 | (next-message-from exchange-name exchange-type (random-queue-name) routing-key)) 86 | ([exchange-name exchange-type queue-name routing-key] 87 | (with-open [channel (.createChannel *rabbit-connection*)] 88 | (let [consumer (consumer-for channel exchange-name exchange-type queue-name routing-key)] 89 | (delivery-from channel consumer))))) 90 | 91 | ; abstract rabbit channel and consumer as a lazy-seq by cons next msg to the rest of msgs from queue. 92 | (defn- lazy-message-seq [channel consumer] 93 | (lazy-seq 94 | (let [message (delivery-from channel consumer)] 95 | (cons message (lazy-message-seq channel consumer))))) 96 | 97 | ; abstract a rabbitmq queue as a lazy-seq 98 | (defn message-seq 99 | ([queue-name] 100 | (message-seq DEFAULT-EXCHANGE-NAME DEFAULT-EXCHANGE-TYPE queue-name queue-name)) 101 | ([exchange-name exchange-type routing-key] 102 | (message-seq exchange-name exchange-type (random-queue-name) routing-key)) 103 | ([exchange-name exchange-type queue-name routing-key] 104 | (let [channel (.createChannel *rabbit-connection*) 105 | consumer (consumer-for channel exchange-name exchange-type queue-name routing-key)] 106 | (lazy-message-seq channel consumer)))) 107 | -------------------------------------------------------------------------------- /mockstub.clj: -------------------------------------------------------------------------------- 1 | (ns mockstub.core 2 | (:use clojure.test)) 3 | 4 | 5 | ; to upper fn 6 | (defn to-upper [s] 7 | (.toUpperCase (str s))) 8 | 9 | ; test to upper fn with a set of is assertion 10 | (deftest test-to-upcase 11 | (is (= "RATHORE" (to-upper "rathore"))) 12 | (is (= "1" (to-upper 1))) 13 | (is (= "AMIT" (to-upper "AMIT")))) 14 | 15 | ; or use are assertion 16 | (deftest test-to-upcase 17 | (are [l u] (= u (to-upper l)) 18 | "RATHORE" "RATHORE" 19 | "1" "1" 20 | "amit" "AMIT")) 21 | 22 | ; a stub ret a canned val predefined. 23 | ; a mock records the fact that it was called with a specific set of args so we can verify api is called properly later from mock log. 24 | 25 | ; dynamic binding for test driven 26 | ; fns are dynamic vars, you can bind it with stub or mock values, elegant. 27 | (defn cal [x y] (prn "real cal " x y) [x y]) 28 | (cal x) 29 | (binding [cal (constantly ["mock-val1" "mock-val2"])] (cal "berkeley" "ucla")) 30 | 31 | ; stub take a list of pairs of [fn-name mock-form] and dynamic binding mock to fn-name. 32 | (defmacro stubbing [stub-forms & body] 33 | (let [stub-pairs (partition 2 stub-forms) 34 | returns (map last stub-pairs) 35 | stub-fns (map #(list 'constantly %) returns) 36 | real-fns (map first stub-pairs)] 37 | `(binding [~@(interleave real-fns stub-fns)] 38 | ~@body))) 39 | 40 | (defn calc-x [x1 x2] 41 | (* x1 x2)) 42 | (defn calc-y [y1 y2] 43 | (/ y2 y1)) 44 | (defn some-client [] 45 | (println (calc-x 2 3) (calc-y 3 4))) 46 | 47 | (stubbing [calc-x 1 calc-y 2] 48 | (some-client)) 49 | 50 | ; common stub-fn ret passed in val no matter what args 51 | (defn stub-fn [return-value] 52 | (fn [& args] 53 | return-value)) 54 | 55 | ; stub out the fn with fixed rets, i.e. 200 OK. 56 | (defmacro stubbing [stub-forms & body] 57 | (let [stub-pairs (partition 2 stub-forms) 58 | returns (map last stub-pairs) 59 | stub-fns (map #(list 'stub-fn %) returns) ; use stub fn 60 | real-fns (map first stub-pairs)] 61 | `(binding [~@(interleave real-fns stub-fns)] 62 | ~@body))) 63 | 64 | ; a mock records when and how API called(time, count, args) so we can verify calls later. 65 | (def mock-calls (atom {})) 66 | 67 | (defn stub-fn [the-function return-value] 68 | (swap! mock-calls assoc the-function []) 69 | (fn [& args] 70 | (swap! mock-calls update-in [the-function] conj args) 71 | return-value)) 72 | 73 | ; mock-fn does not provide a ret val, while stub ret a canned ret val 74 | (defn mock-fn [the-function] 75 | (stub-fn the-function nil)) ; ret nil when mocked out, or ret 200 OK. 76 | 77 | ; mock out a list of fn-names with mock-fn 78 | (defmacro mocking [fn-names & body] 79 | (let [mocks (map #(list 'mock-fn (keyword %)) fn-names)] 80 | `(binding [~@(interleave fn-names mocks)] 81 | ~@body))) 82 | 83 | (defmacro stubbing [stub-forms & body] 84 | (let [stub-pairs (partition 2 stub-forms) 85 | real-fns (map first stub-pairs) 86 | returns (map last stub-pairs) 87 | stub-fns (map #(list 'stub-fn (keyword %1) %2) real-fns returns)] 88 | `(binding [~@(interleave real-fns stub-fns)] 89 | ~@body))) 90 | 91 | (defmacro verify-call-times-for [fn-name number] 92 | `(is (= ~number (count (@mock-calls ~(keyword fn-name)))))) 93 | 94 | ;(defmacro verify-first-call-args-for [fn-name & args] 95 | ; `(is (= '~args (first (@mock-calls ~(keyword fn-name)))))); 96 | 97 | (defmacro verify-first-call-args-for [fn-name & args] 98 | `(verify-nth-call-args-for 1 ~fn-name ~@args)) 99 | 100 | (defmacro verify-nth-call-args-for [n fn-name & args] 101 | `(is (= '~args (nth (@mock-calls ~(keyword fn-name)) (dec ~n))))) 102 | 103 | (defn clear-calls [] 104 | (reset! mock-calls {})) 105 | 106 | 107 | ; mock test : give test name and test body 108 | (defmacro defmocktest [test-name & body] 109 | `(deftest ~test-name 110 | (binding [mock-calls (atom {})] 111 | (do ~@body)))) 112 | 113 | ; test body first stubbing out dependent fns, and focus on just fn of this module. 114 | (defmocktest test-fetch-expenses-greater-than 115 | (stubbing [fetch-all-expenses all-expenses] 116 | (let [filtered (fetch-expenses-greater-than "" "" "" 15.0)] 117 | (is (= (count filtered) 2)) 118 | (is (= (:amount (first filtered)) 20.0)) 119 | (is (= (:amount (last filtered)) 30.0))))) 120 | 121 | ; mock dependent API out and verify call logs. 122 | ; use the testing macro to group these according to those goals: 123 | (defmocktest test-filter-greater-than 124 | (mocking [log-call] 125 | (let [filtered (expenses-greater-than all-expenses 15.0)] 126 | (testing "the filtering itself works as expected" ; use testing macro to group tests 127 | (is (= (count filtered) 2)) 128 | (is (= (:amount (first filtered)) 20.0)) 129 | (is (= (:amount (last filtered)) 30.0)))) 130 | (testing "Auditing via log-call works correctly" ; use testing macro to group tests 131 | (verify-call-times-for log-call 2) 132 | (verify-first-call-args-for log-call "expenses-greater-than" 15.0)))) 133 | 134 | 135 | ; For testing private functions, you need to use the following macro (courtesy of chouser), 136 | (defmacro with-private-fns [[ns fns] & tests] 137 | "Refers private fns from ns and runs tests in context." 138 | `(let ~(reduce #(conj %1 %2 `(ns-resolve '~ns '~%2)) [] fns) 139 | ~@tests)) 140 | 141 | 142 | ; use fixture to set-up test 143 | (defn my-fixture [f] 144 | ;;Perform setup, establish bindings, whatever. 145 | (f) ;;Then call the function we were passed. 146 | ;;Tear-down / clean-up code here. 147 | ) 148 | 149 | -------------------------------------------------------------------------------- /dbconn/src/dbconn/redis/redis_datamapper.clj: -------------------------------------------------------------------------------- 1 | (ns dbconn.redis.redis-datamapper 2 | (:require [clj-redis.client :as redis]) ; bring in redis namespace 3 | (:use clojure.contrib.str-utils) 4 | (:require [clojure.data.json :as json]) 5 | (:use [dbconn.redis.redis-persister])) 6 | 7 | (defn primary-key-value [redis-obj] 8 | (let [pk-keys ((redis-obj :type) :primary-key) 9 | separator ((redis-obj :type) :key-separator) 10 | values (map #(redis-obj :get %) pk-keys)] 11 | (str-join separator values))) 12 | 13 | ;; passed in redis-type closure, to get the type and :valid-key? 14 | (defn new-redis-object [redis-type] 15 | (let [state (ref {})] 16 | (fn thiz [accessor & args] 17 | (condp = accessor 18 | :type redis-type 19 | :set! (let [[k v] args] 20 | (redis-type :valid-key? k) 21 | (dosync 22 | (alter state assoc k v)) 23 | v) 24 | :set-all! (let [[kv-map] args] 25 | (doseq [kv kv-map] 26 | (let [[k v] kv] 27 | (thiz :set! k v)))) 28 | :copy-from-redis-object (let [from (first args) 29 | attribs (rest args)] 30 | (doseq [attrib attribs] 31 | (thiz :set! attrib (from :get attrib)))) 32 | :add! (let [[k v] args 33 | add-to-inner-list (fn [current-state ke valu] 34 | (update-in current-state [ke] conj valu))] 35 | (dosync 36 | (alter state add-to-inner-list k v)) 37 | v) 38 | :get (let [[k] args] 39 | (redis-type :valid-key? k) 40 | (state k)) 41 | :primary-key-value (primary-key-value thiz) 42 | :save! (persist thiz) 43 | :get-state @state 44 | :replace-state (let [[new-state] args] 45 | (dosync 46 | (ref-set state new-state))))))) 47 | 48 | (defn key-type-for [key-name string-types list-types] 49 | (if (some #(= % key-name) string-types) 50 | :string-type 51 | (if (some #(= % key-name) list-types) 52 | :list-type))) 53 | 54 | (defn keys-for [keys separator values] 55 | (let [pk-value (str-join separator values)] 56 | (map #(str pk-value separator %) keys))) 57 | 58 | (defn check-key-validity [key redis-type string-attribs list-attribs] 59 | (if-not (some #(= % key) string-attribs) 60 | (if-not (some #(= % key) list-attribs) 61 | (throw (RuntimeException. (str "Attempt to use unknown key " key " in redis-object of type " (redis-type :name)))))) 62 | true) 63 | 64 | (defn new-redis-type [name separator format primary-keys string-attribs list-attribs] 65 | (fn redis-type [accessor & args] ;; named anonym fn, passed to redis object. 66 | (condp = accessor ; switch dispatcher 67 | :name name ; closure bind to name. 68 | :format format 69 | :key-separator separator 70 | :primary-key primary-keys 71 | :key-type (let [[k] args] 72 | (key-type-for k string-attribs list-attribs)) 73 | :valid-key? (let [[key] args] 74 | (check-key-validity key redis-type string-attribs list-attribs)) 75 | :string-keys (let [[values] args] 76 | (keys-for string-attribs separator values)) 77 | :list-keys (let [[values] args] 78 | (keys-for list-attribs separator values)) 79 | :new (new-redis-object redis-type) 80 | :new-with-state (let [[new-state] args 81 | nh (new-redis-object redis-type)] 82 | (nh :replace-state new-state) 83 | nh) 84 | :find (find-by-primary-key redis-type args) 85 | :exists? (let [key-value (str-join separator args) 86 | key-value (str key-value separator (first primary-keys))] 87 | (redis/exists key-value)) 88 | :attrib-exists? (let [attrib-key (first args) 89 | pk-value (str-join separator (rest args))] 90 | (redis/exists (str pk-value separator attrib-key)))))) 91 | 92 | (defn specs-for [redis-datatype specs] 93 | (let [type-spec? #(= redis-datatype (first %)) 94 | extractor (comp next first)] 95 | (extractor (filter type-spec? specs)))) 96 | 97 | (defmacro def-redis-type [name & specs] 98 | (let [string-types (specs-for 'string-type specs) 99 | list-types (specs-for 'list-type specs) 100 | pk-keys (specs-for 'primary-key specs) 101 | format (or (first (specs-for 'format specs)) :clj-str) 102 | separator (or (first (specs-for 'key-separator specs)) "___")] 103 | `(def ~name 104 | (new-redis-type '~name ~separator ~format '~pk-keys '~string-types '~list-types)))) 105 | 106 | ;; 107 | ;; a redis type is a object type that maps to a storage model with a list of attributes. 108 | ;; in FP, a type is a fn closure with meta, data, etc. 109 | ;; Fn returns a function that accepts commands with arguments makes things look like OO object. 110 | ;;(def-redis-type consumer 111 | ;; (string-type :id :merchant-id :start-time :timezone) 112 | ;; (list-type :cart-items) 113 | ;; (primary-key :id :merchant-id)) 114 | ;; (format :json) 115 | ;; (key-separator "##")) 116 | ;; 117 | ;; 118 | ;;(consumer :name) 119 | ;;(consumer :format) 120 | ;; 121 | ;; to instantiating and using the object, call the fn new closure. 122 | ;;(def c (consumer :new)) 123 | ;;(c :set! :merchant-id "14") 124 | ;;(c :add! :cart-items {:sku "XYZ" :cost 10.95}) 125 | ;; 126 | ;; persistent 127 | ;;(redis/with-server {:host "127.0.0.1" :port 6379 :db 0} 128 | ;; (c :save!)) 129 | ;; 130 | ;; retrieve 131 | ;;(redis/with-server {:host "127.0.0.1" :port 6379 :db 0} 132 | ;; (def d (consumer :find "adi" "14"))) 133 | -------------------------------------------------------------------------------- /io.clj: -------------------------------------------------------------------------------- 1 | ; clojure io 2 | ; (load-file "io.clj") 3 | 4 | ; when defining ns, include only the references that are used. 5 | ;:exclude, :only, :as, :refer-clojure, :import, :use, :load, and :require. 6 | ; ;use naked could corrupt the namespace. (:use :only) 7 | ; :import working with java deftype defrecord 8 | ; :refer 9 | ; (ns my-ns 10 | ; (:refer-clojure :exclude [defstruct]) 11 | ; (:use (clojure set xml)) ;; use other namespace without namespace qualification. 12 | ; (:use [clojure.java.io]) 13 | ; (:use [clojure.contrib.io]) ; (use 'clojure.contrib.io) 14 | ; (:use [clojure.test :only (are is)]) 15 | ; (:require [clojure [zip :as z]]) 16 | ; (:import [java.util.Collection])) 17 | 18 | 19 | ; get the current directory 20 | (System/getProperty "user.dir") 21 | 22 | (def filename "/Users/e51141/macsrc/clj/io.clj") 23 | 24 | ; open a file, spit content to it, with option, using clojure.java.io/write, then close it. 25 | (spit "/tmp/x" "1. log-1\n" :append true) 26 | (spit "/tmp/x" "2. log-2\n" :append true) 27 | 28 | ; slurp a file into mem 29 | (slurp "/Users/e51141/macsrc/clj/io.clj") 30 | 31 | ; transform file as line seq using clojure.java.io/reader 32 | (with-open [rdr (clojure.java.io/reader "/Users/e51141/tmp/x")] 33 | (printf "%s\n" (clojure.string/join "\n" (line-seq rdr)))) 34 | 35 | (require '[clojure.java.io :only [reader] :refer [reader]]') 36 | (with-open [rdr (reader "/Users/e51141/tmp/x")] 37 | (doseq [l (line-seq rdr)] 38 | (prn l))) 39 | 40 | ; output stream convert a file to byte output stream. 41 | (:use [clojure.java.io :only [output-stream]]) 42 | (defn use-output-stream [] 43 | (with-open [o (output-stream "test.txt")] 44 | (.write o 65))) ; Writes 'A' 45 | 46 | (defn parse-line [line] 47 | (let [tokens (.split (.toLowerCase line) " ")] 48 | (map #(vector % 1) tokens))) 49 | 50 | (defn sum [[k v]] 51 | {k (apply + v)}) 52 | 53 | (defn reduce-parsed-lines [collected-values] 54 | (apply merge (map sum collected-values))) 55 | 56 | (defn combine [mapped] 57 | (->> (apply concat mapped) 58 | (group-by first) 59 | (map (fn [[k v]] 60 | {k (map second v)})) 61 | (apply merge-with conj))) 62 | 63 | (defn word-frequency [filename] 64 | (->> (read-lines filename) 65 | (map parse-line) 66 | (combine) 67 | (reduce-parsed-lines))) 68 | 69 | 70 | ;; read a file line by line 71 | (use '[clojure.java.io :only (reader)]) 72 | (with-open [rdr (reader filename)] 73 | (doseq [line (line-seq rdr)] 74 | (println line))) 75 | 76 | (with-open [wrtr (writer "/tmp/test.txt")] 77 | (.write wrtr "Line to be written")) 78 | 79 | (with-open [wrtr (writer "/tmp/test.txt" :append true)] 80 | (.write wrtr "Line to be appended")) 81 | 82 | (defn fetch-url [url] 83 | (with-open [stream (.openStream (java.net.URL. url))] 84 | (let [buf (java.io.BufferedReader. 85 | (java.io.InputStreamReader. stream))] 86 | (apply str (line-seq buf))))) 87 | 88 | (fetch-url "http://google.com") 89 | 90 | 91 | ;; 92 | ;; fetch binary data 93 | (defn fetch-data [url] 94 | (let [con (-> url java.net.URL. .openConnection) 95 | fields (reduce (fn [h v] (assoc h (.getKey v) (into [] (.getValue v)))) {} (.getHeaderFields con)) 96 | size (first (fields "Content-Length")) 97 | in (java.io.BufferedInputStream. (.getInputStream con)) 98 | out (java.io.BufferedOutputStream. (java.io.FileOutputStream. "out.file")) 99 | buffer (make-array Byte/TYPE 1024)] 100 | (loop [g (.read in buffer) r 0] 101 | (if-not (= g -1) 102 | (do 103 | (println r "/" size) 104 | (.write out buffer 0 g) 105 | (recur (.read in buffer) (+ r g))))) 106 | (.close in) 107 | (.close out) 108 | (.disconnect con))) 109 | 110 | (fetch-data "http://google.com") 111 | 112 | 113 | ;; deal with socket directly 114 | (defn socket [host port] 115 | (let [socket (java.net.Socket. host port) 116 | in (java.io.java. (BufferedReader.io.InputStreamReader. (.getInputStream socket))) 117 | out (java.io.PrintWriter. (.getOutputStream socket))] 118 | {:in in :out out})) 119 | 120 | (def conn (socket "irc.freenode.net" 6667)) 121 | (println (.readLine (:in conn))) 122 | 123 | 124 | ; parsing logs 125 | (defn request-seq [filename] 126 | (->> (read-lines filename) 127 | (drop 2) ; drop head 2 lines 128 | (lazy-request-seq))) 129 | cons 130 | ; the result of head (next-log-record hd) to result seq of the rest 131 | (defn lazy-request-seq [log-lines] 132 | (lazy-seq 133 | (let [record (next-log-record log-lines)] 134 | (if (empty? record) 135 | nil 136 | (cons (remove empty? record) 137 | (lazy-request-seq (drop (count record) log-lines))))))) 138 | 139 | (defn next-log-record [log-lines] 140 | (let [head (first log-lines) 141 | body (take-while (complement record-start?) (rest log-lines))] 142 | (remove nil? (conj body head)))) 143 | 144 | 145 | 146 | ; named arguments by destructure the rest argument with a map 147 | (defn foo [& {:keys [k1 k2 k3]}] 148 | (prn "calling foo : " (str k1 k2 k3))) 149 | (foo :a :k1 "k" :k2 "b2" :k3 "c3") 150 | 151 | (defn blah [& {:keys [key1 key2 key3] :or {key3 10}}] 152 | (str key1 key2 key3)) 153 | 154 | ; unamed argument with underscore _ 155 | ; the underscore is used idiomatically indicates that the argument is not used. 156 | 157 | 158 | ; execute shell command 159 | (use '[clojure.java.shell :only [sh]]) 160 | (sh "ls" "-la") 161 | (ns-unmap 'user 'sh) 162 | 163 | (use '[clojure.contrib.shell-out]) 164 | (sh "ls" "-la") 165 | 166 | ; execute shell with java runtime 167 | (import 'java.lang.Runtime) 168 | (let [p (.exec (Runtime/getRuntime) "ls -la") 169 | br (java.io.BufferedReader. (java.io.InputStreamReader. (.getInputStream p)))] 170 | ;(map prn (line-seq br))) 171 | (for [l (line-seq br)] 172 | (prn l))) 173 | 174 | 175 | -------------------------------------------------------------------------------- /trident/src/main/java/com/colorcloud/trident/FakeTweetsBatchSpout.java: -------------------------------------------------------------------------------- 1 | package com.colorcloud.trident; 2 | 3 | import java.io.IOException; 4 | import java.text.ParseException; 5 | import java.text.SimpleDateFormat; 6 | import java.util.Map; 7 | import java.util.Random; 8 | 9 | import org.apache.commons.io.IOUtils; 10 | 11 | import storm.trident.operation.TridentCollector; 12 | import storm.trident.spout.IBatchSpout; 13 | import backtype.storm.Config; 14 | import backtype.storm.task.TopologyContext; 15 | import backtype.storm.tuple.Fields; 16 | import backtype.storm.tuple.Values; 17 | 18 | /** 19 | * A Spout that emits fake tweets. It calculates a random probability distribution for hashtags and actor activity. 20 | * It uses a dataset of 500 english sentences. It has a fixed set of actors and subjects for each tweet. 21 | * Tweet text is one of the random 500 sentences followed by a hashtag of one subject. 22 | */ 23 | @SuppressWarnings({ "serial", "rawtypes" }) 24 | public class FakeTweetsBatchSpout implements IBatchSpout { 25 | 26 | private int batchSize; 27 | 28 | public final static String[] ACTORS = { "stefan", "dave", "pere", "nathan", "doug", "ted", "mary", "rose" }; 29 | public final static String[] LOCATIONS = { "Spain", "USA", "Spain", "USA", "USA", "USA", "UK", "France" }; 30 | public final static String[] SUBJECTS = { "berlin", "justinbieber", "hadoop", "life", "bigdata" }; 31 | 32 | private double[] activityDistribution; 33 | private double[][] subjectInterestDistribution; 34 | private Random randomGenerator; 35 | private String[] sentences; 36 | 37 | private long tweetId = 0; 38 | 39 | public FakeTweetsBatchSpout() throws IOException { 40 | this(5); 41 | } 42 | 43 | public FakeTweetsBatchSpout(int batchSize) throws IOException { 44 | this.batchSize = batchSize; 45 | } 46 | 47 | @SuppressWarnings("unchecked") 48 | @Override 49 | public void open(Map conf, TopologyContext context) { 50 | System.err.println("Open Spout instance"); 51 | this.randomGenerator = new Random(); 52 | // read a resource with 500 sample english sentences 53 | try { 54 | sentences = (String[]) IOUtils.readLines( 55 | ClassLoader.getSystemClassLoader().getResourceAsStream("500_sentences_en.txt")).toArray(new String[0]); 56 | } catch(IOException e) { 57 | throw new RuntimeException(e); 58 | } 59 | // will define which actors are more proactive than the others 60 | this.activityDistribution = getProbabilityDistribution(ACTORS.length, randomGenerator); 61 | // will define what subjects each of the actors are most interested in 62 | this.subjectInterestDistribution = new double[ACTORS.length][]; 63 | for(int i = 0; i < ACTORS.length; i++) { 64 | this.subjectInterestDistribution[i] = getProbabilityDistribution(SUBJECTS.length, randomGenerator); 65 | } 66 | } 67 | 68 | @Override 69 | public void emitBatch(long batchId, TridentCollector collector) { 70 | // batch spout emit batchSize fake tweets for each invocation. 71 | System.out.println("Spout emits a batch of " + batchSize ); 72 | for(int i = 0; i < batchSize; i++) { 73 | collector.emit(getNextTweet()); 74 | } 75 | } 76 | 77 | @Override 78 | public void ack(long batchId) { 79 | // nothing to do here 80 | } 81 | 82 | @Override 83 | public void close() { 84 | // nothing to do here 85 | } 86 | 87 | @Override 88 | public Map getComponentConfiguration() { 89 | // no particular configuration here 90 | return new Config(); 91 | } 92 | 93 | /** 94 | * override this to define spout output columns. 95 | */ 96 | @Override 97 | public Fields getOutputFields() { 98 | return new Fields("id", "text", "actor", "location", "date"); 99 | } 100 | 101 | // --- Helper methods --- // 102 | // SimpleDateFormat is not thread safe! 103 | private SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss aa"); 104 | 105 | public Values getNextTweet() { 106 | int actorIndex = randomIndex(activityDistribution, randomGenerator); 107 | String author = ACTORS[actorIndex]; 108 | String text = sentences[randomGenerator.nextInt(sentences.length)].trim() + " #" 109 | + SUBJECTS[randomIndex(subjectInterestDistribution[actorIndex], randomGenerator)]; 110 | return new Values(++tweetId + "", text, author, LOCATIONS[actorIndex], DATE_FORMAT.format(System 111 | .currentTimeMillis())); 112 | } 113 | 114 | public String getNextTweetString() { 115 | int actorIndex = randomIndex(activityDistribution, randomGenerator); 116 | String author = ACTORS[actorIndex]; 117 | String text = sentences[randomGenerator.nextInt(sentences.length)].trim() + " #" 118 | + SUBJECTS[randomIndex(subjectInterestDistribution[actorIndex], randomGenerator)]; 119 | 120 | return ++tweetId + " : " + text + " : " + author + " : " + LOCATIONS[actorIndex] + " : " + DATE_FORMAT.format(System.currentTimeMillis()); 121 | } 122 | 123 | /** 124 | * Code snippet: http://stackoverflow.com/questions/2171074/generating-a-probability-distribution Returns an array of 125 | * size "n" with probabilities between 0 and 1 such that sum(array) = 1. 126 | */ 127 | private static double[] getProbabilityDistribution(int n, Random randomGenerator) { 128 | double a[] = new double[n]; 129 | double s = 0.0d; 130 | for(int i = 0; i < n; i++) { 131 | a[i] = 1.0d - randomGenerator.nextDouble(); 132 | a[i] = -1 * Math.log(a[i]); 133 | s += a[i]; 134 | } 135 | for(int i = 0; i < n; i++) { 136 | a[i] /= s; 137 | } 138 | return a; 139 | } 140 | 141 | private static int randomIndex(double[] distribution, Random randomGenerator) { 142 | double rnd = randomGenerator.nextDouble(); 143 | double accum = 0; 144 | int index = 0; 145 | for(; index < distribution.length && accum < rnd; index++, accum += distribution[index - 1]) 146 | ; 147 | return index - 1; 148 | } 149 | 150 | public static void main(String[] args) throws IOException, ParseException { 151 | FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(); 152 | spout.open(null, null); 153 | for(int i = 0; i < 30; i++) 154 | System.out.println(spout.getNextTweet()); 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /trident-clj/src/trident_clj/redis/redis_persister.clj: -------------------------------------------------------------------------------- 1 | (ns trident-clj.redis.redis-persister 2 | (:require [clj-redis.client :as redis]) ; bring in redis namespace 3 | (:require [clojure.data.json :as json])) 4 | 5 | ; clojure does not support cyclic dependencies. break cyclic by having a third ns 6 | ; contains common defs. If you have cyclic deps, you did not get your abstraction right. 7 | 8 | ; init redis connection 9 | ;(def redis-db (redis/init :url "redis://localhost")) 10 | (defn init-redis-db [] 11 | (def redis-db (redis/init :url "redis://localhost"))) 12 | 13 | ; serialization, two formats, json or plain string 14 | (defmulti serialize (fn [format key-type value] 15 | [format key-type])) 16 | 17 | (defmethod serialize [:json :string-type] [format key-type value] 18 | (json/write-str value)) 19 | 20 | (defmethod serialize [:json :list-type] [format key-type value] 21 | (map json/write-str value)) 22 | 23 | (defmethod serialize [:clj-str :string-type] [format key-type value] 24 | (pr-str value)) ; pr-str output double quote as part of str 25 | 26 | (defmethod serialize [:clj-str :list-type] [format key-type value] 27 | (map pr-str value)) 28 | 29 | 30 | ; deserialization 31 | (defmulti deserialize (fn [format key-type serialized] 32 | [format key-type])) 33 | 34 | (defmethod deserialize [:json :string-type] [format key-type serialized] 35 | (json/read-str serialized)) 36 | 37 | (defmethod deserialize [:json :list-type] [format key-type serialized] 38 | (map json/read-str serialized)) 39 | 40 | (defmethod deserialize [:clj-str :string-type] [format key-type serialized] 41 | (read-string serialized)) 42 | 43 | (defmethod deserialize [:clj-str :list-type] [format key-type serialized] 44 | (map read-string serialized)) 45 | 46 | ; model itself is a map, it can contain plain string type, or list type. 47 | ; for map type, just flat k,v directly to top model level. 48 | (def inserters { 49 | :string-type redis/set 50 | :list-type redis/rpush 51 | }) 52 | 53 | ; fetchers for different types. get for key type, lrange for list type. 54 | ; key = "1##doug##:id" 55 | (def fetchers { 56 | :string-type (fn [key] 57 | {key {:value (redis/get redis-db key) 58 | :key-type :string-type}}) 59 | :list-type (fn [key] 60 | {key {:value (redis/lrange redis-db key 0 (redis/llen redis-db key)) 61 | :key-type :list-type}})}) 62 | 63 | ; serialize a map with a seq of k,v pairs into redis. 64 | ; just map insert fn, which redis/rpush k,v into redis. 65 | (defn insert-into-redis [persistable] 66 | (let [inserter (fn [[k v]] 67 | ;(prn "insert-into-redis" k v (v :key-type) "v:value" (v :value)) 68 | ; "3##rose##:id" {:value "\"3\"", :key-type :string-type} :string-type "v:value" "\"3\"" 69 | (cond 70 | (= (v :key-type) :string-type) ((inserters :string-type) redis-db k (v :value)) 71 | (= (v :key-type) :list-type) (doall (map #((inserters :list-type) redis-db k %) (v :value)))))] 72 | (doall (map inserter persistable)))) 73 | 74 | 75 | ; convert each row in model to a map that contains a seq of k,v pairs. 76 | ; each row has a pk-val, the key for each column is pk-val + colname 77 | ; use merge to merge a seq of k,v pairs in single maps to form a big map. 78 | ; val is a map with two kv, :key-type str or list, :value=json-str 79 | (defn persistable-for [redis-object] 80 | (let [redis-type (redis-object :type) 81 | separator (redis-type :key-separator) 82 | format (redis-type :format) 83 | pk-value (redis-object :primary-key-value) 84 | kv-persister (fn [[k v]] ; transform k v to {pk+k {:ky}} 85 | (let [key-type (redis-type :key-type k)] 86 | {(str pk-value separator k) 87 | {:value (serialize format key-type v) 88 | :key-type key-type}}))] 89 | ; maped fn, kv-persister, ret a map. 90 | (apply merge (map kv-persister (redis-object :get-state))))) 91 | 92 | (defn persist [redis-object] 93 | (insert-into-redis (persistable-for redis-object)) 94 | true) 95 | 96 | ; first, 97 | (defn deserialize-state [serialized redis-type] 98 | (let [format (redis-type :format) 99 | separator (redis-type :key-separator) 100 | key-from (fn [k] 101 | (read-string (last (.split k separator)))) 102 | deserializer (fn [[k {:keys [key-type value]}]] 103 | (if-not value 104 | {} 105 | {(key-from k) (deserialize format key-type value)}))] 106 | (apply merge (map deserializer serialized)))) 107 | 108 | ; retrive from redis obj based on redis-type name and p key values 109 | ; (def d (consumer :find "adi" "14"))) 110 | ; each obj(row) has n keys(cols), maps to n keys in redis (pkv1+pkv2+k1, pkv1+pkv2+k2, ...) 111 | ; pk-values ("105" "ted") string-keys ("105##ted##:id" "105##ted##:actor" "105##ted##:location" "105##ted##:text" "105##ted##:time") ("105##ted##:followers") 112 | ; { "105##ted##:time" {:value "\"2013-07-29T05:27:44.783Z\"", :key-type :string-type}, 113 | ; "105##ted##:text" {:value "\"Wine glass heels are to be found in both high and semi-heights. \"", :key-type :string-type}, 114 | ; "105##ted##:location" {:value "\"USA\"", :key-type :string-type}, "105##ted##:actor" {:value "\"ted\"", :key-type :string-type}, "105##ted##:id" {:value "\"105\"", :key-type :string-type}} 115 | ; {"36##ted##:followers" {:value ("\"tedfollower-2\"" "\"ted-follower-1\"" "\"tedfollower-2\"" "\"ted-follower-1\""), :key-type :list-type}} 116 | ; {:id "36", :actor "ted", :location "USA", :text "xx", :followers ("tedfollower-2" "ted-follower-1" "tedfollower-2" "ted-follower-1")} 117 | (defn find-by-primary-key [redis-type pk-values] 118 | (let [string-keys (redis-type :string-keys pk-values) 119 | list-keys (redis-type :list-keys pk-values) 120 | ; fetch from redis all string-keys and list-keys belongs to this row(pk-value) 121 | string-maps (apply merge (map #((fetchers :string-type) %) string-keys)) 122 | list-maps (apply merge (map #((fetchers :list-type) %) list-keys)) 123 | serialized (merge string-maps list-maps) 124 | deserialized (deserialize-state serialized redis-type)] 125 | ;(prn "Redis Mapper : " pk-values string-keys list-keys string-maps list-maps serialized deserialized) 126 | (if (empty? deserialized) 127 | nil 128 | (redis-type :new-with-state deserialized)))) 129 | -------------------------------------------------------------------------------- /trident-clj/src/trident_clj/redis/redis_datamapper.clj: -------------------------------------------------------------------------------- 1 | (ns trident-clj.redis.redis-datamapper 2 | (:require [clj-redis.client :as redis]) ; bring in redis namespace 3 | (:use clojure.contrib.str-utils) 4 | (:require [clojure.data.json :as json]) 5 | (:use [trident-clj.redis.redis-persister])) 6 | 7 | ; form pk-val for a row(object) by 8 | (defn primary-key-value [redis-obj] 9 | (let [pk-keys ((redis-obj :type) :primary-key) 10 | separator ((redis-obj :type) :key-separator) 11 | values (map #(redis-obj :get %) pk-keys)] 12 | (str-join separator values))) 13 | 14 | ; redis obj is a closure over a ref map where state(k,v) pairs are stored. 15 | ; the map is serialized into redis when save called to persist. 16 | (defn new-redis-object [redis-type] 17 | (let [state (ref {})] 18 | (fn thiz [accessor & args] 19 | (condp = accessor 20 | :type redis-type 21 | :set! (let [[k v] args] 22 | (redis-type :valid-key? k) 23 | (dosync 24 | (alter state assoc k v)) 25 | v) 26 | :set-all! (let [[kv-map] args] 27 | (doseq [kv kv-map] 28 | (let [[k v] kv] 29 | (thiz :set! k v)))) 30 | :copy-from-redis-object (let [from (first args) 31 | attribs (rest args)] 32 | (doseq [attrib attribs] 33 | (thiz :set! attrib (from :get attrib)))) 34 | :add! (let [[k v] args 35 | add-to-inner-list (fn [current-state ke valu] 36 | (update-in current-state [ke] conj valu))] 37 | (dosync 38 | (alter state add-to-inner-list k v)) 39 | v) 40 | :get (let [[k] args] 41 | (redis-type :valid-key? k) 42 | (state k)) 43 | :primary-key-value (primary-key-value thiz) 44 | :save! (persist thiz) 45 | :get-state @state 46 | :replace-state (let [[new-state] args] 47 | (dosync 48 | (ref-set state new-state))))))) 49 | 50 | ; 51 | (defn key-type-for [key-name string-types list-types] 52 | (if (some #(= % key-name) string-types) 53 | :string-type 54 | (if (some #(= % key-name) list-types) 55 | :list-type))) 56 | 57 | 58 | ; from pk-value, get all keys belong to this row. 59 | ; b/c each row has n cols(keys), so we have n keys in redis, named (pkv1+pkv2+k1)... 60 | (defn keys-for [keys separator values] 61 | (let [pk-value (str-join separator values)] 62 | (map #(str pk-value separator %) keys))) 63 | 64 | 65 | (defn check-key-validity [key redis-type string-attribs list-attribs] 66 | (if-not (some #(= % key) string-attribs) 67 | (if-not (some #(= % key) list-attribs) 68 | (throw (RuntimeException. (str "Attempt to use unknown key " key " in redis-object of type " (redis-type :name)))))) 69 | true) 70 | 71 | 72 | ; model type metadata. closure with condp = accessor 73 | (defn new-redis-type [name separator format primary-keys string-attribs list-attribs] 74 | (fn redis-type [accessor & args] ; named anonym fn, passed to redis object. 75 | (condp = accessor ; switch dispatcher 76 | :name name ; closure bind to name. 77 | :format format 78 | :key-separator separator 79 | :primary-key primary-keys ; a seq of keys 80 | :key-type (let [[k] args] 81 | (key-type-for k string-attribs list-attribs)) 82 | :valid-key? (let [[key] args] 83 | (check-key-validity key redis-type string-attribs list-attribs)) 84 | 85 | ; usage : (redis-type :string-keys pk-values) 86 | ; pk-value already in a list, and being wraped into args list again when passed in here, de-list. 87 | :string-keys (let [[values] args] 88 | (keys-for string-attribs separator values)) 89 | :list-keys (let [[values] args] 90 | (keys-for list-attribs separator values)) 91 | ; new an object of this type. 92 | :new (new-redis-object redis-type) 93 | :new-with-state (let [[new-state] args 94 | nh (new-redis-object redis-type)] 95 | (nh :replace-state new-state) 96 | nh) 97 | ; retrieve row by pkvals 98 | ; (def d (consumer :find "adi" "14"))) 99 | :find (find-by-primary-key redis-type args) 100 | :exists? (let [key-value (str-join separator args) 101 | key-value (str key-value separator (first primary-keys))] 102 | (redis/exists key-value)) 103 | :attrib-exists? (let [attrib-key (first args) 104 | pk-value (str-join separator (rest args))] 105 | (redis/exists (str pk-value separator attrib-key)))))) 106 | 107 | ; ret a vec of [colume names] from (string-type :id :start-time :timezone) 108 | (defn specs-for [redis-datatype specs] 109 | (let [type-spec? #(= redis-datatype (first %)) 110 | extractor (comp next first)] 111 | (extractor (filter type-spec? specs)))) 112 | 113 | ; model scheme. extract a list of col names from specs. 114 | (defmacro def-redis-type [name & specs] 115 | (let [string-types (specs-for 'string-type specs) 116 | list-types (specs-for 'list-type specs) 117 | pk-keys (specs-for 'primary-key specs) 118 | format (or (first (specs-for 'format specs)) :clj-str) 119 | separator (or (first (specs-for 'key-separator specs)) "___")] 120 | `(def ~name 121 | (new-redis-type '~name ~separator ~format '~pk-keys '~string-types '~list-types)))) 122 | 123 | ;; 124 | ;; a redis type is a object type that maps to a storage model with a list of attributes. 125 | ;; in FP, a type is a fn closure with meta, data, etc. 126 | ;; Fn returns a function that accepts commands with arguments makes things look like OO object. 127 | ;;(def-redis-type consumer 128 | ;; (string-type :id :merchant-id :start-time :timezone) 129 | ;; (list-type :cart-items) 130 | ;; (primary-key :id :merchant-id)) 131 | ;; (format :json) 132 | ;; (key-separator "##")) 133 | ;; 134 | ;; 135 | ;;(consumer :name) 136 | ;;(consumer :format) 137 | ;; 138 | ;; to instantiating and using the object, call the fn new closure. 139 | ;;(def c (consumer :new)) 140 | ;;(c :set! :merchant-id "14") 141 | ;;(c :add! :cart-items {:sku "XYZ" :cost 10.95}) 142 | ;; 143 | ;; persistent 144 | ;;(redis/with-server {:host "127.0.0.1" :port 6379 :db 0} 145 | ;; (c :save!)) 146 | ;; 147 | ;; retrieve 148 | ;;(redis/with-server {:host "127.0.0.1" :port 6379 :db 0} 149 | ;; (def d (consumer :find "adi" "14"))) 150 | -------------------------------------------------------------------------------- /task-executor.clj: -------------------------------------------------------------------------------- 1 | ;; a framework for batch processing 2 | ;; 3 | (ns batching 4 | (:import [java.io FileReader] 5 | [java.util Map Map$Entry List ArrayList Collection Iterator HashMap]) 6 | (:import [java.text SimpleDateFormat] 7 | [java.util Calendar TimeZone]) 8 | (:import [java.util.concurrent Executors]) 9 | (:use clojure.contrib.io 10 | clojure.contrib.seq-utils)) 11 | 12 | 13 | ;; example of using executor service. 14 | ;; clojure fn is java.util.concurrent.Callable ! 15 | (defn test-stm [nitems nthreads niters] 16 | (let [refs (map ref (repeat nitems 0)) ; a list of global state refed. 17 | pool (Executors/newFixedThreadPool nthreads) 18 | ; loop create n fn closures, upon invoke, do n times of batch alter refs. 19 | tasks (map (fn [t] 20 | (fn [] 21 | (dotimes [n niters] 22 | (dosync 23 | (doseq [r refs] 24 | (alter r + 1 t)))))) 25 | (range nthreads))] 26 | ; executor invoke all clojure fns, which is java Callables. 27 | ; ret result is wrapped inside blocking future. get it one by one ! 28 | (doseq [future (.invokeAll pool tasks)] ; Collection task 29 | (.get future)) 30 | (.shutdown pool) 31 | (map deref refs))) 32 | 33 | 34 | ;; 35 | ;; the job meta map 36 | (defn new-job [job-id worker batch-size batch-wait-time id-generator] 37 | {:tasks-atom (atom {}) ;; store task id and body as task is running. 38 | :job-id job-id 39 | :worker worker 40 | :batch-size batch-size 41 | :batch-wait-time batch-wait-time 42 | :id-gen id-generator}) ;; snowflake id gen 43 | 44 | ;; composite key to store in redis 45 | (def KEY-SEPARATOR "___") 46 | (defn managed-key [job-id task-id] 47 | (str job-id KEY-SEPARATOR task-id)) 48 | 49 | ;; update job status in redis 50 | (def STATUS-FIELD "status") 51 | (defn update-status-as [job-id task-id status] 52 | (redis/hset (managed-key job-id task-id) STATUS-FIELD status)) 53 | 54 | (def DISPATCHED "dispatched") 55 | (defn mark-dispatched [job-id task-id] 56 | (update-status-as job-id task-id DISPATCHED)) 57 | 58 | (def INITIAL "initial") 59 | (def COMPLETE "complete") 60 | (def ERROR "error") 61 | (def RECOVERY "recovery") 62 | (def SECONDARY "secondary") 63 | (def UNKNOWN "unknown") 64 | 65 | (defn mark-error [job-id task-id] 66 | (update-status-as job-id task-id ERROR)) 67 | 68 | (defn mark-recovery [job-id task-id] 69 | (update-status-as job-id task-id RECOVERY)) 70 | 71 | (def next-status { 72 | DISPATCHED INITIAL 73 | INITIAL COMPLETE 74 | RECOVERY SECONDARY 75 | SECONDARY COMPLETE 76 | "" UNKNOWN 77 | nil UNKNOWN 78 | UNKNOWN UNKNOWN 79 | }) 80 | 81 | (defn status-of [job-id task-id] 82 | (redis/hget (managed-key job-id task-id) STATUS-FIELD)) 83 | 84 | (defn increment-status [job-id task-id] 85 | (->> (status-of job-id task-id) 86 | (next-status) 87 | (update-status-as job-id task-id))) 88 | 89 | ;; dispatch a job 90 | (defn start-job [{:keys [batch-size] :as job} args-seq] 91 | (let [args-batches (partition-all batch-size args-seq)] 92 | (doseq [args-batch args-batches] 93 | (run-batch job args-batch)))) 94 | 95 | (defn run-batch [{:keys [id-gen tasks-atom batch-wait-time] :as job} 96 | args-batch] 97 | (doseq [args args-batch] 98 | (run-task job (apply id-gen args) args mark-dispatched)) 99 | (wait-until-completion (map :proxy (vals @tasks-atom)) batch-wait-time)) 100 | 101 | ;; 102 | (defn run-task [{:keys [job-id worker tasks-atom]} 103 | task-id args mark-status] 104 | (mark-status job-id task-id) 105 | (let [task-info {:args args 106 | :proxy (apply worker [job-id task-id args])}] 107 | (swap! tasks-atom assoc task-id task-info))) 108 | 109 | 110 | ;; 111 | ;; slave wrapper to wrap computation and ret a fn closure. 112 | ;; fn wrapper always ret fn closure to be called with args to the computation, and ids. 113 | ;; 114 | (defn redis-config [] 115 | {:host "localhost"}) 116 | 117 | (defn slave-wrapper [worker-function] 118 | (fn [job-id task-id worker-args] 119 | (redis/with-server (redis-config) 120 | (increment-status job-id task-id) 121 | (try ;; apply fn with the passed args. 122 | (let [return (apply worker-function worker-args)] 123 | (increment-status job-id task-id) 124 | return) 125 | (catch Exception e 126 | (mark-error job-id task-id)))))) 127 | 128 | (defmacro slave-worker [name args & body] 129 | `(let [simple-function# (fn ~args (do ~@body)) 130 | slave-function# (slave-wrapper simple-function#)] 131 | (defworker ~name [~'job-id ~'task-id ~'worker-args'] 132 | (slave-function# ~'job-id ~'task-id ~'worker-args')))) 133 | 134 | ;; the real computation, simulate factorial 135 | (defn fact [n acc] 136 | (if (= n 0) 137 | acc 138 | (recur (dec n) (* n acc)))) 139 | 140 | (slave-worker factorial [n] 141 | (let [f (fact n 1)] 142 | (println "Calculated factorial of" n "value:" f) 143 | f)) 144 | 145 | 146 | ; sleeping barbar, use Q to connect producer-consumer. 147 | ; A queue to tie up producer and consumer. Use ref to sequence the access to queue. 148 | ; or deque/ringbuffer, producers lock on write idx, consumers on read idx. 149 | ; 1. use wait-notify 150 | ; 2. LinkedBlockingQueue, wont blocking if queue is empty. LinkedBlockingQueue will block. 151 | ; 3. clojure.lang.PersistentQueue/EMPTY for FIFO Q, ref fence it. 152 | ; Pattern: wait, wake, lock, check, mute, notify 153 | 154 | (def queue (ref (with-meta 155 | clojure.lang.PersistentQueue/EMPTY 156 | {:tally 0}))) 157 | (def seats 3) 158 | 159 | (defn debug [_ msg n] ; first unamed args 160 | (println msg (apply str (repeat (- 35 (count msg)) \space)) n) 161 | (flush)) 162 | 163 | 164 | (defn the-shop [a] 165 | ; the-shop is the Q connect producer and consumer 166 | (debug "(c) entering shop" a) 167 | (dosync 168 | (if (< (count @queue) seats) 169 | (alter queue conj a) ; mutate ref with update fn 170 | (debug "(s) turning away customer" a)))) 171 | 172 | 173 | ; Pattern: sync, peek, pop, set 174 | ; consumer, wait wake lock update notify 175 | (defn the-barber [st q] 176 | (Thread/sleep (+ 100 (rand-int 600))) 177 | (dosync 178 | (when (peek @q) 179 | (debug "(b) cutting hair of customer" (peek @q)) 180 | (ref-set queue (with-meta (pop @q) 181 | {:tally (inc (:tally (meta @q)))}))))) 182 | 183 | 184 | ; observe mutable state changed, invoke my callback. 185 | ; (add-watch refvar :key (fn [k refvar os ns] (print k refvar os ns))) 186 | (add-watcher queue :send (agent 'barber) the-barber) 187 | (add-watch queue :customer-ready the-barber) 188 | (add-watcher queue :send (agent 'barber) the-barber) 189 | (add-watch queue :send (fn [k queue os ns] (prn k queue os ns))) 190 | 191 | (doseq [customer (range 1 20)] 192 | ; (Thread/sleep (+ 100 (rand-int 200))) 193 | (send-off (agent customer) the-shop)) 194 | 195 | (Thread/sleep 2000) 196 | (println "(!) " (:tally (meta @queue)) "customers got haircuts today") 197 | {:tally (inc (:tally (meta @q)))}))))) 198 | 199 | 200 | -------------------------------------------------------------------------------- /msgqueue/src/msgqueue/rabbitmq/worker.clj: -------------------------------------------------------------------------------- 1 | ;; 2 | ; workers on the server side, get msg from queue, and apply fn to the 3 | ; data(args) extracted from the queue. Send result back thru callback queue. 4 | ; full fledged version is https://github.com/amitrathore/swarmiji 5 | ;; 6 | (ns msgqueue.rabbitmq.worker 7 | (:use msgqueue.rabbitmq.rabbitmq) 8 | (:import (java.util UUID))) 9 | 10 | ; NameSpace global mapping, the name of computation, and the args for it. 11 | ; whenever a thread executes any fns inside this namespace, this global vars 12 | ; are closures shared among all threads. It's like class static variable. 13 | (def workers (ref {})) 14 | (def worker-init-value :__worker_init__) ; worker init status 15 | (def WORKER-QUEUE "workers_queue") 16 | (def BROADCAST-QUEUE "workers_bcast_queue") 17 | (def BROADCAST-EXCHANGE "workers_fanex") 18 | 19 | ; result msg come back to ret-q, changed ret status from init. 20 | (defn all-complete? [swarm-requests] 21 | (every? #(% :complete?) swarm-requests)) 22 | 23 | (defn disconnect-worker [[channel q-name]] 24 | (.queueDelete channel q-name)) 25 | 26 | (defn disconnect-all [swarm-requests] 27 | (doseq [req swarm-requests] 28 | (req :disconnect))) 29 | 30 | 31 | ; spin until all requests completed. 32 | (defn wait-until-completion [swarm-requests allowed-time] 33 | (loop [all-complete (all-complete? swarm-requests) 34 | elapsed-time 0] 35 | (if (> elapsed-time allowed-time) 36 | (do 37 | (disconnect-all swarm-requests) 38 | (throw (RuntimeException. (str "Remote worker timeout exceeded " allowed-time " milliseconds!")))) 39 | (if (not all-complete) 40 | (do 41 | (Thread/sleep 100) 42 | (recur (all-complete? swarm-requests) (+ elapsed-time 100))))))) 43 | 44 | 45 | ; wait for all requests completion, and execute post worker completion 46 | (defmacro from-swarm [swarm-requests & expr] 47 | `(do 48 | (wait-until-completion ~swarm-requests 5000) 49 | ~@expr)) 50 | 51 | ; A FutureTask block on getting msg from return-queue. 52 | ; ref-set worker-ref with response msg get from return queue name 53 | ; wrapped inside a future task. when blocking call rets, 54 | ; setted up when dispatching a runnable to worker. 55 | (defn update-on-response [worker-ref return-q-name] 56 | (let [channel (.createChannel *rabbit-connection*) 57 | consumer (consumer-for channel return-q-name) 58 | ; when blocking call rets, this closure update global ret status. 59 | on-response (fn [response-message] 60 | (dosync 61 | (ref-set worker-ref (read-string response-message)) 62 | (.queueDelete channel return-q-name) 63 | (.close channel)))] 64 | ; wrap blocking call into a future object will be invoke in another thread 65 | ; when callback and cache result for def. The same as java FutureTask. 66 | (future (on-response (delivery-from channel consumer))) 67 | [channel return-q-name])) ; ret ch and ret qname 68 | 69 | ; an env is a map with work fn name and args, send over queue to other end 70 | (defn request-envelope 71 | ([worker-name args] 72 | {:worker-name worker-name :worker-args args}) 73 | ([worker-name args return-q-name] 74 | (assoc (request-envelope worker-name args) :return-q return-q-name))) 75 | 76 | ; dispatch a computation(fn-name is worker-name, provide args, update ret state) 77 | ; dispatching also creates a FutureTask blocking on result and updates ret status ref. 78 | (defn dispatch-work [worker-name args worker-ref] 79 | (let [return-q-name (str (UUID/randomUUID)) ; random reply queue name 80 | request-object (request-envelope worker-name args return-q-name) 81 | worker-transport (update-on-response worker-ref return-q-name)] 82 | (send-message WORKER-QUEUE request-object) 83 | worker-transport)) 84 | 85 | ; get attrib from ret object 86 | (defn attribute-from-response [worker-internal-data attrib-name] 87 | (if (= worker-init-value worker-internal-data) 88 | (throw (RuntimeException. "Worker not complete!"))) 89 | (if (not (= :success (keyword (worker-internal-data :status)))) 90 | (throw (RuntimeException. "Worker has errors!"))) 91 | (worker-internal-data attrib-name)) 92 | 93 | 94 | ; dispatch the worker to msg queue. first creates a ref to store ret value(async call). 95 | ; then create a listener for response, listener wrapped in a future, update ret value(closure) 96 | ; ret a closure that wraps the result and exposes getter accessor fn. 97 | (defn on-swarm [worker-name args] 98 | (let [worker-data (ref worker-init-value) ; init a ref to store ret data 99 | worker-transport (dispatch-work worker-name args worker-data)] 100 | (fn [accessor] 101 | (condp = accessor 102 | :complete? (not (= worker-init-value @worker-data)) 103 | :value (attribute-from-response @worker-data :value) 104 | :status (@worker-data :status) 105 | :disconnect (disconnect-worker worker-transport))))) 106 | 107 | 108 | ; on-swarm dispatch the worker computation request to remote thru rabbitmq queue 109 | (defmacro worker-runner [worker-name should-return worker-args] 110 | `(fn ~worker-args 111 | (if ~should-return 112 | (on-swarm ~worker-name ~worker-args)))) 113 | 114 | 115 | ; def a var with root binding to fn named service-name with args and expr as body, 116 | ; most importantly, add fn object to workers map to make it namespace closure. 117 | ; any distributed thread load the namespace can access the defed fn object. 118 | ; so client threads can send name string cross and processor thread can access 119 | ; fn object from map and call the fn object with the args. 120 | (defmacro defworker [service-name args & exprs] 121 | `(let [worker-name# (keyword '~service-name)] ; name to keyword, :name 122 | (dosync 123 | ; workers is global map of worker name to fn object. Entire fn object form expression is stored. 124 | ; remote worker can retrieve the fn body expression, then evaluate the expr(execute the fn) with passed in args directly. 125 | (alter workers assoc worker-name# (fn ~args (do ~@exprs)))) ; do evalue the fn body expr 126 | ; def a var with root binding to the fn service-name(args, body) 127 | (def ~service-name (worker-runner worker-name# true ~args)))) 128 | 129 | ; dispatch a request to run worker-name fn on remote process. 130 | (defn run-worker-without-return [worker-name-keyword args] 131 | (let [request-object (request-envelope worker-name-keyword args)] 132 | (send-message WORKER-QUEUE request-object))) 133 | 134 | 135 | ; dispatch a request to run worker-name fn without asking for ret value. 136 | (defmacro fire-and-forget [worker-symbol args] 137 | `(run-worker-without-return (keyword '~worker-symbol) ~args)) 138 | 139 | 140 | ; bcast requests to all queues asking for executing worker fn. 141 | (defn run-worker-on-all-servers [worker-name-keyword args] 142 | (let [request-object (request-envelope worker-name-keyword args)] 143 | (send-message BROADCAST-EXCHANGE FANOUT-EXCHANGE-TYPE BROADCAST-QUEUE request-object))) 144 | 145 | ; run everywhere by bcast reqeusts to all queues 146 | (defmacro run-worker-everywhere [worker-symbol & args] 147 | `(run-worker-on-all-servers (keyword '~worker-symbol) '~args)) 148 | 149 | -------------------------------------------------------------------------------- /java-array.clj: -------------------------------------------------------------------------------- 1 | ;; java collection 2 | ;; (load-file "java-array.clj") 3 | 4 | ;; first, import the java package. 5 | (ns java-array 6 | (:import [java.text SimpleDateFormat] 7 | [java.util Calendar TimeZone]) 8 | (:use clojure.contrib.io 9 | clojure.contrib.seq-utils) 10 | 11 | ;; 12 | ;; list logic operation, (and list1 list2) 13 | (def data [[:x :e :e] [:o :x :e] [:o :e :x]]) 14 | (for [x [0 1 2]] (nth (nth data x) x)) ;; get diagonal 15 | (for [x [0 1 2]] (nth (nth data x) (- 3 x))) ;; reverse diagonal 16 | 17 | ;; use java array as a mutable container for intermediate result. 18 | here we are talking about JVM arrays: a mutable container 19 | ;; (alength tokens) (aget tokens 2) (aset tokens 2 "actionable") 20 | 21 | ;; split a string into java array 22 | (def tokens (.split "clojure.in.action" "\\.")) 23 | ;; use amap to prn array 24 | (def z (amap tokens idx ret (aset ret idx (.concat "xx" (aget ret idx))))) 25 | (amap z idx ret (prn (aget ret idx))) 26 | 27 | (defn asum [#^floats xs] 28 | (areduce xs i ret (float 0) (+ ret (aget xs i)))) 29 | 30 | (asum (float-array [1 2 3])) 31 | 32 | ;; 33 | ;; types of array : primitive and reference 34 | ;; primitive array : boolean-array byte-array char-array double-array float-array int-array long-array object-array short-array 35 | ;; use make-array and into-array functions to create primitive arrays: 36 | (doto (StringBuilder. "abc") 37 | (.append (char-array [\x \y \z]))) 38 | 39 | (let [ary (make-array Integer/TYPE 3 3)] ;; obtain Class objects for the primitive types 40 | (dotimes [i 3] 41 | (dotimes [j 3] 42 | (aset ary i j (+ i j)))) 43 | (map seq ary)) 44 | 45 | (into-array Integer/TYPE [1 2 3]) 46 | 47 | ;; reference array : Character [], array with boxed primitive. 48 | ;; into-array ret reference array of boxed primitive 49 | (into-array ["a" "b" "c"]) 50 | 51 | (doto (StringBuilder. "abc") 52 | (.append (into-array [\x \y \z]))) 53 | ;=> #] 54 | 55 | (into-array [(java.util.Date.) (java.sql.Time. 0)]) 56 | 57 | ;; To create a heterogeneous array of java.lang.Object, use the to-array or to-array-2d 58 | (to-array-2d [[1 2 3] [4 5 6]]) ;;=> # 59 | (to-array ["a" 1M #(%) (proxy [Object] [])]) ;;=> # 60 | (to-array [1 (int 2)]) ;;=> #]]]] 61 | 62 | ;; variadic method/constructor calls 63 | (String/format "An int %d and a String %s" (to-array [99, "luftballons"])) 64 | 65 | ;; clojure collections are map, set, sequential(list, vector) 66 | ;; clojure types are atom, array 67 | ;; Clojure object are boxed types. 68 | 69 | 70 | ;; JVM array are mutale. use aset to change ary content. 71 | (def ary (into-array [1 2 3])) 72 | (def sary (seq ary)) 73 | (aset ary 0 42) 74 | 75 | ;; Be cautious when sharing arrays from one function to the next, and especially across threads. 76 | ;; amap maps an expr across the ary. 77 | (defn asum-sq [xs] 78 | (let [dbl (amap xs i ret 79 | (* (aget xs i) 80 | (aget xs i)))] 81 | (areduce dbl i ret 0 82 | (+ ret (aget dbl i))))) 83 | 84 | (asum-sq (float-array [1 2 3 4 5])) 85 | 86 | 87 | ;; sort a collection and convert back to vector for random access 88 | ;; 89 | (defn sorted-vec 90 | [coll] 91 | (let [arr (into-array coll)] 92 | (java.util.Arrays/sort arr) 93 | (vec arr))) 94 | 95 | ;; 96 | ;; using forName to determine 97 | (defmulti what-is class) 98 | (defmethod what-is (Class/forName "[Ljava.lang.String;""]") [a] "1d String") 99 | (defmethod what-is (Class/forName "[[Ljava.lang.Object;""]]") [a] "2d Object") 100 | (defmethod what-is (Class/forName "[[[[I""]]]]") [a] "Primitive 4d int") 101 | (defmethod what-is (Class/forName "[[D""]]") [a] "Primitive 2d double") 102 | (defmethod what-is (Class/forName "[Lclojure.lang.PersistentVector;""]") [a]) 103 | 104 | ;; use java util collections static methods, i.e., comparators, sort, etc. 105 | (import '[java.util Comparator Collections ArrayList]') 106 | (defn gimme [] (ArrayList. [1 3 4 8 2])) 107 | (doto (gimme) 108 | (Collections/sort (Collections/reverseOrder))) 109 | 110 | (doto (gimme) (Collections/sort #(compare %2 %1))) 111 | (doto (gimme) (Collections/sort >)) 112 | (doto (gimme) (Collections/sort <)) 113 | (doto (gimme) (Collections/sort (complement <))) 114 | 115 | ;; create a thread execute a runnable 116 | (doto (Thread. 117 | #(do (Thread/sleep 5000) 118 | (println "haikeeba!"))) 119 | .start) 120 | 121 | ;; callable with future 122 | ;; FutureTask ft = new FutureTask(new Callable(){public V call()}); 123 | ;; how do we pass args ? 124 | (import '[java.util.concurrent FutureTask]') 125 | (let [f (FutureTask. #(do (Thread/sleep 5000) 42))] 126 | (.start (Thread. #(.run f))) 127 | (.get f)) ;; will block until done. 128 | 129 | ;; java.util.list conformance for sequence and seq. 130 | ;; no generic consideration due to erasure. 131 | ;; Clojure doesn't handle generics anyway type info does not exist at runtime. 132 | (.get '[a b c] 1) 133 | (.containsAll '[a b c] '[b c]) 134 | (.add '[a b c] 'd) ;; sequence not mutable 135 | 136 | ;; java.lang.comparable and comparator 137 | ;; vector is the only collection that impls Comparable IF. 138 | ;; 139 | (.compareTo [:a] [:a]) 140 | (.compareTo [:a :b] [:a]) 141 | (.compareTo [:a :b] [:a :b :c]) 142 | (sort [[:a :b :c] [:a] [:a :b]]) 143 | 144 | ;; java.util.Collection 145 | ;; idiom : use a Clojure sequence as a model to build a mutable sequence to use Java Collections API 146 | 147 | (defn shuffle [coll] 148 | (seq (doto (java.util.ArrayList. coll) 149 | java.util.Collections/shuffle))) 150 | 151 | (shuffle (range 10)) 152 | 153 | ;; java.util.map, you can operate on JVM objects directly. 154 | (doto (java.util.HashMap.) (.put :a "xxx")) 155 | 156 | (java.util.Collections/unmodifiableMap 157 | (doto (java.util.HashMap.) (.put :a 1))) 158 | 159 | (into {} (doto (java.util.HashMap.) (.put :a 1))) 160 | 161 | ;; 162 | ;; When writing Clojure code, use errors to mean can’t continue and 163 | ;; exceptions to mean can or might continue. 164 | ;; Clojure's take on checked exception. 165 | ;; By default, declare that all functions throw the root Exception or RuntimeException. 166 | 167 | 168 | 169 | 170 | ;; 171 | ;; use clojure for data process 172 | ;; 173 | ;; parse a string with java.lang.String 174 | (defn parse-line [line] 175 | (let [tokens (.split (.toLowerCase line) " ")] 176 | (map #(vector % 1) tokens))) 177 | 178 | (parse-line "Twas brillig and the slithy toves") 179 | 180 | ;; combine a seq of key value pairs, group by reduce to a map. 181 | (defn combine [mapped] 182 | (->> (apply concat mapped) 183 | (group-by first) 184 | (map (fn [[k v]] 185 | {k (map second v)})) 186 | (apply merge-with conj))) 187 | 188 | (use 'clojure.contrib.io') 189 | (combine (map parse-line (read-lines "/Users/e51141/tmp/x"))) 190 | 191 | ;; sum the tally count of a vec of value. 192 | (defn sum [[k v]] 193 | {k (apply + v)}) 194 | 195 | ;; sum the val vector for each key, then merge keys 196 | (defn reduce-parsed-lines [collected-values] 197 | (apply merge (map sum collected-values))) 198 | 199 | ;; integrated solution 200 | (defn word-frequency [filename] 201 | (->> (read-lines filename) 202 | (map parse-line) 203 | (combine) 204 | (reduce-parsed-lines))) 205 | 206 | 207 | 208 | 209 | -------------------------------------------------------------------------------- /dbconn/src/dbconn/mysql/mysql_datamapper.clj: -------------------------------------------------------------------------------- 1 | 2 | (ns dbconn.mysql.mysql-datamapper 3 | (:use [clojure.contrib.str-utils]) 4 | (:require [clojure.data.json :as json]) 5 | (:require [clojure.java.jdbc :as jdbc]) 6 | (:use [korma.core]) 7 | (:use [korma.db]) 8 | (:require [clojure.string :as str])) 9 | 10 | ; the author of clj-record do not recommend the use of it. 11 | ; http://elhumidor.blogspot.com/2012/11/why-not-to-use-my-library-clj-record.html 12 | ; use http://sqlkorma.com/, it is saner. 13 | ; All the parts of a query in Korma can be composed at will. 14 | 15 | ; use mysql help with connection spec map to create connection pool. 16 | (def mys (mysql {:classname "com.mysql.jdbc.Driver" 17 | :subprotocol "mysql" 18 | :subname "//localhost:3306/test" ; dbname is test 19 | :user "root"})) 20 | 21 | (def pg (postgres {:db "korma" 22 | :user "korma" 23 | :password "kormapass" 24 | ;; optional keys 25 | :host "myhost" 26 | :port "4567" 27 | :delimiters ""})) 28 | 29 | 30 | ; create db connection based on mys configuration. 31 | (defdb sparkle-db mys) 32 | 33 | ; def entity models 34 | (declare users email address state account posts) 35 | 36 | ; create table manually, entity only represent the table. Need create table first! 37 | ; create table account ( 38 | ; id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 39 | ; users_id INT NOT NULL, 40 | ; balance Decimal(10,4) 41 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 42 | 43 | ; create table users ( 44 | ; id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 45 | ; first VARCHAR(100), 46 | ; last VARCHAR(100), 47 | ; account_id INT, 48 | ; active BOOL 49 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 50 | ; alter table users add foreign key(account_id) references account(id) on delete cascade; 51 | 52 | ; create table email ( 53 | ; id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 54 | ; users_id INT NOT NULL, 55 | ; email VARCHAR(100), 56 | ; FOREIGN KEY (users_id) REFERENCES users(id) ON DELETE CASCADE 57 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 58 | 59 | ; create table state_st ( 60 | ; id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 61 | ; name VARCHAR(100) 62 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 63 | 64 | ; create table address ( 65 | ; id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 66 | ; users_id INT NOT NULL, 67 | ; id_state INT NOT NULL, 68 | ; FOREIGN KEY (users_id) REFERENCES users(id) ON DELETE CASCADE, 69 | ; FOREIGN KEY (id_state) REFERENCES state_st(id) ON DELETE CASCADE 70 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 71 | 72 | ; create table posts ( 73 | ; id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 74 | ; content VARCHAR(100) 75 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 76 | 77 | ; create table users_posts ( 78 | ; posts_id INT NOT NULL, 79 | ; users_id INT NOT NULL, 80 | ; FOREIGN KEY (posts_id) REFERENCES posts(id) ON DELETE CASCADE, 81 | ; FOREIGN KEY (users_id) REFERENCES users(id) ON DELETE CASCADE 82 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 83 | 84 | 85 | (defentity users 86 | ;; Basic configuration 87 | (pk :id) ;; by default "id". This line is unnecessary. 88 | ;; it's used for relationships joins. 89 | (table :users) ;; by default the name of the symbol. 90 | ;; The line above is also unecessary. 91 | (database sparkle-db) ;; if none is specified the last defdb 92 | ;; will be used. Also unnecessary. 93 | (entity-fields :first :last) ;; default fields for selects 94 | 95 | ;; Mutations 96 | 97 | ; apply a function before records/value going into the db 98 | (prepare (fn [{last :last :as v}] 99 | (if last ; if has last col, chg to upper case 100 | (assoc v :last (str/upper-case last)) 101 | v))) 102 | 103 | ; apply a function to all query results coming from database. 104 | (transform (fn [{first :first :as v}] 105 | (if first ; note how first keyword get shadowed. 106 | (assoc v :first (str/capitalize first)) 107 | v))) 108 | 109 | ; Relationships 110 | ; fk on address table with format table_id. address.users_id = users.id 111 | (has-one address) 112 | 113 | ; ent to subent. fk is on sub-entity with table_id format. 114 | ; email.users_id = users.id 115 | (has-many email) 116 | 117 | ; sub-ent to ent. current entity has fk with format tablename_id: 118 | ; belongs-to, has-one need to be paired. 119 | ; users.account_id = account.id 120 | (belongs-to account) 121 | 122 | ; a join table users_posts with columns user_id and post_id 123 | ; like has-many, also gets the results in a second 124 | ; query for each element 125 | (many-to-many posts :users_posts)) 126 | 127 | ;; a entity as the result of subselect, give tablename 128 | ; set the table name for the entity 129 | (defentity subselect-example 130 | (table (subselect users (where {:active true})) 131 | :activeUsers)) 132 | 133 | ; email entity model users.id = emails.users_id, one user to many email 134 | (defentity email 135 | (belongs-to users)) 136 | 137 | ; address entity, fk ref back to users, state 138 | (defentity address 139 | (pk :my_pk) ; sets the primary key to "my_pk" 140 | (belongs-to users) 141 | (belongs-to state {:fk :id_state})) ; fk state.id = address.id_state 142 | 143 | (defentity state 144 | (table :state_st) ; sets the table to "state_st" 145 | (has-many address)) 146 | 147 | (defentity account 148 | (has-one users)) 149 | 150 | (defentity posts 151 | (many-to-many users :users_posts)) 152 | 153 | ; test with raw 154 | (defn select-test-tbl [] 155 | (jdbc/with-connection {:classname "com.mysql.jdbc.Driver" 156 | :subprotocol "mysql" 157 | :subname "//localhost:3306/test" ; dbname is test 158 | :user "root"} 159 | (jdbc/with-query-results rs ["select * from test"] 160 | (prn rs)))) 161 | 162 | ; populate all tables 163 | (defn populate-accounts [] 164 | (-> (insert* "account") 165 | (values [{:users_id 1 :balance 0} 166 | {:users_id 1 :balance 1} 167 | {:users_id 2 :balance 0} 168 | {:users_id 3 :balance 0}]) 169 | (insert))) 170 | 171 | ; populate users 172 | (defn populate-users [] 173 | (-> (insert* "users") 174 | (values [ 175 | {:first "mike" :last "jackson" :account_id 1} ; auto id starts from 1 176 | {:first "bon" :last "jovi" :account_id 2} 177 | {:first "kenny" :last "G" :account_id 3}]) 178 | (insert))) 179 | 180 | (defn populate-email [] 181 | (-> (insert* "email") 182 | (values [ 183 | {:users_id 7 :email "user_0@test.com"} 184 | {:users_id 7 :email "user_0_1@test.com"} 185 | {:users_id 8 :email "user_1@test.com"} 186 | {:users_id 9 :email "user_2@test.com"}]) 187 | (insert))) 188 | 189 | (defn populate-db [] 190 | (populate-accounts) 191 | (populate-users) 192 | (populate-email)) 193 | 194 | ; 195 | ; select and subselect 196 | ; 197 | (defn get-user [name] 198 | (select users 199 | (fields :first :last :email.email) ; :table.filed 200 | (where {:last [like name]}) 201 | (join email (= :email.users_id :id)) 202 | (where {:email.id [in (subselect email 203 | (fields :id) 204 | (where {:email [like "%@test%"]}))]}))) 205 | ; with only applicable for one-one mapping, otherwise, duplicated key error. 206 | ; (with email 207 | ; (fields :email) 208 | ; (where {:email [like "%@test%" ]})))) 209 | -------------------------------------------------------------------------------- /trident-clj/src/trident_clj/mysql/mysql_datamapper.clj: -------------------------------------------------------------------------------- 1 | 2 | (ns dbconn.mysql.mysql-datamapper 3 | (:use [clojure.contrib.str-utils]) 4 | (:require [clojure.data.json :as json]) 5 | (:require [clojure.java.jdbc :as jdbc]) 6 | (:use [korma.core]) 7 | (:use [korma.db]) 8 | (:require [clojure.string :as str])) 9 | 10 | ; the author of clj-record do not recommend the use of it. 11 | ; http://elhumidor.blogspot.com/2012/11/why-not-to-use-my-library-clj-record.html 12 | ; use http://sqlkorma.com/, it is saner. 13 | ; All the parts of a query in Korma can be composed at will. 14 | 15 | ; use mysql help with connection spec map to create connection pool. 16 | (def mys (mysql {:classname "com.mysql.jdbc.Driver" 17 | :subprotocol "mysql" 18 | :subname "//localhost:3306/test" ; dbname is test 19 | :user "root"})) 20 | 21 | (def pg (postgres {:db "korma" 22 | :user "korma" 23 | :password "kormapass" 24 | ;; optional keys 25 | :host "myhost" 26 | :port "4567" 27 | :delimiters ""})) 28 | 29 | 30 | ; create db connection based on mys configuration. 31 | (defdb sparkle-db mys) 32 | 33 | ; def entity models 34 | (declare users email address state account posts) 35 | 36 | ; create table manually, entity only represent the table. Need create table first! 37 | ; create table account ( 38 | ; id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 39 | ; users_id INT NOT NULL, 40 | ; balance Decimal(10,4) 41 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 42 | 43 | ; create table users ( 44 | ; id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 45 | ; first VARCHAR(100), 46 | ; last VARCHAR(100), 47 | ; account_id INT, 48 | ; active BOOL 49 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 50 | ; alter table users add foreign key(account_id) references account(id) on delete cascade; 51 | 52 | ; create table email ( 53 | ; id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 54 | ; users_id INT NOT NULL, 55 | ; email VARCHAR(100), 56 | ; FOREIGN KEY (users_id) REFERENCES users(id) ON DELETE CASCADE 57 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 58 | 59 | ; create table state_st ( 60 | ; id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 61 | ; name VARCHAR(100) 62 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 63 | 64 | ; create table address ( 65 | ; id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 66 | ; users_id INT NOT NULL, 67 | ; id_state INT NOT NULL, 68 | ; FOREIGN KEY (users_id) REFERENCES users(id) ON DELETE CASCADE, 69 | ; FOREIGN KEY (id_state) REFERENCES state_st(id) ON DELETE CASCADE 70 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 71 | 72 | ; create table posts ( 73 | ; id INT NOT NULL AUTO_INCREMENT PRIMARY KEY, 74 | ; content VARCHAR(100) 75 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 76 | 77 | ; create table users_posts ( 78 | ; posts_id INT NOT NULL, 79 | ; users_id INT NOT NULL, 80 | ; FOREIGN KEY (posts_id) REFERENCES posts(id) ON DELETE CASCADE, 81 | ; FOREIGN KEY (users_id) REFERENCES users(id) ON DELETE CASCADE 82 | ; )ENGINE=InnoDB DEFAULT CHARSET=utf8; 83 | 84 | 85 | (defentity users 86 | ;; Basic configuration 87 | (pk :id) ;; by default "id". This line is unnecessary. 88 | ;; it's used for relationships joins. 89 | (table :users) ;; by default the name of the symbol. 90 | ;; The line above is also unecessary. 91 | (database sparkle-db) ;; if none is specified the last defdb 92 | ;; will be used. Also unnecessary. 93 | (entity-fields :first :last) ;; default fields for selects 94 | 95 | ;; Mutations 96 | 97 | ; apply a function before records/value going into the db 98 | (prepare (fn [{last :last :as v}] 99 | (if last ; if has last col, chg to upper case 100 | (assoc v :last (str/upper-case last)) 101 | v))) 102 | 103 | ; apply a function to all query results coming from database. 104 | (transform (fn [{first :first :as v}] 105 | (if first ; note how first keyword get shadowed. 106 | (assoc v :first (str/capitalize first)) 107 | v))) 108 | 109 | ; Relationships 110 | ; fk on address table with format table_id. address.users_id = users.id 111 | (has-one address) 112 | 113 | ; ent to subent. fk is on sub-entity with table_id format. 114 | ; email.users_id = users.id 115 | (has-many email) 116 | 117 | ; sub-ent to ent. current entity has fk with format tablename_id: 118 | ; belongs-to, has-one need to be paired. 119 | ; users.account_id = account.id 120 | (belongs-to account) 121 | 122 | ; a join table users_posts with columns user_id and post_id 123 | ; like has-many, also gets the results in a second 124 | ; query for each element 125 | (many-to-many posts :users_posts)) 126 | 127 | ;; a entity as the result of subselect, give tablename 128 | ; set the table name for the entity 129 | (defentity subselect-example 130 | (table (subselect users (where {:active true})) 131 | :activeUsers)) 132 | 133 | ; email entity model users.id = emails.users_id, one user to many email 134 | (defentity email 135 | (belongs-to users)) 136 | 137 | ; address entity, fk ref back to users, state 138 | (defentity address 139 | (pk :my_pk) ; sets the primary key to "my_pk" 140 | (belongs-to users) 141 | (belongs-to state {:fk :id_state})) ; fk state.id = address.id_state 142 | 143 | (defentity state 144 | (table :state_st) ; sets the table to "state_st" 145 | (has-many address)) 146 | 147 | (defentity account 148 | (has-one users)) 149 | 150 | (defentity posts 151 | (many-to-many users :users_posts)) 152 | 153 | ; test with raw 154 | (defn select-test-tbl [] 155 | (jdbc/with-connection {:classname "com.mysql.jdbc.Driver" 156 | :subprotocol "mysql" 157 | :subname "//localhost:3306/test" ; dbname is test 158 | :user "root"} 159 | (jdbc/with-query-results rs ["select * from test"] 160 | (prn rs)))) 161 | 162 | ; populate all tables 163 | (defn populate-accounts [] 164 | (-> (insert* "account") 165 | (values [{:users_id 1 :balance 0} 166 | {:users_id 1 :balance 1} 167 | {:users_id 2 :balance 0} 168 | {:users_id 3 :balance 0}]) 169 | (insert))) 170 | 171 | ; populate users 172 | (defn populate-users [] 173 | (-> (insert* "users") 174 | (values [ 175 | {:first "mike" :last "jackson" :account_id 1} ; auto id starts from 1 176 | {:first "bon" :last "jovi" :account_id 2} 177 | {:first "kenny" :last "G" :account_id 3}]) 178 | (insert))) 179 | 180 | (defn populate-email [] 181 | (-> (insert* "email") 182 | (values [ 183 | {:users_id 7 :email "user_0@test.com"} 184 | {:users_id 7 :email "user_0_1@test.com"} 185 | {:users_id 8 :email "user_1@test.com"} 186 | {:users_id 9 :email "user_2@test.com"}]) 187 | (insert))) 188 | 189 | (defn populate-db [] 190 | (populate-accounts) 191 | (populate-users) 192 | (populate-email)) 193 | 194 | ; 195 | ; select and subselect 196 | ; 197 | (defn get-user [name] 198 | (select users 199 | (fields :first :last :email.email) ; :table.filed 200 | (where {:last [like name]}) 201 | (join email (= :email.users_id :id)) 202 | (where {:email.id [in (subselect email 203 | (fields :id) 204 | (where {:email [like "%@test%"]}))]}))) 205 | ; with only applicable for one-one mapping, otherwise, duplicated key error. 206 | ; (with email 207 | ; (fields :email) 208 | ; (where {:email [like "%@test%" ]})))) 209 | -------------------------------------------------------------------------------- /class.clj: -------------------------------------------------------------------------------- 1 | ;; A class object system for clojure 2 | ;; (load-file "class.clj") 3 | 4 | ;; (:use mixed in all fns, symbols, and mappings from other _NameSpace_.) 5 | ;; (:import [pkg1] [pkg2 fn1 ...]) import Java packages 6 | ;; (:require [namespace_1 :ref local_namespace_]) 7 | ;; when defining ns, include only the references that are used. 8 | ;; :exclude, :only, :as, :refer-clojure, :import, :use, :load, and :require. 9 | ;; ;use naked could corrupt the namespace. (:use :only) 10 | ;; :import working with java deftype defrecord 11 | 12 | (ns my-class 13 | (:refer-clojure :exclude [defstruct]) 14 | (:use [clojure.test :only (are is)]) 15 | (:require (clojure [zip :as z])) 16 | (:import (java.util.Collection))) 17 | 18 | 19 | ; method spec destruct spec expression into name and body 20 | (defn method-spec [sexpr] 21 | (let [name (keyword (second sexpr)) 22 | body (next sexpr)] 23 | [name (conj body 'fn)])) 24 | 25 | (method-spec '(method age [] (* 2 10))) 26 | 27 | ; a list of method specs 28 | (defn method-specs [sexprs] 29 | (->> sexprs 30 | (filter #(= 'method (first %))) 31 | (mapcat method-spec) 32 | (apply hash-map))) 33 | 34 | ; spec quoted as unevaled form. 35 | (method-specs '((method age [] 36 | (* 2 10)) 37 | (method greet [visitor] 38 | (str "Hello there, " visitor)))) 39 | 40 | ; a new object is just a closure with instance state 41 | (declare this) ; give a dynamic var so that fn can be executed under the bindings of this. 42 | 43 | (defn new-object [klass] 44 | (let [state (ref {})] 45 | (fn thiz [command & args] ; give closure an explicit name(thiz), so we can bind it to this. 46 | (condp = command 47 | :class klass 48 | :class-name (klass :name) 49 | :set! (let [[k v] args] 50 | (dosync (alter state assoc k v)) 51 | nil) 52 | :get (let [[key] args] 53 | (key @state)) 54 | 55 | (let [method (klass :method command)] 56 | (if-not method 57 | (throw (RuntimeException. 58 | (str "Unable to respond to " command)))) 59 | (binding [this thiz] 60 | (apply method args))))))) 61 | 62 | ; a new class is a closure on methods 63 | (defn find-method [method-name instance-methods] 64 | (instance-methods method-name)) 65 | 66 | (defn new-class [class-name methods] 67 | (fn klass [command & args] 68 | (condp = command 69 | :name (name class-name) 70 | :new (new-object klass) 71 | :method (let [[method-name] args] 72 | (find-method method-name methods))))) 73 | 74 | ; use def inside macro to force evaluation of fns fn map {:fname (fn [] (* 2 4))} 75 | ; 76 | (defmacro defclass [class-name & specs] 77 | (let [fns (or (method-specs specs) {})] 78 | `(def ~class-name (new-class '~class-name ~fns)))) 79 | 80 | (defclass Person 81 | (method age [] 82 | (* 2 10)) 83 | (method greet [visitor] 84 | (str "Hello there, " visitor)) 85 | (method about [diff] ; invoke :age method in the same closure by binding to this. 86 | (str "I was born about " (+ diff (this :age)) " years ago"))) 87 | 88 | ; 89 | ; after parsing method specs, we got fns 90 | ; {:age (fn age [] (* 2 10)), :greet (fn greet [visitor] (str "Hello there, " visitor))} 91 | ; to force evaluate fns, put it through def 92 | ; (eval fns) -> produce {:age # 93 | ; (def efns {:age (fn age [] (* 2 8))})) -> {:age #} 94 | ; (eval `(def efns ~fns)) 95 | ; 96 | ; (apply (efns :age) []) 97 | ; 98 | (Person :method :age) 99 | 100 | (def shelly (Person :new)) 101 | (shelly :age) 102 | (shelly :greet "Nancy") 103 | 104 | 105 | ;;;;;;;;;;;;;;;;;;; 106 | ; macro is asking compiler to generate code for you. s-expr passed to defmacro is not evaled. 107 | ; 108 | ; normal quote ' yields the unevaluated form. 109 | ; Syntax-quote `, resolves the symbol, yielding a fully qualified name in the current context. 110 | ; for s-expr, syntax-quote establishes a template of the corresponding data structure. 111 | ; Syntax-unquote ~, inside syntax quote, resolve form to data structure and eval the data structure. 112 | ; 113 | ; so in order to convert unevaled s-expr in defmacro to data structure, we use syntax-quote to 114 | ; establish a template of corresponding data structure. At the same time, we need to unqote 115 | ; the s-expr inside syntax-quote to avoid s-expr symbol being resolved as user/s-expr. 116 | ; 117 | ; inside defmacro, if you donot unquote your s-expr, error with no such var user/s-expr 118 | ; once unquote inside syntax-quote, the s-expr resolved to data structure and it is evaled; 119 | ; also, unquote must live inside quote, make sense, right. otherwise, unquote is unbound error. 120 | ; 121 | ; syntax-quote unquote only resolve variables symbols, no effect on keywords and num, string literals. including ' quote. 122 | ; 123 | ; ~(unquote) to substitude the value. similar to string intrapolate. 124 | ; if var is a form, (* 3 4), unquote it will cause it being evaluated. 125 | ; ~() unquote entire () to avoid unquote each one. ~(eval ~x) 126 | ; `( '~x ) : unquote resolve var data structure. the value is then quoted to unevaled form, so prn (* 2 3) 127 | ; 128 | ; ~@var-name (unquote-splicing): remove the list (). 129 | ; 130 | (defmacro dbg [fn-name args & body] 131 | `(defn ~fn-name ~args 132 | (println "dbg ...") 133 | ~@body)) 134 | 135 | ; when passing '(* 2 3) to macro, macro wont evaluate passed in forms. 136 | (defn gen-map [nm spec] {(keyword nm) spec}) 137 | (defmacro fnmacro [name] (let [m (gen-map name '(fn [n] (* 2 n)))] `(prn ~m) `~m)) 138 | (fnmacro age) 139 | 140 | ; arg is task name and spec, not in list data struture, pass to macro and return a 141 | ; task map where key is fn name and val is fn closure 142 | (defmacro fnmacro [name spec] (let [m (gen-map name spec)] `(prn ~m) `~m)) 143 | (apply ((fnmacro foo (fn [n] (* 2 n))) :foo) [4]) 144 | 145 | ; if fn body is in quoted list data structure, directly eval the data structure. 146 | (def spec '(age [n] (* 2 n))) 147 | (defn fnw [sexpr] 148 | (eval (conj spec 'fn))) 149 | (apply (fnw spec) [4]) 150 | 151 | ; if fn body is code, not list data structure, pass to defmacro to wrap it. 152 | (defmacro fn-wrapper [fname args & body] 153 | `(defn ~fname ~args 154 | (prn "calling " '~fname '~args '~@body) ; when prn, use substituded val, quote to non-evaluated form. 155 | ~@body)) 156 | (fn-wrapper foo [n] (* 2 n)) 157 | (foo 4) 158 | 159 | ; dbg macro take a form, and prn it and its evaluated result. 160 | ; because (eval "x") eval to itself, we can always add eval without side effect, so 161 | ; the macro can take both (quote (* 2 3)) as well as (* 2 3) 162 | (defmacro dbg [sexpr] 163 | (prn sexpr) 164 | `~sexpr) ; sexpr when passed into defmacro, is un-evalued. quote unquote restore and eval the form. 165 | 166 | (dbg (* 2 4)) 167 | (dbg '(* 2 4)) 168 | 169 | (defmacro dbg-ev [sexpr] 170 | (prn sexpr) 171 | `(eval ~sexpr)) ; eval a data structur from unquoting the formm 172 | 173 | (dbg-ev (* 2 4)) 174 | (dbg '(* 2 4)) 175 | (let [f (dbg-ev '(fn [n] (* 2 n)))] (f 3)) 176 | 177 | ; fn composition with defmacro defn 178 | (defmacro fn-wrapper [name arg alg data-list] 179 | (let [bd (conj data-list alg)] 180 | `(defn ~name ~arg ~bd))) 181 | 182 | ; create a fn that map algebra to a list of data 183 | (fn-wrapper double [n] * (2 n)) 184 | 185 | 186 | ; macro examples 187 | (defmacro declare [ & names] 188 | `(do 189 | ~@(map #(list 'def %) names))) 190 | (macroexpand-1 '(declare add multiply subtract divide)) 191 | 192 | ; and is just another macro 193 | (defmacro my-and 194 | ([] true) 195 | ([x] x) 196 | ([x & next] 197 | `(if ~x 198 | (my-and ~@next) 199 | ~x))) 200 | 201 | (defmacro and 202 | ([] true) 203 | ([x] x) 204 | ([x & next] 205 | `(let [and# ~x] 206 | (if and# 207 | (and ~@next) 208 | and#)))) 209 | 210 | ; time (* 1234 12345) 211 | (defmacro time [expr] 212 | `(let [start# (System/nanotime) 213 | ret# ~expr] ; unquote expr, trigger evaluation of expr. 214 | (prn 215 | (str "Elapsed time :" 216 | (/ (double (- (System/nanotime) start#)) 1000000.0) 217 | " msecs")) 218 | ret#)) 219 | 220 | 221 | 222 | 223 | -------------------------------------------------------------------------------- /trident/src/main/java/com/colorcloud/trident/GroupAggregate.java: -------------------------------------------------------------------------------- 1 | package com.colorcloud.trident; 2 | 3 | import java.io.IOException; 4 | import java.util.HashMap; 5 | import java.util.List; 6 | import java.util.Map; 7 | 8 | import storm.trident.TridentTopology; 9 | import storm.trident.operation.Aggregator; 10 | import storm.trident.operation.BaseAggregator; 11 | import storm.trident.operation.Filter; 12 | import storm.trident.operation.Function; 13 | import storm.trident.operation.TridentCollector; 14 | import storm.trident.operation.TridentOperationContext; 15 | import storm.trident.tuple.TridentTuple; 16 | import backtype.storm.Config; 17 | import backtype.storm.LocalCluster; 18 | import backtype.storm.LocalDRPC; 19 | import backtype.storm.generated.StormTopology; 20 | import backtype.storm.tuple.Fields; 21 | import backtype.storm.tuple.Values; 22 | 23 | import com.colorcloud.trident.storage.JedisDB; 24 | 25 | /** 26 | * This example illustrates the usage of groupBy. GroupBy creates a "grouped stream" which means that subsequent aggregators 27 | * will only affect Tuples within a group. GroupBy must always be followed by an aggregator. 28 | * Because we are aggregating groups, we don't need to produce a hashmap for the per-location counts (as opposed to {@link BatchAggregate} 29 | * and we can use the simple Count() aggregator. 30 | */ 31 | public class GroupAggregate { 32 | public static final String TOPNAME = "location_groupaggregate"; 33 | 34 | /** 35 | * batch processing treats every batch process as a transaction. 36 | */ 37 | public static class GroupTotal extends BaseAggregator> { 38 | private static final long serialVersionUID = 5747067380651287870L; 39 | private static final String TAG = "GroupTotal :"; 40 | Map result = new HashMap(); 41 | protected JedisDB jedis; 42 | int batchcnt = 0; 43 | 44 | /** 45 | * prepare only called once when creating object. Put any initiation code here. 46 | */ 47 | @Override 48 | public void prepare(Map conf, TridentOperationContext context) { 49 | System.out.println(TAG + " prepare :"); 50 | jedis = new JedisDB(); 51 | } 52 | 53 | /** 54 | * init is called upon object instantiation, ret the state to store the aggregation. 55 | */ 56 | @Override 57 | public Map init(Object batchId, TridentCollector collector) { 58 | System.out.println(TAG + "init : batchId : " + batchId); 59 | batchcnt = 0; 60 | return result; 61 | } 62 | 63 | /** 64 | * aggregate called upon every tuple inside the batch. 65 | * @param Map aggregate fn was given the global state so it can update global for each tuple. 66 | * @param tuple the current tuple to be processed 67 | * @param collector the collector to emit the processed tuple 68 | */ 69 | @Override 70 | public void aggregate(Map val, TridentTuple tuple, TridentCollector collector) { 71 | System.out.println(TAG + "aggregate :" + tuple); 72 | String loc = tuple.getString(3); 73 | //long cnt = tuple.getLong(1); 74 | batchcnt += 1; 75 | long cnt = batchcnt; 76 | long totcnt = 0; 77 | if( val.get(loc) != null ){ 78 | totcnt = val.get(loc); 79 | } 80 | val.put(loc, cnt+totcnt); 81 | List v = tuple.getValues(); 82 | //v.add(totcnt); 83 | collector.emit(new Values(cnt, totcnt)); 84 | } 85 | 86 | /** 87 | * complete called after done with every batch. store summary map to redis. 88 | */ 89 | @Override 90 | public void complete(Map val, TridentCollector collector) { 91 | System.out.println(TAG + "complete :" + val); 92 | jedis.storeMap("loc-cnt", val); 93 | //collector.emit(new Values(val)); 94 | } 95 | } 96 | 97 | /** 98 | * this function will be serialized and distributed to all nodes to run. 99 | * every member inside must be serializable in order to be distributed. 100 | */ 101 | static class DBWriteBolt implements Filter, Function { 102 | private static final String TAG = "DBWriteBolt :"; 103 | protected JedisDB jedis; 104 | 105 | public DBWriteBolt() { 106 | System.out.println(TAG + " constructor: "); 107 | } 108 | 109 | /** 110 | * prepare only called once for filter function at the time of init. 111 | */ 112 | @Override 113 | public void prepare(Map conf, TridentOperationContext context) { 114 | System.out.println(TAG + " prepare called, init db connection : "); 115 | jedis = new JedisDB(); // use default configuration. 116 | } 117 | 118 | @Override 119 | public boolean isKeep(TridentTuple tuple) { 120 | System.out.println(TAG + " iskeep : "); 121 | return true; 122 | } 123 | 124 | @Override 125 | public void cleanup() { 126 | // disconnect db connection 127 | } 128 | 129 | @Override 130 | public void execute(TridentTuple tuple, TridentCollector collector) { 131 | System.out.println(TAG + " execute : " + tuple.get(0)); // selected location as first ele in tuple 132 | Values v = new Values(tuple.get(0)); 133 | jedis.rpush("tweetloc", (String)tuple.get(0)); // save location to redis 134 | collector.emit(v); 135 | } 136 | } 137 | 138 | public static StormTopology buildTopology(LocalDRPC drpc) throws IOException { 139 | FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(100); // create spout as the source for the topology 140 | Function stateStore = new DBWriteBolt(); 141 | Aggregator> grpTotal = new GroupTotal(); 142 | storm.trident.operation.builtin.Count counter = new storm.trident.operation.builtin.Count(); 143 | 144 | // check field and tuple section in trident tutorial data model. 145 | // grouped stream, after aggregation, only contains grouping key and other fields emitted from aggregator. 146 | TridentTopology topology = new TridentTopology(); 147 | // if using top.newDRPCStream(topName, drpc), we can get result of processing tuple result = drpc.execute(topName, tuple); 148 | topology.newDRPCStream(TOPNAME, drpc) 149 | //topology.newStream("spout", spout) // topology src stream point to tweet spout 150 | .groupBy(new Fields("location")) // for each location fields, a virtual stream is created 151 | //.aggregate(new Fields("location"), counter, new Fields("count")) // aggregation on each location stream 152 | //.aggregate(new Fields("location", "count"), grpTotal, new Fields("location", "batch_count", "sum")) 153 | .aggregate(spout.getOutputFields(), grpTotal, new Fields("count", "sum")) 154 | .each(new Fields("location", "count", "sum"), new Utils.PrintFilter()); // after aggregation, emits aggregation result. 155 | 156 | // you can add more source spout stream as you like. 157 | // FakeTweetsBatchSpout spout2 = new FakeTweetsBatchSpout(10); // create spout as the source for the topology 158 | // Stream stream = topology.newStream("spout2", spout2); // topology src stream point to tweet spout 159 | // stream.each(new Fields("location"), stateStore, new Fields("duploc")) 160 | // .each(new Fields("id", "text", "actor", "duploc"), new Utils.PrintFilter()); 161 | 162 | return topology.build(); 163 | } 164 | 165 | public static StormTopology buildDRPCTopology(LocalDRPC drpc) throws IOException { 166 | FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(100); // create spout as the source for the topology 167 | Function stateStore = new DBWriteBolt(); 168 | Aggregator> grpTotal = new GroupTotal(); 169 | storm.trident.operation.builtin.Count counter = new storm.trident.operation.builtin.Count(); 170 | 171 | // check field and tuple section in trident tutorial data model. 172 | // grouped stream, after aggregation, only contains grouping key and other fields emitted from aggregator. 173 | TridentTopology topology = new TridentTopology(); 174 | // if using top.newDRPCStream(topName, drpc), we can get result of processing tuple result = drpc.execute(topName, tuple); 175 | topology.newDRPCStream(TOPNAME, drpc) 176 | .each(new Fields("args"), new Utils.TextProcessor(), new Fields("textprocessed")) 177 | .project(new Fields("args", "textprocessed")); 178 | 179 | System.out.println(" >>>>>>>>>> build drpc topology"); 180 | return topology.build(); 181 | } 182 | 183 | 184 | public static void main(String[] args) throws Exception { 185 | Config conf = new Config(); 186 | LocalDRPC drpc = new LocalDRPC(); 187 | LocalCluster cluster = new LocalCluster(); 188 | 189 | 190 | //cluster.submitTopology(TOPNAME, conf, buildTopology(drpc)); 191 | 192 | cluster.submitTopology(TOPNAME, conf, buildDRPCTopology(drpc)); 193 | System.out.println(" >>>>>>>>>> done cluster submit topology"); 194 | 195 | Thread.sleep(5000); // give it some time to setup 196 | 197 | String result = drpc.execute(TOPNAME, "hello world"); 198 | System.out.println(">>>>>>>>>> drpc result :" + result); 199 | result = drpc.execute(TOPNAME, "hello world from world"); 200 | System.out.println(">>>>>>>>>> drpc result :" + result); 201 | } 202 | } 203 | -------------------------------------------------------------------------------- /trident/src/main/java/com/colorcloud/trident/Demo.java: -------------------------------------------------------------------------------- 1 | package com.colorcloud.trident; 2 | 3 | import java.io.IOException; 4 | import java.util.HashMap; 5 | import java.util.Map; 6 | 7 | import org.apache.commons.collections.MapUtils; 8 | 9 | import storm.trident.TridentTopology; 10 | import storm.trident.operation.Aggregator; 11 | import storm.trident.operation.BaseFunction; 12 | import storm.trident.operation.Filter; 13 | import storm.trident.operation.TridentCollector; 14 | import storm.trident.operation.TridentOperationContext; 15 | import storm.trident.operation.builtin.Count; 16 | import storm.trident.tuple.TridentTuple; 17 | import backtype.storm.LocalDRPC; 18 | import backtype.storm.generated.StormTopology; 19 | import backtype.storm.tuple.Fields; 20 | import backtype.storm.tuple.Values; 21 | 22 | /** 23 | * This class is not mean to be run, instead it is mean to be read. This is the guideline I followed for giving a 24 | * hackaton at Berlin for the #4 Big Data Beers: http://www.meetup.com/Big-Data-Beers/events/112226662/ 25 | *

26 | * If you read through the code and the comments you will see how I explained different Trident concepts. 27 | *

28 | * If you want to run some stream you'll need to comment out everything else. Otherwise the topology will run all the 29 | * streams at the same time, which can be a bit of a chaos. 30 | * 31 | * @author pere 32 | */ 33 | public class Demo { 34 | 35 | /** 36 | * Dummy filter that just keeps tweets by "Pere" 37 | */ 38 | @SuppressWarnings({ "serial", "rawtypes" }) 39 | public static class PereTweetsFilter implements Filter { 40 | 41 | int partitionIndex; 42 | 43 | @Override 44 | public void prepare(Map conf, TridentOperationContext context) { 45 | this.partitionIndex = context.getPartitionIndex(); 46 | } 47 | 48 | @Override 49 | public void cleanup() { 50 | } 51 | 52 | @Override 53 | public boolean isKeep(TridentTuple tuple) { 54 | boolean filter = tuple.getString(1).equals("pere"); 55 | if(filter) { 56 | System.err.println("I am partition [" + partitionIndex + "] and I have filtered pere."); 57 | } 58 | return filter; 59 | } 60 | } 61 | 62 | /** 63 | * Dummy function that just emits the uppercased tweet text. 64 | */ 65 | @SuppressWarnings("serial") 66 | public static class UppercaseFunction extends BaseFunction { 67 | 68 | @Override 69 | public void execute(TridentTuple tuple, TridentCollector collector) { 70 | collector.emit(new Values(tuple.getString(0).toUpperCase())); 71 | } 72 | } 73 | 74 | /** 75 | * A simple Aggregator that produces a hashmap of key, counts. 76 | */ 77 | @SuppressWarnings("serial") 78 | public static class LocationAggregator implements Aggregator> { 79 | 80 | int partitionId; 81 | 82 | @SuppressWarnings("rawtypes") 83 | @Override 84 | public void prepare(Map conf, TridentOperationContext context) { 85 | this.partitionId = context.getPartitionIndex(); 86 | } 87 | 88 | @Override 89 | public void cleanup() { 90 | } 91 | 92 | @Override 93 | public Map init(Object batchId, TridentCollector collector) { 94 | return new HashMap(); 95 | } 96 | 97 | @Override 98 | public void aggregate(Map val, TridentTuple tuple, TridentCollector collector) { 99 | String loc = tuple.getString(0); 100 | val.put(loc, MapUtils.getInteger(val, loc, 0) + 1); 101 | } 102 | 103 | @Override 104 | public void complete(Map val, TridentCollector collector) { 105 | System.err.println("I am partition [" + partitionId + "] and have aggregated: [" + val + "]"); 106 | collector.emit(new Values(val)); 107 | } 108 | } 109 | 110 | public static StormTopology buildTopology(LocalDRPC drpc) throws IOException { 111 | FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(); 112 | 113 | // A topology is a set of streams. 114 | // A stream is a DAG of Spouts and Bolts. 115 | // (In Storm there are Spouts (data producers) and Bolts (data processors). 116 | // Spouts create Tuples and Bolts manipulate then and possibly emit new ones.) 117 | 118 | // But in Trident we operate at a higher level. 119 | // Bolts are created and connected automatically out of higher-level constructs. 120 | // Also, Spouts are "batched". 121 | TridentTopology topology = new TridentTopology(); 122 | 123 | // Each primitive allows us to apply either filters or functions to the stream 124 | // We always have to select the input fields. 125 | topology.newStream("filter", spout).each(new Fields("text", "actor"), new PereTweetsFilter()) 126 | .each(new Fields("text", "actor"), new Utils.PrintFilter()); 127 | 128 | // Functions describe their output fields, which are always appended to the input fields. 129 | topology.newStream("function", spout) 130 | .each(new Fields("text", "actor"), new UppercaseFunction(), new Fields("uppercased_text")) 131 | .each(new Fields("text", "uppercased_text"), new Utils.PrintFilter()); 132 | 133 | // As you see, Each operations can be chained. 134 | 135 | // Stream can be parallelized with "parallelismHint" 136 | // Parallelism hint is applied downwards until a partitioning operation (we will see this later). 137 | // This topology creates 5 spouts and 5 bolts: 138 | // Let's debug that with TridentOperationContext . partitionIndex ! 139 | topology.newStream("parallel", spout).each(new Fields("text", "actor"), new PereTweetsFilter()) 140 | .parallelismHint(5).each(new Fields("text", "actor"), new Utils.PrintFilter()); 141 | 142 | // A stream can be partitioned in various ways. 143 | // Let's partition it by "actor". What happens with previous example? 144 | topology.newStream("parallel_and_partitioned", spout).partitionBy(new Fields("actor")) 145 | .each(new Fields("text", "actor"), new PereTweetsFilter()).parallelismHint(5) 146 | .each(new Fields("text", "actor"), new Utils.PrintFilter()); 147 | 148 | // Only one partition is filtering, which makes sense for the case. 149 | // If we remove the partitionBy we get the previous behavior. 150 | 151 | // Before we have parallelism = 5 everywhere. What if we want only one spout? 152 | // We need to specify a partitioning policy for that to happen. 153 | // (We said that parallelism hint is applied downwards until a partitioning operation is found). 154 | 155 | // But if we don't want to partition by any field, we can just use shuffle() 156 | // We could also choose global() - with care! 157 | topology.newStream("parallel_and_partitioned", spout).parallelismHint(1).shuffle() 158 | .each(new Fields("text", "actor"), new PereTweetsFilter()).parallelismHint(5) 159 | .each(new Fields("text", "actor"), new Utils.PrintFilter()); 160 | 161 | // Because data is batched, we can aggregate batches for efficiency. 162 | // The aggregate primitive aggregates one full batch. Useful if we want to persist the result of each batch only 163 | // once. 164 | // The aggregation for each batch is executed in a random partition as can be seen: 165 | topology.newStream("aggregation", spout).parallelismHint(1) 166 | .aggregate(new Fields("location"), new LocationAggregator(), new Fields("aggregated_result")) 167 | .parallelismHint(5).each(new Fields("aggregated_result"), new Utils.PrintFilter()); 168 | 169 | // The partitionAggregate on the other hand only executes the aggregator within one partition's part of the batch. 170 | // Let's debug that with TridentOperationContext . partitionIndex ! 171 | topology 172 | .newStream("partial_aggregation", spout) 173 | .parallelismHint(1) 174 | .shuffle() 175 | .partitionAggregate(new Fields("location"), new LocationAggregator(), 176 | new Fields("aggregated_result")).parallelismHint(6) 177 | .each(new Fields("aggregated_result"), new Utils.PrintFilter()); 178 | 179 | // (See what happens when we change the Spout batch size / parallelism) 180 | 181 | // A useful primitive is groupBy. 182 | // It splits the stream into groups so that aggregations only ocurr within a group. 183 | // Because now we are grouping, the aggregation function can be much simpler (Count()) 184 | // We don't need to use HashMaps anymore. 185 | topology.newStream("aggregation", spout).parallelismHint(1).groupBy(new Fields("location")) 186 | .aggregate(new Fields("location"), new Count(), new Fields("count")).parallelismHint(5) 187 | .each(new Fields("location", "count"), new Utils.PrintFilter()); 188 | 189 | // EXERCISE: Use Functions and Aggregators to parallelize per-hashtag counts. 190 | // Step by step: 1) Obtain and select hashtags, 2) Write the Aggregator. 191 | 192 | // Bonus 1: State API. 193 | // Bonus 2: "Trending" hashtags. 194 | return topology.build(); 195 | } 196 | 197 | // public static void main(String[] args) throws Exception { 198 | // Config conf = new Config(); 199 | // 200 | // LocalDRPC drpc = new LocalDRPC(); 201 | // LocalCluster cluster = new LocalCluster(); 202 | // cluster.submitTopology("hackaton", conf, buildTopology(drpc)); 203 | // } 204 | } 205 | -------------------------------------------------------------------------------- /trident/src/main/java/com/colorcloud/trident/DrpcStream.java: -------------------------------------------------------------------------------- 1 | package com.colorcloud.trident; 2 | 3 | import java.io.IOException; 4 | import java.util.Arrays; 5 | import java.util.HashMap; 6 | import java.util.List; 7 | import java.util.Map; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | import org.apache.logging.log4j.LogManager; 12 | import org.apache.logging.log4j.Logger; 13 | 14 | import storm.trident.TridentState; 15 | import storm.trident.TridentTopology; 16 | import storm.trident.operation.Aggregator; 17 | import storm.trident.operation.BaseAggregator; 18 | import storm.trident.operation.Filter; 19 | import storm.trident.operation.Function; 20 | import storm.trident.operation.TridentCollector; 21 | import storm.trident.operation.TridentOperationContext; 22 | import storm.trident.state.State; 23 | import storm.trident.state.StateFactory; 24 | import storm.trident.tuple.TridentTuple; 25 | import backtype.storm.Config; 26 | import backtype.storm.LocalCluster; 27 | import backtype.storm.LocalDRPC; 28 | import backtype.storm.generated.StormTopology; 29 | import backtype.storm.task.IMetricsContext; 30 | import backtype.storm.tuple.Fields; 31 | import backtype.storm.tuple.Values; 32 | 33 | import com.colorcloud.trident.storage.JedisDB; 34 | 35 | /** 36 | * Build a topology with drpc spout, and streaming lines of text(log) into drpc spout, and get back 37 | * the processing result in an added field in tuple. 38 | * groupBy. GroupBy creates a "grouped stream" which means that subsequent aggregators can 39 | * only affect Tuples within the group. GroupBy must always be followed by an aggregator it knows how to aggregate group. 40 | * Because we are aggregating groups, we don't need to produce a hashmap for the per-location counts (as opposed to {@link BatchAggregate} 41 | * and we can use the simple Count() aggregator. 42 | */ 43 | public class DrpcStream { 44 | public static final String TOPNAME = "DRPC_STREAM"; 45 | public static final Logger logger = LogManager.getLogger(DrpcStream.class.getName()); 46 | public static final String TAG = "DRPC"; 47 | 48 | public static void log(String...strings){ 49 | System.out.println(Arrays.toString(strings)); 50 | } 51 | 52 | /** 53 | * first, the text tokenize and clean fn 54 | */ 55 | //public static class TextProcessor extends BaseFunction{ 56 | public static class TextProcessor implements Function { 57 | private static final long serialVersionUID = 1L; 58 | public static final String TAG = "TextProc"; 59 | public static final Logger logger = LogManager.getLogger(TextProcessor.class.getName()); 60 | 61 | @Override 62 | public void prepare(Map conf, TridentOperationContext context) { 63 | } 64 | 65 | @Override 66 | public void execute(TridentTuple tuple, TridentCollector collector) { 67 | String v = (String) tuple.getValue(0); 68 | logger.info(TAG, v); 69 | String loc = extractLocation(v); 70 | System.out.println("location : " + loc); 71 | collector.emit(new Values(loc, v)); // emits location field first, then cleartext 72 | } 73 | 74 | @Override 75 | public void cleanup() { 76 | // TODO Auto-generated method stub 77 | } 78 | 79 | private String extractLocation(String text) { 80 | String loc = " XXX "; 81 | Pattern locpattern = Pattern.compile("(.*) (: \\w+ :)( \\d{4}-\\d{2}-\\d{2}) (.*)", Pattern.CASE_INSENSITIVE); 82 | try{ 83 | Matcher m = locpattern.matcher(text); 84 | if(m.matches()){ 85 | loc = m.group(2); // group starts from 1 86 | } 87 | }catch(Exception e){ 88 | } 89 | 90 | loc = loc.substring(1,loc.length()-2).trim(); // exclusive the end index 91 | return loc; 92 | } 93 | } 94 | 95 | /** 96 | * state stores intermediate processing result. 97 | */ 98 | public static class LocationStateFactory implements StateFactory { 99 | public LocationStateFactory() { 100 | } 101 | 102 | @Override 103 | public State makeState(Map conf, IMetricsContext metrics, 104 | int partitionIndex, int numPartitions) { 105 | return new LocationState(partitionIndex, numPartitions); 106 | } 107 | } 108 | 109 | 110 | /** 111 | * batch processing treats every batch process as a transaction. 112 | */ 113 | public static class GroupTotal extends BaseAggregator> { 114 | public static final Logger logger = LogManager.getLogger(GroupTotal.class.getName()); 115 | private static final long serialVersionUID = 5747067380651287870L; 116 | private static final String TAG = "GroupTotal :"; 117 | Map result = new HashMap(); 118 | protected JedisDB jedis; 119 | int batchcnt = 0; 120 | 121 | /** 122 | * prepare only called once when creating object. Put any initiation code here. 123 | */ 124 | @Override 125 | public void prepare(Map conf, TridentOperationContext context) { 126 | logger.info(TAG, " prepare called, create jedis "); 127 | jedis = new JedisDB(); 128 | } 129 | 130 | /** 131 | * init is called upon object instantiation, ret the state to store the aggregation. 132 | */ 133 | @Override 134 | public Map init(Object batchId, TridentCollector collector) { 135 | logger.info(TAG, "init : batchId : " + batchId); 136 | batchcnt = 0; 137 | return result; 138 | } 139 | 140 | /** 141 | * aggregate called upon every tuple inside the batch. 142 | * @param Map aggregate fn was given the global state so it can update global for each tuple. 143 | * @param tuple the current tuple to be processed 144 | * @param collector the collector to emit the processed tuple 145 | */ 146 | @Override 147 | public void aggregate(Map val, TridentTuple tuple, TridentCollector collector) { 148 | logger.info(TAG, "aggregate :" + tuple); 149 | 150 | String loc = tuple.getString(3); 151 | //long cnt = tuple.getLong(1); 152 | batchcnt += 1; 153 | long cnt = batchcnt; 154 | long totcnt = 0; 155 | if( val.get(loc) != null ){ 156 | totcnt = val.get(loc); 157 | } 158 | val.put(loc, cnt+totcnt); 159 | List v = tuple.getValues(); 160 | //v.add(totcnt); 161 | collector.emit(new Values(cnt, totcnt)); 162 | } 163 | 164 | /** 165 | * complete called after done with every batch. store summary map to redis. 166 | */ 167 | @Override 168 | public void complete(Map val, TridentCollector collector) { 169 | System.out.println(TAG + "complete :" + val); 170 | jedis.storeMap("loc-cnt", val); 171 | //collector.emit(new Values(val)); 172 | } 173 | } 174 | 175 | /** 176 | * this function will be serialized and distributed to all nodes to run. 177 | * every member inside must be serializable in order to be distributed. 178 | */ 179 | static class DBWriteBolt implements Filter, Function { 180 | private static final long serialVersionUID = 1L; 181 | private static final String TAG = "DBWriteBolt :"; 182 | protected JedisDB jedis; 183 | 184 | public DBWriteBolt() { 185 | System.out.println(TAG + " constructor: "); 186 | } 187 | 188 | /** 189 | * prepare only called once for filter function at the time of init. 190 | */ 191 | @Override 192 | public void prepare(Map conf, TridentOperationContext context) { 193 | System.out.println(TAG + " prepare called, init db connection : "); 194 | jedis = new JedisDB(); // use default configuration. 195 | } 196 | 197 | @Override 198 | public boolean isKeep(TridentTuple tuple) { 199 | System.out.println(TAG + " iskeep : "); 200 | return true; 201 | } 202 | 203 | @Override 204 | public void cleanup() { 205 | // disconnect db connection 206 | } 207 | 208 | @Override 209 | public void execute(TridentTuple tuple, TridentCollector collector) { 210 | System.out.println(TAG + " execute : " + tuple.get(0)); // selected location as first ele in tuple 211 | Values v = new Values(tuple.get(0)); 212 | jedis.rpush("tweetloc", (String)tuple.get(0)); // save location to redis 213 | collector.emit(v); 214 | } 215 | } 216 | 217 | 218 | public static StormTopology buildDRPCTopology(LocalDRPC drpc) throws IOException { 219 | FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(100); // create spout as the source for the topology 220 | Function stateStore = new DBWriteBolt(); 221 | Aggregator> grpTotal = new GroupTotal(); 222 | storm.trident.operation.builtin.Count counter = new storm.trident.operation.builtin.Count(); // default counter aggregator 223 | TridentTopology topology = new TridentTopology(); 224 | 225 | // define text processing function 226 | Function textFn = new TextProcessor(); 227 | TridentState locState = topology.newStaticState(new LocationStateFactory()); 228 | LocationStateQuery stateQuery = new LocationStateQuery(); 229 | 230 | 231 | // grouped stream, after aggregation, only contains grouping key and other fields emitted from aggregator. 232 | 233 | topology.newDRPCStream(TOPNAME, drpc) // spout src from drpc execute, data wrapped into args field. 234 | .each(new Fields("args"), textFn, new Fields("location", "cleartext")) // got clear text out of text processing 235 | .broadcast() 236 | .stateQuery(locState, new Fields("location", "cleartext"), stateQuery, new Fields("locationx", "locationTotal")) 237 | .project(new Fields("locationx", "locationTotal")); // ret two fields 238 | 239 | return topology.build(); 240 | } 241 | 242 | public static void streamText(LocalDRPC drpc, FakeTweetsBatchSpout spout) { 243 | String tweet, result; 244 | while((tweet = spout.getNextTweetString()) != null){ 245 | log("streamText src:" + tweet); 246 | // multiple fields packed into one ret string value. 247 | result = drpc.execute(TOPNAME, tweet); 248 | log("streamText reslt:" + result); 249 | } 250 | } 251 | 252 | public static void main(String[] args) throws Exception { 253 | Config conf = new Config(); 254 | LocalDRPC drpc = new LocalDRPC(); 255 | LocalCluster cluster = new LocalCluster(); 256 | FakeTweetsBatchSpout spout = new FakeTweetsBatchSpout(100); // create spout as the source for the topology 257 | spout.open(null, null); 258 | 259 | //cluster.submitTopology(TOPNAME, conf, buildTopology(drpc)); 260 | cluster.submitTopology(TOPNAME, conf, buildDRPCTopology(drpc)); 261 | 262 | logger.info(" >>>>>>>>>> done cluster submit topology"); 263 | Thread.sleep(5000); // give it some time for top to setup. 264 | 265 | streamText(drpc, spout); 266 | // String result = drpc.execute(TOPNAME, "hello world"); 267 | // System.out.println(">>>>>>>>>> drpc result :" + result); 268 | // result = drpc.execute(TOPNAME, "hello world from world"); 269 | // System.out.println(">>>>>>>>>> drpc result :" + result); 270 | } 271 | } 272 | -------------------------------------------------------------------------------- /qsort.clj: -------------------------------------------------------------------------------- 1 | ;; lazy qsort 2 | ;; (load-file "qsort.clj") 3 | (ns qsort 4 | (:import [java.util.collections]) 5 | (:require clojure.set :refer :all)) 6 | 7 | 8 | ; for seq comprehension Vs while loop to iterate over a seq with great control. 9 | ; 10 | ; public static int indexOfAny(String str, char[] searchChars) { 11 | ; if("".equals(str) || searchChars.length == 0){ return -1; } 12 | ; int i, j, strsz = str.length(), searchsz = searchChars.length; 13 | ; while(i < strsz){ // use while loop to move index at your own control. 14 | ; for(j=0; j beg and l[j] > pivot: 74 | ;; j -= 1 75 | ;; 76 | ;; if(i < j): 77 | ;; swap(l, i, j) 78 | ;; i += 1 79 | ;; j -= 1 80 | ;; else: 81 | ;; break ; when break out, i >= j 82 | ;; 83 | ;; # when break out, i >= j, j is first ele <= pivot. so swap pivot to j. 84 | ;; swap(l, beg, j) 85 | ;; qsort(l, beg, i-1) ; [beg, i-1] are all <= pivot 86 | ;; qsort(l, j+1, end) ; [j+1, end] j+1 is starting point of all 87 | ;; 88 | ;; in fn lang, focus on result by transform list from [head rest] into [smaller pivot higher]. 89 | ;; then what left is just recursion. 90 | ;; 91 | ;; 92 | 93 | (defn myqsort [l] 94 | (loop [[low & high] l] ;; first destructuring top list into low and high partition 95 | (prn "looping low " low " high " high) 96 | (if-let [[pivot & xs] (seq low)] ;; low half is as left child, go all the way down. Note the nil pun with seq 97 | (let [ smaller? #(< % pivot) ] 98 | (do 99 | (prn "recur pivot " pivot " low " low " low rest " xs " divided smaller " (filter smaller? xs)) 100 | (prn "recur binding l=" (list* (filter smaller? xs) pivot (remove smaller? xs))) 101 | 102 | (recur (list* ;; recur by transform list into [ [smaller-smaller] [pivot [smaller-higher] higher]] 103 | (filter smaller? xs) ;; recur divide smaller partition into smaller and higher partition around pivot 104 | pivot ;; constr 105 | (remove smaller? xs) ;; larger partition of the low partition 106 | high)))) ;; original large partition of high partition 107 | (when-let [[p & xs] high] ;; if-let false, smaller partition done, recursion on larger partition 108 | (prn "low nil, cons " p " into result of high " xs) 109 | (cons p (myqsort xs)))))) 110 | 111 | (defn qsort [xs] 112 | (myqsort (list xs))) 113 | 114 | ; 115 | ; test 116 | (qsort [2 1 4 3]) 117 | 118 | ; lazy-cat is used to merge intermediate result during recursion to ret single list to caller. 119 | ; with lazy-cat to merge intermediate result during recursion, we can use map to divide and distribute works. 120 | ; first, destruct the passed-in list as (p & body), then partition body into 121 | ; low hi bodies, full sol = cons solution for low, pivot, solution for hi. 122 | (defn lazyqsort [l] 123 | (if-not (seq l) ; idiomatic. (seq l) ret a seq view of the collection, or nil 124 | [] ; ret empty seq so high level can lazy-cat vectors 125 | (let [p (first l) body (rest l) 126 | lol (filter #(<= % p) body) hil (remove #(<= % p) body) ] 127 | (lazy-cat (lazyqsort lol) [p] (lazyqsort hil))))) 128 | 129 | (lazyqsort [1]) 130 | (lazyqsort [1 1 1]) 131 | (lazyqsort [1 2 3]) 132 | (lazyqsort [9 8 7 6 1 2 3 4]) 133 | 134 | ; lazy-cat merge-sort 135 | (defn mergesort [xs] 136 | (letfn [(merge [p q] 137 | (cond 138 | (not (seq p)) q ; p is done, take entire q 139 | (not (seq q)) p ; q is done, take entire p 140 | :else 141 | (let [ph (first p) qh (first q)] 142 | (if (< ph qh) 143 | (lazy-seq (cons ph (merge (rest p) q))) 144 | (lazy-seq (cons qh (merge p (rest q))))))))] 145 | (if (<= (count xs) 1) 146 | xs ; base, only one ele left, ret 147 | (let [[l q] (split-at (quot (count xs) 2) xs)] ; split-at half 148 | (merge (mergesort l) (mergesort q)))))) 149 | 150 | (mergesort [1]) 151 | (mergesort [1 1 1]) 152 | (mergesort [1 2 3]) 153 | (mergesort [9 8 7 6 1 2 3 4]) 154 | 155 | 156 | ;; 157 | ;; bisect, if not found, insert to the end. 158 | ;; To make this really fast you will want to use int throughout, though, 159 | ;; and unchecked arithmetic: 160 | ;; 161 | (defn bisect [l v] 162 | "binary search for value in l, if not found, insert v at the end of l" 163 | (loop [i (int 0) j (int (dec (count l)))] ;; count is inlined, thus ret a primitive. 164 | (if (> i j) ; continue when i == j 165 | false 166 | (let [ mid (unchecked-divide (unchecked-add i j) 2) midv (l mid) ] 167 | (cond 168 | (= midv v) 169 | mid 170 | (> midv v) 171 | (recur i (unchecked-dec mid)) 172 | :else 173 | (recur (unchecked-inc mid) j)))))) 174 | 175 | (bisect [1 3 5 6 8 9] 3) 176 | 177 | ;; use java collections binarySearch directly 178 | (defn java-binsearch [xs x] 179 | (java.util.Collections/binarySearch xs x compare)) 180 | 181 | (java-binsearch [1 3 5 6 8 9] 3) 182 | 183 | ; recursive version of bi-sect, insert just before entry that is bigger than val 184 | ; arg is map-indexed vector l, [idx val] 185 | (defn recur-bisect [l v] 186 | (if (empty? l) 187 | 0 ; ret immediately upon empty list, we ensured never recur with empty list. 188 | (let [len (count l) mid (quot len 2) midv (second (nth l mid)) 189 | lo (subvec (vec l) 0 mid) hi (subvec (vec l) (inc mid))] 190 | (if (>= v midv) ; recur until the first one bigger than val 191 | (if (empty? hi) ; hi subvec explored, insert after mid 192 | (inc (first (nth l mid))) 193 | (recur hi v)) ; never recur with empty list 194 | (if (empty? lo) ; lo subvec explored, insert before mid 195 | (first (nth l mid)) 196 | (recur lo v)))))) ; never recur with empty list 197 | 198 | (recur-bisect (map-indexed vector []) 3) 199 | (recur-bisect (map-indexed vector [5]) 3) 200 | (recur-bisect (map-indexed vector [1 2 3 4 5]) 3) 201 | (recur-bisect (map-indexed vector [1 2 3 3]) 3) 202 | (recur-bisect (map-indexed vector [1 2 3 3 5]) 3) 203 | (recur-bisect (map-indexed vector [1 2 3 3 5]) 8) 204 | 205 | 206 | ; for list, use header iteration when need to apply fn to each element in list. 207 | ; for tree, can use branch DP to explore 208 | (defn permutation [text] 209 | (letfn [(inject-each-pos [hd subw] ; ret a list of strings 210 | (if (empty? subw) 211 | hd ; bottom, ret hd string 212 | (let [sz (inc (count subw)) 213 | splits (map #(split-at % subw) (range sz)) 214 | injected-splits (map #(concat (first %) (vec hd) (second %)) splits)] 215 | (map #(apply str %) injected-splits))))] 216 | 217 | (if (empty? text) 218 | [] 219 | (let [ hd (subs text 0 1) 220 | subp (permutation (subs text 1))] 221 | (if (empty? subp) 222 | [hd] 223 | (mapcat #(inject-each-pos hd %) subp)))))) 224 | 225 | ;; cons each head to each tail, which is recur result of list without header 226 | (defn all-permutations [things] 227 | (if (= 1 (count things)) 228 | (list things) 229 | (for [head things 230 | tail (all-permutations (disj (set things) head))] 231 | (do 232 | (cons head tail))))) 233 | (all-permutations '(a b c)) 234 | 235 | ; 236 | ; mutual recursion is idea for state machine transition 237 | 238 | ; trampoline(fn & args) change recur fn to recur #(fn) to achieve TCO 239 | ; you give trampoline a fn, trampoline will recur the fn without stack overflow. 240 | 241 | (defn my-even? [n] 242 | (letfn [(e? [n] 243 | (if (zero? n) 244 | true 245 | #(o? (dec (Math/abs n))))) 246 | (o? [n] 247 | (if (zero? n) 248 | false 249 | #(e? (dec (Math/abs n)))))] 250 | (trampoline e? n))) 251 | 252 | (defn my-odd? [n] 253 | (not (my-even? n))) 254 | 255 | 256 | ; recursive build a list, recursive destructure a list 257 | (defn- coll-or-scalar [x & _] (if (coll? x) :collection :scalar)) ; dispatch 258 | (defmulti replace-symbol coll-or-scalar) 259 | (defmethod replace-symbol :collection [coll oldsym newsym] 260 | (lazy-seq ; invoke the body only when needed, ret empty seq at bottom 261 | (when (seq coll) 262 | ; apply the same repalce-symbo to the first ele, be it scalar or a seq, 263 | ; and to rest list. replace-symbo polymorphy by dispatch on the ele type. 264 | (cons (replace-symbol (first coll) oldsym newsym) 265 | (replace-symbol (rest coll) oldsym newsym))))) 266 | ; after dispatching, the first arg is exact 267 | (defmethod replace-symbol :scalar [obj odlsym newsym] 268 | (if (= obj oldsym) 269 | newsym 270 | oldsym)) 271 | 272 | ; given a list, continuously delete every other ele until one left 273 | ; just transform the list recursive 274 | (defn filterlist [l] 275 | (if (= (count l) 1) ; base, only one, ret ele in the list 276 | (first l) 277 | (let [sz (count l) ; or if next point to itself, only one left. 278 | keep-even-l (keep-indexed #(if (even? %1) %2) l)] ; keep ele with even idx, drop odd index ele in the list 279 | (if (even? sz) ; if len is even, the last ele is dropped, no need to adjust head for next recursion 280 | (recur keep-even-l) 281 | (recur (next keep-even-l)))))) ; odd is size, next recursion need to adjust head 282 | 283 | -------------------------------------------------------------------------------- /wordcount/src/wordcount/core.clj: -------------------------------------------------------------------------------- 1 | (ns wordcount.core 2 | (:import [backtype.storm StormSubmitter LocalCluster]) 3 | (:use [backtype.storm clojure config]) 4 | (:use [clojure.string]) 5 | (:use [clojure.pprint]) 6 | (:gen-class)) 7 | 8 | ;; 9 | ;; (defspout name output-declare option-map % impl) 10 | ;; (defbolt name output-declare option-map % impl) 11 | ;; spout-spec take spout-imp. bolt-spec specify the input to the bolt. 12 | ;; spout spec impl body takes topl conf, topl context, and spoutoutputcollector. 13 | ;; bolt spec impl execute method [tuple collector] 14 | ;; the input map is for each bolt is defined in topology. 15 | ;; 16 | ;; topology map {"comp_id" (spout-spec (spec-impl)) } 17 | ;; spout-spec = spec impl and parallelism. output stream map defined inside spout-spec 18 | ;; bolt-spec = input-map [spec-impl arglists] spec impl body 19 | ;; 20 | ;; output map stream-id and stream spec { "1" ["f1" "f2"] "2" (direct-stream ["f1" "f2"])} 21 | ;; input-map = {[comp-id stream-id] :stream-grp [comp-id stream-id] ["field1" "field2"]} 22 | ;; 23 | 24 | ;; 25 | ;; (defspout name output-declare option-map % impl) 26 | ;; output map reduced to a vector of fields iff default stream. 27 | ;; unprepared spout only defines nextTuple method. 28 | ;; 29 | (defspout sentence-spout-parameterized ["word"] 30 | {:params [sentences] :prepare false} 31 | [collector] 32 | (Thread/sleep 500) 33 | (emit-spout! collector [(rand-nth sentences)])) 34 | 35 | ;; 36 | ;; defspout takes 2 method(nexstTuple, ack) impls and ret an ISpout object. 37 | ;; emit tuple is a list of key value pairs. 38 | ;; {"logevent" "Apr 21 01:00:08 haijin-mac kernel[0]: image 2166576128"} 39 | ;; 40 | (defspout sentence-spout ["logevent"] ; output stream has tuples with logevent field. 41 | [conf context collector] 42 | (let [logs [ 43 | "Apr 21 01:00:08 haijin-mac kernel[0]: image 2166576128, uncompressed 5045297152 (183186), compressed 2155141936 (42%), sum1 6c67ac3, sum2 72119025" 44 | "Apr 21 01:00:08 haijin-mac kernel[0]: wired_pages_encrypted 77034, wired_pages_clear 106654, dirty_pages_encrypted 1048074" 45 | "Apr 21 01:00:08 haijin-mac kernel[0]: hibernate_write_image done(0)" 46 | "Apr 21 01:00:08 haijin-mac kernel[0]: sleep" 47 | "Apr 21 01:00:08 haijin-mac kernel[0]: SMC::smcHandleInterruptEvent WARNING status=0x0 (0x40 not set) notif=0x0" 48 | "Apr 21 08:44:12 haijin-mac kernel[0]: Wake reason: EC LID0" 49 | "Apr 21 08:44:12 haijin-mac kernel[0]: HID tickle 135 ms" 50 | "Apr 21 08:44:12 haijin-mac kernel[0]: Previous Sleep Cause: 5" 51 | "Apr 21 08:44:12 haijin-mac kernel[0]: wlEvent: en1 en1 Link DOWN virtIf = 0" 52 | "Apr 21 08:44:12 haijin-mac kernel[0]: AirPort: Link Down on en1. Reason 8 (Disassociated because station leaving)." 53 | "Apr 21 08:44:12 haijin-mac kernel[0]: en1: 802.11d country code set to 'X0'." 54 | "Apr 21 08:44:12 haijin-mac kernel[0]: en1: Supported channels 1 2 3 4 5 6 7 8 9 10 11 36 40 44 48 52 56 60 64 100 104 108 112 116 120 124 128 132 136 140 149 153 157 161 165" 55 | "Apr 21 08:44:15 haijin-mac kernel[0]: 00000000 00000020 NVEthernet::setLinkStatus - not Active" 56 | "Apr 21 08:44:18 haijin-mac kernel[0]: en1: 802.11d country code set to 'US'." 57 | "Apr 21 08:44:18 haijin-mac kernel[0]: en1: Supported channels 1 2 3 4 5 6 7 8 9 10 11 36 40 44 48 52 56 60 64 100 104 108 112 116 120 124 128 132 136 140 149 153 157 161 165" 58 | "Apr 21 08:44:35 haijin-mac kernel[0]: MacAuthEvent en1 Auth result for: 5c:d9:98:65:83:d4 MAC AUTH succeeded" 59 | "Apr 21 08:44:35 haijin-mac kernel[0]: wlEvent: en1 en1 Link UP virtIf = 0" 60 | "Apr 21 08:44:35 haijin-mac kernel[0]: AirPort: Link Up on en1" 61 | "Apr 21 08:44:35 haijin-mac kernel[0]: en1: BSSID changed to 5c:d9:98:65:83:d4" 62 | "Apr 21 08:44:35 haijin-mac kernel[0]: AirPort: RSN handshake complete on en1" 63 | "Apr 21 09:04:51 haijin-mac kernel[0]: CODE SIGNING: cs_invalid_page(0x1000): p=99354[GoogleSoftwareUp] clearing CS_VALID" 64 | "Apr 21 10:03:34 haijin-mac kernel[0]: CODE SIGNING: cs_invalid_page(0x1000): p=367[GoogleSoftwareUp] clearing CS_VALID" 65 | "Apr 21 11:02:18 haijin-mac kernel[0]: CODE SIGNING: cs_invalid_page(0x1000): p=1436[GoogleSoftwareUp] clearing CS_VALID" 66 | "Apr 21 11:57:07 haijin-mac kernel[0]: MacAuthEvent en1 Auth result for: 5c:d9:98:65:83:d4 MAC AUTH succeeded" 67 | "Apr 21 11:57:07 haijin-mac kernel[0]: wlEvent: en1 en1 Link UP virtIf = 0" 68 | "Apr 21 11:57:07 haijin-mac kernel[0]: AirPort: RSN handshake complete on en1" 69 | "Apr 21 11:57:07 haijin-mac kernel[0]: wl0: Roamed or switched channel, reason #4, bssid 5c:d9:98:65:83:d4" 70 | "Apr 21 11:57:07 haijin-mac kernel[0]: en1: BSSID changed to 5c:d9:98:65:83:d4" 71 | ]] 72 | (spout ;; spout macro takes 2 method (nextTuple, ack) impl and ret an ISpout object 73 | (nextTuple [] 74 | (Thread/sleep 100) 75 | (emit-spout! collector [(rand-nth logs)])) ;; emit tuple to collector 76 | ;; You only need to define this method for reliable spouts 77 | ;; (such as one that reads off of a queue like Kestrel) 78 | ;; This is an unreliable spout, so it does nothing here 79 | (ack [id])))) 80 | 81 | ;; 82 | ;; bolt filters sentence based on a vec of keywords, 83 | ;; in stream tuple: {"logevent" "11226 [Thread-32] INFO backtype.storm.daemon.task - Emitting: 3 default [\"cat\" 17]"} 84 | ;; output stream defined with a vector of fields. 85 | ;; default output stream, reduce to a vector of output field rather than a output map. 86 | ;; 87 | (defbolt filter-sentence ["sentence"] ;; output map = a vector of fields(keyword) 88 | ; params to the bolt in :params, passed in when assembling bolt in topology. 89 | {:params [keywords] :prepare false} 90 | 91 | ; for non-prepared bolt, impl fn (execute [tuple collector]) 92 | [tuple collector] 93 | (let [ sentence (.getStringByField tuple "logevent") ;; the first string in tuple string list is sentence. 94 | words (.split sentence " ") 95 | wordset (into #{} words) ] 96 | ;(pprint tuple) ;; what's inside a tuple. 97 | ;(pprint sentence) ;; what's inside a tuple. 98 | ; only emit sentence that has keywords 99 | (if (some (set keywords) wordset) ;; (clojure.set/intersection wordset (set keywords)) 100 | (emit-bolt! collector [sentence] :anchor tuple) ;; anchored tuple ensure replay if lost in downstreams. 101 | (ack! collector tuple) ))) ;; ack the upstream this tuple has been processed. 102 | 103 | ;; 104 | ;; (defbolt name output-declare option-map % impl) 105 | ;; impl is [arglists] and the body of execute Iface. 106 | ;; parameterized the bolt as parameterized collection with {:params []} option. Coll ~= (bolt "-params") 107 | ;; output map stream-id and stream spec { "1" ["f1" "f2"] "2" (direct-stream ["f1" "f2"])} 108 | ;; a bolt has many tasks, input stream grping partition streams and dispatch to designated tasks. 109 | ;; partition streams with grping: shuffle, fields, global, direct 110 | ;; 111 | (defbolt split-sentence 112 | { "1" ["word"] ; out stream 1, output tuple has only one field "word" 113 | "2" ["word" "index"]} ; out stream 2, output tuple has two fields "word" and "index" 114 | {:prepare true} ; prepare bolt, topology will call prepare(stormConf, ctx) 115 | [conf context collector] ; prepared bolt impl takes conf, context, collector 116 | (let [index (atom 0) ] ; index is atomic counter 117 | (bolt ;; impl execute fn for bolt IF. 118 | (prepare [conf context] ; prepare fn takes conf and context 119 | (prn "creating table inside prepare")) 120 | 121 | (execute [tuple] 122 | (let [ nprocessed (swap! index inc) 123 | words (.split (.getStringByField tuple "sentence") " ")] ;; split tuple's string content 124 | ;(pprint words) 125 | (if (odd? nprocessed) 126 | (doseq [w words] ; odd line to stream 1 127 | (emit-bolt! collector [w] :anchor tuple :stream "1")) 128 | (doseq [w words] ; even line to stream 2 with line no. 129 | (emit-bolt! collector [w nprocessed] :anchor tuple :stream "2"))) ;; emit tuple to stream 2. with :id x 130 | (ack! collector tuple)))))) 131 | 132 | ;; 133 | ; prepared bolt stores states locally for join and stream aggregation. 134 | ; state is stored in the closure in a mutable map collection. 135 | ; 136 | ; using atom to store mutable state inside clojure. 137 | ; atom to store intermediate states, @atom to de-ref to get state. 138 | ; swap! to update state. 139 | (defbolt word-count ["word" "count"] 140 | {:prepare true} 141 | [conf context collector] ; prepared bolt impl takes conf, context, collector 142 | (let [counts (atom {})] ; concurrent hash map to store cnt for each word 143 | (bolt 144 | (execute [tuple] ; input tuple has field word 145 | (let [word (.getStringByField tuple "word")] 146 | (swap! counts (partial merge-with +) {word 1}) ; merge map zip map set/union conj collection. 147 | (emit-bolt! collector [word (@counts word)] :anchor tuple) 148 | (ack! collector tuple)))))) 149 | 150 | ; 151 | ; combiner get input from stream 2 of split-sentence. Tuple has two fields, word/count 152 | ; 153 | (defbolt combiner ["word" "count"] 154 | {:prepare true} 155 | [conf context collector] 156 | (let [counts (atom {})] 157 | (bolt 158 | (execute [tuple] ; tuple in the stream has two fields, word, index 159 | (let [ word (.getStringByField tuple "word") 160 | line (.getLongByField tuple "index")] 161 | (prn "combiner word " word) 162 | (prn "combiner count " line)))))) 163 | 164 | ;; 165 | ;; topology is a map of component id [1 2 3 4] and its spec 166 | ;; spout-spec = spec impl and parallelism. output stream map defined inside spout-spec 167 | ;; bolt-spec = input-map and spec impl. 168 | ;; bolt-spec input-map = { [comp-id stream-id] :stream-grp [comp-id stream-id] ["field1" "field2"] 169 | ;; components exchange data in *tuple*. Tuple has fields. 170 | ;; 171 | (defn mk-topology [] 172 | ; toplogy is a map of component id and component spec.(spout-spec, bolt-spec) 173 | (topology 174 | ; spout-spec : spout impl and parallel tasks. 175 | {"1" (spout-spec (sentence-spout-parameterized ["the cat jumped over the door" "greetings from the faraway land"]) 176 | :p 2) 177 | 178 | "2" (spout-spec sentence-spout) } ; done with spout spec. 179 | 180 | ;; bolt-spec, input declaration, bolt implementation, parallel tasks. 181 | ;; input declaration: a map of stream id and stream groupings. 182 | ;; stream id = [==component id== ==stream id==] 183 | ;; stream grp ["id" "name"]: subscribes with a fields grouping on the specified fields 184 | {"3" (bolt-spec {"2" :shuffle} ; input component 2, shuffle grouping 185 | (filter-sentence ["Link" "channel"]) ; filter bolt take a list of keyword to filter 186 | :p 2) 187 | 188 | "4" (bolt-spec {"3" :shuffle} ;; input from component 3, shuffle grouping 189 | split-sentence 190 | :p 2) 191 | 192 | "5" (bolt-spec {["4" "1" ] ["word"]} ;; comp 4, stream 1, grouping by field word 193 | word-count 194 | :p 2) 195 | 196 | "6" (bolt-spec {["4" "2"] ["word" "index"]} ; comp 4, stream 2, group by word index 197 | combiner 198 | :p 2)})) 199 | 200 | (defn run-local! [] 201 | (let [cluster (LocalCluster.)] 202 | (.submitTopology cluster "word-count" {TOPOLOGY-DEBUG false} (mk-topology)) 203 | (Thread/sleep 10000) ;; run 10 seconds 204 | (.shutdown cluster) 205 | )) 206 | 207 | (defn submit-topology! [name] 208 | (StormSubmitter/submitTopology 209 | name 210 | {TOPOLOGY-DEBUG false 211 | TOPOLOGY-WORKERS 3} 212 | (mk-topology))) 213 | 214 | (defn -main 215 | ([] 216 | (run-local!)) 217 | ([name] 218 | (submit-topology! name))) 219 | 220 | --------------------------------------------------------------------------------