├── .gitignore
├── chapter4
    ├── generator
    │   ├── requests.txt
    │   ├── referers.txt
    │   ├── codes.txt
    │   ├── ips.txt
    │   ├── resources.txt
    │   ├── user_agents.txt
    │   └── generator.py
    ├── logstash
    │   ├── indexer
    │   │   └── kafka_to_elasticsearch.conf
    │   ├── processor
    │   │   └── forwarder_to_kafka.conf
    │   └── forwarder
    │   │   └── forwarder.json
    └── kafka
    │   ├── server-1.properties
    │   └── server-2.properties
├── chapter7
    └── docker
    │   ├── generator
    │       ├── requests.txt
    │       ├── referers.txt
    │       ├── codes.txt
    │       ├── ips.txt
    │       ├── resources.txt
    │       ├── user_agents.txt
    │       └── generator.py
    │   ├── docker-machine.sh
    │   ├── logstash
    │       ├── indexer
    │       │   └── kafka_to_elasticsearch.conf
    │       ├── processor
    │       │   └── forwarder_to_kafka.conf
    │       ├── forwarder
    │       │   └── forwarder.json
    │       └── .logstash-forwarder
    │   ├── templates
    │       └── template.json
    │   ├── kafka
    │       ├── server1
    │       │   ├── zookeeper.properties
    │       │   └── server.properties
    │       └── server2
    │       │   ├── zookeeper.properties
    │       │   └── server.properties
    │   ├── security
    │       ├── lumberjack.crt
    │       └── lumberjack.key
    │   └── docker-compose.yml
├── chapter6
    └── spark-enrich-and-ml
    │   ├── build.sh
    │   ├── src
    │       └── main
    │       │   ├── main.iml
    │       │   └── scala
    │       │       └── org
    │       │           └── apache
    │       │               └── spark
    │       │                   └── examples
    │       │                       ├── StreamingExamples.scala
    │       │                       └── SparkEnricher.scala
    │   └── pom.xml
└── chapter5
    └── spark-scala-streamer
        ├── build.sh
        ├── src
            └── main
            │   ├── main.iml
            │   └── scala
            │       ├── org
            │           └── apress
            │           │   └── examples
            │           │       └── chapter4
            │           │           ├── StreamingExamples.scala
            │           │           └── KafkaStreamer.scala
            │       └── clickstream
            │           ├── PageViewGenerator.scala
            │           └── PageViewStream.scala
        ├── pom.xml
        └── spark-scala-streamer.iml


/.gitignore:
--------------------------------------------------------------------------------
1 | .DS_Store
2 | 


--------------------------------------------------------------------------------
/chapter4/generator/requests.txt:
--------------------------------------------------------------------------------
 1 | GET
 2 | GET
 3 | GET
 4 | GET
 5 | GET
 6 | GET
 7 | GET
 8 | GET
 9 | GET
10 | GET
11 | POST
12 | POST


--------------------------------------------------------------------------------
/chapter7/docker/generator/requests.txt:
--------------------------------------------------------------------------------
 1 | GET
 2 | GET
 3 | GET
 4 | GET
 5 | GET
 6 | GET
 7 | GET
 8 | GET
 9 | GET
10 | GET
11 | POST
12 | POST


--------------------------------------------------------------------------------
/chapter4/generator/referers.txt:
--------------------------------------------------------------------------------
1 | -
2 | http://www.google.com
3 | http://www.yahoo.com
4 | http://www.referrer.com
5 | http://www.bing.com
6 | http://www.amazon.com


--------------------------------------------------------------------------------
/chapter7/docker/generator/referers.txt:
--------------------------------------------------------------------------------
1 | -
2 | http://www.google.com
3 | http://www.yahoo.com
4 | http://www.referrer.com
5 | http://www.bing.com
6 | http://www.amazon.com


--------------------------------------------------------------------------------
/chapter4/generator/codes.txt:
--------------------------------------------------------------------------------
 1 | 200
 2 | 200
 3 | 200
 4 | 200
 5 | 200
 6 | 200
 7 | 200
 8 | 200
 9 | 200
10 | 200
11 | 404
12 | 404
13 | 404
14 | 401
15 | 401
16 | 403
17 | 500


--------------------------------------------------------------------------------
/chapter7/docker/generator/codes.txt:
--------------------------------------------------------------------------------
 1 | 200
 2 | 200
 3 | 200
 4 | 200
 5 | 200
 6 | 200
 7 | 200
 8 | 200
 9 | 200
10 | 200
11 | 404
12 | 404
13 | 404
14 | 401
15 | 401
16 | 403
17 | 500


--------------------------------------------------------------------------------
/chapter4/generator/ips.txt:
--------------------------------------------------------------------------------
 1 | 10.10.10.10
 2 | 10.10.10.11
 3 | 10.10.10.12
 4 | 10.10.10.13
 5 | 10.10.10.14
 6 | 10.10.10.15
 7 | 10.10.10.16
 8 | 10.10.10.17
 9 | 10.10.10.18
10 | 10.10.10.19


--------------------------------------------------------------------------------
/chapter7/docker/generator/ips.txt:
--------------------------------------------------------------------------------
 1 | 10.10.10.10
 2 | 10.10.10.11
 3 | 10.10.10.12
 4 | 10.10.10.13
 5 | 10.10.10.14
 6 | 10.10.10.15
 7 | 10.10.10.16
 8 | 10.10.10.17
 9 | 10.10.10.18
10 | 10.10.10.19


--------------------------------------------------------------------------------
/chapter7/docker/docker-machine.sh:
--------------------------------------------------------------------------------
1 | docker-machine -D create \
2 |   --driver amazonec2 \
3 |   --amazonec2-access-key $AWS_ACCESS_KEY \
4 |   --amazonec2-secret-key $AWS_SECRET_KEY \
5 |   --amazonec2-vpc-id $AWS_VPC_ID \
6 |   --amazonec2-zone b \
7 |   baha-lambda-architecture


--------------------------------------------------------------------------------
/chapter4/generator/resources.txt:
--------------------------------------------------------------------------------
 1 | /products/product1
 2 | /products/product2
 3 | /products/product3
 4 | /products/product4
 5 | /products/product5
 6 | /products/product6
 7 | /store/cart
 8 | /store/cart/checkout
 9 | /account
10 | /page1
11 | /page2
12 | /page3
13 | /page4
14 | /page5
15 | /page6
16 | /page6


--------------------------------------------------------------------------------
/chapter7/docker/generator/resources.txt:
--------------------------------------------------------------------------------
 1 | /products/product1
 2 | /products/product2
 3 | /products/product3
 4 | /products/product4
 5 | /products/product5
 6 | /products/product6
 7 | /store/cart
 8 | /store/cart/checkout
 9 | /account
10 | /page1
11 | /page2
12 | /page3
13 | /page4
14 | /page5
15 | /page6
16 | /page6


--------------------------------------------------------------------------------
/chapter4/logstash/indexer/kafka_to_elasticsearch.conf:
--------------------------------------------------------------------------------
 1 | input {
 2 |   kafka {
 3 |     topic_id => "clickstream"
 4 |   }
 5 | }
 6 | filter {
 7 | }
 8 | output {  
 9 |   stdout { codec => rubydebug }
10 |   elasticsearch {
11 |     index => "clickstream-%{+YYYY.MM.dd}"
12 |     manage_template => false
13 |     host => localhost
14 |     protocol => http
15 |   }
16 | }


--------------------------------------------------------------------------------
/chapter7/docker/logstash/indexer/kafka_to_elasticsearch.conf:
--------------------------------------------------------------------------------
 1 | input {
 2 |   kafka {
 3 |     zk_connect => "brokers:2181"
 4 |     topic_id => "clickstream"
 5 |   }
 6 | }
 7 | filter {
 8 |   mutate {convert => ["bytes", "integer"]}
 9 | }
10 | output {  
11 |   stdout { codec => rubydebug }
12 |   elasticsearch {
13 |     index => "clickstream-%{+YYYY.MM.dd}"
14 |     manage_template => false
15 |     host => brokers
16 |     protocol => http
17 |   }
18 | }


--------------------------------------------------------------------------------
/chapter6/spark-enrich-and-ml/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo 'Setting JAVA_HOME to Java7'
 4 | export JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.7.0_21.jdk/Contents/Home/
 5 | 
 6 | echo 'Package sources'
 7 | mvn clean scala:compile package
 8 | 
 9 | echo 'Running Spark'
10 | spark-submit --class org.apache.spark.examples.SparkEnricher \
11 | 	--master local[2] \
12 | 	target/spark-enrich-and-ml-1.0.0-jar-with-dependencies.jar \
13 | 	192.168.59.103:9092,192.168.59.103:9093 clickstream
14 | 


--------------------------------------------------------------------------------
/chapter5/spark-scala-streamer/build.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo 'Setting JAVA_HOME to Java7'
 4 | export JAVA_HOME=/Library/Java/JavaVirtualMachines/jdk1.7.0_21.jdk/Contents/Home/
 5 | 
 6 | echo 'Package sources'
 7 | mvn clean scala:compile package
 8 | 
 9 | echo 'Running Spark'
10 | spark-submit --class org.apress.examples.chapter4.KafkaStreamer \
11 | 	--master local[2] \
12 | 	target/spark-scala-streamer-1.0.0-jar-with-dependencies.jar \
13 | 	192.168.59.103:9092,192.168.59.103:9093 clickstream
14 | 


--------------------------------------------------------------------------------
/chapter7/docker/logstash/processor/forwarder_to_kafka.conf:
--------------------------------------------------------------------------------
 1 | input {
 2 |   lumberjack {
 3 |       port => "5043"
 4 |       ssl_certificate => "/etc/logstash/security/lumberjack.crt"
 5 |       ssl_key => "/etc/logstash/security/lumberjack.key"
 6 |   }
 7 | }
 8 | filter {
 9 |   grok {
10 |     match => {
11 |       "message" => "%{COMBINEDAPACHELOG}"
12 |     }
13 |   }
14 | }
15 | output {  
16 |   stdout { codec => rubydebug }
17 |   kafka {
18 |     broker_list => "192.168.59.103:9092"
19 |     topic_id => "clickstream"
20 |   }
21 | }


--------------------------------------------------------------------------------
/chapter4/logstash/processor/forwarder_to_kafka.conf:
--------------------------------------------------------------------------------
 1 | input {
 2 |   lumberjack {
 3 |       port => "5043"
 4 |       ssl_certificate => "/Users/bahaaldine/Dropbox/apress/demo/chapter4/security/lumberjack.crt"
 5 |       ssl_key => "/Users/bahaaldine/Dropbox/apress/demo/chapter4/security/lumberjack.key"
 6 |   }
 7 | }
 8 | filter {
 9 |   grok {
10 |     match => {
11 |       "message" => "%{COMBINEDAPACHELOG}"
12 |     }
13 |   }
14 | }
15 | output {  
16 |   stdout { codec => rubydebug }
17 |   kafka {
18 |     topic_id => "clickstream"
19 |   }
20 | }


--------------------------------------------------------------------------------
/chapter4/logstash/forwarder/forwarder.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "network": {
 3 |     "servers": [ "localhost:5043" ],
 4 |     "ssl certificate": "../security/lumberjack.crt",
 5 |     "ssl key": "../security/lumberjack.key",
 6 |     "ssl ca": "../security/lumberjack.crt",
 7 |     "timeout": 15
 8 |   },
 9 | 
10 |   "files": [
11 |     {
12 |       "paths": [
13 |         "/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_*"
14 |       ],
15 |       "fields": { "type": "access_log" }
16 |     }, {
17 |       "paths": [ "-" ],
18 |       "fields": { "type": "stdin" }
19 |     }
20 |   ]
21 | }


--------------------------------------------------------------------------------
/chapter5/spark-scala-streamer/src/main/main.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="JAVA_MODULE" version="4">
 3 |   <component name="NewModuleRootManager" inherit-compiler-output="true">
 4 |     <exclude-output />
 5 |     <content url="file://$MODULE_DIR$">
 6 |       <sourceFolder url="file://$MODULE_DIR$/scala" isTestSource="false" />
 7 |     </content>
 8 |     <orderEntry type="inheritedJdk" />
 9 |     <orderEntry type="sourceFolder" forTests="false" />
10 |     <orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.10.0" level="project" />
11 |   </component>
12 | </module>


--------------------------------------------------------------------------------
/chapter6/spark-enrich-and-ml/src/main/main.iml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <module type="JAVA_MODULE" version="4">
 3 |   <component name="NewModuleRootManager" inherit-compiler-output="true">
 4 |     <exclude-output />
 5 |     <content url="file://$MODULE_DIR$">
 6 |       <sourceFolder url="file://$MODULE_DIR$/scala" isTestSource="false" />
 7 |     </content>
 8 |     <orderEntry type="inheritedJdk" />
 9 |     <orderEntry type="sourceFolder" forTests="false" />
10 |     <orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.10.0" level="project" />
11 |   </component>
12 | </module>


--------------------------------------------------------------------------------
/chapter7/docker/logstash/forwarder/forwarder.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "network": {
 3 |     "servers": [ "processors:5043" ],
 4 |     "ssl certificate": "/etc/logstash-forwarder/security/lumberjack.crt",
 5 |     "ssl key": "/etc/logstash-forwarder/security/lumberjack.key",
 6 |     "ssl ca": "/etc/logstash-forwarder/security/lumberjack.crt",
 7 |     "timeout": 15
 8 |   },
 9 | 
10 |   "files": [
11 |     {
12 |       "paths": [
13 |         "/tmp/source/access_log_*"
14 |       ],
15 |       "fields": { "type": "access_log" }
16 |     }, {
17 |       "paths": [ "-" ],
18 |       "fields": { "type": "stdin" }
19 |     }
20 |   ]
21 | }


--------------------------------------------------------------------------------
/chapter4/generator/user_agents.txt:
--------------------------------------------------------------------------------
1 | Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)
2 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36
3 | Mozilla/5.0 (Linux; U; Android 2.3.5; en-us; HTC Vision Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1
4 | Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25
5 | Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201
6 | Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0
7 | Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))


--------------------------------------------------------------------------------
/chapter7/docker/generator/user_agents.txt:
--------------------------------------------------------------------------------
1 | Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.0)
2 | Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/36.0.1944.0 Safari/537.36
3 | Mozilla/5.0 (Linux; U; Android 2.3.5; en-us; HTC Vision Build/GRI40) AppleWebKit/533.1 (KHTML, like Gecko) Version/4.0 Mobile Safari/533.1
4 | Mozilla/5.0 (iPad; CPU OS 6_0 like Mac OS X) AppleWebKit/536.26 (KHTML, like Gecko) Version/6.0 Mobile/10A5355d Safari/8536.25
5 | Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:2.2) Gecko/20110201
6 | Mozilla/5.0 (Windows NT 5.1; rv:31.0) Gecko/20100101 Firefox/31.0
7 | Mozilla/5.0 (Windows; U; MSIE 9.0; WIndows NT 9.0; en-US))


--------------------------------------------------------------------------------
/chapter7/docker/templates/template.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "template_1" : {
 3 |     "template" : "*",
 4 |     "settings" : {
 5 |         "index.number_of_shards" : 1,
 6 |         "index.number_of_replicas" : 0
 7 |     },
 8 |     "mappings" : {
 9 |       "_default_": {
10 |         "dynamic_templates": [
11 |           {
12 |             "string_fields": {
13 |               "mapping": {
14 |                 "index": "not_analyzed",
15 |                 "omit_norms": true,
16 |                 "type": "string"
17 |               },
18 |               "match_mapping_type": "string",
19 |               "match": "*"
20 |             }
21 |           }
22 |         ],
23 |         "_all": {
24 |           "enabled": true
25 |         },
26 |         "properties": {
27 |           "bytes": { "type": "integer" },
28 |           "response": { "type": "integer" }
29 |         }        
30 |       }
31 |     }
32 |   }
33 | }


--------------------------------------------------------------------------------
/chapter7/docker/kafka/server1/zookeeper.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | # 
 8 | #    http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # the directory where the snapshot is stored.
16 | dataDir=/tmp/zookeeper
17 | # the port at which the clients will connect
18 | clientPort=2181
19 | # disable the per-ip limit on the number of connections since this is a non-production config
20 | maxClientCnxns=0
21 | 


--------------------------------------------------------------------------------
/chapter7/docker/kafka/server2/zookeeper.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one or more
 2 | # contributor license agreements.  See the NOTICE file distributed with
 3 | # this work for additional information regarding copyright ownership.
 4 | # The ASF licenses this file to You under the Apache License, Version 2.0
 5 | # (the "License"); you may not use this file except in compliance with
 6 | # the License.  You may obtain a copy of the License at
 7 | # 
 8 | #    http://www.apache.org/licenses/LICENSE-2.0
 9 | # 
10 | # Unless required by applicable law or agreed to in writing, software
11 | # distributed under the License is distributed on an "AS IS" BASIS,
12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | # See the License for the specific language governing permissions and
14 | # limitations under the License.
15 | # the directory where the snapshot is stored.
16 | dataDir=/tmp/zookeeper
17 | # the port at which the clients will connect
18 | clientPort=2182
19 | # disable the per-ip limit on the number of connections since this is a non-production config
20 | maxClientCnxns=0
21 | 


--------------------------------------------------------------------------------
/chapter7/docker/security/lumberjack.crt:
--------------------------------------------------------------------------------
 1 | -----BEGIN CERTIFICATE-----
 2 | MIIDIzCCAgugAwIBAgIJAPlrM1BCQmOVMA0GCSqGSIb3DQEBBQUAMBUxEzARBgNV
 3 | BAMTCnByb2Nlc3NvcnMwHhcNMTUwNzE2MDk1NjU1WhcNMTUwODE1MDk1NjU1WjAV
 4 | MRMwEQYDVQQDEwpwcm9jZXNzb3JzMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIB
 5 | CgKCAQEAnIDqokdntcXHt1ez4bsg2gpqYENF9cpd7Y/cJa393l++/nul24HQQrrr
 6 | Q3+gYSw1u+dAb9nh8ThDeUABBItxSAHpPB2ozxkHa3SmVf02XWSl5vGWVxsnYliB
 7 | qCEqxtLXUzB8Az1FIZy0HLlCTSDCb5U2JHInIkqqxWTnHRQ4s9vXuai0ysxT3BPQ
 8 | h4d9ocpgdn80eQO+XCrlyxDaUsSRwY+MdO2G0vEqgiAgru6bndzlCsyPa19yMrru
 9 | qwkErv16Yiws6Qmc4jxYDspR7xOfcrQl2N6SgqHtPL0Qv2rAmZhqcyl5V2lbEd7T
10 | WGqbLntYjtht43EyLB7TVoSi2SVFIwIDAQABo3YwdDAdBgNVHQ4EFgQUYAbYar6K
11 | K2/WhrZiWhDZcWU73Y0wRQYDVR0jBD4wPIAUYAbYar6KK2/WhrZiWhDZcWU73Y2h
12 | GaQXMBUxEzARBgNVBAMTCnByb2Nlc3NvcnOCCQD5azNQQkJjlTAMBgNVHRMEBTAD
13 | AQH/MA0GCSqGSIb3DQEBBQUAA4IBAQBJJdcAnFCxnRz8x1/Nta9J9ZGCVb/HM2LD
14 | AEh2sdbWoOZ4tvYtIxfvIsOQW/UfSUCfZnTXQE4qaCqT+r85QxmPE0UuFcTfbmU2
15 | 0u96m5J+tRGoFHI6/FegALtLpt2xQ5yEE2QWBXfKTjL+9TfKxcllyyGHJwKE1oOL
16 | BBAptNxfWRIZWFL73pdy4xmqAzhAE8zhFLmyy7VJY8fK5eewvGaACwKCMRBdBeAN
17 | IzPZwYG3VycJwRAoNBs3HhuIdgbKI95WpszQbxmo8MRBrwp3dxkwel6AHh4jyJiF
18 | cDYUoKp6V0NmtJ40jKPc1riN4BER5pO8aztXcyfp7GDb12/eY0Ur
19 | -----END CERTIFICATE-----
20 | 


--------------------------------------------------------------------------------
/chapter4/generator/generator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys
 3 | import time
 4 | import datetime
 5 | import random
 6 | from pytz import timezone
 7 | 
 8 | line_count = int(sys.argv[1])
 9 | timestr = time.strftime("%Y%m%d-%H%M%S")
10 | 
11 | f = open('../source/access_log_'+timestr+'.log','w')
12 |  
13 | # ips  
14 | with open('ips.txt') as ips_file:
15 | 	ips = ips_file.read().splitlines()
16 | 
17 | # referers
18 | with open('referers.txt') as referers_file:
19 | 	referers = referers_file.read().splitlines()
20 | 
21 | # resources
22 | with open('resources.txt') as resources_file:
23 | 	resources = resources_file.read().splitlines()
24 | 
25 | # user agents  
26 | with open('user_agents.txt') as user_agents_file:
27 | 	useragents = user_agents_file.read().splitlines()
28 | 
29 | # codes
30 | with open('codes.txt') as codes_file:
31 | 	codes = codes_file.read().splitlines()
32 | 
33 | # requests
34 | with open('requests.txt') as requests_file:
35 | 	requests = requests_file.read().splitlines()
36 | 
37 | event_time = datetime.datetime(2013,10,10).replace(tzinfo=timezone('UTC'))
38 |  
39 | for i in xrange(0,line_count):
40 | 	increment = datetime.timedelta(seconds=random.randint(30,300))
41 | 	event_time += increment
42 | 	uri = random.choice(resources)
43 | 	if uri.find("Store")>0:
44 | 		uri += `random.randint(1000,1500)`
45 | 	ip = random.choice(ips)
46 | 	useragent = random.choice(useragents)
47 | 	referer = random.choice(referers)
48 | 	code = random.choice(codes)
49 | 	request= random.choice(requests)
50 | 	f.write('%s - - [%s] "%s %s HTTP/1.0" %s %s "%s" "%s" \n' % (random.choice(ips),event_time.strftime('%d/%b/%Y:%H:%M:%S %z'),request,uri,code,random.randint(2000,5000),referer,useragent))


--------------------------------------------------------------------------------
/chapter7/docker/generator/generator.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/python
 2 | import sys
 3 | import time
 4 | import datetime
 5 | import random
 6 | from pytz import timezone
 7 | 
 8 | line_count = int(sys.argv[1])
 9 | timestr = time.strftime("%Y%m%d-%H%M%S")
10 | 
11 | f = open('../source/access_log_'+timestr+'.log','w')
12 |  
13 | # ips  
14 | with open('ips.txt') as ips_file:
15 | 	ips = ips_file.read().splitlines()
16 | 
17 | # referers
18 | with open('referers.txt') as referers_file:
19 | 	referers = referers_file.read().splitlines()
20 | 
21 | # resources
22 | with open('resources.txt') as resources_file:
23 | 	resources = resources_file.read().splitlines()
24 | 
25 | # user agents  
26 | with open('user_agents.txt') as user_agents_file:
27 | 	useragents = user_agents_file.read().splitlines()
28 | 
29 | # codes
30 | with open('codes.txt') as codes_file:
31 | 	codes = codes_file.read().splitlines()
32 | 
33 | # requests
34 | with open('requests.txt') as requests_file:
35 | 	requests = requests_file.read().splitlines()
36 | 
37 | event_time = datetime.datetime(2013,10,10).replace(tzinfo=timezone('UTC'))
38 |  
39 | for i in xrange(0,line_count):
40 | 	increment = datetime.timedelta(seconds=random.randint(30,300))
41 | 	event_time += increment
42 | 	uri = random.choice(resources)
43 | 	if uri.find("Store")>0:
44 | 		uri += `random.randint(1000,1500)`
45 | 	ip = random.choice(ips)
46 | 	useragent = random.choice(useragents)
47 | 	referer = random.choice(referers)
48 | 	code = random.choice(codes)
49 | 	request= random.choice(requests)
50 | 	f.write('%s - - [%s] "%s %s HTTP/1.0" %s %s "%s" "%s" \n' % (random.choice(ips),event_time.strftime('%d/%b/%Y:%H:%M:%S %z'),request,uri,code,random.randint(2000,5000),referer,useragent))


--------------------------------------------------------------------------------
/chapter6/spark-enrich-and-ml/src/main/scala/org/apache/spark/examples/StreamingExamples.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apache.spark.examples
19 | 
20 | import org.apache.spark.Logging
21 | import org.apache.log4j.{Level, Logger}
22 | 
23 | /** Utility functions for Spark Streaming examples. */
24 | object StreamingExamples extends Logging {
25 | 
26 |   /** Set reasonable logging levels for streaming if the user has not configured log4j. */
27 |   def setStreamingLogLevels() {
28 |     val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
29 |     if (!log4jInitialized) {
30 |       // We first log something to initialize Spark's default logging, then we override the
31 |       // logging level.
32 |       logInfo("Setting log level to [WARN] for streaming example." +
33 |         " To override add a custom log4j.properties to the classpath.")
34 |       Logger.getRootLogger.setLevel(Level.WARN)
35 |     }
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/chapter5/spark-scala-streamer/src/main/scala/org/apress/examples/chapter4/StreamingExamples.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
 3 |  * contributor license agreements.  See the NOTICE file distributed with
 4 |  * this work for additional information regarding copyright ownership.
 5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
 6 |  * (the "License"); you may not use this file except in compliance with
 7 |  * the License.  You may obtain a copy of the License at
 8 |  *
 9 |  *    http://www.apache.org/licenses/LICENSE-2.0
10 |  *
11 |  * Unless required by applicable law or agreed to in writing, software
12 |  * distributed under the License is distributed on an "AS IS" BASIS,
13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 |  * See the License for the specific language governing permissions and
15 |  * limitations under the License.
16 |  */
17 | 
18 | package org.apress.examples.chapter4
19 | 
20 | import org.apache.spark.Logging
21 | import org.apache.log4j.{Level, Logger}
22 | 
23 | /** Utility functions for Spark Streaming examples. */
24 | object StreamingExamples extends Logging {
25 | 
26 |   /** Set reasonable logging levels for streaming if the user has not configured log4j. */
27 |   def setStreamingLogLevels() {
28 |     val log4jInitialized = Logger.getRootLogger.getAllAppenders.hasMoreElements
29 |     if (!log4jInitialized) {
30 |       // We first log something to initialize Spark's default logging, then we override the
31 |       // logging level.
32 |       logInfo("Setting log level to [WARN] for streaming example." +
33 |         " To override add a custom log4j.properties to the classpath.")
34 |       Logger.getRootLogger.setLevel(Level.WARN)
35 |     }
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/chapter7/docker/security/lumberjack.key:
--------------------------------------------------------------------------------
 1 | -----BEGIN RSA PRIVATE KEY-----
 2 | MIIEogIBAAKCAQEAnIDqokdntcXHt1ez4bsg2gpqYENF9cpd7Y/cJa393l++/nul
 3 | 24HQQrrrQ3+gYSw1u+dAb9nh8ThDeUABBItxSAHpPB2ozxkHa3SmVf02XWSl5vGW
 4 | VxsnYliBqCEqxtLXUzB8Az1FIZy0HLlCTSDCb5U2JHInIkqqxWTnHRQ4s9vXuai0
 5 | ysxT3BPQh4d9ocpgdn80eQO+XCrlyxDaUsSRwY+MdO2G0vEqgiAgru6bndzlCsyP
 6 | a19yMrruqwkErv16Yiws6Qmc4jxYDspR7xOfcrQl2N6SgqHtPL0Qv2rAmZhqcyl5
 7 | V2lbEd7TWGqbLntYjtht43EyLB7TVoSi2SVFIwIDAQABAoIBAEgL6PgP2ySJCinT
 8 | DBJMrRDzXok2Lwy5vofQhE4sTQSEG07X6UCmABPYLNoD9PcG/S2lhbp4GVWMyzMU
 9 | TeOjteCqUI7GMKI2KX5EUxae3NodrWolqUosS/MwP3AA5Mpm4Y2AcnajTCHvG9Nl
10 | Tapn/FqV3Djm/DEDIp6AlwyB9KCHSdlietrdqncvIFY7kBv9E+O2wvD+8L5iw/mr
11 | 1IiirsJyb5Q6YeJ2E/x3a1eAL5RN3kLzUQdxbuVqJW1ebQIRVuZV0Y7+va8qrDsm
12 | ZcNtuEXjj25h2c3RHB9pbS9EYum6QrKJBgQNaf3Ol3/HnfB4DHgYH6stgS8L7P6i
13 | ae/Dg8ECgYEAz+jISccOKi7FnYemWyQSIlsuRH1Ew8dFEY3vtLSEqGtmGbW2b6M5
14 | YL+LcoiTaRTfGG7gvyPYVfu1CxIUEw8ZjzWUUL0rD6Rj5HYqPPBQB5M24E68Bm6y
15 | qE/GF+FAyNk4goRfy0Bt8bugXe35YuE3CwUTFvsZ7cbbf84DcKuS8TECgYEAwLQt
16 | Pc+N2nxDoXLwtfOai5ZYxbHoirNqNn6JEpLEbBoGeMhlhVoK7D1RBUBo84lV42kU
17 | w3op8HUxrsoWcL8UQmfDY+lXqhebomb/jOBSxqFLOz0uCN1iOlk9h8mZbbIKpICR
18 | GICt33VLiCXn6ftzvpPUTznSwijFTZJi8yGPppMCgYB4B+B6F52p5M7aH5J/U9fU
19 | VefkSp4EmWras6MMJJvhz/9mIG5vDUD3Bh6vxQG1zQLvzDOcpkkjeuoOtiU4y6Lp
20 | 9vjllBDkOvgg0ceY3vSVKvyni00qOYaTfVrUNot8aUwireHQKiZPRQ9UqysrVvK8
21 | PfXDcryJdiELLBj4V1XCQQKBgDOCde1lw3c5bCKFxM3+FbtmLsh5a71Xg/aZEC7E
22 | yHhU5JH4jxp8HRtUsImE1Aj1Ft44wnIV/4vk56jfH+x3RwURE2trfiFbOiDJA1o2
23 | xCmQB7oH+nwoIQ+TtxzKyJZH1wxtyVsRWmi2w7/a0gj7S88PWNrsi+eWWlcH4Cc5
24 | O1q9AoGANw3EWZ/z+48w5ZqEFcd0ThkY7uujknufRGM4v41tx5c2EWNBzHjXKtX5
25 | yXu11/uXmNCTQtzQHHUbJjYmhGOMrv8ohOeNOyzSpEoVtHeSllzNldLS7uVb3Fni
26 | Ae/BDC0ARoFjHcxAbusEF5vqhSSro22nMQIoJ8h7TILT9Cj0j4g=
27 | -----END RSA PRIVATE KEY-----
28 | 


--------------------------------------------------------------------------------
/chapter7/docker/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | zookeeper1:
 2 |   image: bahaaldine/docker-zookeeper
 3 |   volumes:
 4 |     - "kafka/server1:/etc/kafka"
 5 |     - "logs/logstash-zookeeper1:/opt/kafka/logs/"
 6 |   ports:
 7 |     - "2181:2181"
 8 |   extra_hosts:
 9 |     - "brokers:192.168.59.103"
10 | 
11 | kafka1:
12 |   image: bahaaldine/docker-kafka
13 |   volumes:
14 |     - "kafka/server1:/etc/kafka"
15 |     - "logs/logstash-kafka1:/opt/kafka/logs/"
16 |   extra_hosts:
17 |     - "brokers:192.168.59.103"
18 |   ports:
19 |     - "9092:9092"
20 | 
21 | kafka2:
22 |   image: bahaaldine/docker-kafka
23 |   volumes:
24 |     - "kafka/server2:/etc/kafka"
25 |     - "logs/logstash-kafka2:/opt/kafka/logs/"
26 |   extra_hosts:
27 |     - "brokers:192.168.59.103"
28 |   links:
29 |     - "kafka1"
30 |   ports:
31 |     - "9093:9093"
32 | 
33 | logstashProcessor1:
34 |   image: bahaaldine/docker-logstash-agent
35 |   volumes:
36 |     - "logstash/processor:/etc/logstash"
37 |     - "security:/etc/logstash/security"
38 |     - "logs/logstash-processor1:/var/log/logstash"
39 |   links:
40 |     - kafka1
41 |   ports:
42 |     - "5043:5043"
43 | 
44 | elasticsearch1:
45 |   image: bahaaldine/docker-elasticsearch
46 |   ports:
47 |     - "9200:9200"
48 |   volumes:
49 |     - "logs/elasticsearch1:/var/log/elasticsearch"
50 |     - "templates:/etc/elasticsearch/templates"
51 |   extra_hosts:
52 |     - "elasticsearch:192.168.59.103"
53 | 
54 | logstashIndexer1:
55 |   image: bahaaldine/docker-logstash-agent
56 |   volumes:
57 |     - "logstash/indexer:/etc/logstash"
58 |     - "logs/logstash-indexer1:/var/log/logstash"
59 |   links:
60 |     - elasticsearch1
61 |   extra_hosts:
62 |     - "brokers:192.168.59.103"
63 | 
64 | logstashForwarder:
65 |   image: bahaaldine/docker-logstash-forwarder
66 |   volumes:
67 |     - "logstash/forwarder:/etc/logstash-forwarder"
68 |     - "security:/etc/logstash-forwarder/security"
69 |     - "logs/logstash-forwarder1:/tmp/logs/"
70 |     - "source:/tmp/source"
71 |   extra_hosts:
72 |     - "processors:192.168.59.103"
73 | kibana1:
74 |   image: bahaaldine/docker-kibana
75 |   ports:
76 |     - "5601:5601"
77 |   volumes:
78 |     - "logs/kibana:/var/log/kibana"
79 |   extra_hosts:
80 |     - "elasticsearch:192.168.59.103"


--------------------------------------------------------------------------------
/chapter5/spark-scala-streamer/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | 
  3 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 | 	<modelVersion>4.0.0</modelVersion>
  6 | 	<groupId>org.apress</groupId>
  7 | 	<artifactId>spark-scala-streamer</artifactId>
  8 | 	<packaging>jar</packaging>
  9 | 	<name>Apress Streamer</name>
 10 | 	<url>http://spark.apache.org/</url>
 11 | 	<version>1.0.0</version>
 12 | 
 13 | 	<properties>
 14 | 		<spark.version>1.4.0</spark.version>
 15 | 		<scala.binary.version>2.10</scala.binary.version>
 16 | 		<scala.version>2.10.0</scala.version>
 17 | 		<avro.version>1.7.7</avro.version>
 18 | 
 19 | 		<hadoop.deps.scope>compile</hadoop.deps.scope>
 20 | 	</properties>
 21 | 
 22 | 	<dependencies>
 23 | 		<dependency>
 24 | 			<groupId>org.apache.spark</groupId>
 25 | 			<artifactId>spark-core_${scala.binary.version}</artifactId>
 26 | 			<version>${spark.version}</version>
 27 | 			<scope>provided</scope>
 28 | 		</dependency>
 29 | 		<dependency>
 30 | 			<groupId>org.apache.spark</groupId>
 31 | 			<artifactId>spark-streaming_${scala.binary.version}</artifactId>
 32 | 			<version>${spark.version}</version>
 33 | 			<scope>provided</scope>
 34 | 		</dependency>
 35 | 		<dependency>
 36 | 			<groupId>org.apache.spark</groupId>
 37 | 			<artifactId>spark-mllib_${scala.binary.version}</artifactId>
 38 | 			<version>${spark.version}</version>
 39 | 			<scope>provided</scope>
 40 | 		</dependency>
 41 | 		<dependency>
 42 | 			<groupId>org.apache.spark</groupId>
 43 | 			<artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
 44 | 			<version>${spark.version}</version>
 45 | 		</dependency>
 46 | 		<dependency>
 47 | 			<groupId>com.google.guava</groupId>
 48 | 			<artifactId>guava</artifactId>
 49 | 			<version>14.0.1</version>
 50 | 		</dependency>
 51 | 		<dependency>
 52 | 			<groupId>org.scala-lang</groupId>
 53 | 			<artifactId>scala-library</artifactId>
 54 | 			<version>${scala.version}</version>
 55 | 		</dependency>
 56 | 		<dependency>
 57 | 			<groupId>org.elasticsearch</groupId>
 58 | 			<artifactId>elasticsearch-hadoop</artifactId>
 59 | 			<version>2.1.0</version>
 60 | 		</dependency>
 61 | 		<dependency>
 62 | 			<groupId>org.apache.avro</groupId>
 63 | 			<artifactId>avro</artifactId>
 64 | 			<version>${avro.version}</version>
 65 | 			<scope>${hadoop.deps.scope}</scope>
 66 | 		</dependency>
 67 | 
 68 | 	</dependencies>
 69 | 
 70 | 	<build>
 71 | 		<pluginManagement>
 72 | 			<plugins>
 73 | 				<plugin>
 74 | 					<groupId>net.alchim31.maven</groupId>
 75 | 					<artifactId>scala-maven-plugin</artifactId>
 76 | 					<version>3.1.5</version>
 77 | 				</plugin>
 78 | 				<plugin>
 79 | 					<groupId>org.apache.maven.plugins</groupId>
 80 | 					<artifactId>maven-compiler-plugin</artifactId>
 81 | 					<version>2.0.2</version>
 82 | 				</plugin>
 83 | 			</plugins>
 84 | 		</pluginManagement>
 85 | 
 86 | 		<plugins>
 87 | 
 88 | 			<plugin>
 89 | 				<groupId>net.alchim31.maven</groupId>
 90 | 				<artifactId>scala-maven-plugin</artifactId>
 91 | 			</plugin>
 92 | 
 93 | 			<!-- Plugin to create a single jar that includes all dependencies -->
 94 | 			<plugin>
 95 | 				<artifactId>maven-assembly-plugin</artifactId>
 96 | 				<version>2.4</version>
 97 | 				<configuration>
 98 | 					<descriptorRefs>
 99 | 						<descriptorRef>jar-with-dependencies</descriptorRef>
100 | 					</descriptorRefs>
101 | 				</configuration>
102 | 				<executions>
103 | 					<execution>
104 | 						<id>make-assembly</id>
105 | 						<phase>package</phase>
106 | 						<goals>
107 | 							<goal>single</goal>
108 | 						</goals>
109 | 					</execution>
110 | 				</executions>
111 | 			</plugin>
112 | 
113 | 		</plugins>
114 | 	</build>
115 | </project>
116 | 


--------------------------------------------------------------------------------
/chapter6/spark-enrich-and-ml/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | 
  3 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  4 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  5 | 	<modelVersion>4.0.0</modelVersion>
  6 | 	<groupId>org.apache.spark.examples</groupId>
  7 | 	<artifactId>spark-enrich-and-ml</artifactId>
  8 | 	<packaging>jar</packaging>
  9 | 	<name>Spark enrichment and machine learning</name>
 10 | 	<url>http://spark.apache.org/</url>
 11 | 	<version>1.0.0</version>
 12 | 
 13 | 	<properties>
 14 | 		<spark.version>1.4.0</spark.version>
 15 | 		<scala.binary.version>2.10</scala.binary.version>
 16 | 		<scala.version>2.10.0</scala.version>
 17 | 		<avro.version>1.7.7</avro.version>
 18 | 
 19 | 		<hadoop.deps.scope>compile</hadoop.deps.scope>
 20 | 	</properties>
 21 | 
 22 | 	<dependencies>
 23 | 		<dependency>
 24 | 			<groupId>org.apache.spark</groupId>
 25 | 			<artifactId>spark-core_${scala.binary.version}</artifactId>
 26 | 			<version>${spark.version}</version>
 27 | 			<scope>provided</scope>
 28 | 		</dependency>
 29 | 		<dependency>
 30 | 			<groupId>org.apache.spark</groupId>
 31 | 			<artifactId>spark-streaming_${scala.binary.version}</artifactId>
 32 | 			<version>${spark.version}</version>
 33 | 			<scope>provided</scope>
 34 | 		</dependency>
 35 | 		<dependency>
 36 | 			<groupId>org.apache.spark</groupId>
 37 | 			<artifactId>spark-mllib_${scala.binary.version}</artifactId>
 38 | 			<version>${spark.version}</version>
 39 | 			<scope>provided</scope>
 40 | 		</dependency>
 41 | 		<dependency>
 42 | 			<groupId>org.apache.spark</groupId>
 43 | 			<artifactId>spark-streaming-kafka_${scala.binary.version}</artifactId>
 44 | 			<version>${spark.version}</version>
 45 | 		</dependency>
 46 | 		<dependency>
 47 | 			<groupId>com.google.guava</groupId>
 48 | 			<artifactId>guava</artifactId>
 49 | 			<version>14.0.1</version>
 50 | 		</dependency>
 51 | 		<dependency>
 52 | 			<groupId>org.scala-lang</groupId>
 53 | 			<artifactId>scala-library</artifactId>
 54 | 			<version>${scala.version}</version>
 55 | 		</dependency>
 56 | 		<dependency>
 57 | 			<groupId>org.elasticsearch</groupId>
 58 | 			<artifactId>elasticsearch-hadoop</artifactId>
 59 | 			<version>2.1.0</version>
 60 | 		</dependency>
 61 | 		<dependency>
 62 | 			<groupId>org.apache.avro</groupId>
 63 | 			<artifactId>avro</artifactId>
 64 | 			<version>${avro.version}</version>
 65 | 			<scope>${hadoop.deps.scope}</scope>
 66 | 		</dependency>
 67 | 
 68 | 	</dependencies>
 69 | 
 70 | 	<build>
 71 | 		<pluginManagement>
 72 | 			<plugins>
 73 | 				<plugin>
 74 | 					<groupId>net.alchim31.maven</groupId>
 75 | 					<artifactId>scala-maven-plugin</artifactId>
 76 | 					<version>3.1.5</version>
 77 | 				</plugin>
 78 | 				<plugin>
 79 | 					<groupId>org.apache.maven.plugins</groupId>
 80 | 					<artifactId>maven-compiler-plugin</artifactId>
 81 | 					<version>2.0.2</version>
 82 | 				</plugin>
 83 | 			</plugins>
 84 | 		</pluginManagement>
 85 | 
 86 | 		<plugins>
 87 | 
 88 | 			<plugin>
 89 | 				<groupId>net.alchim31.maven</groupId>
 90 | 				<artifactId>scala-maven-plugin</artifactId>
 91 | 			</plugin>
 92 | 
 93 | 			<!-- Plugin to create a single jar that includes all dependencies -->
 94 | 			<plugin>
 95 | 				<artifactId>maven-assembly-plugin</artifactId>
 96 | 				<version>2.4</version>
 97 | 				<configuration>
 98 | 					<descriptorRefs>
 99 | 						<descriptorRef>jar-with-dependencies</descriptorRef>
100 | 					</descriptorRefs>
101 | 				</configuration>
102 | 				<executions>
103 | 					<execution>
104 | 						<id>make-assembly</id>
105 | 						<phase>package</phase>
106 | 						<goals>
107 | 							<goal>single</goal>
108 | 						</goals>
109 | 					</execution>
110 | 				</executions>
111 | 			</plugin>
112 | 
113 | 		</plugins>
114 | 	</build>
115 | </project>
116 | 


--------------------------------------------------------------------------------
/chapter5/spark-scala-streamer/src/main/scala/org/apress/examples/chapter4/KafkaStreamer.scala:
--------------------------------------------------------------------------------
  1 | package org.apress.examples.chapter4
  2 | 
  3 | import kafka.serializer.StringDecoder
  4 | import org.apache.spark.streaming._
  5 | import org.apache.spark.streaming.kafka._
  6 | import org.apache.spark.SparkConf
  7 | import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
  8 | import org.apache.spark.SparkContext
  9 | import org.apache.spark.sql.SQLContext
 10 | import scala.util.parsing.json.JSON
 11 | import org.elasticsearch.spark._
 12 | import org.elasticsearch.spark.rdd.EsSpark
 13 | import scala.collection.mutable.HashMap
 14 | import breeze.linalg.Axis._0
 15 | import org.apache.spark.rdd.RDD
 16 | import scala.collection.mutable.MutableList
 17 | 
 18 | case class PageStatistic (
 19 |   verbs:List[Map[String, Integer]]
 20 | )
 21 | 
 22 | case class Clickstream (
 23 |    message:String,
 24 |    version:String,
 25 |    file:String,
 26 |    host:String,
 27 |    offset:String,
 28 |    eventType:String,
 29 |    clientip:String,
 30 |    ident:String,
 31 |    auth:String,
 32 |    timestamp:String,
 33 |    verb:String,
 34 |    request:String,
 35 |    httpVersion:String,
 36 |    response:String,
 37 |    bytes:Integer,
 38 |    referrer:String,
 39 |    agent:String
 40 | )
 41 | 
 42 | object KafkaStreamer {
 43 |   def main(args: Array[String]) {
 44 |     if (args.length < 2) {
 45 |       System.err.println(s"""
 46 |         |Usage: DirectKafkaWordCount <brokers> <topics>
 47 |         |  <brokers> is a list of one or more Kafka brokers
 48 |         |  <topics> is a list of one or more kafka topics to consume from
 49 |         |
 50 |         """.stripMargin)
 51 |       System.exit(1)
 52 |     }
 53 | 
 54 |     StreamingExamples.setStreamingLogLevels()
 55 | 
 56 |     // Create context with 2 second batch interval
 57 |     val sparkConf = new SparkConf().setAppName("KafkaStreamerToElasticsearch")
 58 |     sparkConf.set("es.index.auto.create", "true")
 59 |     sparkConf.set("es.nodes", "localhost:9200")
 60 |     //sparkConf.set("es.net.http.auth.user", "bahaaldine")
 61 |     //sparkConf.set("es.net.http.auth.pass", "bazarmi")
 62 |     val ssc = new StreamingContext(sparkConf, Seconds(2))
 63 |     
 64 |     // Create direct kafka stream with brokers and topics
 65 |     val Array(brokers, topics) = args
 66 |     val topicsSet = topics.split(",").toSet
 67 |     val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers)
 68 |     val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
 69 |       ssc, kafkaParams, topicsSet)
 70 |     // Get the lines, split them into words, count the words and print
 71 |     val lines = messages.map(_._2)
 72 |     val parsedEvents = lines.map(JSON.parseFull(_)).map(_.get.asInstanceOf[scala.collection.immutable.Map[String,Any]])
 73 |     val events = parsedEvents.map(data=>Clickstream(
 74 |        data("message").toString
 75 |        ,data("@version").toString
 76 |        ,data("file").toString
 77 |        ,data("host").toString
 78 |        ,data("offset").toString
 79 |        ,data("type").toString
 80 |        ,data("clientip").toString
 81 |        ,data("ident").toString
 82 |        ,data("auth").toString
 83 |        ,data("timestamp").toString
 84 |        ,data("verb").toString
 85 |        ,data("request").toString
 86 |        ,data("httpversion").toString
 87 |        ,data("response").toString
 88 |        ,Integer.parseInt(data("bytes").toString)
 89 |        ,data("referrer").toString
 90 |        ,data("agent").toString
 91 |     ))
 92 |     
 93 |     val counts = events.map(event => event.verb).countByValue()
 94 |     counts.print()
 95 |  
 96 |     counts.foreachRDD{ rdd =>
 97 |       if (rdd.toLocalIterator.nonEmpty) {
 98 |         var array:Array[(String, Long)] = rdd.collect()
 99 |         EsSpark.saveToEs(rdd, "spark/clickstream")
100 |         //EsSpark.saveToEs(ssc.sparkContext.makeRDD(Seq(Map("id" -> 123, array(0)._1 -> array(0)._2))), "spark/clickstream", Map("es.mapping.id" -> "id"))    
101 |       }
102 |     }
103 |     
104 |     
105 |     // Start the computation
106 |     ssc.start()
107 |     ssc.awaitTermination()
108 |   }
109 | }
110 | 


--------------------------------------------------------------------------------
/chapter5/spark-scala-streamer/src/main/scala/clickstream/PageViewGenerator.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.spark.examples.streaming.clickstream
 19 | 
 20 | import java.net.ServerSocket
 21 | import java.io.PrintWriter
 22 | import util.Random
 23 | 
 24 | /** Represents a page view on a website with associated dimension data. */
 25 | class PageView(val url : String, val status : Int, val zipCode : Int, val userID : Int)
 26 |     extends Serializable {
 27 |   override def toString() : String = {
 28 |     "%s\t%s\t%s\t%s\n".format(url, status, zipCode, userID)
 29 |   }
 30 | }
 31 | 
 32 | object PageView extends Serializable {
 33 |   def fromString(in : String) : PageView = {
 34 |     val parts = in.split("\t")
 35 |     new PageView(parts(0), parts(1).toInt, parts(2).toInt, parts(3).toInt)
 36 |   }
 37 | }
 38 | 
 39 | // scalastyle:off
 40 | /** Generates streaming events to simulate page views on a website.
 41 |   *
 42 |   * This should be used in tandem with PageViewStream.scala. Example:
 43 |   *
 44 |   * To run the generator
 45 |   * `$ bin/run-example org.apache.spark.examples.streaming.clickstream.PageViewGenerator 44444 10`
 46 |   * To process the generated stream
 47 |   * `$ bin/run-example \
 48 |   *    org.apache.spark.examples.streaming.clickstream.PageViewStream errorRatePerZipCode localhost 44444`
 49 |   *
 50 |   */
 51 | // scalastyle:on
 52 | object PageViewGenerator {
 53 |   val pages = Map("http://foo.com/"        -> .7,
 54 |                   "http://foo.com/news"    -> 0.2,
 55 |                   "http://foo.com/contact" -> .1)
 56 |   val httpStatus = Map(200 -> .95,
 57 |                        404 -> .05)
 58 |   val userZipCode = Map(94709 -> .5,
 59 |                         94117 -> .5)
 60 |   val userID = Map((1 to 100).map(_ -> .01) : _*)
 61 | 
 62 |   def pickFromDistribution[T](inputMap : Map[T, Double]) : T = {
 63 |     val rand = new Random().nextDouble()
 64 |     var total = 0.0
 65 |     for ((item, prob) <- inputMap) {
 66 |       total = total + prob
 67 |       if (total > rand) {
 68 |         return item
 69 |       }
 70 |     }
 71 |     inputMap.take(1).head._1 // Shouldn't get here if probabilities add up to 1.0
 72 |   }
 73 | 
 74 |   def getNextClickEvent() : String = {
 75 |     val id = pickFromDistribution(userID)
 76 |     val page = pickFromDistribution(pages)
 77 |     val status = pickFromDistribution(httpStatus)
 78 |     val zipCode = pickFromDistribution(userZipCode)
 79 |     new PageView(page, status, zipCode, id).toString()
 80 |   }
 81 | 
 82 |   def main(args : Array[String]) {
 83 |     if (args.length != 2) {
 84 |       System.err.println("Usage: PageViewGenerator <port> <viewsPerSecond>")
 85 |       System.exit(1)
 86 |     }
 87 |     val port = args(0).toInt
 88 |     val viewsPerSecond = args(1).toFloat
 89 |     val sleepDelayMs = (1000.0 / viewsPerSecond).toInt
 90 |     val listener = new ServerSocket(port)
 91 |     println("Listening on port: " + port)
 92 | 
 93 |     while (true) {
 94 |       val socket = listener.accept()
 95 |       new Thread() {
 96 |         override def run(): Unit = {
 97 |           println("Got client connected from: " + socket.getInetAddress)
 98 |           val out = new PrintWriter(socket.getOutputStream(), true)
 99 | 
100 |           while (true) {
101 |             Thread.sleep(sleepDelayMs)
102 |             out.write(getNextClickEvent())
103 |             out.flush()
104 |           }
105 |           socket.close()
106 |         }
107 |       }.start()
108 |     }
109 |   }
110 | }
111 | 


--------------------------------------------------------------------------------
/chapter5/spark-scala-streamer/src/main/scala/clickstream/PageViewStream.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Licensed to the Apache Software Foundation (ASF) under one or more
  3 |  * contributor license agreements.  See the NOTICE file distributed with
  4 |  * this work for additional information regarding copyright ownership.
  5 |  * The ASF licenses this file to You under the Apache License, Version 2.0
  6 |  * (the "License"); you may not use this file except in compliance with
  7 |  * the License.  You may obtain a copy of the License at
  8 |  *
  9 |  *    http://www.apache.org/licenses/LICENSE-2.0
 10 |  *
 11 |  * Unless required by applicable law or agreed to in writing, software
 12 |  * distributed under the License is distributed on an "AS IS" BASIS,
 13 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 14 |  * See the License for the specific language governing permissions and
 15 |  * limitations under the License.
 16 |  */
 17 | 
 18 | package org.apache.spark.examples.streaming.clickstream
 19 | 
 20 | import org.apache.spark.SparkContext._
 21 | import org.apache.spark.streaming.{Seconds, StreamingContext}
 22 | import org.apress.examples.chapter4.StreamingExamples
 23 | // scalastyle:off
 24 | /** Analyses a streaming dataset of web page views. This class demonstrates several types of
 25 |   * operators available in Spark streaming.
 26 |   *
 27 |   * This should be used in tandem with PageViewStream.scala. Example:
 28 |   * To run the generator
 29 |   * `$ bin/run-example org.apache.spark.examples.streaming.clickstream.PageViewGenerator 44444 10`
 30 |   * To process the generated stream
 31 |   * `$ bin/run-example \
 32 |   *    org.apache.spark.examples.streaming.clickstream.PageViewStream errorRatePerZipCode localhost 44444`
 33 |   */
 34 | // scalastyle:on
 35 | object PageViewStream {
 36 |   def main(args: Array[String]) {
 37 |     if (args.length != 3) {
 38 |       System.err.println("Usage: PageViewStream <metric> <host> <port>")
 39 |       System.err.println("<metric> must be one of pageCounts, slidingPageCounts," +
 40 |                          " errorRatePerZipCode, activeUserCount, popularUsersSeen")
 41 |       System.exit(1)
 42 |     }
 43 |     StreamingExamples.setStreamingLogLevels()
 44 |     val metric = args(0)
 45 |     val host = args(1)
 46 |     val port = args(2).toInt
 47 | 
 48 |     // Create the context
 49 |     val ssc = new StreamingContext("local[2]", "PageViewStream", Seconds(1),
 50 |       System.getenv("SPARK_HOME"), StreamingContext.jarOfClass(this.getClass).toSeq)
 51 | 
 52 |     // Create a ReceiverInputDStream on target host:port and convert each line to a PageView
 53 |     val pageViews = ssc.socketTextStream(host, port)
 54 |                        .flatMap(_.split("\n"))
 55 |                        .map(PageView.fromString(_))
 56 | 
 57 |     // Return a count of views per URL seen in each batch
 58 |     val pageCounts = pageViews.map(view => view.url).countByValue()
 59 | 
 60 |     // Return a sliding window of page views per URL in the last ten seconds
 61 |     val slidingPageCounts = pageViews.map(view => view.url)
 62 |                                      .countByValueAndWindow(Seconds(10), Seconds(2))
 63 | 
 64 | 
 65 |     // Return the rate of error pages (a non 200 status) in each zip code over the last 30 seconds
 66 |     val statusesPerZipCode = pageViews.window(Seconds(30), Seconds(2))
 67 |                                       .map(view => ((view.zipCode, view.status)))
 68 |                                       .groupByKey()
 69 |     val errorRatePerZipCode = statusesPerZipCode.map{
 70 |       case(zip, statuses) =>
 71 |         val normalCount = statuses.filter(_ == 200).size
 72 |         val errorCount = statuses.size - normalCount
 73 |         val errorRatio = errorCount.toFloat / statuses.size
 74 |         if (errorRatio > 0.05) {
 75 |           "%s: **%s**".format(zip, errorRatio)
 76 |         } else {
 77 |           "%s: %s".format(zip, errorRatio)
 78 |         }
 79 |     }
 80 | 
 81 |     // Return the number unique users in last 15 seconds
 82 |     val activeUserCount = pageViews.window(Seconds(15), Seconds(2))
 83 |                                    .map(view => (view.userID, 1))
 84 |                                    .groupByKey()
 85 |                                    .count()
 86 |                                    .map("Unique active users: " + _)
 87 | 
 88 |     // An external dataset we want to join to this stream
 89 |     val userList = ssc.sparkContext.parallelize(
 90 |        Map(1 -> "Patrick Wendell", 2->"Reynold Xin", 3->"Matei Zaharia").toSeq)
 91 | 
 92 |     metric match {
 93 |       case "pageCounts" => pageCounts.print()
 94 |       case "slidingPageCounts" => slidingPageCounts.print()
 95 |       case "errorRatePerZipCode" => errorRatePerZipCode.print()
 96 |       case "activeUserCount" => activeUserCount.print()
 97 |       case "popularUsersSeen" =>
 98 |         // Look for users in our existing dataset and print it out if we have a match
 99 |         pageViews.map(view => (view.userID, 1))
100 |           .foreachRDD((rdd, time) => rdd.join(userList)
101 |             .map(_._2._2)
102 |             .take(10)
103 |             .foreach(u => println("Saw user %s at time %s".format(u, time))))
104 |       case _ => println("Invalid metric entered: " + metric)
105 |     }
106 | 
107 |     ssc.start()
108 |   }
109 | }
110 | 


--------------------------------------------------------------------------------
/chapter7/docker/kafka/server1/server.properties:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | # 
  8 | #    http://www.apache.org/licenses/LICENSE-2.0
  9 | # 
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # see kafka.server.KafkaConfig for additional details and defaults
 16 | 
 17 | ############################# Server Basics #############################
 18 | 
 19 | # The id of the broker. This must be set to a unique integer for each broker.
 20 | broker.id=1
 21 | 
 22 | ############################# Socket Server Settings #############################
 23 | 
 24 | # The port the socket server listens on
 25 | port=9092
 26 | 
 27 | # Hostname the broker will bind to. If not set, the server will bind to all interfaces
 28 | #host.name=brokers
 29 | 
 30 | # Hostname the broker will advertise to producers and consumers. If not set, it uses the
 31 | # value for "host.name" if configured.  Otherwise, it will use the value returned from
 32 | # java.net.InetAddress.getCanonicalHostName().
 33 | advertised.host.name=192.168.59.103
 34 | 
 35 | # The port to publish to ZooKeeper for clients to use. If this is not set,
 36 | # it will publish the same port that the broker binds to.
 37 | #advertised.port=<port accessible by clients>
 38 | 
 39 | # The number of threads handling network requests
 40 | num.network.threads=3
 41 |  
 42 | # The number of threads doing disk I/O
 43 | num.io.threads=8
 44 | 
 45 | # The send buffer (SO_SNDBUF) used by the socket server
 46 | socket.send.buffer.bytes=102400
 47 | 
 48 | # The receive buffer (SO_RCVBUF) used by the socket server
 49 | socket.receive.buffer.bytes=102400
 50 | 
 51 | # The maximum size of a request that the socket server will accept (protection against OOM)
 52 | socket.request.max.bytes=104857600
 53 | 
 54 | 
 55 | ############################# Log Basics #############################
 56 | 
 57 | # A comma seperated list of directories under which to store log files
 58 | log.dirs=/tmp/kafka-logs-1
 59 | 
 60 | # The default number of log partitions per topic. More partitions allow greater
 61 | # parallelism for consumption, but this will also result in more files across
 62 | # the brokers.
 63 | num.partitions=1
 64 | 
 65 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
 66 | # This value is recommended to be increased for installations with data dirs located in RAID array.
 67 | num.recovery.threads.per.data.dir=1
 68 | 
 69 | ############################# Log Flush Policy #############################
 70 | 
 71 | # Messages are immediately written to the filesystem but by default we only fsync() to sync
 72 | # the OS cache lazily. The following configurations control the flush of data to disk. 
 73 | # There are a few important trade-offs here:
 74 | #    1. Durability: Unflushed data may be lost if you are not using replication.
 75 | #    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
 76 | #    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 
 77 | # The settings below allow one to configure the flush policy to flush data after a period of time or
 78 | # every N messages (or both). This can be done globally and overridden on a per-topic basis.
 79 | 
 80 | # The number of messages to accept before forcing a flush of data to disk
 81 | #log.flush.interval.messages=10000
 82 | 
 83 | # The maximum amount of time a message can sit in a log before we force a flush
 84 | #log.flush.interval.ms=1000
 85 | 
 86 | ############################# Log Retention Policy #############################
 87 | 
 88 | # The following configurations control the disposal of log segments. The policy can
 89 | # be set to delete segments after a period of time, or after a given size has accumulated.
 90 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
 91 | # from the end of the log.
 92 | 
 93 | # The minimum age of a log file to be eligible for deletion
 94 | log.retention.hours=168
 95 | 
 96 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
 97 | # segments don't drop below log.retention.bytes.
 98 | #log.retention.bytes=1073741824
 99 | 
100 | # The maximum size of a log segment file. When this size is reached a new log segment will be created.
101 | log.segment.bytes=1073741824
102 | 
103 | # The interval at which log segments are checked to see if they can be deleted according 
104 | # to the retention policies
105 | log.retention.check.interval.ms=300000
106 | 
107 | # By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
108 | # If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
109 | log.cleaner.enable=false
110 | 
111 | ############################# Zookeeper #############################
112 | 
113 | # Zookeeper connection string (see zookeeper docs for details).
114 | # This is a comma separated host:port pairs, each corresponding to a zk
115 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
116 | # You can also append an optional chroot string to the urls to specify the
117 | # root directory for all kafka znodes.
118 | zookeeper.connect=brokers:2181
119 | 
120 | # Timeout in ms for connecting to zookeeper
121 | zookeeper.connection.timeout.ms=6000
122 | 


--------------------------------------------------------------------------------
/chapter7/docker/kafka/server2/server.properties:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | # 
  8 | #    http://www.apache.org/licenses/LICENSE-2.0
  9 | # 
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # see kafka.server.KafkaConfig for additional details and defaults
 16 | 
 17 | ############################# Server Basics #############################
 18 | 
 19 | # The id of the broker. This must be set to a unique integer for each broker.
 20 | broker.id=2
 21 | 
 22 | ############################# Socket Server Settings #############################
 23 | 
 24 | # The port the socket server listens on
 25 | port=9093
 26 | 
 27 | # Hostname the broker will bind to. If not set, the server will bind to all interfaces
 28 | #host.name=brokers
 29 | 
 30 | # Hostname the broker will advertise to producers and consumers. If not set, it uses the
 31 | # value for "host.name" if configured.  Otherwise, it will use the value returned from
 32 | # java.net.InetAddress.getCanonicalHostName().
 33 | advertised.host.name=192.168.59.103
 34 | 
 35 | # The port to publish to ZooKeeper for clients to use. If this is not set,
 36 | # it will publish the same port that the broker binds to.
 37 | #advertised.port=<port accessible by clients>
 38 | 
 39 | # The number of threads handling network requests
 40 | num.network.threads=3
 41 |  
 42 | # The number of threads doing disk I/O
 43 | num.io.threads=8
 44 | 
 45 | # The send buffer (SO_SNDBUF) used by the socket server
 46 | socket.send.buffer.bytes=102400
 47 | 
 48 | # The receive buffer (SO_RCVBUF) used by the socket server
 49 | socket.receive.buffer.bytes=102400
 50 | 
 51 | # The maximum size of a request that the socket server will accept (protection against OOM)
 52 | socket.request.max.bytes=104857600
 53 | 
 54 | 
 55 | ############################# Log Basics #############################
 56 | 
 57 | # A comma seperated list of directories under which to store log files
 58 | log.dirs=/tmp/kafka-logs-2
 59 | 
 60 | # The default number of log partitions per topic. More partitions allow greater
 61 | # parallelism for consumption, but this will also result in more files across
 62 | # the brokers.
 63 | num.partitions=1
 64 | 
 65 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
 66 | # This value is recommended to be increased for installations with data dirs located in RAID array.
 67 | num.recovery.threads.per.data.dir=1
 68 | 
 69 | ############################# Log Flush Policy #############################
 70 | 
 71 | # Messages are immediately written to the filesystem but by default we only fsync() to sync
 72 | # the OS cache lazily. The following configurations control the flush of data to disk. 
 73 | # There are a few important trade-offs here:
 74 | #    1. Durability: Unflushed data may be lost if you are not using replication.
 75 | #    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
 76 | #    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 
 77 | # The settings below allow one to configure the flush policy to flush data after a period of time or
 78 | # every N messages (or both). This can be done globally and overridden on a per-topic basis.
 79 | 
 80 | # The number of messages to accept before forcing a flush of data to disk
 81 | #log.flush.interval.messages=10000
 82 | 
 83 | # The maximum amount of time a message can sit in a log before we force a flush
 84 | #log.flush.interval.ms=1000
 85 | 
 86 | ############################# Log Retention Policy #############################
 87 | 
 88 | # The following configurations control the disposal of log segments. The policy can
 89 | # be set to delete segments after a period of time, or after a given size has accumulated.
 90 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
 91 | # from the end of the log.
 92 | 
 93 | # The minimum age of a log file to be eligible for deletion
 94 | log.retention.hours=168
 95 | 
 96 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
 97 | # segments don't drop below log.retention.bytes.
 98 | #log.retention.bytes=1073741824
 99 | 
100 | # The maximum size of a log segment file. When this size is reached a new log segment will be created.
101 | log.segment.bytes=1073741824
102 | 
103 | # The interval at which log segments are checked to see if they can be deleted according 
104 | # to the retention policies
105 | log.retention.check.interval.ms=300000
106 | 
107 | # By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
108 | # If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
109 | log.cleaner.enable=false
110 | 
111 | ############################# Zookeeper #############################
112 | 
113 | # Zookeeper connection string (see zookeeper docs for details).
114 | # This is a comma separated host:port pairs, each corresponding to a zk
115 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
116 | # You can also append an optional chroot string to the urls to specify the
117 | # root directory for all kafka znodes.
118 | zookeeper.connect=brokers:2181
119 | 
120 | # Timeout in ms for connecting to zookeeper
121 | zookeeper.connection.timeout.ms=6000
122 | 


--------------------------------------------------------------------------------
/chapter4/kafka/server-1.properties:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | # 
  8 | #    http://www.apache.org/licenses/LICENSE-2.0
  9 | # 
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # see kafka.server.KafkaConfig for additional details and defaults
 16 | 
 17 | ############################# Server Basics #############################
 18 | 
 19 | # The id of the broker. This must be set to a unique integer for each broker.
 20 | broker.id=1
 21 | 
 22 | ############################# Socket Server Settings #############################
 23 | 
 24 | # The port the socket server listens on
 25 | port=9092
 26 | 
 27 | # Hostname the broker will bind to. If not set, the server will bind to all interfaces
 28 | #host.name=localhost
 29 | 
 30 | # Hostname the broker will advertise to producers and consumers. If not set, it uses the
 31 | # value for "host.name" if configured.  Otherwise, it will use the value returned from
 32 | # java.net.InetAddress.getCanonicalHostName().
 33 | #advertised.host.name=<hostname routable by clients>
 34 | 
 35 | # The port to publish to ZooKeeper for clients to use. If this is not set,
 36 | # it will publish the same port that the broker binds to.
 37 | #advertised.port=<port accessible by clients>
 38 | 
 39 | # The number of threads handling network requests
 40 | num.network.threads=3
 41 |  
 42 | # The number of threads doing disk I/O
 43 | num.io.threads=8
 44 | 
 45 | # The send buffer (SO_SNDBUF) used by the socket server
 46 | socket.send.buffer.bytes=102400
 47 | 
 48 | # The receive buffer (SO_RCVBUF) used by the socket server
 49 | socket.receive.buffer.bytes=102400
 50 | 
 51 | # The maximum size of a request that the socket server will accept (protection against OOM)
 52 | socket.request.max.bytes=104857600
 53 | 
 54 | 
 55 | ############################# Log Basics #############################
 56 | 
 57 | # A comma seperated list of directories under which to store log files
 58 | log.dirs=/tmp/kafka-logs-1
 59 | 
 60 | # The default number of log partitions per topic. More partitions allow greater
 61 | # parallelism for consumption, but this will also result in more files across
 62 | # the brokers.
 63 | num.partitions=1
 64 | 
 65 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
 66 | # This value is recommended to be increased for installations with data dirs located in RAID array.
 67 | num.recovery.threads.per.data.dir=1
 68 | 
 69 | ############################# Log Flush Policy #############################
 70 | 
 71 | # Messages are immediately written to the filesystem but by default we only fsync() to sync
 72 | # the OS cache lazily. The following configurations control the flush of data to disk. 
 73 | # There are a few important trade-offs here:
 74 | #    1. Durability: Unflushed data may be lost if you are not using replication.
 75 | #    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
 76 | #    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 
 77 | # The settings below allow one to configure the flush policy to flush data after a period of time or
 78 | # every N messages (or both). This can be done globally and overridden on a per-topic basis.
 79 | 
 80 | # The number of messages to accept before forcing a flush of data to disk
 81 | #log.flush.interval.messages=10000
 82 | 
 83 | # The maximum amount of time a message can sit in a log before we force a flush
 84 | #log.flush.interval.ms=1000
 85 | 
 86 | ############################# Log Retention Policy #############################
 87 | 
 88 | # The following configurations control the disposal of log segments. The policy can
 89 | # be set to delete segments after a period of time, or after a given size has accumulated.
 90 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
 91 | # from the end of the log.
 92 | 
 93 | # The minimum age of a log file to be eligible for deletion
 94 | log.retention.hours=168
 95 | 
 96 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
 97 | # segments don't drop below log.retention.bytes.
 98 | #log.retention.bytes=1073741824
 99 | 
100 | # The maximum size of a log segment file. When this size is reached a new log segment will be created.
101 | log.segment.bytes=1073741824
102 | 
103 | # The interval at which log segments are checked to see if they can be deleted according 
104 | # to the retention policies
105 | log.retention.check.interval.ms=300000
106 | 
107 | # By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
108 | # If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
109 | log.cleaner.enable=false
110 | 
111 | ############################# Zookeeper #############################
112 | 
113 | # Zookeeper connection string (see zookeeper docs for details).
114 | # This is a comma separated host:port pairs, each corresponding to a zk
115 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
116 | # You can also append an optional chroot string to the urls to specify the
117 | # root directory for all kafka znodes.
118 | zookeeper.connect=localhost:2181
119 | 
120 | # Timeout in ms for connecting to zookeeper
121 | zookeeper.connection.timeout.ms=6000
122 | 


--------------------------------------------------------------------------------
/chapter4/kafka/server-2.properties:
--------------------------------------------------------------------------------
  1 | # Licensed to the Apache Software Foundation (ASF) under one or more
  2 | # contributor license agreements.  See the NOTICE file distributed with
  3 | # this work for additional information regarding copyright ownership.
  4 | # The ASF licenses this file to You under the Apache License, Version 2.0
  5 | # (the "License"); you may not use this file except in compliance with
  6 | # the License.  You may obtain a copy of the License at
  7 | # 
  8 | #    http://www.apache.org/licenses/LICENSE-2.0
  9 | # 
 10 | # Unless required by applicable law or agreed to in writing, software
 11 | # distributed under the License is distributed on an "AS IS" BASIS,
 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 13 | # See the License for the specific language governing permissions and
 14 | # limitations under the License.
 15 | # see kafka.server.KafkaConfig for additional details and defaults
 16 | 
 17 | ############################# Server Basics #############################
 18 | 
 19 | # The id of the broker. This must be set to a unique integer for each broker.
 20 | broker.id=2
 21 | 
 22 | ############################# Socket Server Settings #############################
 23 | 
 24 | # The port the socket server listens on
 25 | port=9093
 26 | 
 27 | # Hostname the broker will bind to. If not set, the server will bind to all interfaces
 28 | #host.name=localhost
 29 | 
 30 | # Hostname the broker will advertise to producers and consumers. If not set, it uses the
 31 | # value for "host.name" if configured.  Otherwise, it will use the value returned from
 32 | # java.net.InetAddress.getCanonicalHostName().
 33 | #advertised.host.name=<hostname routable by clients>
 34 | 
 35 | # The port to publish to ZooKeeper for clients to use. If this is not set,
 36 | # it will publish the same port that the broker binds to.
 37 | #advertised.port=<port accessible by clients>
 38 | 
 39 | # The number of threads handling network requests
 40 | num.network.threads=3
 41 |  
 42 | # The number of threads doing disk I/O
 43 | num.io.threads=8
 44 | 
 45 | # The send buffer (SO_SNDBUF) used by the socket server
 46 | socket.send.buffer.bytes=102400
 47 | 
 48 | # The receive buffer (SO_RCVBUF) used by the socket server
 49 | socket.receive.buffer.bytes=102400
 50 | 
 51 | # The maximum size of a request that the socket server will accept (protection against OOM)
 52 | socket.request.max.bytes=104857600
 53 | 
 54 | 
 55 | ############################# Log Basics #############################
 56 | 
 57 | # A comma seperated list of directories under which to store log files
 58 | log.dirs=/tmp/kafka-logs-2
 59 | 
 60 | # The default number of log partitions per topic. More partitions allow greater
 61 | # parallelism for consumption, but this will also result in more files across
 62 | # the brokers.
 63 | num.partitions=1
 64 | 
 65 | # The number of threads per data directory to be used for log recovery at startup and flushing at shutdown.
 66 | # This value is recommended to be increased for installations with data dirs located in RAID array.
 67 | num.recovery.threads.per.data.dir=1
 68 | 
 69 | ############################# Log Flush Policy #############################
 70 | 
 71 | # Messages are immediately written to the filesystem but by default we only fsync() to sync
 72 | # the OS cache lazily. The following configurations control the flush of data to disk. 
 73 | # There are a few important trade-offs here:
 74 | #    1. Durability: Unflushed data may be lost if you are not using replication.
 75 | #    2. Latency: Very large flush intervals may lead to latency spikes when the flush does occur as there will be a lot of data to flush.
 76 | #    3. Throughput: The flush is generally the most expensive operation, and a small flush interval may lead to exceessive seeks. 
 77 | # The settings below allow one to configure the flush policy to flush data after a period of time or
 78 | # every N messages (or both). This can be done globally and overridden on a per-topic basis.
 79 | 
 80 | # The number of messages to accept before forcing a flush of data to disk
 81 | #log.flush.interval.messages=10000
 82 | 
 83 | # The maximum amount of time a message can sit in a log before we force a flush
 84 | #log.flush.interval.ms=1000
 85 | 
 86 | ############################# Log Retention Policy #############################
 87 | 
 88 | # The following configurations control the disposal of log segments. The policy can
 89 | # be set to delete segments after a period of time, or after a given size has accumulated.
 90 | # A segment will be deleted whenever *either* of these criteria are met. Deletion always happens
 91 | # from the end of the log.
 92 | 
 93 | # The minimum age of a log file to be eligible for deletion
 94 | log.retention.hours=168
 95 | 
 96 | # A size-based retention policy for logs. Segments are pruned from the log as long as the remaining
 97 | # segments don't drop below log.retention.bytes.
 98 | #log.retention.bytes=1073741824
 99 | 
100 | # The maximum size of a log segment file. When this size is reached a new log segment will be created.
101 | log.segment.bytes=1073741824
102 | 
103 | # The interval at which log segments are checked to see if they can be deleted according 
104 | # to the retention policies
105 | log.retention.check.interval.ms=300000
106 | 
107 | # By default the log cleaner is disabled and the log retention policy will default to just delete segments after their retention expires.
108 | # If log.cleaner.enable=true is set the cleaner will be enabled and individual logs can then be marked for log compaction.
109 | log.cleaner.enable=false
110 | 
111 | ############################# Zookeeper #############################
112 | 
113 | # Zookeeper connection string (see zookeeper docs for details).
114 | # This is a comma separated host:port pairs, each corresponding to a zk
115 | # server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002".
116 | # You can also append an optional chroot string to the urls to specify the
117 | # root directory for all kafka znodes.
118 | zookeeper.connect=localhost:2181
119 | 
120 | # Timeout in ms for connecting to zookeeper
121 | zookeeper.connection.timeout.ms=6000
122 | 


--------------------------------------------------------------------------------
/chapter6/spark-enrich-and-ml/src/main/scala/org/apache/spark/examples/SparkEnricher.scala:
--------------------------------------------------------------------------------
  1 | package org.apache.spark.examples
  2 | 
  3 | import kafka.serializer.StringDecoder
  4 | import org.apache.spark.streaming._
  5 | import org.apache.spark.streaming.kafka._
  6 | import org.apache.spark.SparkConf
  7 | import org.apache.spark.streaming.dstream.DStream.toPairDStreamFunctions
  8 | import org.apache.spark.SparkContext
  9 | import org.apache.spark.sql.SQLContext
 10 | import scala.util.parsing.json.JSON
 11 | import org.elasticsearch.spark._
 12 | import org.elasticsearch.spark.rdd.EsSpark
 13 | import scala.collection.mutable.HashMap
 14 | import breeze.linalg.Axis._0
 15 | import org.apache.spark.rdd.RDD
 16 | import scala.collection.mutable.MutableList
 17 | import org.apache.spark.mllib.linalg.Vectors
 18 | import org.apache.spark.mllib.clustering.KMeans
 19 | import org.apache.spark.mllib.clustering.StreamingKMeans
 20 | import org.apache.spark.mllib.regression.LabeledPoint
 21 | 
 22 | case class Clickstream (
 23 |    message:String,
 24 |    version:String,
 25 |    file:String,
 26 |    host:String,
 27 |    offset:String,
 28 |    eventType:String,
 29 |    clientip:String,
 30 |    ident:String,
 31 |    auth:String,
 32 |    timestamp:String,
 33 |    verb:String,
 34 |    request:String,
 35 |    httpVersion:String,
 36 |    response:String,
 37 |    bytes:Integer,
 38 |    referrer:String,
 39 |    agent:String
 40 | )
 41 | 
 42 | case class Customer (
 43 |    session:String,
 44 |    request:String,
 45 |    category:String
 46 | )
 47 | 
 48 | 
 49 | object SparkEnricher {
 50 |   def main(args: Array[String]) {
 51 |     if (args.length < 2) {
 52 |       System.err.println(s"""
 53 |         |Usage: DirectKafkaWordCount <brokers> <topics>
 54 |         |  <brokers> is a list of one or more Kafka brokers
 55 |         |  <topics> is a list of one or more kafka topics to consume from
 56 |         |
 57 |         """.stripMargin)
 58 |       System.exit(1)
 59 |     }
 60 |     
 61 |     StreamingExamples.setStreamingLogLevels()
 62 | 
 63 |     // Create context with 2 second batch interval
 64 |     val sparkConf = new SparkConf().setAppName("KafkaStreamerToElasticsearch")
 65 |     sparkConf.set("es.index.auto.create", "true")
 66 |     sparkConf.set("es.nodes", "localhost:9200")
 67 |     //sparkConf.set("es.net.http.auth.user", "bahaaldine")
 68 |     //sparkConf.set("es.net.http.auth.pass", "bazarmi")
 69 |     val ssc = new StreamingContext(sparkConf, Seconds(2))
 70 | 
 71 |     val productCategoryMappingFile = ssc.sparkContext.textFile("/Users/bahaaldine/Google Drive/demo/v2/clickstream/generator/mappings.csv")
 72 |     val productCategoryMapping = productCategoryMappingFile.map(line => line.split(",")).map(x => (x(0),x(1))).collectAsMap()
 73 |     val categoryLabelMapping:scala.collection.Map[String,Double] = productCategoryMappingFile.map(line => line.split(",")).map(x => (x(1),x(2).toDouble)).collectAsMap()
 74 |     val brodcastProductCategoryMapping = ssc.sparkContext.broadcast(productCategoryMapping)
 75 |     val brodcastCategoryLabelMapping = ssc.sparkContext.broadcast(categoryLabelMapping)
 76 |     
 77 |     val customerMappingFile = ssc.sparkContext.textFile("/Users/bahaaldine/Google Drive/demo/v2/clickstream/generator/ip_mappings.csv")
 78 |     val ipLabelMapping:scala.collection.Map[String,Double] = customerMappingFile.map(line => line.split(",")).map(x => (x(0),x(1).toDouble)).collectAsMap()
 79 |     val brodcastIpLabelMapping = ssc.sparkContext.broadcast(ipLabelMapping)
 80 |     
 81 |     // Create direct kafka stream with brokers and topics
 82 |     val Array(brokers, topics) = args
 83 |     val topicsSet = topics.split(",").toSet
 84 |     val kafkaParams = Map[String, String]("metadata.broker.list" -> brokers)
 85 |     val messages = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](
 86 |       ssc, kafkaParams, topicsSet)
 87 |     // Get the lines, split them into words, count the words and print
 88 |     val lines = messages.map(_._2)
 89 |     val parsedEvents = lines.map(JSON.parseFull(_)).map(_.get.asInstanceOf[scala.collection.immutable.Map[String,Any]])
 90 |     val events = parsedEvents.map(data=>Clickstream(
 91 |        data("message").toString
 92 |        ,data("@version").toString
 93 |        ,data("file").toString
 94 |        ,data("host").toString
 95 |        ,data("offset").toString
 96 |        ,data("type").toString
 97 |        ,data("clientip").toString
 98 |        ,data("ident").toString
 99 |        ,data("auth").toString
100 |        ,data("timestamp").toString
101 |        ,data("verb").toString
102 |        ,data("request").toString
103 |        ,data("httpversion").toString
104 |        ,data("response").toString
105 |        ,Integer.parseInt(data("bytes").toString)
106 |        ,data("referrer").toString
107 |        ,data("agent").toString
108 |     ))
109 |     
110 |     // Creating and enriching the customer object
111 |     val customers = events.map { clickstream => 
112 |        val lookupMap = brodcastProductCategoryMapping.value
113 |        Customer(clickstream.clientip, clickstream.request, lookupMap.getOrElse(clickstream.request, "category not found"))
114 |     }
115 | 
116 |     customers.foreachRDD{ rdd =>
117 |       if (rdd.toLocalIterator.nonEmpty) {
118 |         EsSpark.saveToEs(rdd, "spark/customer")    
119 |       }
120 |     }
121 |     
122 |     val trainingData = customers.map { customer =>
123 |       val categoryLookupMap = brodcastCategoryLabelMapping.value
124 |       val customerLookupMap = brodcastIpLabelMapping.value
125 |       
126 |       val categoryLabel = categoryLookupMap.getOrElse(customer.category, 1).asInstanceOf[Double]
127 |       val customerLabel = customerLookupMap.getOrElse(customer.session, 1).asInstanceOf[Double]
128 |       
129 |       Vectors.dense(Array(categoryLabel, customerLabel))
130 |     }
131 |     
132 |     val testData = customers.map { customer =>
133 |       val categoryLookupMap = brodcastCategoryLabelMapping.value
134 |       val customerLookupMap = brodcastIpLabelMapping.value
135 |       
136 |       val categoryLabel = categoryLookupMap.getOrElse(customer.category, 1).asInstanceOf[Double]
137 |       val customerLabel = customerLookupMap.getOrElse(customer.session, 1).asInstanceOf[Double]
138 |       
139 |       LabeledPoint(categoryLabel, Vectors.dense(Array(categoryLabel, customerLabel)))
140 |     }
141 |     
142 |     val model = new StreamingKMeans()
143 |       .setK(3)
144 |       .setDecayFactor(1.0)
145 |       .setRandomCenters(2, 0.0)
146 | 
147 |     model.trainOn(trainingData)
148 |     model.predictOnValues(testData.map(lp => (lp.label, lp.features))).foreachRDD{ rdd =>
149 |       if (rdd.toLocalIterator.nonEmpty) {
150 |         EsSpark.saveToEs(rdd, "spark/prediction")    
151 |       }
152 |     }
153 | 
154 |         
155 |     // Start the computation
156 |     ssc.start()
157 |     ssc.awaitTermination()
158 |   }
159 | }
160 | 


--------------------------------------------------------------------------------
/chapter7/docker/logstash/.logstash-forwarder:
--------------------------------------------------------------------------------
1 | {"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150711-220259.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150711-220259.log","offset":96859,"inode":7479218,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-000447.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-000447.log","offset":972325,"inode":7504456,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-000741.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-000741.log","offset":975794,"inode":7505868,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-001824.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-001824.log","offset":647,"inode":7508694,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-001915.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-001915.log","offset":174,"inode":7508875,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-093649.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-093649.log","offset":222,"inode":7535349,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-140436.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-140436.log","offset":166,"inode":7572409,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-142450.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-142450.log","offset":971867,"inode":7578311,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-225306.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150712-225306.log","offset":971300,"inode":7728720,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-134126.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-134126.log","offset":242,"inode":7918600,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-134150.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-134150.log","offset":241,"inode":7918743,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-134238.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-134238.log","offset":974,"inode":7918888,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-134434.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-134434.log","offset":179,"inode":7919307,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-135702.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-135702.log","offset":234,"inode":7922130,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-135855.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-135855.log","offset":169,"inode":7922725,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-140105.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-140105.log","offset":183,"inode":7923254,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-140440.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-140440.log","offset":243,"inode":7925752,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-163017.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-163017.log","offset":146,"inode":7965083,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-170127.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-170127.log","offset":164,"inode":7975371,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-170639.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-170639.log","offset":169,"inode":7977865,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-170655.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-170655.log","offset":1848,"inode":7977973,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-173241.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-173241.log","offset":236,"inode":7988866,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-173404.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-173404.log","offset":252,"inode":7989373,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-173422.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-173422.log","offset":1720,"inode":7989470,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-173621.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-173621.log","offset":1925,"inode":7990107,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-175004.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-175004.log","offset":1740,"inode":7993748,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-181903.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-181903.log","offset":2029,"inode":8005282,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-182035.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-182035.log","offset":2090,"inode":8005809,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-182711.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150713-182711.log","offset":1976,"inode":8009448,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150714-000234.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150714-000234.log","offset":2032,"inode":8048571,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150714-000309.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150714-000309.log","offset":2059,"inode":8048811,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-133903.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-133903.log","offset":1903,"inode":8288492,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-134905.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-134905.log","offset":2042,"inode":8292378,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-143201.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-143201.log","offset":2038,"inode":8343433,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-145645.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-145645.log","offset":1781,"inode":8420823,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-153450.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-153450.log","offset":2171,"inode":8459287,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-154641.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-154641.log","offset":1960,"inode":8462798,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-154730.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-154730.log","offset":2044,"inode":8463027,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-163018.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-163018.log","offset":1889,"inode":8491010,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-163455.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-163455.log","offset":1749,"inode":8492463,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-164003.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-164003.log","offset":1912,"inode":8495558,"device":16777217},"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-164023.log":{"source":"/Users/bahaaldine/Dropbox/apress/demo/chapter4/source/access_log_20150715-164023.log","offset":193955,"inode":8495798,"device":16777217}}
2 | 


--------------------------------------------------------------------------------
/chapter5/spark-scala-streamer/spark-scala-streamer.iml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <module org.jetbrains.idea.maven.project.MavenProjectsManager.isMavenModule="true" type="JAVA_MODULE" version="4">
  3 |   <component name="NewModuleRootManager" LANGUAGE_LEVEL="JDK_1_5" inherit-compiler-output="false">
  4 |     <output url="file://$MODULE_DIR$/target/classes" />
  5 |     <output-test url="file://$MODULE_DIR$/target/test-classes" />
  6 |     <content url="file://$MODULE_DIR$">
  7 |       <sourceFolder url="file://$MODULE_DIR$/src/main/scala" isTestSource="false" />
  8 |       <excludeFolder url="file://$MODULE_DIR$/target" />
  9 |     </content>
 10 |     <orderEntry type="inheritedJdk" />
 11 |     <orderEntry type="sourceFolder" forTests="false" />
 12 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-core_2.10:1.4.0" level="project" />
 13 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:chill_2.10:0.5.0" level="project" />
 14 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.esotericsoftware.kryo:kryo:2.21" level="project" />
 15 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.esotericsoftware.reflectasm:reflectasm:shaded:1.07" level="project" />
 16 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.esotericsoftware.minlog:minlog:1.2" level="project" />
 17 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.objenesis:objenesis:1.2" level="project" />
 18 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:chill-java:0.5.0" level="project" />
 19 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-client:2.2.0" level="project" />
 20 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-common:2.2.0" level="project" />
 21 |     <orderEntry type="library" scope="PROVIDED" name="Maven: commons-cli:commons-cli:1.2" level="project" />
 22 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.commons:commons-math:2.1" level="project" />
 23 |     <orderEntry type="library" scope="PROVIDED" name="Maven: xmlenc:xmlenc:0.52" level="project" />
 24 |     <orderEntry type="library" scope="PROVIDED" name="Maven: commons-io:commons-io:2.1" level="project" />
 25 |     <orderEntry type="library" scope="PROVIDED" name="Maven: commons-logging:commons-logging:1.1.1" level="project" />
 26 |     <orderEntry type="library" scope="PROVIDED" name="Maven: commons-lang:commons-lang:2.5" level="project" />
 27 |     <orderEntry type="library" scope="PROVIDED" name="Maven: commons-configuration:commons-configuration:1.6" level="project" />
 28 |     <orderEntry type="library" scope="PROVIDED" name="Maven: commons-collections:commons-collections:3.2.1" level="project" />
 29 |     <orderEntry type="library" scope="PROVIDED" name="Maven: commons-digester:commons-digester:1.8" level="project" />
 30 |     <orderEntry type="library" scope="PROVIDED" name="Maven: commons-beanutils:commons-beanutils:1.7.0" level="project" />
 31 |     <orderEntry type="library" scope="PROVIDED" name="Maven: commons-beanutils:commons-beanutils-core:1.8.0" level="project" />
 32 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.codehaus.jackson:jackson-core-asl:1.8.8" level="project" />
 33 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.avro:avro:1.7.4" level="project" />
 34 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.google.protobuf:protobuf-java:2.5.0" level="project" />
 35 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-auth:2.2.0" level="project" />
 36 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.commons:commons-compress:1.4.1" level="project" />
 37 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.tukaani:xz:1.0" level="project" />
 38 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-hdfs:2.2.0" level="project" />
 39 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.mortbay.jetty:jetty-util:6.1.26" level="project" />
 40 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-app:2.2.0" level="project" />
 41 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-common:2.2.0" level="project" />
 42 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-yarn-client:2.2.0" level="project" />
 43 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.google.inject:guice:3.0" level="project" />
 44 |     <orderEntry type="library" scope="PROVIDED" name="Maven: javax.inject:javax.inject:1" level="project" />
 45 |     <orderEntry type="library" scope="PROVIDED" name="Maven: aopalliance:aopalliance:1.0" level="project" />
 46 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey.jersey-test-framework:jersey-test-framework-grizzly2:1.9" level="project" />
 47 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey.jersey-test-framework:jersey-test-framework-core:1.9" level="project" />
 48 |     <orderEntry type="library" scope="PROVIDED" name="Maven: javax.servlet:javax.servlet-api:3.0.1" level="project" />
 49 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey:jersey-client:1.9" level="project" />
 50 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey:jersey-grizzly2:1.9" level="project" />
 51 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.glassfish.grizzly:grizzly-http:2.1.2" level="project" />
 52 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.glassfish.grizzly:grizzly-framework:2.1.2" level="project" />
 53 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.glassfish.gmbal:gmbal-api-only:3.0.0-b023" level="project" />
 54 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.glassfish.external:management-api:3.0.0-b012" level="project" />
 55 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.glassfish.grizzly:grizzly-http-server:2.1.2" level="project" />
 56 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.glassfish.grizzly:grizzly-rcm:2.1.2" level="project" />
 57 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.glassfish.grizzly:grizzly-http-servlet:2.1.2" level="project" />
 58 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.glassfish:javax.servlet:3.1" level="project" />
 59 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey:jersey-json:1.9" level="project" />
 60 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.codehaus.jettison:jettison:1.1" level="project" />
 61 |     <orderEntry type="library" scope="PROVIDED" name="Maven: stax:stax-api:1.0.1" level="project" />
 62 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.codehaus.jackson:jackson-jaxrs:1.8.3" level="project" />
 63 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.codehaus.jackson:jackson-xc:1.8.3" level="project" />
 64 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey.contribs:jersey-guice:1.9" level="project" />
 65 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-yarn-server-common:2.2.0" level="project" />
 66 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-shuffle:2.2.0" level="project" />
 67 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-yarn-api:2.2.0" level="project" />
 68 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-core:2.2.0" level="project" />
 69 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-yarn-common:2.2.0" level="project" />
 70 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-mapreduce-client-jobclient:2.2.0" level="project" />
 71 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.hadoop:hadoop-annotations:2.2.0" level="project" />
 72 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-launcher_2.10:1.4.0" level="project" />
 73 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-network-common_2.10:1.4.0" level="project" />
 74 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-network-shuffle_2.10:1.4.0" level="project" />
 75 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-unsafe_2.10:1.4.0" level="project" />
 76 |     <orderEntry type="library" scope="PROVIDED" name="Maven: net.java.dev.jets3t:jets3t:0.7.1" level="project" />
 77 |     <orderEntry type="library" scope="PROVIDED" name="Maven: commons-codec:commons-codec:1.3" level="project" />
 78 |     <orderEntry type="library" scope="PROVIDED" name="Maven: commons-httpclient:commons-httpclient:3.1" level="project" />
 79 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.curator:curator-recipes:2.4.0" level="project" />
 80 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.curator:curator-framework:2.4.0" level="project" />
 81 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.curator:curator-client:2.4.0" level="project" />
 82 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.zookeeper:zookeeper:3.4.5" level="project" />
 83 |     <orderEntry type="library" scope="PROVIDED" name="Maven: jline:jline:0.9.94" level="project" />
 84 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.eclipse.jetty.orbit:javax.servlet:3.0.0.v201112011016" level="project" />
 85 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.commons:commons-lang3:3.3.2" level="project" />
 86 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.commons:commons-math3:3.4.1" level="project" />
 87 |     <orderEntry type="library" name="Maven: com.google.code.findbugs:jsr305:1.3.9" level="project" />
 88 |     <orderEntry type="library" name="Maven: org.slf4j:slf4j-api:1.7.10" level="project" />
 89 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.slf4j:jul-to-slf4j:1.7.10" level="project" />
 90 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.slf4j:jcl-over-slf4j:1.7.10" level="project" />
 91 |     <orderEntry type="library" name="Maven: log4j:log4j:1.2.17" level="project" />
 92 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.slf4j:slf4j-log4j12:1.7.10" level="project" />
 93 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.ning:compress-lzf:1.0.3" level="project" />
 94 |     <orderEntry type="library" name="Maven: org.xerial.snappy:snappy-java:1.1.1.7" level="project" />
 95 |     <orderEntry type="library" name="Maven: net.jpountz.lz4:lz4:1.2.0" level="project" />
 96 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.roaringbitmap:RoaringBitmap:0.4.5" level="project" />
 97 |     <orderEntry type="library" scope="PROVIDED" name="Maven: commons-net:commons-net:2.2" level="project" />
 98 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.spark-project.akka:akka-remote_2.10:2.3.4-spark" level="project" />
 99 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.spark-project.akka:akka-actor_2.10:2.3.4-spark" level="project" />
100 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.typesafe:config:1.2.1" level="project" />
101 |     <orderEntry type="library" scope="PROVIDED" name="Maven: io.netty:netty:3.8.0.Final" level="project" />
102 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.spark-project.protobuf:protobuf-java:2.5.0-spark" level="project" />
103 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.uncommons.maths:uncommons-maths:1.2.2a" level="project" />
104 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.spark-project.akka:akka-slf4j_2.10:2.3.4-spark" level="project" />
105 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.json4s:json4s-jackson_2.10:3.2.10" level="project" />
106 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.json4s:json4s-core_2.10:3.2.10" level="project" />
107 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.json4s:json4s-ast_2.10:3.2.10" level="project" />
108 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.scala-lang:scalap:2.10.0" level="project" />
109 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey:jersey-server:1.9" level="project" />
110 |     <orderEntry type="library" scope="PROVIDED" name="Maven: asm:asm:3.1" level="project" />
111 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.jersey:jersey-core:1.9" level="project" />
112 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.mesos:mesos:shaded-protobuf:0.21.1" level="project" />
113 |     <orderEntry type="library" scope="PROVIDED" name="Maven: io.netty:netty-all:4.0.23.Final" level="project" />
114 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.clearspring.analytics:stream:2.7.0" level="project" />
115 |     <orderEntry type="library" scope="PROVIDED" name="Maven: io.dropwizard.metrics:metrics-core:3.1.0" level="project" />
116 |     <orderEntry type="library" scope="PROVIDED" name="Maven: io.dropwizard.metrics:metrics-jvm:3.1.0" level="project" />
117 |     <orderEntry type="library" scope="PROVIDED" name="Maven: io.dropwizard.metrics:metrics-json:3.1.0" level="project" />
118 |     <orderEntry type="library" scope="PROVIDED" name="Maven: io.dropwizard.metrics:metrics-graphite:3.1.0" level="project" />
119 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.fasterxml.jackson.core:jackson-databind:2.4.4" level="project" />
120 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.fasterxml.jackson.core:jackson-annotations:2.4.0" level="project" />
121 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.fasterxml.jackson.core:jackson-core:2.4.4" level="project" />
122 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.fasterxml.jackson.module:jackson-module-scala_2.10:2.4.4" level="project" />
123 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.scala-lang:scala-reflect:2.10.4" level="project" />
124 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.thoughtworks.paranamer:paranamer:2.6" level="project" />
125 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.ivy:ivy:2.4.0" level="project" />
126 |     <orderEntry type="library" scope="PROVIDED" name="Maven: oro:oro:2.0.8" level="project" />
127 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.tachyonproject:tachyon-client:0.6.4" level="project" />
128 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.tachyonproject:tachyon:0.6.4" level="project" />
129 |     <orderEntry type="library" scope="PROVIDED" name="Maven: net.razorvine:pyrolite:4.4" level="project" />
130 |     <orderEntry type="library" scope="PROVIDED" name="Maven: net.sf.py4j:py4j:0.8.2.1" level="project" />
131 |     <orderEntry type="library" name="Maven: org.spark-project.spark:unused:1.0.0" level="project" />
132 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-streaming_2.10:1.4.0" level="project" />
133 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-mllib_2.10:1.4.0" level="project" />
134 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-sql_2.10:1.4.0" level="project" />
135 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-catalyst_2.10:1.4.0" level="project" />
136 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.scala-lang:scala-compiler:2.10.4" level="project" />
137 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.scalamacros:quasiquotes_2.10:2.0.1" level="project" />
138 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-column:1.6.0rc3" level="project" />
139 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-common:1.6.0rc3" level="project" />
140 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-encoding:1.6.0rc3" level="project" />
141 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-generator:1.6.0rc3" level="project" />
142 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-hadoop:1.6.0rc3" level="project" />
143 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-format:2.2.0-rc1" level="project" />
144 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.twitter:parquet-jackson:1.6.0rc3" level="project" />
145 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.codehaus.jackson:jackson-mapper-asl:1.9.11" level="project" />
146 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.jodd:jodd-core:3.6.3" level="project" />
147 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.apache.spark:spark-graphx_2.10:1.4.0" level="project" />
148 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.github.fommil.netlib:core:1.1.2" level="project" />
149 |     <orderEntry type="library" scope="PROVIDED" name="Maven: net.sourceforge.f2j:arpack_combined_all:0.1" level="project" />
150 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.scalanlp:breeze_2.10:0.11.2" level="project" />
151 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.scalanlp:breeze-macros_2.10:0.11.2" level="project" />
152 |     <orderEntry type="library" scope="PROVIDED" name="Maven: net.sf.opencsv:opencsv:2.3" level="project" />
153 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.github.rwl:jtransforms:2.4.0" level="project" />
154 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.spire-math:spire_2.10:0.7.4" level="project" />
155 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.spire-math:spire-macros_2.10:0.7.4" level="project" />
156 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.jpmml:pmml-model:1.1.15" level="project" />
157 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.jpmml:pmml-agent:1.1.15" level="project" />
158 |     <orderEntry type="library" scope="PROVIDED" name="Maven: org.jpmml:pmml-schema:1.1.15" level="project" />
159 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.xml.bind:jaxb-impl:2.2.7" level="project" />
160 |     <orderEntry type="library" scope="PROVIDED" name="Maven: com.sun.xml.bind:jaxb-core:2.2.7" level="project" />
161 |     <orderEntry type="library" scope="PROVIDED" name="Maven: javax.xml.bind:jaxb-api:2.2.7" level="project" />
162 |     <orderEntry type="library" name="Maven: org.apache.spark:spark-streaming-kafka_2.10:1.4.0" level="project" />
163 |     <orderEntry type="library" name="Maven: org.apache.kafka:kafka_2.10:0.8.2.1" level="project" />
164 |     <orderEntry type="library" name="Maven: com.yammer.metrics:metrics-core:2.2.0" level="project" />
165 |     <orderEntry type="library" name="Maven: org.apache.kafka:kafka-clients:0.8.2.1" level="project" />
166 |     <orderEntry type="library" name="Maven: com.101tec:zkclient:0.3" level="project" />
167 |     <orderEntry type="library" name="Maven: com.google.guava:guava:14.0.1" level="project" />
168 |     <orderEntry type="library" name="Maven: org.scala-lang:scala-library:2.10.0" level="project" />
169 |   </component>
170 | </module>


--------------------------------------------------------------------------------