├── .gitattributes
├── .gitignore
├── README.txt
├── data
    ├── sensordata.csv
    ├── sensormaint.csv
    └── sensorvendor.csv
├── dependency-reduced-pom.xml
├── pom.xml
├── scripts
    ├── create_ext_table.hql
    ├── create_join_view.hql
    ├── create_maint_table.hql
    └── create_pump_table.hql
└── src
    └── main
        └── scala
            └── examples
                ├── HBaseReadRowWriteStats.scala
                ├── HBaseReadWrite.scala
                └── HBaseSensorStream.scala


/.gitattributes:
--------------------------------------------------------------------------------
 1 | # Auto detect text files and perform LF normalization
 2 | * text=auto
 3 | 
 4 | # Custom for Visual Studio
 5 | *.cs     diff=csharp
 6 | 
 7 | # Standard to msysgit
 8 | *.doc	 diff=astextplain
 9 | *.DOC	 diff=astextplain
10 | *.docx diff=astextplain
11 | *.DOCX diff=astextplain
12 | *.dot  diff=astextplain
13 | *.DOT  diff=astextplain
14 | *.pdf  diff=astextplain
15 | *.PDF	 diff=astextplain
16 | *.rtf	 diff=astextplain
17 | *.RTF	 diff=astextplain
18 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Windows image file caches
 2 | Thumbs.db
 3 | ehthumbs.db
 4 | 
 5 | # Folder config file
 6 | Desktop.ini
 7 | 
 8 | # Recycle Bin used on file shares
 9 | $RECYCLE.BIN/
10 | 
11 | # Windows Installer files
12 | *.cab
13 | *.msi
14 | *.msm
15 | *.msp
16 | 
17 | # Windows shortcuts
18 | *.lnk
19 | 
20 | # =========================
21 | # Operating System Files
22 | # =========================
23 | 
24 | # OSX
25 | # =========================
26 | 
27 | .DS_Store
28 | .AppleDouble
29 | .LSOverride
30 | 
31 | # Thumbnails
32 | ._*
33 | 
34 | # Files that might appear on external disk
35 | .Spotlight-V100
36 | .Trashes
37 | 
38 | # Directories potentially created on remote AFP share
39 | .AppleDB
40 | .AppleDesktop
41 | Network Trash Folder
42 | Temporary Items
43 | .apdisk
44 | /target/


--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | Create an hbase table to write to:
 3 | launch the hbase shell
 4 | $hbase shell
 5 | 
 6 | create '/user/user01/sensor', {NAME=>'data'}, {NAME=>'alert'}, {NAME=>'stats'}
 7 | 
 8 | Commands to run labs:
 9 | 
10 | Step 1: First compile the project: Select project  -> Run As -> Maven Install
11 | 
12 | Step 2: use scp to copy the sparkstreamhbaseapp-1.0.jar to the mapr sandbox or cluster
13 | 
14 | To run the  streaming:
15 | 
16 | Step 3: start the streaming app
17 | 
18 | /opt/mapr/spark/spark-1.5.2/bin/spark-submit --driver-class-path `hbase classpath` --class examples.HBaseSensorStream sparkstreamhbaseapp-1.0.jar
19 | 
20 | Step 4: copy the streaming data file to the stream directory
21 | cp sensordata.csv  /user/user01/stream/.
22 | 
23 | Step 5: you can scan the data written to the table, however the values in binary double are not readable from the shell
24 | launch the hbase shell,  scan the data column family and the alert column family 
25 | $hbase shell
26 | scan '/user/user01/sensor',  {COLUMNS=>['data'],  LIMIT => 10}
27 | scan '/user/user01/sensor',  {COLUMNS=>['alert'],  LIMIT => 10 }
28 | 
29 | Step 6: launch one of the programs below to read data and calculate daily statistics
30 | calculate stats for one column
31 | /opt/mapr/spark/spark-1.5.2/bin/spark-submit --driver-class-path `hbase classpath` --class examples.HBaseReadWrite sparkstreamhbaseapp-1.0.jar
32 | calculate stats for whole row
33 | /opt/mapr/spark/spark-1.5.2/bin/spark-submit --driver-class-path `hbase classpath` --class examples.HBaseReadRowWriteStats sparkstreamhbaseapp-1.0.jar
34 | 
35 | launch the shell and scan for statistics
36 | scan '/user/user01/sensor',  {COLUMNS=>['stats']}
37 | 
38 | 
39 | 


--------------------------------------------------------------------------------
/data/sensormaint.csv:
--------------------------------------------------------------------------------
  1 | COHUTTA,3/15/11,J.Thomas,Install
  2 | COHUTTA,2/20/12,J.Thomas,Inspection
  3 | COHUTTA,1/13/13,J.Thomas,Inspection
  4 | COHUTTA,6/15/13,J.Thomas,Tighten Mounts
  5 | COHUTTA,2/27/14,J.Thomas,Inspection
  6 | COHUTTA,3/6/14,E. Simmons,Adjust bearing alignment
  7 | NANTAHALLA,3/15/11,J.Thomas,Install
  8 | NANTAHALLA,2/19/12,J.Thomas,Inspection
  9 | NANTAHALLA,1/12/13,J.Thomas,Inspection
 10 | NANTAHALLA,6/14/13,J.Thomas,Tighten Mounts
 11 | NANTAHALLA,2/26/14,J.Thomas,Inspection
 12 | NANTAHALLA,3/3/14,E. Simmons,Adjust bearing alignment
 13 | NANTAHALLA,3/13/14,E. Simmons,Shutdown Failure
 14 | THERMALITO,9/26/09,W.Stevens,Install
 15 | THERMALITO,11/22/09,W.Stevens,Tighten Mounts
 16 | THERMALITO,6/10/10,T. LaBou,Inspection
 17 | THERMALITO,1/7/11,T. LaBou,Inspection
 18 | THERMALITO,9/26/11,W.Stevens,Inspection
 19 | THERMALITO,10/2/11,T. LaBou,Bearing Seal
 20 | THERMALITO,11/5/11,D.Pitre,Inspect
 21 | THERMALITO,5/22/12,D.Pitre,Inspect
 22 | THERMALITO,12/15/12,D.Pitre,Inspect
 23 | THERMALITO,6/16/13,T. LaBou,Vane clearance adjust
 24 | THERMALITO,7/11/13,W.Stevens,Inspect
 25 | THERMALITO,2/5/14,D.Pitre,Inspect
 26 | BUTTE,10/2/09,W.Stevens,Install
 27 | BUTTE,10/5/09,W.Stevens,Inspect
 28 | BUTTE,11/22/09,W.Stevens,Tighten Mounts
 29 | BUTTE,6/10/10,T. LaBou,Inspect
 30 | BUTTE,1/7/11,T. LaBou,Inspect
 31 | BUTTE,9/26/11,W.Stevens,Inspect
 32 | BUTTE,10/2/11,T. LaBou,Bearing Seal
 33 | BUTTE,11/5/11,D.Pitre,Inspect
 34 | BUTTE,5/22/12,D.Pitre,Inspect
 35 | BUTTE,12/15/12,D.Pitre,Inspect
 36 | BUTTE,6/16/13,T. LaBou,Vane clearance adjust
 37 | BUTTE,7/11/13,W.Stevens,Inspect
 38 | BUTTE,2/5/14,D.Pitre,Inspect
 39 | CARGO,10/2/09,T. LaBou,Install
 40 | CARGO,10/5/09,W.Stevens,Inspect
 41 | CARGO,11/22/09,T. LaBou,Tighten Mounts
 42 | CARGO,6/10/10,T. LaBou,Inspect
 43 | CARGO,1/7/11,T. LaBou,Inspect
 44 | CARGO,9/26/11,W.Stevens,Inspect
 45 | CARGO,10/2/11,T. LaBou,Bearing Seal
 46 | CARGO,11/5/11,D.Pitre,Inspect
 47 | CARGO,5/22/12,D.Pitre,Inspect
 48 | CARGO,12/15/12,D.Pitre,Inspect
 49 | CARGO,6/18/13,T. LaBou,Vane clearance adjust
 50 | CARGO,7/11/13,W.Stevens,Inspect
 51 | CARGO,2/5/14,D.Pitre,Inspect
 52 | CARGO,3/13/14,D.Pitre,Tighten Mounts
 53 | LAGNAPPE,10/2/09,T. LaBou,Install
 54 | LAGNAPPE,10/5/09,W.Stevens,Inspect
 55 | LAGNAPPE,11/24/09,W.Stevens,Tighten Mounts
 56 | LAGNAPPE,6/10/10,T. LaBou,Inspect
 57 | LAGNAPPE,1/7/11,T. LaBou,Inspect
 58 | LAGNAPPE,9/30/11,W.Stevens,Inspect
 59 | LAGNAPPE,10/3/11,T. LaBou,Bearing Seal
 60 | LAGNAPPE,11/5/11,D.Pitre,Inspect
 61 | LAGNAPPE,5/22/12,D.Pitre,Inspect
 62 | LAGNAPPE,12/15/12,W.Stevens,Inspect
 63 | LAGNAPPE,6/18/13,T. LaBou,Vane clearance adjust
 64 | LAGNAPPE,7/11/13,W.Stevens,Inspect
 65 | LAGNAPPE,2/5/14,D.Pitre,Inspect
 66 | LAGNAPPE,3/14/14,D.Pitre,Shutdown Main Feed Line Failure
 67 | CHER,11/5/09,D.Pitre,Install
 68 | CHER,11/23/09,W.Stevens,Inspect
 69 | CHER,11/24/09,W.Stevens,Tighten Mounts
 70 | CHER,6/10/10,T. LaBou,Inspect
 71 | CHER,1/7/11,T. LaBou,Inspect
 72 | CHER,9/30/11,W.Stevens,Inspect
 73 | CHER,10/3/11,W.Stevens,Bearing Seal
 74 | CHER,11/5/11,D.Pitre,Inspect
 75 | CHER,5/22/12,W.Stevens,Inspect
 76 | CHER,12/15/12,W.Stevens,Inspect
 77 | CHER,6/22/13,T. LaBou,Vane clearance adjust
 78 | CHER,7/11/13,W.Stevens,Inspect
 79 | CHER,2/5/14,D.Pitre,Inspect
 80 | CHER,3/15/14,T. LaBou,Tighten Mounts
 81 | ANDOUILLE,11/25/09,W.Stevens,Install
 82 | ANDOUILLE,11/27/09,W.Stevens,Inspect
 83 | ANDOUILLE,12/2/09,W.Stevens,Tighten Mounts
 84 | ANDOUILLE,6/10/10,T. LaBou,Inspect
 85 | ANDOUILLE,1/7/11,T. LaBou,Inspect
 86 | ANDOUILLE,9/30/11,W.Stevens,Inspect
 87 | ANDOUILLE,10/7/11,D.Pitre,Bearing Seal
 88 | ANDOUILLE,11/5/11,D.Pitre,Inspect
 89 | ANDOUILLE,5/22/12,W.Stevens,Inspect
 90 | ANDOUILLE,12/15/12,W.Stevens,Inspect
 91 | ANDOUILLE,6/25/13,T. LaBou,Vane clearance adjust
 92 | ANDOUILLE,7/11/13,W.Stevens,Inspect
 93 | ANDOUILLE,2/5/14,D.Pitre,Inspect
 94 | ANDOUILLE,3/12/14,T. LaBou,Tighten Mounts
 95 | MOJO,6/20/11,N.Boudreau,Install
 96 | MOJO,7/11/11,N.Boudreau,Tighten Mounts
 97 | MOJO,1/22/13,N.Boudreau,Inspect
 98 | MOJO,7/15/13,N.Boudreau,Inspect
 99 | MOJO,12/15/13,N.Boudreau,Inspect
100 | MOJO,6/7/13,M.Dugas,Inspect
101 | MOJO,1/11/14,N.Boudreau,Inspect
102 | MOJO,3/14/14,M.Dugas,Inspect
103 | BBKING,7/15/11,M.Dugas,Install
104 | BBKING,7/11/11,N.Boudreau,Tighten Mounts
105 | BBKING,1/24/13,N.Boudreau,Inspect
106 | BBKING,7/22/13,N.Boudreau,Inspect
107 | BBKING,12/16/13,N.Boudreau,Inspect
108 | BBKING,12/18/13,M.Dugas,Shutdown Failure
109 | BBKING,12/19/13,M.Dugas,Replace Front Motor Bearing
110 | BBKING,12/20/13,N.Boudreau,Inspect
111 | BBKING,6/12/13,M.Dugas,Inspect
112 | BBKING,1/14/14,N.Boudreau,Inspect
113 | BBKING,3/14/14,M.Dugas,Inspect
114 | 


--------------------------------------------------------------------------------
/data/sensorvendor.csv:
--------------------------------------------------------------------------------
 1 | COHUTTA,HYDROPUMP,11/27/10,3/15/11,HYDROCAM,29.687276,-91.162492
 2 | NANTAHALLA,HYDROPUMP,11/27/10,3/15/11,HYDROCAM,29.687128,-91.162499
 3 | THERMALITO,HYDROPUMP,5/25/08,9/26/09,GENPUMP,29.687276,-91.162492
 4 | BUTTE,HYDROPUMP,5/25/08,9/26/09,GENPUMP,29.686929,-91.1625
 5 | CARGO,HYDROPUMP,5/25/08,9/26/09,GENPUMP,29.683147,-91.145448
 6 | LAGNAPPE,HYDROPUMP,5/25/08,9/26/09,GENPUMP,29.683124,-91.145471
 7 | CHER,HYDROPUMP,5/25/08,9/26/09,GENPUMP,29.683167,-91.145427
 8 | ANDOUILLE,HYDROPUMP,5/25/08,9/26/09,GENPUMP,29.683187,-91.145408
 9 | MOJO,HYDROPUMP,4/25/11,6/20/11,XYLO,29.66975,-91.13223
10 | BBKING,HYDROPUMP,4/25/11,6/20/11,XYLO,29.669723,-91.132278
11 | 


--------------------------------------------------------------------------------
/dependency-reduced-pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  3 |   <modelVersion>4.0.0</modelVersion>
  4 |   <groupId>mapr</groupId>
  5 |   <artifactId>sparkdataframesapp</artifactId>
  6 |   <name>sparkdataframesapp</name>
  7 |   <version>1.0</version>
  8 |   <inceptionYear>2015</inceptionYear>
  9 |   <build>
 10 |     <sourceDirectory>src/main/scala</sourceDirectory>
 11 |     <testSourceDirectory>src/test/scala</testSourceDirectory>
 12 |     <plugins>
 13 |       <plugin>
 14 |         <groupId>net.alchim31.maven</groupId>
 15 |         <artifactId>scala-maven-plugin</artifactId>
 16 |         <version>3.2.0</version>
 17 |         <executions>
 18 |           <execution>
 19 |             <goals>
 20 |               <goal>compile</goal>
 21 |               <goal>testCompile</goal>
 22 |             </goals>
 23 |             <configuration>
 24 |               <args>
 25 |                 <arg>-dependencyfile</arg>
 26 |                 <arg>${project.build.directory}/.scala_dependencies</arg>
 27 |               </args>
 28 |             </configuration>
 29 |           </execution>
 30 |         </executions>
 31 |       </plugin>
 32 |       <plugin>
 33 |         <artifactId>maven-surefire-plugin</artifactId>
 34 |         <version>2.18.1</version>
 35 |         <configuration>
 36 |           <useFile>false</useFile>
 37 |           <disableXmlReport>true</disableXmlReport>
 38 |           <includes>
 39 |             <include>**/*Test.*</include>
 40 |             <include>**/*Suite.*</include>
 41 |           </includes>
 42 |         </configuration>
 43 |       </plugin>
 44 |       <plugin>
 45 |         <artifactId>maven-shade-plugin</artifactId>
 46 |         <version>2.3</version>
 47 |         <executions>
 48 |           <execution>
 49 |             <phase>package</phase>
 50 |             <goals>
 51 |               <goal>shade</goal>
 52 |             </goals>
 53 |           </execution>
 54 |         </executions>
 55 |       </plugin>
 56 |     </plugins>
 57 |   </build>
 58 |   <repositories>
 59 |     <repository>
 60 |       <id>scala-tools.org</id>
 61 |       <name>Scala-tools Maven2 Repository</name>
 62 |       <url>http://scala-tools.org/repo-releases</url>
 63 |     </repository>
 64 |     <repository>
 65 |       <releases />
 66 |       <snapshots>
 67 |         <enabled>false</enabled>
 68 |       </snapshots>
 69 |       <id>mapr-releases</id>
 70 |       <url>http://repository.mapr.com/maven/</url>
 71 |     </repository>
 72 |   </repositories>
 73 |   <dependencies>
 74 |     <dependency>
 75 |       <groupId>org.apache.spark</groupId>
 76 |       <artifactId>spark-core_2.10</artifactId>
 77 |       <version>1.3.1</version>
 78 |       <scope>provided</scope>
 79 |       <exclusions>
 80 |         <exclusion>
 81 |           <artifactId>chill_2.10</artifactId>
 82 |           <groupId>com.twitter</groupId>
 83 |         </exclusion>
 84 |         <exclusion>
 85 |           <artifactId>chill-java</artifactId>
 86 |           <groupId>com.twitter</groupId>
 87 |         </exclusion>
 88 |         <exclusion>
 89 |           <artifactId>hadoop-client</artifactId>
 90 |           <groupId>org.apache.hadoop</groupId>
 91 |         </exclusion>
 92 |         <exclusion>
 93 |           <artifactId>spark-network-common_2.10</artifactId>
 94 |           <groupId>org.apache.spark</groupId>
 95 |         </exclusion>
 96 |         <exclusion>
 97 |           <artifactId>spark-network-shuffle_2.10</artifactId>
 98 |           <groupId>org.apache.spark</groupId>
 99 |         </exclusion>
100 |         <exclusion>
101 |           <artifactId>jets3t</artifactId>
102 |           <groupId>net.java.dev.jets3t</groupId>
103 |         </exclusion>
104 |         <exclusion>
105 |           <artifactId>curator-recipes</artifactId>
106 |           <groupId>org.apache.curator</groupId>
107 |         </exclusion>
108 |         <exclusion>
109 |           <artifactId>javax.servlet</artifactId>
110 |           <groupId>org.eclipse.jetty.orbit</groupId>
111 |         </exclusion>
112 |         <exclusion>
113 |           <artifactId>commons-lang3</artifactId>
114 |           <groupId>org.apache.commons</groupId>
115 |         </exclusion>
116 |         <exclusion>
117 |           <artifactId>commons-math3</artifactId>
118 |           <groupId>org.apache.commons</groupId>
119 |         </exclusion>
120 |         <exclusion>
121 |           <artifactId>jsr305</artifactId>
122 |           <groupId>com.google.code.findbugs</groupId>
123 |         </exclusion>
124 |         <exclusion>
125 |           <artifactId>slf4j-api</artifactId>
126 |           <groupId>org.slf4j</groupId>
127 |         </exclusion>
128 |         <exclusion>
129 |           <artifactId>jul-to-slf4j</artifactId>
130 |           <groupId>org.slf4j</groupId>
131 |         </exclusion>
132 |         <exclusion>
133 |           <artifactId>jcl-over-slf4j</artifactId>
134 |           <groupId>org.slf4j</groupId>
135 |         </exclusion>
136 |         <exclusion>
137 |           <artifactId>log4j</artifactId>
138 |           <groupId>log4j</groupId>
139 |         </exclusion>
140 |         <exclusion>
141 |           <artifactId>slf4j-log4j12</artifactId>
142 |           <groupId>org.slf4j</groupId>
143 |         </exclusion>
144 |         <exclusion>
145 |           <artifactId>compress-lzf</artifactId>
146 |           <groupId>com.ning</groupId>
147 |         </exclusion>
148 |         <exclusion>
149 |           <artifactId>snappy-java</artifactId>
150 |           <groupId>org.xerial.snappy</groupId>
151 |         </exclusion>
152 |         <exclusion>
153 |           <artifactId>lz4</artifactId>
154 |           <groupId>net.jpountz.lz4</groupId>
155 |         </exclusion>
156 |         <exclusion>
157 |           <artifactId>RoaringBitmap</artifactId>
158 |           <groupId>org.roaringbitmap</groupId>
159 |         </exclusion>
160 |         <exclusion>
161 |           <artifactId>commons-net</artifactId>
162 |           <groupId>commons-net</groupId>
163 |         </exclusion>
164 |         <exclusion>
165 |           <artifactId>akka-remote_2.10</artifactId>
166 |           <groupId>org.spark-project.akka</groupId>
167 |         </exclusion>
168 |         <exclusion>
169 |           <artifactId>akka-slf4j_2.10</artifactId>
170 |           <groupId>org.spark-project.akka</groupId>
171 |         </exclusion>
172 |         <exclusion>
173 |           <artifactId>json4s-jackson_2.10</artifactId>
174 |           <groupId>org.json4s</groupId>
175 |         </exclusion>
176 |         <exclusion>
177 |           <artifactId>mesos</artifactId>
178 |           <groupId>org.apache.mesos</groupId>
179 |         </exclusion>
180 |         <exclusion>
181 |           <artifactId>netty-all</artifactId>
182 |           <groupId>io.netty</groupId>
183 |         </exclusion>
184 |         <exclusion>
185 |           <artifactId>stream</artifactId>
186 |           <groupId>com.clearspring.analytics</groupId>
187 |         </exclusion>
188 |         <exclusion>
189 |           <artifactId>metrics-core</artifactId>
190 |           <groupId>io.dropwizard.metrics</groupId>
191 |         </exclusion>
192 |         <exclusion>
193 |           <artifactId>metrics-jvm</artifactId>
194 |           <groupId>io.dropwizard.metrics</groupId>
195 |         </exclusion>
196 |         <exclusion>
197 |           <artifactId>metrics-json</artifactId>
198 |           <groupId>io.dropwizard.metrics</groupId>
199 |         </exclusion>
200 |         <exclusion>
201 |           <artifactId>metrics-graphite</artifactId>
202 |           <groupId>io.dropwizard.metrics</groupId>
203 |         </exclusion>
204 |         <exclusion>
205 |           <artifactId>jackson-databind</artifactId>
206 |           <groupId>com.fasterxml.jackson.core</groupId>
207 |         </exclusion>
208 |         <exclusion>
209 |           <artifactId>jackson-module-scala_2.10</artifactId>
210 |           <groupId>com.fasterxml.jackson.module</groupId>
211 |         </exclusion>
212 |         <exclusion>
213 |           <artifactId>ivy</artifactId>
214 |           <groupId>org.apache.ivy</groupId>
215 |         </exclusion>
216 |         <exclusion>
217 |           <artifactId>oro</artifactId>
218 |           <groupId>oro</groupId>
219 |         </exclusion>
220 |         <exclusion>
221 |           <artifactId>tachyon-client</artifactId>
222 |           <groupId>org.tachyonproject</groupId>
223 |         </exclusion>
224 |         <exclusion>
225 |           <artifactId>pyrolite</artifactId>
226 |           <groupId>org.spark-project</groupId>
227 |         </exclusion>
228 |         <exclusion>
229 |           <artifactId>py4j</artifactId>
230 |           <groupId>net.sf.py4j</groupId>
231 |         </exclusion>
232 |         <exclusion>
233 |           <artifactId>unused</artifactId>
234 |           <groupId>org.spark-project.spark</groupId>
235 |         </exclusion>
236 |       </exclusions>
237 |     </dependency>
238 |     <dependency>
239 |       <groupId>org.apache.spark</groupId>
240 |       <artifactId>spark-sql_2.10</artifactId>
241 |       <version>1.3.1</version>
242 |       <scope>provided</scope>
243 |       <exclusions>
244 |         <exclusion>
245 |           <artifactId>spark-catalyst_2.10</artifactId>
246 |           <groupId>org.apache.spark</groupId>
247 |         </exclusion>
248 |         <exclusion>
249 |           <artifactId>parquet-column</artifactId>
250 |           <groupId>com.twitter</groupId>
251 |         </exclusion>
252 |         <exclusion>
253 |           <artifactId>parquet-hadoop</artifactId>
254 |           <groupId>com.twitter</groupId>
255 |         </exclusion>
256 |         <exclusion>
257 |           <artifactId>jodd-core</artifactId>
258 |           <groupId>org.jodd</groupId>
259 |         </exclusion>
260 |         <exclusion>
261 |           <artifactId>jackson-databind</artifactId>
262 |           <groupId>com.fasterxml.jackson.core</groupId>
263 |         </exclusion>
264 |         <exclusion>
265 |           <artifactId>unused</artifactId>
266 |           <groupId>org.spark-project.spark</groupId>
267 |         </exclusion>
268 |       </exclusions>
269 |     </dependency>
270 |     <dependency>
271 |       <groupId>junit</groupId>
272 |       <artifactId>junit</artifactId>
273 |       <version>4.11</version>
274 |       <scope>test</scope>
275 |       <exclusions>
276 |         <exclusion>
277 |           <artifactId>hamcrest-core</artifactId>
278 |           <groupId>org.hamcrest</groupId>
279 |         </exclusion>
280 |       </exclusions>
281 |     </dependency>
282 |     <dependency>
283 |       <groupId>org.specs2</groupId>
284 |       <artifactId>specs2_2.10</artifactId>
285 |       <version>1.13</version>
286 |       <scope>test</scope>
287 |       <exclusions>
288 |         <exclusion>
289 |           <artifactId>scalaz-core_2.10</artifactId>
290 |           <groupId>org.specs2</groupId>
291 |         </exclusion>
292 |         <exclusion>
293 |           <artifactId>scalaz-concurrent_2.10</artifactId>
294 |           <groupId>org.specs2</groupId>
295 |         </exclusion>
296 |       </exclusions>
297 |     </dependency>
298 |     <dependency>
299 |       <groupId>org.scalatest</groupId>
300 |       <artifactId>scalatest_2.10</artifactId>
301 |       <version>2.0.M6-SNAP8</version>
302 |       <scope>test</scope>
303 |     </dependency>
304 |   </dependencies>
305 |   <properties>
306 |     <scala.version>2.10.4</scala.version>
307 |     <encoding>UTF-8</encoding>
308 |     <maven.compiler.source>1.7</maven.compiler.source>
309 |     <spark.version>1.3.1</spark.version>
310 |     <scala.tools.version>2.10</scala.tools.version>
311 |     <maven.compiler.target>1.7</maven.compiler.target>
312 |   </properties>
313 | </project>
314 | 
315 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 |          xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
  3 |     <modelVersion>4.0.0</modelVersion>
  4 |     <groupId>mapr</groupId>
  5 |     <artifactId>sparkstreamhbaseapp</artifactId>
  6 |     <version>1.0</version>
  7 |     <name>sparkstreamhbaseapp</name>
  8 |     <description></description>
  9 |     <inceptionYear>2015</inceptionYear>
 10 | 	
 11 |     <properties>
 12 |         <maven.compiler.source>1.7</maven.compiler.source>
 13 |         <maven.compiler.target>1.7</maven.compiler.target>
 14 |         <encoding>UTF-8</encoding>
 15 |         <scala.tools.version>2.10</scala.tools.version>
 16 |         <scala.version>2.10.4</scala.version>
 17 |         <spark.version>1.5.2</spark.version>
 18 |         <mapr.version>1.1.1-mapr-1602-m7-5.1.0</mapr.version>
 19 |     </properties> 
 20 |         
 21 |     <repositories>
 22 |         <repository>
 23 |             <id>scala-tools.org</id>
 24 |             <name>Scala-tools Maven2 Repository</name>
 25 |             <url>http://scala-tools.org/repo-releases</url>
 26 |         </repository>
 27 | 
 28 |         <repository>
 29 |             <id>mapr-releases</id>
 30 |             <url>http://repository.mapr.com/maven/</url>
 31 |             <snapshots>
 32 |                 <enabled>false</enabled>
 33 |             </snapshots>
 34 |             <releases>
 35 |                 <enabled>true</enabled>
 36 |             </releases>
 37 |         </repository>
 38 |         <repository>
 39 |             <id>maven2-repository.dev.java.net</id>
 40 |             <name>Java.net Repository for Maven</name>
 41 |             <url>http://download.java.net/maven/2/</url>
 42 |             <layout>default</layout>
 43 |         </repository>
 44 |     </repositories>
 45 |  <dependencies>
 46 |         <dependency>
 47 |             <groupId>org.scala-lang</groupId>
 48 |             <artifactId>scala-library</artifactId>
 49 |             <version>${scala.version}</version>
 50 |         </dependency>
 51 |         <dependency>
 52 |             <groupId>org.apache.spark</groupId>
 53 |             <artifactId>spark-core_${scala.tools.version}</artifactId>
 54 |             <version>${spark.version}</version>
 55 |             <scope>provided</scope>
 56 |         </dependency>
 57 |         <dependency>
 58 |             <groupId>org.apache.spark</groupId>
 59 |             <artifactId>spark-sql_${scala.tools.version}</artifactId>
 60 |             <version>${spark.version}</version>	
 61 |         </dependency>
 62 |         <dependency>
 63 |             <groupId>org.apache.spark</groupId>
 64 |             <artifactId>spark-streaming_${scala.tools.version}</artifactId>
 65 |             <version>${spark.version}</version>
 66 |         </dependency>
 67 |         <dependency>
 68 |             <groupId>org.apache.hbase</groupId>
 69 |             <artifactId>hbase-server</artifactId>
 70 |             <version>${mapr.version}</version>
 71 |         </dependency>
 72 |     </dependencies>
 73 | 
 74 |     <build>
 75 |         <sourceDirectory>src/main/scala</sourceDirectory>
 76 |         <testSourceDirectory>src/test/scala</testSourceDirectory>
 77 |         <plugins>
 78 |             <plugin>
 79 |                 <groupId>org.scala-tools</groupId>
 80 |                 <artifactId>maven-scala-plugin</artifactId>
 81 |                 <version>2.15.2</version>
 82 |                 <executions>
 83 |                     <execution>
 84 |                         <goals>
 85 |                             <goal>compile</goal>
 86 |                         </goals>
 87 |                         <configuration>
 88 |                             <args>
 89 |                                 <arg>-dependencyfile</arg>
 90 |                                 <arg>${project.build.directory}/.scala_dependencies</arg>
 91 |                             </args>
 92 |                         </configuration>
 93 |                     </execution>
 94 |                 </executions>
 95 |             </plugin>
 96 |             <plugin>
 97 |                 <groupId>org.apache.maven.plugins</groupId>
 98 |                 <artifactId>maven-compiler-plugin</artifactId>
 99 |                 <version>2.3.1</version>
100 |                 <configuration>
101 |                     <source>1.7</source>
102 |                     <target>1.7</target>
103 |                     <showDeprecation>true</showDeprecation>
104 |                     <showWarnings>true</showWarnings>
105 |                 </configuration>
106 |             </plugin>
107 |             <plugin>
108 |                 <groupId>org.apache.maven.plugins</groupId>
109 |                 <artifactId>maven-eclipse-plugin</artifactId>
110 |                 <version>2.8</version>
111 |             </plugin>
112 |         </plugins>
113 |     </build>
114 | </project>
115 | 


--------------------------------------------------------------------------------
/scripts/create_ext_table.hql:
--------------------------------------------------------------------------------
 1 | CREATE EXTERNAL TABLE sensor
 2 |         (key STRING, resID STRING, date STRING,
 3 |         hz FLOAT,
 4 |         disp FLOAT,
 5 |         flo INT,
 6 |         sedPPM FLOAT,
 7 |         psi INT,
 8 |         chlPPM FLOAT)
 9 |         STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
10 |         WITH SERDEPROPERTIES (
11 |         "hbase.columns.mapping" =
12 |         ":key,cf1:resID,cf1:date,cf1:hz,cf1:disp,
13 |         cf1:flo,cf1:sedPPM,cf1:psi,cf1:chlPPM"
14 |         )
15 | 
16 | TBLPROPERTIES("hbase.table.name" = "/user/user01/sensor");
17 | 


--------------------------------------------------------------------------------
/scripts/create_join_view.hql:
--------------------------------------------------------------------------------
1 | create view pumpview as
2 | select s.date, s.hz, s.disp, s.flo, s.sedPPM, s.psi, s.chlPPM, 
3 | p.resourceid, p.type, p.purchasedate, p.dateinservice, p.vendor, p.longitude, p.latitude
4 | from sensor s
5 | join pump_info p
6 | on (s.resid = p.resourceid);
7 | 


--------------------------------------------------------------------------------
/scripts/create_maint_table.hql:
--------------------------------------------------------------------------------
 1 | CREATE EXTERNAL TABLE maint_table
 2 | 
 3 | (resourceid STRING, eventDate STRING,
 4 | technician STRING, description STRING)
 5 | 
 6 | ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
 7 | 
 8 | 
 9 | STORED AS TEXTFILE LOCATION "/user/user01/sensormaint.csv";
10 | 


--------------------------------------------------------------------------------
/scripts/create_pump_table.hql:
--------------------------------------------------------------------------------
 1 | CREATE EXTERNAL TABLE pump_info
 2 | 
 3 | (resourceid STRING, type STRING, purchasedate STRING,
 4 | dateinservice STRING, vendor STRING, longitude FLOAT, latitude FLOAT)
 5 | 
 6 | ROW FORMAT DELIMITED FIELDS TERMINATED BY ","
 7 | 
 8 | 
 9 | STORED AS TEXTFILE LOCATION "/user/user01/sensorvendor.csv";
10 | 


--------------------------------------------------------------------------------
/src/main/scala/examples/HBaseReadRowWriteStats.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * This example reads a row of time series sensor data
  3 |  * calculates the the statistics for the hz data 
  4 |  * and then writes these statistics to the stats column family
  5 |  *  
  6 |  * you can specify specific columns to return, More info:
  7 |  * http://hbase.apache.org/apidocs/org/apache/hadoop/hbase/mapreduce/TableInputFormat.html
  8 |  */
  9 | 
 10 | package examples
 11 | 
 12 | import scala.reflect.runtime.universe
 13 | 
 14 | import org.apache.hadoop.hbase.HBaseConfiguration
 15 | import org.apache.hadoop.hbase.client.Put
 16 | import org.apache.hadoop.hbase.client.Result
 17 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable
 18 | import org.apache.hadoop.hbase.mapred.TableOutputFormat
 19 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat
 20 | import org.apache.hadoop.hbase.util.Bytes
 21 | import org.apache.hadoop.mapred.JobConf
 22 | import org.apache.spark.SparkConf
 23 | import org.apache.spark.SparkContext
 24 | import org.apache.spark.rdd.PairRDDFunctions
 25 | import org.apache.spark.sql.Row
 26 | import org.apache.spark.sql.functions.avg
 27 | import org.apache.hadoop.mapreduce.Job
 28 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat
 29 | import org.apache.hadoop.fs.Path
 30 | 
 31 | object HBaseReadRowWriteStats {
 32 | 
 33 |   case class SensorRow(rowkey: String, hz: Double, disp: Double, flo: Double, sedPPM: Double, psi: Double, chlPPM: Double)
 34 | 
 35 |   object SensorRow extends Serializable{
 36 |     def parseSensorRow(result: Result): SensorRow = {
 37 |       val rowkey = Bytes.toString(result.getRow())
 38 |       // remove time from rowKey, stats row key is for day
 39 |       val p0 = rowkey.split(" ")(0)
 40 |       val p1 = Bytes.toDouble(result.getValue(cfDataBytes, Bytes.toBytes("hz")))
 41 |       val p2 = Bytes.toDouble(result.getValue(cfDataBytes, Bytes.toBytes("disp")))
 42 |       val p3 = Bytes.toDouble(result.getValue(cfDataBytes, Bytes.toBytes("flo")))
 43 |       val p4 = Bytes.toDouble(result.getValue(cfDataBytes, Bytes.toBytes("sedPPM")))
 44 |       val p5 = Bytes.toDouble(result.getValue(cfDataBytes, Bytes.toBytes("psi")))
 45 |       val p6 = Bytes.toDouble(result.getValue(cfDataBytes, Bytes.toBytes("chlPPM")))
 46 |       SensorRow(p0, p1, p2, p3, p4, p5, p6)
 47 |     }
 48 |   }
 49 | 
 50 |   case class SensorStatsRow(rowkey: String,
 51 |     maxhz: Double, minhz: Double, avghz: Double,
 52 |     maxdisp: Double, mindisp: Double, avgdisp: Double,
 53 |     maxflo: Double, minflo: Double, avgflo: Double,
 54 |     maxsedPPM: Double, minsedPPM: Double, avgsedPPM: Double,
 55 |     maxpsi: Double, minpsi: Double, avgpsi: Double,
 56 |     maxchlPPM: Double, minchlPPM: Double, avgchlPPM: Double)
 57 | 
 58 |   object SensorStatsRow {
 59 |     def convertToPutStats(row: SensorStatsRow): (ImmutableBytesWritable, Put) = {
 60 |       val p = new Put(Bytes.toBytes(row.rowkey))
 61 |       // add columns with data values to put
 62 |       p.add(cfStatsBytes, Bytes.toBytes("hzmax"), Bytes.toBytes(row.maxhz))
 63 |       p.add(cfStatsBytes, Bytes.toBytes("hzmin"), Bytes.toBytes(row.minhz))
 64 |       p.add(cfStatsBytes, Bytes.toBytes("hzavg"), Bytes.toBytes(row.avghz))
 65 |       p.add(cfStatsBytes, Bytes.toBytes("dispmax"), Bytes.toBytes(row.maxdisp))
 66 |       p.add(cfStatsBytes, Bytes.toBytes("dispmin"), Bytes.toBytes(row.mindisp))
 67 |       p.add(cfStatsBytes, Bytes.toBytes("dispavg"), Bytes.toBytes(row.avgdisp))
 68 |       p.add(cfStatsBytes, Bytes.toBytes("flomax"), Bytes.toBytes(row.maxflo))
 69 |       p.add(cfStatsBytes, Bytes.toBytes("flomin"), Bytes.toBytes(row.minflo))
 70 |       p.add(cfStatsBytes, Bytes.toBytes("floavg"), Bytes.toBytes(row.avgflo))
 71 |       p.add(cfStatsBytes, Bytes.toBytes("sedPPMmax"), Bytes.toBytes(row.maxsedPPM))
 72 |       p.add(cfStatsBytes, Bytes.toBytes("sedPPMmin"), Bytes.toBytes(row.minsedPPM))
 73 |       p.add(cfStatsBytes, Bytes.toBytes("sedPPMavg"), Bytes.toBytes(row.avgsedPPM))
 74 |       p.add(cfStatsBytes, Bytes.toBytes("psimax"), Bytes.toBytes(row.maxpsi))
 75 |       p.add(cfStatsBytes, Bytes.toBytes("psimin"), Bytes.toBytes(row.minpsi))
 76 |       p.add(cfStatsBytes, Bytes.toBytes("psiavg"), Bytes.toBytes(row.avgpsi))
 77 |       p.add(cfStatsBytes, Bytes.toBytes("chlPPMmax"), Bytes.toBytes(row.maxchlPPM))
 78 |       p.add(cfStatsBytes, Bytes.toBytes("chlPPMmin"), Bytes.toBytes(row.minchlPPM))
 79 |       p.add(cfStatsBytes, Bytes.toBytes("chlPPMavg"), Bytes.toBytes(row.avgchlPPM))
 80 |       (new ImmutableBytesWritable, p)
 81 |     }
 82 |   }
 83 | 
 84 |   final val tableName = "/user/user01/sensor"
 85 |   final val cfData = "data"
 86 |   final val cfDataBytes = Bytes.toBytes(cfData)
 87 |   final val cfStats = "stats"
 88 |   final val cfStatsBytes = Bytes.toBytes(cfStats)
 89 | 
 90 |   def main(args: Array[String]) {
 91 |     val sparkConf = new SparkConf().setAppName("HBaseTest")
 92 |     val sc = new SparkContext(sparkConf)
 93 |     val sqlContext = new org.apache.spark.sql.SQLContext(sc)
 94 |     import sqlContext.implicits._
 95 | 
 96 |     val conf = HBaseConfiguration.create()
 97 | 
 98 |     conf.set(TableInputFormat.INPUT_TABLE, tableName)
 99 |     // scan data column family
100 |     conf.set(TableInputFormat.SCAN_COLUMNS, "data")
101 | 
102 |     // Load an RDD of rowkey, result(ImmutableBytesWritable, Result) tuples from the table
103 |     val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
104 |       classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
105 |       classOf[org.apache.hadoop.hbase.client.Result])
106 | 
107 |     hBaseRDD.count()
108 | 
109 |     // transform (ImmutableBytesWritable, Result) tuples into an RDD of Results
110 |     val resultRDD = hBaseRDD.map(tuple => tuple._2)
111 |     resultRDD.count()
112 |     // transform RDD of Results into an RDD of SensorRow objects 
113 |     val sensorRDD = resultRDD.map(SensorRow.parseSensorRow)
114 |     // change  RDD of SensorRow  objects to a DataFrame
115 |     val sensorDF = sensorRDD.toDF()
116 |     // Return the schema of this DataFrame
117 |     sensorDF.printSchema()
118 |     // Display the top 20 rows of DataFrame
119 |     sensorDF.show()
120 |     // group by the rowkey (sensorid_date) get average psi
121 |     sensorDF.groupBy("rowkey").agg(avg(sensorDF("psi"))).take(5).foreach(println)
122 |     // register the DataFrame as a temp table 
123 |     sensorDF.registerTempTable("SensorRow")
124 | 
125 |     // group by the rowkey (sensorid_date) get average, max , min for all columns
126 |     val sensorStatDF = sqlContext.sql("SELECT rowkey,MAX(hz) as maxhz, min(hz) as minhz, avg(hz) as avghz, MAX(disp) as maxdisp, min(disp) as mindisp, avg(disp) as avgdisp, MAX(flo) as maxflo, min(flo) as minflo, avg(flo) as avgflo,MAX(sedPPM) as maxsedPPM, min(sedPPM) as minsedPPM, avg(sedPPM) as avgsedPPM, MAX(psi) as maxpsi, min(psi) as minpsi, avg(psi) as avgpsi,MAX(chlPPM) as maxchlPPM, min(chlPPM) as minchlPPM, avg(chlPPM) as avgchlPPM FROM SensorRow GROUP BY rowkey")
127 |     sensorStatDF.printSchema()
128 |     sensorStatDF.take(5).foreach(println)
129 | 
130 |     // map the query result row to the SensorStatsRow object
131 |     val sensorStatsRowRDD = sensorStatDF.map {
132 |       case Row(rowkey: String,
133 |         maxhz: Double, minhz: Double, avghz: Double, maxdisp: Double, mindisp: Double, avgdisp: Double,
134 |         maxflo: Double, minflo: Double, avgflo: Double, maxsedPPM: Double, minsedPPM: Double, avgsedPPM: Double,
135 |         maxpsi: Double, minpsi: Double, avgpsi: Double, maxchlPPM: Double, minchlPPM: Double, avgchlPPM: Double) =>
136 |         SensorStatsRow(rowkey: String,
137 |           maxhz: Double, minhz: Double, avghz: Double, maxdisp: Double, mindisp: Double, avgdisp: Double,
138 |           maxflo: Double, minflo: Double, avgflo: Double, maxsedPPM: Double, minsedPPM: Double, avgsedPPM: Double,
139 |           maxpsi: Double, minpsi: Double, avgpsi: Double, maxchlPPM: Double, minchlPPM: Double, avgchlPPM: Double)
140 |     }
141 | 
142 |     sensorStatsRowRDD.take(5).foreach(println)
143 | 
144 |     // set JobConfiguration variables for writing to HBase
145 |     val jobConfig: JobConf = new JobConf(conf, this.getClass)
146 |     jobConfig.set("mapreduce.output.fileoutputformat.outputdir", "/user/user01/out")
147 |     // set the HBase output table
148 |     jobConfig.setOutputFormat(classOf[TableOutputFormat])
149 |     jobConfig.set(TableOutputFormat.OUTPUT_TABLE, tableName)
150 |     // convert the SensorStatsRow objects into HBase put objects and write to HBase
151 |     sensorStatsRowRDD.map {
152 |       case sensorStatsRow => SensorStatsRow.convertToPutStats(sensorStatsRow)
153 |     }.saveAsHadoopDataset(jobConfig)
154 |   }
155 | 
156 | }
157 | 


--------------------------------------------------------------------------------
/src/main/scala/examples/HBaseReadWrite.scala:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * This example reads a row of time series sensor data
 3 |  * calculates the the statistics for the hz data 
 4 |  * and then writes these statistics to the stats column family
 5 |  *  
 6 |  */
 7 | 
 8 | package examples
 9 | 
10 | import org.apache.hadoop.hbase.HBaseConfiguration
11 | import org.apache.hadoop.hbase.client.Put
12 | import org.apache.hadoop.hbase.client.Result
13 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable
14 | import org.apache.hadoop.hbase.mapred.TableOutputFormat
15 | import org.apache.hadoop.hbase.mapreduce.TableInputFormat
16 | import org.apache.hadoop.hbase.util.Bytes
17 | import org.apache.hadoop.mapred.JobConf
18 | import org.apache.spark.SparkConf
19 | import org.apache.spark.SparkContext
20 | import org.apache.spark.rdd.PairRDDFunctions
21 | import org.apache.spark.rdd.RDD.rddToPairRDDFunctions
22 | import org.apache.spark.util.StatCounter
23 | 
24 | object HBaseReadWrite extends Serializable{
25 | 
26 |   final val tableName = "/user/user01/sensor"
27 |   final val cfDataBytes = Bytes.toBytes("data")
28 |   final val cfStatsBytes = Bytes.toBytes("stats")
29 | 
30 |   def main(args: Array[String]) {
31 |     val sparkConf = new SparkConf().setAppName("HBaseTest")
32 |     val sc = new SparkContext(sparkConf)
33 | 
34 |     val conf = HBaseConfiguration.create()
35 |     conf.set(TableInputFormat.INPUT_TABLE, tableName)
36 |     conf.set(TableInputFormat.SCAN_ROW_START, "COHUTTA_3/10/14")
37 |     conf.set(TableInputFormat.SCAN_ROW_STOP, "COHUTTA_3/11/14")
38 |     // specify specific column to return
39 |     conf.set(TableInputFormat.SCAN_COLUMNS, "data:psi")
40 | 
41 |     // Load an RDD of (ImmutableBytesWritable, Result) tuples from the table
42 |     val hBaseRDD = sc.newAPIHadoopRDD(conf, classOf[TableInputFormat],
43 |       classOf[org.apache.hadoop.hbase.io.ImmutableBytesWritable],
44 |       classOf[org.apache.hadoop.hbase.client.Result])
45 | 
46 |     hBaseRDD.count()
47 | 
48 |     // transform (ImmutableBytesWritable, Result) tuples into an RDD of Result’s
49 |     val resultRDD = hBaseRDD.map(tuple => tuple._2)
50 |     resultRDD.count()
51 |     // transform into an RDD of (RowKey, ColumnValue)s  the RowKey has the time removed
52 |     val keyValueRDD = resultRDD.map(result => (Bytes.toString(result.getRow()).split(" ")(0), Bytes.toDouble(result.value)))
53 |     keyValueRDD.take(3).foreach(kv => println(kv))
54 | 
55 |     // group by rowkey , get statistics for column value
56 |     val keyStatsRDD = keyValueRDD.groupByKey().mapValues(list => StatCounter(list))
57 |     keyStatsRDD.take(5).foreach(println)
58 | 
59 |     // set JobConfiguration variables for writing to HBase
60 |     val jobConfig: JobConf = new JobConf(conf, this.getClass)
61 |     jobConfig.set("mapreduce.output.fileoutputformat.outputdir", "/user/user01/out")
62 |     jobConfig.setOutputFormat(classOf[TableOutputFormat])
63 |     jobConfig.set(TableOutputFormat.OUTPUT_TABLE, tableName)
64 |     // convert rowkey, psi stats to put and write to hbase table stats column family
65 |     keyStatsRDD.map { case (k, v) => convertToPut(k, v) }.saveAsHadoopDataset(jobConfig)
66 | 
67 |   }
68 |   // convert rowkey, stats to put 
69 |   def convertToPut(key: String, stats: StatCounter): (ImmutableBytesWritable, Put) = {
70 |     val p = new Put(Bytes.toBytes(key))
71 |     // add columns with data values to put
72 |     p.add(cfStatsBytes, Bytes.toBytes("psimax"), Bytes.toBytes(stats.max))
73 |     p.add(cfStatsBytes, Bytes.toBytes("psimin"), Bytes.toBytes(stats.min))
74 |     p.add(cfStatsBytes, Bytes.toBytes("psimean"), Bytes.toBytes(stats.mean))
75 |     (new ImmutableBytesWritable, p)
76 |   }
77 | 
78 | }
79 | 


--------------------------------------------------------------------------------
/src/main/scala/examples/HBaseSensorStream.scala:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * 
  3 |  *  
  4 |  */
  5 | 
  6 | package examples
  7 | 
  8 | import org.apache.hadoop.hbase.HBaseConfiguration
  9 | import org.apache.hadoop.hbase.client.Put
 10 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable
 11 | import org.apache.hadoop.hbase.mapred.TableOutputFormat
 12 | import org.apache.hadoop.hbase.util.Bytes
 13 | import org.apache.hadoop.mapred.JobConf
 14 | import org.apache.spark.SparkConf
 15 | 
 16 | import org.apache.spark.streaming.Seconds
 17 | import org.apache.spark.streaming.StreamingContext
 18 | 
 19 | object HBaseSensorStream extends Serializable {
 20 |   final val tableName = "/user/user01/sensor"
 21 |   final val cfDataBytes = Bytes.toBytes("data")
 22 |   final val cfAlertBytes = Bytes.toBytes("alert")
 23 |   final val colHzBytes = Bytes.toBytes("hz")
 24 |   final val colDispBytes = Bytes.toBytes("disp")
 25 |   final val colFloBytes = Bytes.toBytes("flo")
 26 |   final val colSedBytes = Bytes.toBytes("sedPPM")
 27 |   final val colPsiBytes = Bytes.toBytes("psi")
 28 |   final val colChlBytes = Bytes.toBytes("chlPPM")
 29 | 
 30 |   // schema for sensor data   
 31 |   case class Sensor(resid: String, date: String, time: String, hz: Double, disp: Double, flo: Double, sedPPM: Double, psi: Double, chlPPM: Double)
 32 | 
 33 |   object Sensor extends Serializable{
 34 |     // function to parse line of sensor data into Sensor class
 35 |     def parseSensor(str: String): Sensor = {
 36 |       val p = str.split(",")
 37 |       Sensor(p(0), p(1), p(2), p(3).toDouble, p(4).toDouble, p(5).toDouble, p(6).toDouble, p(7).toDouble, p(8).toDouble)
 38 |     }
 39 |     //  Convert a row of sensor object data to an HBase put object
 40 |     def convertToPut(sensor: Sensor): (ImmutableBytesWritable, Put) = {
 41 |       val dateTime = sensor.date + " " + sensor.time
 42 |       // create a composite row key: sensorid_date time
 43 |       val rowkey = sensor.resid + "_" + dateTime
 44 |       val put = new Put(Bytes.toBytes(rowkey))
 45 |       // add to column family data, column  data values to put object 
 46 |       put.add(cfDataBytes, colHzBytes, Bytes.toBytes(sensor.hz))
 47 |       put.add(cfDataBytes, colDispBytes, Bytes.toBytes(sensor.disp))
 48 |       put.add(cfDataBytes, colFloBytes, Bytes.toBytes(sensor.flo))
 49 |       put.add(cfDataBytes, colSedBytes, Bytes.toBytes(sensor.sedPPM))
 50 |       put.add(cfDataBytes, colPsiBytes, Bytes.toBytes(sensor.psi))
 51 |       put.add(cfDataBytes, colChlBytes, Bytes.toBytes(sensor.chlPPM))
 52 |       return (new ImmutableBytesWritable(Bytes.toBytes(rowkey)), put)
 53 |     }
 54 |     // convert psi alert to an HBase put object
 55 |     def convertToPutAlert(sensor: Sensor): (ImmutableBytesWritable, Put) = {
 56 |       val dateTime = sensor.date + " " + sensor.time
 57 |       // create a composite row key: sensorid_date time
 58 |       val key = sensor.resid + "_" + dateTime
 59 |       val p = new Put(Bytes.toBytes(key))
 60 |       // add to column family alert, column psi data value to put object 
 61 |       p.add(cfAlertBytes, colPsiBytes, Bytes.toBytes(sensor.psi))
 62 |       return (new ImmutableBytesWritable(Bytes.toBytes(key)), p)
 63 |     }
 64 |   }
 65 | 
 66 |   def main(args: Array[String]): Unit = {
 67 |     // set up HBase Table configuration
 68 |     val conf = HBaseConfiguration.create()
 69 |     conf.set(TableOutputFormat.OUTPUT_TABLE, tableName)
 70 |     val jobConfig: JobConf = new JobConf(conf, this.getClass)
 71 |     jobConfig.set("mapreduce.output.fileoutputformat.outputdir", "/user/user01/out")
 72 |     jobConfig.setOutputFormat(classOf[TableOutputFormat])
 73 |     jobConfig.set(TableOutputFormat.OUTPUT_TABLE, tableName)
 74 | 
 75 |     val sparkConf = new SparkConf().setAppName("HBaseStream")
 76 |     // create a StreamingContext, the main entry point for all streaming functionality
 77 |     val ssc = new StreamingContext(sparkConf, Seconds(2))
 78 | 
 79 |     // parse the lines of data into sensor objects
 80 |     val sensorDStream = ssc.textFileStream("/user/user01/stream").map(Sensor.parseSensor)
 81 |     sensorDStream.print()
 82 | 
 83 |     sensorDStream.foreachRDD { rdd =>
 84 |       // filter sensor data for low psi
 85 |       val alertRDD = rdd.filter(sensor => sensor.psi < 5.0)
 86 |       alertRDD.take(1).foreach(println)
 87 |       // convert sensor data to put object and write to HBase table column family data
 88 |       rdd.map(Sensor.convertToPut).
 89 |         saveAsHadoopDataset(jobConfig)
 90 |       // convert alert data to put object and write to HBase table column family alert
 91 |       alertRDD.map(Sensor.convertToPutAlert).
 92 |         saveAsHadoopDataset(jobConfig)
 93 |     }
 94 |     // Start the computation
 95 |     ssc.start()
 96 |     // Wait for the computation to terminate
 97 |     ssc.awaitTermination()
 98 | 
 99 |   }
100 | 
101 | }


--------------------------------------------------------------------------------