├── hive-udf
    ├── src
    │   └── main
    │   │   └── java
    │   │       └── com
    │   │           └── cloudera
    │   │               └── fce
    │   │                   └── curtis
    │   │                       └── sparkudfexamples
    │   │                           └── hiveudf
    │   │                               └── CTOF.java
    ├── hive-udf-example.py
    └── pom.xml
├── data
    ├── inventory.json
    └── temperatures.json
├── python-udf
    └── python-udf-example.py
├── scala-udaf-from-python
    ├── scala-udaf-from-python.py
    ├── src
    │   └── main
    │   │   └── scala
    │   │       └── com
    │   │           └── cloudera
    │   │               └── fce
    │   │                   └── curtis
    │   │                       └── sparkudfexamples
    │   │                           └── scalaudaffrompython
    │   │                               └── ScalaUDAFFromPythonExample.scala
    └── pom.xml
├── scala-udf
    ├── src
    │   └── main
    │   │   └── scala
    │   │       └── com
    │   │           └── cloudera
    │   │               └── fce
    │   │                   └── curtis
    │   │                       └── sparkudfexamples
    │   │                           └── scalaudf
    │   │                               └── ScalaUDFExample.scala
    └── pom.xml
├── java-udf
    ├── src
    │   └── main
    │   │   └── java
    │   │       └── com
    │   │           └── cloudera
    │   │               └── fce
    │   │                   └── curtis
    │   │                       └── sparkudfexamples
    │   │                           └── javaudf
    │   │                               └── JavaUDFExample.java
    └── pom.xml
├── README.md
└── scala-udaf
    ├── src
        └── main
        │   └── scala
        │       └── com
        │           └── cloudera
        │               └── fce
        │                   └── curtis
        │                       └── sparkudfexamples
        │                           └── scalaudaf
        │                               └── ScalaUDAFExample.scala
    └── pom.xml


/hive-udf/src/main/java/com/cloudera/fce/curtis/sparkudfexamples/hiveudf/CTOF.java:
--------------------------------------------------------------------------------
 1 | package com.cloudera.fce.curtis.sparkudfexamples.hiveudf;
 2 | 
 3 | import org.apache.hadoop.hive.ql.exec.UDF;
 4 | 
 5 | public class CTOF extends UDF {
 6 |   public Double evaluate(Double degreesCelsius) {
 7 |     return ((degreesCelsius * 9.0 / 5.0) + 32.0);
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/data/inventory.json:
--------------------------------------------------------------------------------
1 | {"Make":"Honda","Model":"Pilot","RetailValue":32145.0,"Stock":4}
2 | {"Make":"Honda","Model":"Civic","RetailValue":19575.0,"Stock":11}
3 | {"Make":"Honda","Model":"Ridgeline","RetailValue":42870.0,"Stock":2}
4 | {"Make":"Jeep","Model":"Cherokee","RetailValue":23595.0,"Stock":13}
5 | {"Make":"Jeep","Model":"Wrangler","RetailValue":27895.0,"Stock":4}
6 | {"Make":"Volkswagen","Model":"Passat","RetailValue":22440.0,"Stock":2}
7 | 


--------------------------------------------------------------------------------
/python-udf/python-udf-example.py:
--------------------------------------------------------------------------------
 1 | from pyspark.sql import SparkSession
 2 | 
 3 | spark = SparkSession.builder.appName("Python UDF example").getOrCreate() 
 4 | 
 5 | df = spark.read.json("temperatures.json")
 6 | df.createOrReplaceTempView("citytemps")
 7 | 
 8 | # Register the UDF with our SparkSession 
 9 | spark.udf.register("CTOF", lambda degreesCelsius: ((degreesCelsius * 9.0 / 5.0) + 32.0))
10 | 
11 | spark.sql("SELECT city, CTOF(avgLow) AS avgLowF, CTOF(avgHigh) AS avgHighF FROM citytemps").show()
12 | 


--------------------------------------------------------------------------------
/scala-udaf-from-python/scala-udaf-from-python.py:
--------------------------------------------------------------------------------
 1 | from pyspark.sql import SparkSession
 2 | 
 3 | spark = SparkSession.builder.appName("Scala UDAF from Python example").getOrCreate()
 4 | 
 5 | df = spark.read.json("inventory.json")
 6 | df.createOrReplaceTempView("inventory")
 7 | 
 8 | spark.sparkContext._jvm.com.cloudera.fce.curtis.sparkudfexamples.scalaudaffrompython.ScalaUDAFFromPythonExample.registerUdf()
 9 | 
10 | spark.sql("SELECT Make, SUMPRODUCT(RetailValue,Stock) as InventoryValuePerMake FROM inventory GROUP BY Make").show()
11 | 


--------------------------------------------------------------------------------
/hive-udf/hive-udf-example.py:
--------------------------------------------------------------------------------
 1 | from pyspark     import SparkConf, SparkContext
 2 | from pyspark.sql import HiveContext
 3 | 
 4 | conf       = SparkConf().setAppName("Hive UDF example")
 5 | sc         = SparkContext(conf=conf)
 6 | sqlContext = HiveContext(sc)
 7 | 
 8 | df         = sqlContext.read.json("temperatures.json")
 9 | df.registerTempTable("citytemps")
10 | 
11 | # Register our Hive UDF
12 | sqlContext.sql("CREATE TEMPORARY FUNCTION CTOF AS 'com.cloudera.fce.curtis.sparkudfexamples.hiveudf.CTOF'")
13 | 
14 | sqlContext.sql("SELECT city, CTOF(avgLow) AS avgLowF, CTOF(avgHigh) AS avgHighF FROM citytemps").show()
15 | 


--------------------------------------------------------------------------------
/data/temperatures.json:
--------------------------------------------------------------------------------
 1 | {"city":"St. John's","avgHigh":8.7,"avgLow":0.6}
 2 | {"city":"Charlottetown","avgHigh":9.7,"avgLow":0.9}
 3 | {"city":"Halifax","avgHigh":11.0,"avgLow":1.6}
 4 | {"city":"Fredericton","avgHigh":11.2,"avgLow":-0.5}
 5 | {"city":"Quebec","avgHigh":9.0,"avgLow":-1.0}
 6 | {"city":"Montreal","avgHigh":11.1,"avgLow":1.4}
 7 | {"city":"Ottawa","avgHigh":10.9,"avgLow":1.1}
 8 | {"city":"Toronto","avgHigh":12.5,"avgLow":2.5}
 9 | {"city":"Winnipeg","avgHigh":8.3,"avgLow":-3.1}
10 | {"city":"Regina","avgHigh":9.1,"avgLow":-3.4}
11 | {"city":"Edmonton","avgHigh":8.5,"avgLow":-3.8}
12 | {"city":"Calgary","avgHigh":10.5,"avgLow":-2.4}
13 | {"city":"Vancouver","avgHigh":13.7,"avgLow":6.5}
14 | {"city":"Victoria","avgHigh":14.1,"avgLow":5.3}
15 | {"city":"Whitehorse","avgHigh":4.5,"avgLow":-5.9}
16 | {"city":"Yellowknife","avgHigh":-0.2,"avgLow":-9.0}
17 | 


--------------------------------------------------------------------------------
/scala-udf/src/main/scala/com/cloudera/fce/curtis/sparkudfexamples/scalaudf/ScalaUDFExample.scala:
--------------------------------------------------------------------------------
 1 | package com.cloudera.fce.curtis.sparkudfexamples.scalaudf
 2 | 
 3 | import org.apache.spark.sql.SparkSession
 4 | import org.apache.spark.SparkConf
 5 | 
 6 | object ScalaUDFExample {
 7 |   def main(args: Array[String]) {
 8 |     val conf       = new SparkConf().setAppName("Scala UDF Example")
 9 |     val spark      = SparkSession.builder().enableHiveSupport().config(conf).getOrCreate() 
10 | 
11 |     val ds = spark.read.json("temperatures.json")
12 |     ds.createOrReplaceTempView("citytemps")
13 | 
14 |     // Register the UDF with our SparkSession 
15 |     spark.udf.register("CTOF", (degreesCelcius: Double) => ((degreesCelcius * 9.0 / 5.0) + 32.0))
16 | 
17 |     spark.sql("SELECT city, CTOF(avgLow) AS avgLowF, CTOF(avgHigh) AS avgHighF FROM citytemps").show()
18 |   }
19 | }
20 | 


--------------------------------------------------------------------------------
/java-udf/src/main/java/com/cloudera/fce/curtis/sparkudfexamples/javaudf/JavaUDFExample.java:
--------------------------------------------------------------------------------
 1 | package com.cloudera.fce.curtis.sparkudfexamples.javaudf;
 2 | 
 3 | import org.apache.spark.api.java.*;
 4 | import org.apache.spark.SparkConf;
 5 | import org.apache.spark.sql.*;
 6 | import org.apache.spark.sql.api.java.UDF1;
 7 | import org.apache.spark.sql.types.DataTypes;
 8 | 
 9 | public class JavaUDFExample {
10 |   public static void main(String[] args) {
11 |     SparkConf conf        = new SparkConf().setAppName("Java UDF Example");
12 |     SparkSession spark = SparkSession.builder().enableHiveSupport().config(conf).getOrCreate(); 
13 |  
14 |     Dataset<Row> ds = spark.read().json("temperatures.json");
15 |     ds.createOrReplaceTempView("citytemps");
16 |    
17 |     // Register the UDF with our SparkSession 
18 |     spark.udf().register("CTOF", new UDF1<Double, Double>() {
19 |       @Override
20 |       public Double call(Double degreesCelcius) {
21 |         return ((degreesCelcius * 9.0 / 5.0) + 32.0);
22 |       }
23 |     }, DataTypes.DoubleType);
24 |     
25 |     spark.sql("SELECT city, CTOF(avgLow) AS avgLowF, CTOF(avgHigh) AS avgHighF FROM citytemps").show();
26 |   }
27 | }
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Spark UDF Examples 
 2 | Simple examples of Spark SQL user-defined functions. Tested with CDH 5.13.1, Spark 2.1.0 (see the *spark1.6* branch for Spark 1.x examples)
 3 | 
 4 | ### Load the sample data
 5 | ```
 6 | hdfs dfs -put data/temperatures.json temperatures.json
 7 | hdfs dfs -put data/inventory.json    inventory.json
 8 | ```
 9 | 
10 | ### Build the Java and Scala examples
11 | Under each example root (java-udf/, scala-udf/, ...):
12 | <br/>
13 | ```
14 | mvn package
15 | ```
16 | 
17 | ### Run them
18 | Python UDF:
19 | <br/>
20 | ```
21 | spark2-submit --master local python-udf-example.py
22 | ```
23 | 
24 | Scala UDF:
25 | <br/>
26 | ```
27 | spark2-submit --class com.cloudera.fce.curtis.sparkudfexamples.scalaudf.ScalaUDFExample --master local target/scalaudf-0.0.1-jar-with-dependencies.jar
28 | ```
29 | 
30 | Java UDF:
31 | <br/>
32 | ```
33 | spark2-submit --class com.cloudera.fce.curtis.sparkudfexamples.javaudf.JavaUDFExample  --master local target/javaudf-0.0.1-jar-with-dependencies.jar
34 | ```
35 | 
36 | Scala UDAF:
37 | <br/>
38 | ```
39 | spark2-submit --class com.cloudera.fce.curtis.sparkudfexamples.scalaudaf.ScalaUDAFExample --master local target/scalaudaf-0.0.1-jar-with-dependencies.jar
40 | ```
41 | 
42 | Hive UDF:
43 | <br/>
44 | ```
45 | spark2-submit --jars target/hiveudf-0.0.1-jar-with-dependencies.jar  hive-udf-example.py
46 | ```
47 | 
48 | Scala UDAF From PySpark:
49 | <br/>
50 | ```
51 | spark2-submit --jars target/scalaudaffrompython-0.0.1.jar --driver-class-path target/scalaudaffrompython-0.0.1.jar scala-udaf-from-python.py
52 | ```
53 | 


--------------------------------------------------------------------------------
/scala-udaf-from-python/src/main/scala/com/cloudera/fce/curtis/sparkudfexamples/scalaudaffrompython/ScalaUDAFFromPythonExample.scala:
--------------------------------------------------------------------------------
 1 | package com.cloudera.fce.curtis.sparkudfexamples.scalaudaffrompython
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.sql._
 5 | import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
 6 | import org.apache.spark.sql.types._
 7 | import org.apache.spark.sql.SparkSession
 8 | 
 9 | object ScalaUDAFFromPythonExample {
10 | 
11 |   private class SumProductAggregateFunction extends UserDefinedAggregateFunction {
12 |     def inputSchema: StructType =     
13 |       new StructType().add("price", DoubleType).add("quantity", LongType)
14 |     def bufferSchema: StructType =    
15 |       new StructType().add("total", DoubleType)
16 |     def dataType: DataType = DoubleType
17 |     def deterministic: Boolean = true 
18 | 
19 |     def initialize(buffer: MutableAggregationBuffer): Unit = {
20 |       buffer.update(0, 0.0)      
21 |     }
22 | 
23 |     def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
24 |       val sum   = buffer.getDouble(0) 
25 |       val price = input.getDouble(0) 
26 |       val qty   = input.getLong(1)   
27 |       buffer.update(0, sum + (price * qty))  
28 |     }
29 |    
30 |     def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
31 |       buffer1.update(0, buffer1.getDouble(0) + buffer2.getDouble(0))
32 |     }
33 | 
34 |     def evaluate(buffer: Row): Any = {
35 |       buffer.getDouble(0)
36 |     }
37 |   }
38 | 
39 |   // This function is called from PySpark to register our UDAF
40 |   def registerUdf() {
41 |     import org.apache.spark.sql.SparkSession
42 |     val spark = SparkSession.builder().getOrCreate() 
43 |     spark.udf.register("SUMPRODUCT", new SumProductAggregateFunction)
44 |   }
45 | }
46 | 


--------------------------------------------------------------------------------
/scala-udaf/src/main/scala/com/cloudera/fce/curtis/sparkudfexamples/scalaudaf/ScalaUDAFExample.scala:
--------------------------------------------------------------------------------
 1 | package com.cloudera.fce.curtis.sparkudfexamples.scalaudaf
 2 | 
 3 | import org.apache.spark.SparkConf
 4 | import org.apache.spark.sql._
 5 | import org.apache.spark.sql.expressions.{MutableAggregationBuffer, UserDefinedAggregateFunction}
 6 | import org.apache.spark.sql.types._
 7 | import org.apache.spark.sql.SparkSession
 8 | 
 9 | object ScalaUDAFExample {
10 | 
11 |   // Define the SparkSQL UDAF logic
12 |   private class SumProductAggregateFunction extends UserDefinedAggregateFunction {
13 |     // Define the UDAF input and result schema's
14 |     def inputSchema: StructType =     // Input  = (Double price, Long quantity)
15 |       new StructType().add("price", DoubleType).add("quantity", LongType)
16 |     def bufferSchema: StructType =    // Output = (Double total)
17 |       new StructType().add("total", DoubleType)
18 |     def dataType: DataType = DoubleType
19 |     def deterministic: Boolean = true // true: our UDAF's output given an input is deterministic
20 | 
21 |     def initialize(buffer: MutableAggregationBuffer): Unit = {
22 |       buffer.update(0, 0.0)           // Initialize the result to 0.0
23 |     }
24 | 
25 |     def update(buffer: MutableAggregationBuffer, input: Row): Unit = {
26 |       val sum   = buffer.getDouble(0) // Intermediate result to be updated
27 |       val price = input.getDouble(0)  // First input parameter
28 |       val qty   = input.getLong(1)    // Second input parameter
29 |       buffer.update(0, sum + (price * qty))   // Update the intermediate result
30 |     }
31 |     // Merge intermediate result sums by adding them
32 |     def merge(buffer1: MutableAggregationBuffer, buffer2: Row): Unit = {
33 |       buffer1.update(0, buffer1.getDouble(0) + buffer2.getDouble(0))
34 |     }
35 |     // THe final result will be contained in 'buffer'
36 |     def evaluate(buffer: Row): Any = {
37 |       buffer.getDouble(0)
38 |     }
39 |   }
40 | 
41 |   def main (args: Array[String]) {
42 |     val conf       = new SparkConf().setAppName("Scala UDAF Example")
43 |     val spark      = SparkSession.builder().enableHiveSupport().config(conf).getOrCreate()
44 | 
45 |     val testDF = spark.read.json("inventory.json")
46 |     testDF.createOrReplaceTempView("inventory") 
47 |     // Register the UDAF with our SQLContext
48 |     spark.udf.register("SUMPRODUCT", new SumProductAggregateFunction)
49 | 
50 |     spark.sql("SELECT Make, SUMPRODUCT(RetailValue,Stock) as InventoryValuePerMake FROM inventory GROUP BY Make").show()
51 |   }
52 | }
53 | 


--------------------------------------------------------------------------------
/hive-udf/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 |   Copyright (c) 2014, Cloudera, Inc. All Rights Reserved.
 4 | 
 5 |   Cloudera, Inc. licenses this file to you under the Apache License,
 6 |   Version 2.0 (the "License"). You may not use this file except in
 7 |   compliance with the License. You may obtain a copy of the License at
 8 | 
 9 |       http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |   This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 |   CONDITIONS OF ANY KIND, either express or implied. See the License for
13 |   the specific language governing permissions and limitations under the
14 |   License.
15 |   -->
16 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
17 |   <modelVersion>4.0.0</modelVersion>
18 |   <groupId>com.cloudera.fce.curtis.sparkudfexamples.hiveudf</groupId>
19 |   <artifactId>hiveudf</artifactId>
20 |   <version>0.0.1</version>
21 |   <packaging>jar</packaging>
22 |   <name>"Hive UDF Example"</name>
23 |   
24 |   <repositories>
25 |     <repository>
26 |       <id>cloudera-repos</id>
27 |       <name>Cloudera Repos</name>
28 |       <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
29 |     </repository>
30 |   </repositories>
31 | 
32 |   <build>
33 |     <plugins>
34 |       <plugin>
35 |         <groupId>org.apache.maven.plugins</groupId>
36 |         <artifactId>maven-compiler-plugin</artifactId>
37 |         <version>2.3.1</version>
38 |         <configuration>
39 |           <source>1.7</source>
40 |           <target>1.7</target>
41 |         </configuration>
42 |       </plugin>
43 |       <plugin>
44 |         <artifactId>maven-assembly-plugin</artifactId>
45 |         <configuration>
46 |           <descriptorRefs>
47 |           <descriptorRef>jar-with-dependencies</descriptorRef>
48 |           </descriptorRefs>
49 |         </configuration>
50 |         <executions>
51 |           <execution>
52 |             <id>make-assembly</id>
53 |             <phase>package</phase>
54 |             <goals>
55 |               <goal>single</goal>
56 |             </goals>
57 |           </execution>
58 |         </executions>
59 |       </plugin>
60 |     </plugins>  
61 |   </build>
62 | 
63 |   <dependencies>
64 |     <dependency>
65 |       <groupId>org.apache.hive</groupId>
66 |       <artifactId>hive-exec</artifactId>
67 |       <version>1.2.1</version>
68 |     </dependency>
69 |      <dependency>
70 |       <groupId>org.apache.hadoop</groupId>
71 |       <artifactId>hadoop-core</artifactId>
72 |       <version>1.2.1</version>
73 |     </dependency>
74 |   </dependencies>
75 | </project>
76 | 


--------------------------------------------------------------------------------
/java-udf/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 |   Copyright (c) 2014, Cloudera, Inc. All Rights Reserved.
 4 | 
 5 |   Cloudera, Inc. licenses this file to you under the Apache License,
 6 |   Version 2.0 (the "License"). You may not use this file except in
 7 |   compliance with the License. You may obtain a copy of the License at
 8 | 
 9 |       http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |   This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 |   CONDITIONS OF ANY KIND, either express or implied. See the License for
13 |   the specific language governing permissions and limitations under the
14 |   License.
15 |   -->
16 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
17 |   <modelVersion>4.0.0</modelVersion>
18 |   <groupId>com.cloudera.fce.curtis.sparkudfexamples.javaudf</groupId>
19 |   <artifactId>javaudf</artifactId>
20 |   <version>0.0.1</version>
21 |   <packaging>jar</packaging>
22 |   <name>"Java UDF Example"</name>
23 |   
24 |   <repositories>
25 |     <repository>
26 |       <id>cloudera-repos</id>
27 |       <name>Cloudera Repos</name>
28 |       <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
29 |     </repository>
30 |   </repositories>
31 | 
32 |   <build>
33 |     <plugins>
34 |       <plugin>
35 |         <groupId>org.apache.maven.plugins</groupId>
36 |         <artifactId>maven-compiler-plugin</artifactId>
37 |         <version>2.3.1</version>
38 |         <configuration>
39 |           <source>1.7</source>
40 |           <target>1.7</target>
41 |         </configuration>
42 |       </plugin>
43 |       <plugin>
44 |         <artifactId>maven-assembly-plugin</artifactId>
45 |         <configuration>
46 |           <descriptorRefs>
47 |           <descriptorRef>jar-with-dependencies</descriptorRef>
48 |           </descriptorRefs>
49 |         </configuration>
50 |         <executions>
51 |           <execution>
52 |             <id>make-assembly</id>
53 |             <phase>package</phase>
54 |             <goals>
55 |               <goal>single</goal>
56 |             </goals>
57 |           </execution>
58 |         </executions>
59 |       </plugin>
60 |     </plugins>  
61 |   </build>
62 | 
63 |   <dependencies>
64 |     <dependency>
65 |       <groupId>org.apache.spark</groupId>
66 |       <artifactId>spark-core_2.11</artifactId>
67 |       <version>2.1.0.cloudera1</version>
68 |     </dependency>
69 |     <dependency>
70 |       <groupId>org.apache.spark</groupId>
71 |       <artifactId>spark-sql_2.11</artifactId>
72 |       <version>2.1.0.cloudera1</version>
73 |     </dependency>
74 |   </dependencies>
75 | </project>
76 | 


--------------------------------------------------------------------------------
/scala-udaf-from-python/pom.xml:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 |   Copyright (c) 2014, Cloudera, Inc. All Rights Reserved.
 4 | 
 5 |   Cloudera, Inc. licenses this file to you under the Apache License,
 6 |   Version 2.0 (the "License"). You may not use this file except in
 7 |   compliance with the License. You may obtain a copy of the License at
 8 | 
 9 |       http://www.apache.org/licenses/LICENSE-2.0
10 | 
11 |   This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
12 |   CONDITIONS OF ANY KIND, either express or implied. See the License for
13 |   the specific language governing permissions and limitations under the
14 |   License.
15 |   -->
16 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
17 |   <modelVersion>4.0.0</modelVersion>
18 |   <groupId>com.cloudera.fce.curtis.sparkudfexamples.scalaudaffrompython</groupId>
19 |   <artifactId>scalaudaffrompython</artifactId>
20 |   <version>0.0.1</version>
21 |   <packaging>jar</packaging>
22 |   <name>"Scala UDAF from Python Example"</name>
23 |   
24 |   <repositories>
25 |     <repository>
26 |       <id>scala-tools.org</id>
27 |       <name>Scala-tools Maven2 Repository</name>
28 |       <url>http://scala-tools.org/repo-releases</url>
29 |     </repository>
30 |     <repository>
31 |       <id>maven-hadoop</id>
32 |       <name>Hadoop Releases</name>
33 |       <url>https://repository.cloudera.com/content/repositories/releases/</url>
34 |     </repository>
35 |     <repository>
36 |       <id>cloudera-repos</id>
37 |       <name>Cloudera Repos</name>
38 |       <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
39 |     </repository>
40 |   </repositories>
41 | 
42 |   <pluginRepositories>
43 |     <pluginRepository>
44 |       <id>scala-tools.org</id>
45 |       <name>Scala-tools Maven2 Repository</name>
46 |       <url>http://scala-tools.org/repo-releases</url>
47 |     </pluginRepository>
48 |   </pluginRepositories>
49 | 
50 |   <properties>
51 |     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
52 |     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
53 |   </properties>
54 | 
55 |   <build>
56 |     <plugins>
57 |       <plugin>
58 |         <groupId>org.scala-tools</groupId>
59 |         <artifactId>maven-scala-plugin</artifactId>
60 |         <version>2.15.2</version>
61 |         <executions>
62 |           <execution>
63 |             <goals>
64 |               <goal>compile</goal>
65 |             </goals>
66 |           </execution>
67 |         </executions>
68 |       </plugin>
69 |       <plugin>
70 |         <artifactId>maven-compiler-plugin</artifactId>
71 |         <version>3.1</version>
72 |         <configuration>
73 |           <source>1.6</source>
74 |           <target>1.6</target>
75 |         </configuration>
76 |       </plugin>
77 |     </plugins>  
78 |   </build>
79 | 
80 |   <dependencies>
81 |     <dependency>
82 |       <groupId>org.scala-lang</groupId>
83 |       <artifactId>scala-library</artifactId>
84 |       <version>2.11.12</version>
85 |     </dependency>
86 |     <dependency>
87 |       <groupId>org.apache.spark</groupId>
88 |       <artifactId>spark-core_2.11</artifactId>
89 |       <version>2.1.0.cloudera1</version>
90 |     </dependency>
91 |     <dependency>
92 |       <groupId>org.apache.spark</groupId>
93 |       <artifactId>spark-sql_2.11</artifactId>
94 |       <version>2.1.0.cloudera1</version>
95 |     </dependency>
96 | 
97 | </dependencies>
98 | </project>
99 | 


--------------------------------------------------------------------------------
/scala-udf/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!--
  3 |   Copyright (c) 2014, Cloudera, Inc. All Rights Reserved.
  4 | 
  5 |   Cloudera, Inc. licenses this file to you under the Apache License,
  6 |   Version 2.0 (the "License"). You may not use this file except in
  7 |   compliance with the License. You may obtain a copy of the License at
  8 | 
  9 |       http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |   This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 |   CONDITIONS OF ANY KIND, either express or implied. See the License for
 13 |   the specific language governing permissions and limitations under the
 14 |   License.
 15 |   -->
 16 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 17 |   <modelVersion>4.0.0</modelVersion>
 18 |   <groupId>com.cloudera.fce.curtis.sparkudfexamples.scalaudf</groupId>
 19 |   <artifactId>scalaudf</artifactId>
 20 |   <version>0.0.1</version>
 21 |   <packaging>jar</packaging>
 22 |   <name>"Scala UDF Example"</name>
 23 |   
 24 |   <repositories>
 25 |     <repository>
 26 |       <id>scala-tools.org</id>
 27 |       <name>Scala-tools Maven2 Repository</name>
 28 |       <url>http://scala-tools.org/repo-releases</url>
 29 |     </repository>
 30 |     <repository>
 31 |       <id>maven-hadoop</id>
 32 |       <name>Hadoop Releases</name>
 33 |       <url>https://repository.cloudera.com/content/repositories/releases/</url>
 34 |     </repository>
 35 |     <repository>
 36 |       <id>cloudera-repos</id>
 37 |       <name>Cloudera Repos</name>
 38 |       <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
 39 |     </repository>
 40 |   </repositories>
 41 | 
 42 |   <pluginRepositories>
 43 |     <pluginRepository>
 44 |       <id>scala-tools.org</id>
 45 |       <name>Scala-tools Maven2 Repository</name>
 46 |       <url>http://scala-tools.org/repo-releases</url>
 47 |     </pluginRepository>
 48 |   </pluginRepositories>
 49 | 
 50 |   <build>
 51 |     <plugins>
 52 |       <plugin>
 53 |         <groupId>org.scala-tools</groupId>
 54 |         <artifactId>maven-scala-plugin</artifactId>
 55 |         <version>2.15.2</version>
 56 |         <executions>
 57 |           <execution>
 58 |             <goals>
 59 |               <goal>compile</goal>
 60 |             </goals>
 61 |           </execution>
 62 |         </executions>
 63 |       </plugin>
 64 |       <plugin>
 65 |         <artifactId>maven-compiler-plugin</artifactId>
 66 |         <version>2.3.1</version>
 67 |         <configuration>
 68 |           <source>1.7</source>
 69 |           <target>1.7</target>
 70 |         </configuration>
 71 |       </plugin>
 72 |       <plugin>
 73 |         <artifactId>maven-assembly-plugin</artifactId>
 74 |         <configuration>
 75 |           <descriptorRefs>
 76 |           <descriptorRef>jar-with-dependencies</descriptorRef>
 77 |           </descriptorRefs>
 78 |         </configuration>
 79 |         <executions>
 80 |           <execution>
 81 |             <id>make-assembly</id>
 82 |             <phase>package</phase>
 83 |             <goals>
 84 |               <goal>single</goal>
 85 |             </goals>
 86 |           </execution>
 87 |         </executions>
 88 |       </plugin>
 89 |     </plugins>  
 90 |   </build>
 91 | 
 92 |   <dependencies>
 93 |     <dependency>
 94 |       <groupId>org.scala-lang</groupId>
 95 |       <artifactId>scala-library</artifactId>
 96 |       <version>2.11.12</version>
 97 |     </dependency>
 98 |     <dependency>
 99 |       <groupId>org.apache.spark</groupId>
100 |       <artifactId>spark-core_2.11</artifactId>
101 |       <version>2.1.0.cloudera1</version>
102 |     </dependency>
103 |     <dependency>
104 |       <groupId>org.apache.spark</groupId>
105 |       <artifactId>spark-sql_2.11</artifactId>
106 |       <version>2.1.0.cloudera1</version>
107 |     </dependency>
108 | 
109 | </dependencies>
110 | </project>
111 | 


--------------------------------------------------------------------------------
/scala-udaf/pom.xml:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <!--
  3 |   Copyright (c) 2014, Cloudera, Inc. All Rights Reserved.
  4 | 
  5 |   Cloudera, Inc. licenses this file to you under the Apache License,
  6 |   Version 2.0 (the "License"). You may not use this file except in
  7 |   compliance with the License. You may obtain a copy of the License at
  8 | 
  9 |       http://www.apache.org/licenses/LICENSE-2.0
 10 | 
 11 |   This software is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 12 |   CONDITIONS OF ANY KIND, either express or implied. See the License for
 13 |   the specific language governing permissions and limitations under the
 14 |   License.
 15 |   -->
 16 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
 17 |   <modelVersion>4.0.0</modelVersion>
 18 |   <groupId>com.cloudera.fce.curtis.sparkudfexamples.scalaudaf</groupId>
 19 |   <artifactId>scalaudaf</artifactId>
 20 |   <version>0.0.1</version>
 21 |   <packaging>jar</packaging>
 22 |   <name>"Scala UDAF Example"</name>
 23 |   
 24 |   <repositories>
 25 |     <repository>
 26 |       <id>scala-tools.org</id>
 27 |       <name>Scala-tools Maven2 Repository</name>
 28 |       <url>http://scala-tools.org/repo-releases</url>
 29 |     </repository>
 30 |     <repository>
 31 |       <id>maven-hadoop</id>
 32 |       <name>Hadoop Releases</name>
 33 |       <url>https://repository.cloudera.com/content/repositories/releases/</url>
 34 |     </repository>
 35 |     <repository>
 36 |       <id>cloudera-repos</id>
 37 |       <name>Cloudera Repos</name>
 38 |       <url>https://repository.cloudera.com/artifactory/cloudera-repos/</url>
 39 |     </repository>
 40 |   </repositories>
 41 | 
 42 |   <pluginRepositories>
 43 |     <pluginRepository>
 44 |       <id>scala-tools.org</id>
 45 |       <name>Scala-tools Maven2 Repository</name>
 46 |       <url>http://scala-tools.org/repo-releases</url>
 47 |     </pluginRepository>
 48 |   </pluginRepositories>
 49 | 
 50 |   <properties>
 51 |     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
 52 |     <project.reporting.outputEncoding>UTF-8</project.reporting.outputEncoding>
 53 |   </properties>
 54 | 
 55 |   <build>
 56 |     <plugins>
 57 |       <plugin>
 58 |         <groupId>org.scala-tools</groupId>
 59 |         <artifactId>maven-scala-plugin</artifactId>
 60 |         <version>2.15.2</version>
 61 |         <executions>
 62 |           <execution>
 63 |             <goals>
 64 |               <goal>compile</goal>
 65 |             </goals>
 66 |           </execution>
 67 |         </executions>
 68 |       </plugin>
 69 |       <plugin>
 70 |         <artifactId>maven-compiler-plugin</artifactId>
 71 |         <version>2.3.1</version>
 72 |         <configuration>
 73 |           <source>1.7</source>
 74 |           <target>1.7</target>
 75 |         </configuration>
 76 |       </plugin>
 77 |       <plugin>
 78 |         <artifactId>maven-assembly-plugin</artifactId>
 79 |         <configuration>
 80 |           <descriptorRefs>
 81 |           <descriptorRef>jar-with-dependencies</descriptorRef>
 82 |           </descriptorRefs>
 83 |         </configuration>
 84 |         <executions>
 85 |           <execution>
 86 |             <id>make-assembly</id>
 87 |             <phase>package</phase>
 88 |             <goals>
 89 |               <goal>single</goal>
 90 |             </goals>
 91 |           </execution>
 92 |         </executions>
 93 |       </plugin>
 94 |     </plugins>  
 95 |   </build>
 96 | 
 97 |   <dependencies>
 98 |     <dependency>
 99 |       <groupId>org.scala-lang</groupId>
100 |       <artifactId>scala-library</artifactId>
101 |       <version>2.11.12</version>
102 |     </dependency>
103 |     <dependency>
104 |       <groupId>org.apache.spark</groupId>
105 |       <artifactId>spark-core_2.11</artifactId>
106 |       <version>2.1.0.cloudera1</version>
107 |     </dependency>
108 |     <dependency>
109 |       <groupId>org.apache.spark</groupId>
110 |       <artifactId>spark-sql_2.11</artifactId>
111 |       <version>2.1.0.cloudera1</version>
112 |     </dependency>
113 | 
114 | </dependencies>
115 | </project>
116 | 


--------------------------------------------------------------------------------