├── .gitignore
├── README.md
├── people.txt
├── pom.xml
└── src
├── main
└── java
│ └── com
│ └── matthewrathbone
│ └── example
│ ├── ComplexUDFExample.java
│ ├── NameParserGenericUDTF.java
│ ├── SimpleUDFExample.java
│ └── TotalNumOfLettersGenericUDAF.java
└── test
└── java
└── com
└── matthewrathbone
└── example
├── ComplexUDFExampleTest.java
├── NameParserGenericUDTFTest.java
└── SimpleUDFExampleTest.java
/.gitignore:
--------------------------------------------------------------------------------
1 | target/*
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Hive UDF Examples
2 |
3 | This code accompanies [this article which walks through creating UDFs in Apache Hive][blog-post].
4 |
5 | ## Compile
6 |
7 | ```
8 | mvn compile
9 | ```
10 |
11 | ## Test
12 |
13 | ```
14 | mvn test
15 | ```
16 |
17 | ## Build
18 | ```
19 | mvn assembly:single
20 | ```
21 |
22 | ## Run
23 |
24 | ```
25 | %> hive
26 | hive> ADD JAR /path/to/assembled.jar;
27 | hive> create temporary function hello as 'com.matthewrathbone.example.SimpleUDFExample';
28 | hive> select hello(firstname) from people limit 10;
29 |
30 | ```
31 |
32 | [blog-post]:http://blog.matthewrathbone.com/2013/08/10/guide-to-writing-hive-udfs.html
--------------------------------------------------------------------------------
/people.txt:
--------------------------------------------------------------------------------
1 | John Smith
2 | John and Ann White
3 | Ted Green
4 | Dorothy
5 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 |
4 |
5 |
6 |
7 |
8 | org.apache.maven.plugins
9 | maven-surefire-plugin
10 | 2.8
11 |
12 |
13 | maven-assembly-plugin
14 |
15 |
16 |
17 | com.matthewrathbone.example.RawMapreduce
18 |
19 |
20 |
21 | jar-with-dependencies
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 | 4.0.0
30 | com.matthewrathbone.example
31 | hive-extensions
32 | jar
33 | 1.0-SNAPSHOT
34 | hive-extensions
35 | http://maven.apache.org
36 |
37 |
38 | org.apache.hadoop
39 | hadoop-client
40 | 2.0.0-mr1-cdh4.3.1
41 | provided
42 |
43 |
44 | org.apache.hive
45 | hive-exec
46 | 0.10.0-cdh4.3.1
47 | provided
48 |
49 |
50 |
51 | org.apache.commons
52 | commons-io
53 | 1.3.2
54 | test
55 |
56 |
57 | commons-httpclient
58 | commons-httpclient
59 | 3.1
60 | test
61 |
62 |
63 | org.apache.hadoop
64 | hadoop-test
65 | 2.0.0-mr1-cdh4.1.2
66 | test
67 |
68 |
69 | junit
70 | junit
71 | 4.8.2
72 | test
73 |
74 |
75 |
76 |
77 | cloudera
78 | https://repository.cloudera.com/artifactory/cloudera-repos/
79 |
80 |
81 |
82 |
--------------------------------------------------------------------------------
/src/main/java/com/matthewrathbone/example/ComplexUDFExample.java:
--------------------------------------------------------------------------------
1 | package com.matthewrathbone.example;
2 |
3 | import java.util.List;
4 |
5 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
6 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
7 | import org.apache.hadoop.hive.ql.metadata.HiveException;
8 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
9 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
10 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
11 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
12 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector;
13 |
14 | class ComplexUDFExample extends GenericUDF {
15 |
16 | ListObjectInspector listOI;
17 | StringObjectInspector elementOI;
18 |
19 | @Override
20 | public String getDisplayString(String[] arg0) {
21 | return "arrayContainsExample()"; // this should probably be better
22 | }
23 |
24 | @Override
25 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
26 | if (arguments.length != 2) {
27 | throw new UDFArgumentLengthException("arrayContainsExample only takes 2 arguments: List, T");
28 | }
29 | // 1. Check we received the right object types.
30 | ObjectInspector a = arguments[0];
31 | ObjectInspector b = arguments[1];
32 | if (!(a instanceof ListObjectInspector) || !(b instanceof StringObjectInspector)) {
33 | throw new UDFArgumentException("first argument must be a list / array, second argument must be a string");
34 | }
35 | this.listOI = (ListObjectInspector) a;
36 | this.elementOI = (StringObjectInspector) b;
37 |
38 | // 2. Check that the list contains strings
39 | if(!(listOI.getListElementObjectInspector() instanceof StringObjectInspector)) {
40 | throw new UDFArgumentException("first argument must be a list of strings");
41 | }
42 |
43 | // the return type of our function is a boolean, so we provide the correct object inspector
44 | return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector;
45 | }
46 |
47 | @Override
48 | public Object evaluate(DeferredObject[] arguments) throws HiveException {
49 |
50 | // get the list and string from the deferred objects using the object inspectors
51 | List list = (List) this.listOI.getList(arguments[0].get());
52 | String arg = elementOI.getPrimitiveJavaObject(arguments[1].get());
53 |
54 | // check for nulls
55 | if (list == null || arg == null) {
56 | return null;
57 | }
58 |
59 | // see if our list contains the value we need
60 | for(String s: list) {
61 | if (arg.equals(s)) return new Boolean(true);
62 | }
63 | return new Boolean(false);
64 | }
65 |
66 | }
--------------------------------------------------------------------------------
/src/main/java/com/matthewrathbone/example/NameParserGenericUDTF.java:
--------------------------------------------------------------------------------
1 | package com.matthewrathbone.example;
2 |
3 | import java.util.ArrayList;
4 | import java.util.Iterator;
5 | import java.util.List;
6 |
7 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
8 | import org.apache.hadoop.hive.ql.metadata.HiveException;
9 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
10 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
11 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
12 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
13 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
14 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
15 |
16 | public class NameParserGenericUDTF extends GenericUDTF {
17 | private PrimitiveObjectInspector stringOI = null;
18 |
19 | @Override
20 | public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException {
21 | if (args.length != 1) {
22 | throw new UDFArgumentException("NameParserGenericUDTF() takes exactly one argument");
23 | }
24 |
25 | if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE
26 | && ((PrimitiveObjectInspector) args[0]).getPrimitiveCategory() != PrimitiveObjectInspector.PrimitiveCategory.STRING) {
27 | throw new UDFArgumentException("NameParserGenericUDTF() takes a string as a parameter");
28 | }
29 |
30 | // input
31 | stringOI = (PrimitiveObjectInspector) args[0];
32 |
33 | // output
34 | List fieldNames = new ArrayList(2);
35 | List fieldOIs = new ArrayList(2);
36 | fieldNames.add("name");
37 | fieldNames.add("surname");
38 | fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
39 | fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
40 | return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs);
41 | }
42 |
43 | public ArrayList