├── README.md └── MyExplode └── src └── com └── example └── hive └── udf └── MyExplode.java /README.md: -------------------------------------------------------------------------------- 1 | HDInsight 2 | ========= 3 | 4 | HDInsight : Hadoop - Hive - Microsoft Business Intelligence 5 | 6 | ------------------------------------------------------------------------------------------------------------- 7 | 8 | The UDF Explode() take an array and explode it : SELECT EXPLODE(COL) AS MyCOL FROM MyTable; 9 | 10 | Col MyCOL 11 | | [1,2] | -> | 1 | 12 | | [3,4] | | 2 | 13 | | 3 | 14 | | 4 | 15 | 16 | With LATERAL VIEW : SELECT ID, MyCol FROM MyTable LATERAL VIEW EXPLODE(Col) MyVirtualTable AS MyCol; 17 | 18 | ID Col ID MyCol 19 | | 1 | [1,2] | -> | 1 | 1 | 20 | | 2 | [3,4] | | 1 | 2 | 21 | | 2 | 3 | 22 | | 2 | 4 | 23 | 24 | With Multiple LATERAL VIEW : 25 | SELECT MyCol1, MyCol2 FROM MaTable 26 | LATERAL VIEW explode(Col1) MyVirtualTable1 AS MyCol1 27 | LATERAL VIEW explode(Col2) MyVirtualTable2 AS MyCol2; 28 | 29 | Col1 Col2 MyCol1 MyCol2 30 | | [1,2] | [5,6] | -> | 1 | 5 | 31 | | [3,4] | [7,8] | | 1 | 6 | 32 | | 2 | 5 | 33 | | 2 | 6 | 34 | | 3 | 7 | 35 | | 3 | 8 | 36 | | 4 | 7 | 37 | | 4 | 8 | 38 | 39 | !!! With MyExplode() UDF : 40 | SELECT MyCol1, MyCol2 FROM MyTable 41 | LATERAL VIEW MyExplode(Array(1,2,3), Array(4,5,6)) MyVirtualTable As MyCol1, MyCol2; 42 | 43 | Col1 Col2 MyCol1 MyCol2 44 | | [1,2,3] | [4,5,6] | -> | 1 | 4 | 45 | | 2 | 5 | 46 | | 3 | 6 | 47 | 48 | ------------------------------------------------------------------------------------------------------------- 49 | Instalation : 50 | 51 | hive> ADD JAR /#PATH#/MyExplode.jar; 52 | hive> CREATE TEMPORARY FUNCTION MyExplode AS 'com.example.hive.udf.MyExplode'; 53 | 54 | -------------------------------------------------------------------------------- /MyExplode/src/com/example/hive/udf/MyExplode.java: -------------------------------------------------------------------------------- 1 | package com.example.hive.udf; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.hadoop.hive.ql.exec.Description; 7 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 8 | import org.apache.hadoop.hive.ql.metadata.HiveException; 9 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; 10 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; 11 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 12 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; 13 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; 14 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; 15 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 16 | 17 | @Description(name = "MyExplode", 18 | value = "_FUNC_(a, b) - separates the elements of arrays a and b into multiple rows") 19 | 20 | public class MyExplode extends GenericUDTF { 21 | 22 | private ListObjectInspector listOI0 = null; 23 | private ListObjectInspector listOI1 = null; 24 | 25 | @Override 26 | public void close() throws HiveException { 27 | } 28 | 29 | @Override 30 | public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { 31 | 32 | if (args[0].getCategory() != ObjectInspector.Category.LIST) { 33 | throw new UDFArgumentException("explode() takes an array as a parameter"); 34 | } 35 | listOI0 = (ListObjectInspector) args[0]; 36 | listOI1 = (ListObjectInspector) args[1]; 37 | 38 | ArrayList fieldNames = new ArrayList(); 39 | ArrayList fieldOIs = new ArrayList(); 40 | fieldNames.add("col0"); 41 | fieldNames.add("col1"); 42 | fieldOIs.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveCategory.STRING)); 43 | fieldOIs.add(PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveCategory.STRING)); 44 | return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); 45 | } 46 | 47 | private final Object[] forwardObj = new Object[2]; 48 | 49 | @Override 50 | public void process(Object[] o) throws HiveException { 51 | List list0 = listOI0.getList(o[0]); 52 | List list1 = listOI1.getList(o[1]); 53 | if(list0 == null) { 54 | return; 55 | } 56 | for(int i=0; i< list0.size(); i++) 57 | { 58 | Object r0 = list0.get(i); 59 | Object r1 = list1.get(i); 60 | forwardObj[0] = r0.toString(); 61 | forwardObj[1] = r1.toString(); 62 | forward(forwardObj); 63 | } 64 | } 65 | 66 | @Override 67 | public String toString() { 68 | return "MyExplode"; 69 | } 70 | } --------------------------------------------------------------------------------