├── .gitignore ├── LICENSE ├── README.md ├── build.xml ├── pom.xml ├── src └── main │ └── java │ └── com │ └── nexr │ └── platform │ └── hive │ └── udf │ ├── GenericUDAFDedup.java │ ├── GenericUDFCount.java │ ├── GenericUDFDecode.java │ ├── GenericUDFDenseRank.java │ ├── GenericUDFGreatest.java │ ├── GenericUDFInstr.java │ ├── GenericUDFLag.java │ ├── GenericUDFLnnvl.java │ ├── GenericUDFMax.java │ ├── GenericUDFMin.java │ ├── GenericUDFNVL.java │ ├── GenericUDFNVL2.java │ ├── GenericUDFRank.java │ ├── GenericUDFRowNumber.java │ ├── GenericUDFSum.java │ ├── GenericUDFToNumber.java │ ├── GenericUDFTrunc.java │ ├── GenericUDTFPopulate.java │ ├── UDFChr.java │ ├── UDFDateFormat.java │ ├── UDFLastDay.java │ ├── UDFStrToDate.java │ ├── UDFSubstrForOracle.java │ ├── UDFSysDate.java │ └── UDFToChar.java └── test ├── cleanup.sql ├── datatypes.txt ├── dual.txt ├── emp.txt ├── lineitem.sql ├── lineitem.txt ├── lineitem_cleanup.q ├── onerow.txt └── setup.sql /.gitignore: -------------------------------------------------------------------------------- 1 | .classpath 2 | .project 3 | .launches 4 | .settings 5 | target 6 | logs 7 | javadocs 8 | build 9 | dist 10 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # NexR Hive UDFs 2 | 3 | ## About 4 | NexR Hive UDFs is a collection of user defined functions for Hive. 5 | 6 | ## License 7 | [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0) 8 | 9 | ## Quickstart 10 | $ git clone https://github.com/nexr/hive-udf.git 11 | $ cd hive-udf 12 | $ mvn clean package 13 | or 14 | $ ant -Dhive.install.dir=../hive/build/dist clean package 15 | 16 | ## Run the tests using Hive PDK 17 | $ ant -Dhive.install.dir=../hive/build/dist test 18 | 19 | ## Install and Configurations 20 | ### hive-site.xml 21 | 22 | hive.aux.jars.path 23 | file:///path/to/nexr-hive-udf-[VERSION].jar 24 | 25 | 26 | ### Registering the UDFs 27 | hive> add jar /path/to/nexr-hive-udf-[VERSION].jar; 28 | hive> CREATE TEMPORARY FUNCTION nvl AS 'com.nexr.platform.hive.udf.GenericUDFNVL'; 29 | hive> CREATE TEMPORARY FUNCTION decode AS 'com.nexr.platform.hive.udf.GenericUDFDecode'; 30 | hive> CREATE TEMPORARY FUNCTION nvl2 AS 'com.nexr.platform.hive.udf.GenericUDFNVL2'; 31 | hive> CREATE TEMPORARY FUNCTION str_to_date AS 'com.nexr.platform.hive.udf.UDFStrToDate'; 32 | hive> CREATE TEMPORARY FUNCTION date_format AS 'com.nexr.platform.hive.udf.UDFDateFormat'; 33 | hive> CREATE TEMPORARY FUNCTION to_char AS 'com.nexr.platform.hive.udf.UDFToChar'; 34 | hive> CREATE TEMPORARY FUNCTION instr4 AS 'com.nexr.platform.hive.udf.GenericUDFInstr'; 35 | hive> CREATE TEMPORARY FUNCTION chr AS 'com.nexr.platform.hive.udf.UDFChr'; 36 | hive> CREATE TEMPORARY FUNCTION last_day AS 'com.nexr.platform.hive.udf.UDFLastDay'; 37 | hive> CREATE TEMPORARY FUNCTION greatest AS 'com.nexr.platform.hive.udf.GenericUDFGreatest'; 38 | hive> CREATE TEMPORARY FUNCTION to_number AS 'com.nexr.platform.hive.udf.GenericUDFToNumber'; 39 | hive> CREATE TEMPORARY FUNCTION trunc AS 'com.nexr.platform.hive.udf.GenericUDFTrunc'; 40 | hive> CREATE TEMPORARY FUNCTION rank AS 'com.nexr.platform.hive.udf.GenericUDFRank'; 41 | hive> CREATE TEMPORARY FUNCTION row_number AS 'com.nexr.platform.hive.udf.GenericUDFRowNumber'; 42 | hive> CREATE TEMPORARY FUNCTION sysdate AS 'com.nexr.platform.hive.udf.UDFSysDate'; 43 | hive> CREATE TEMPORARY FUNCTION populate AS 'com.nexr.platform.hive.udf.GenericUDTFPopulate'; 44 | hive> CREATE TEMPORARY FUNCTION dedup AS 'com.nexr.platform.hive.udf.GenericUDAFDedup'; 45 | hive> CREATE TEMPORARY FUNCTION lnnvl AS 'com.nexr.platform.hive.udf.GenericUDFLnnvl'; 46 | hive> CREATE TEMPORARY FUNCTION substr AS 'com.nexr.platform.hive.udf.UDFSubstrForOracle'; 47 | 48 | ## Usage of Hive UDFs 49 | See the details at [Project Wiki](https://github.com/nexr/hive-udf/wiki). 50 | 51 | ## References 52 | * [Apache Hive](http://hive.apache.org/) 53 | * [Oracle 11g SQL Functions](http://docs.oracle.com/cd/B28359_01/server.111/b28286/functions001.htm#i88893) 54 | -------------------------------------------------------------------------------- /build.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | com.nexr 5 | nexr-hive-udf 6 | 0.2-SNAPSHOT 7 | nexr-hive-udf 8 | nexr-hive-udf 9 | 10 | 11 | UTF-8 12 | 0.20.2 13 | 0.9.0 14 | 15 | 16 | 17 | 18 | 19 | org.apache.hadoop 20 | hadoop-core 21 | ${hadoop.version} 22 | 23 | 24 | 25 | org.apache.hive 26 | hive-exec 27 | ${hive.version} 28 | 29 | 30 | org.apache.hive 31 | hive-metastore 32 | ${hive.version} 33 | 34 | 35 | org.apache.hive 36 | hive-pdk 37 | ${hive.version} 38 | 39 | 40 | javax.jdo 41 | jdo2-api 42 | 2.3-eb 43 | 44 | 45 | commons-logging 46 | commons-logging 47 | 1.1.1 48 | 49 | 50 | junit 51 | junit 52 | 4.7 53 | test 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDAFDedup.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.metadata.HiveException; 23 | import org.apache.hadoop.hive.ql.parse.SemanticException; 24 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; 25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo; 26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 30 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; 31 | import org.apache.hive.pdk.HivePdkUnitTest; 32 | import org.apache.hive.pdk.HivePdkUnitTests; 33 | 34 | import java.util.ArrayList; 35 | import java.util.List; 36 | 37 | @Description(name = "dedup", value = "_FUNC_(x,y,z) - Removes duplicated row and return it as a struct.") 38 | public class GenericUDAFDedup implements GenericUDAFResolver2 { 39 | 40 | public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) throws SemanticException { 41 | return new GenericUDAFDedupEval(); 42 | } 43 | 44 | public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { 45 | return new GenericUDAFDedupEval(); 46 | } 47 | 48 | public static class GenericUDAFDedupEval extends GenericUDAFEvaluator { 49 | 50 | ObjectInspector[] input; 51 | ObjectInspector output; 52 | 53 | public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { 54 | super.init(m, parameters); 55 | this.input = parameters; 56 | switch (m) { 57 | case PARTIAL1: 58 | case COMPLETE: 59 | List names = new ArrayList(parameters.length); 60 | List inspectors = new ArrayList(parameters.length); 61 | for (int i = 0; i < parameters.length; i++) { 62 | names.add("_col" + i); 63 | inspectors.add(ObjectInspectorUtils.getStandardObjectInspector(parameters[i])); 64 | } 65 | return output = ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors); 66 | case PARTIAL2: 67 | return output = parameters[0]; 68 | case FINAL: 69 | return output = ObjectInspectorUtils.getStandardObjectInspector(parameters[0]); 70 | default: 71 | throw new IllegalArgumentException("never"); 72 | } 73 | } 74 | 75 | @Override 76 | public AggregationBuffer getNewAggregationBuffer() throws HiveException { 77 | return new DedupRow(); 78 | } 79 | 80 | @Override 81 | public void reset(AggregationBuffer agg) throws HiveException { 82 | ((DedupRow) agg).row = null; 83 | } 84 | 85 | @Override 86 | public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { 87 | if (((DedupRow) agg).row == null) { 88 | Object[] array = new Object[parameters.length]; 89 | for (int i = 0; i < array.length; i++) { 90 | array[i] = ObjectInspectorUtils.copyToStandardObject(parameters[i], input[i]); 91 | } 92 | ((DedupRow) agg).row = array; 93 | } 94 | } 95 | 96 | @Override 97 | public Object terminatePartial(AggregationBuffer agg) throws HiveException { 98 | return ((DedupRow) agg).row; 99 | } 100 | 101 | @Override 102 | public void merge(AggregationBuffer agg, Object partial) throws HiveException { 103 | if (((DedupRow) agg).row == null) { 104 | ((DedupRow) agg).row = ObjectInspectorUtils.copyToStandardObject(partial, input[0]); 105 | } 106 | } 107 | 108 | @Override 109 | public Object terminate(AggregationBuffer agg) throws HiveException { 110 | return ((DedupRow) agg).row; 111 | } 112 | 113 | static class DedupRow implements AggregationBuffer { 114 | Object row; 115 | } 116 | } 117 | } 118 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFCount.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.commons.lang.StringUtils; 22 | import org.apache.hadoop.hive.ql.exec.Description; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 24 | import org.apache.hadoop.hive.ql.metadata.HiveException; 25 | import org.apache.hadoop.hive.ql.udf.UDFType; 26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; 30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 31 | import org.apache.hadoop.io.LongWritable; 32 | import org.apache.hive.pdk.HivePdkUnitTest; 33 | import org.apache.hive.pdk.HivePdkUnitTests; 34 | 35 | 36 | @Description(name = "count", 37 | value = "_FUNC_(hash_key, order_by_col) " + 38 | "- Returns the count value of group", 39 | extended = "Example:\n" 40 | + " > SELECT _FUNC_(HASH(p1, p2), order_by_col) FROM (\n" 41 | + " > SELECT ~ FROM table DISTRIBUTE BY HASH(p1,p2) SORT BY p1,p2,order_by_col DESC, ... \n" 42 | + " > );") 43 | 44 | @HivePdkUnitTests( 45 | setup = "", cleanup = "", 46 | cases = { 47 | @HivePdkUnitTest( 48 | query = "SELECT t.deptno, nexr_count(hash(t.deptno),t.empno) as emp_count" 49 | +" FROM (" 50 | +" select a.empno, a.deptno from emp a" 51 | +" distribute by hash(a.deptno)" 52 | +" sort BY a.deptno, a.empno" 53 | +" ) t;", 54 | result ="10\t1\n" 55 | +"10\t2\n" 56 | +"10\t3\n" 57 | +"20\t1\n" 58 | +"20\t2\n" 59 | +"20\t3\n" 60 | +"20\t4\n" 61 | +"20\t5\n" 62 | +"30\t1\n" 63 | +"30\t2\n" 64 | +"30\t3\n" 65 | +"30\t4\n" 66 | +"30\t5\n" 67 | +"30\t6") 68 | } 69 | ) 70 | 71 | @UDFType(deterministic = false, stateful = true) 72 | public class GenericUDFCount extends GenericUDF { 73 | private final LongWritable longResult = new LongWritable(); 74 | private ObjectInspector hashOI, prevHashStandardOI; 75 | private Object prevHash; 76 | @Override 77 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 78 | if (arguments.length != 2) { 79 | throw new UDFArgumentException("Exactly two argument is expected."); 80 | } 81 | 82 | hashOI = arguments[0]; 83 | prevHashStandardOI=ObjectInspectorUtils.getStandardObjectInspector(hashOI,ObjectInspectorCopyOption.JAVA); 84 | 85 | longResult.set(0); 86 | 87 | return PrimitiveObjectInspectorFactory.writableLongObjectInspector; 88 | } 89 | 90 | @Override 91 | public Object evaluate(DeferredObject[] arguments) throws HiveException { 92 | Object hash = arguments[0].get(); 93 | 94 | if (prevHash==null||ObjectInspectorUtils.compare(prevHash,prevHashStandardOI,hash,hashOI)!=0) { 95 | longResult.set(0); 96 | 97 | } 98 | 99 | prevHash=ObjectInspectorUtils.copyToStandardObject(hash,hashOI, ObjectInspectorCopyOption.JAVA); 100 | longResult.set(longResult.get()+1); 101 | return longResult; 102 | } 103 | 104 | @Override 105 | public String getDisplayString(String[] children) { 106 | return "count(" + StringUtils.join(children, ',') + ")"; 107 | } 108 | } 109 | 110 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFDecode.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; 24 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 25 | import org.apache.hadoop.hive.ql.metadata.HiveException; 26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 27 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 29 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 30 | import org.apache.hive.pdk.HivePdkUnitTest; 31 | import org.apache.hive.pdk.HivePdkUnitTests; 32 | 33 | /** 34 | * GenericUDF Class for SQL construct "decode(value1, value2, value3, .... defaultValue)". 35 | * oracle's DECODE compares first argument to each other value one by one. 36 | *

37 | * There is CASE...WHEN...clause in Hive, 38 | * but it is convenient to convert from SQL to HQL without query change. 39 | *

40 | * 41 | */ 42 | @Description(name = "decode", value = "_FUNC_(value1, value2, value3, .... defaultValue) " + 43 | "- Returns value3 if value1=value2 otherwise defaultValue", extended = "Example:\n" 44 | + " > SELECT _FUNC_(dep, 0, \"ACCOUNT\", 1, \"HR\", \"NO-DEP\") FROM src LIMIT 1;\n 'ACCOUNT' if dep=0") 45 | @HivePdkUnitTests(setup = "create table dual_data (i int); " 46 | + "insert overwrite table dual_data select 1 from dual limit 1;", 47 | cleanup = "drop table if exists dual_data;", 48 | cases = { 49 | @HivePdkUnitTest( 50 | query = "SELECT nexr_decode(i, 0, 'ACCOUNT', 1, 'HR', 'NO-DEP') FROM dual_data;", 51 | result = "HR"), 52 | @HivePdkUnitTest( 53 | query = "SELECT nexr_decode(dep, 0, 'ACCOUNT', 1, 'HR', 'NO-DEP') " + 54 | "FROM dual WHERE id = 0;", result = "ACCOUNT"), 55 | @HivePdkUnitTest( 56 | query = "SELECT nexr_decode(dep, 0, 'ACCOUNT', 1, 'HR', 'NO-DEP') " + 57 | "FROM dual WHERE id = 2;", result = "NO-DEP") }) 58 | public class GenericUDFDecode extends GenericUDF { 59 | 60 | private ObjectInspector[] argumentOIs; 61 | private GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; 62 | private GenericUDFUtils.ReturnObjectInspectorResolver caseOIResolver; 63 | 64 | @Override 65 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 66 | if (arguments.length < 3) { 67 | throw new UDFArgumentLengthException( 68 | "The function decode(value1,value2,value3...default) needs " 69 | + "at least three arguments."); 70 | } 71 | 72 | argumentOIs = arguments; 73 | caseOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); 74 | returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); 75 | caseOIResolver.update(arguments[0]); 76 | for (int i = 1; i + 1 < arguments.length; i += 2) { 77 | // 1, 3, 5, 7, ... 78 | if (!caseOIResolver.update(arguments[i])) { 79 | throw new UDFArgumentTypeException(i, "The value of 'value'" + (i + 1) 80 | + " should have the same type: \"" 81 | + caseOIResolver.get().getTypeName() 82 | + "\" is expected but \"" + arguments[i].getTypeName() 83 | + "\" is found"); 84 | } 85 | // 2, 4, 6... 86 | if (!returnOIResolver.update(arguments[i + 1])) { 87 | throw new UDFArgumentTypeException(i + 1,"The value of return(value " 88 | + (i + 1) + " should have the same type: \"" 89 | + returnOIResolver.get().getTypeName() 90 | + "\" is expected but \"" + arguments[i + 1].getTypeName() 91 | + "\" is found"); 92 | } 93 | } 94 | if (!returnOIResolver.update(arguments[arguments.length - 1])) { 95 | throw new UDFArgumentTypeException(arguments.length - 1, 96 | "The value of return should have the same type: \"" + 97 | returnOIResolver.get().getTypeName() + "\" is expected but \"" + 98 | arguments[arguments.length - 1].getTypeName() + "\" is found"); 99 | } 100 | 101 | return returnOIResolver.get(); 102 | 103 | } 104 | 105 | @Override 106 | public Object evaluate(DeferredObject[] records) throws HiveException { 107 | 108 | Object fieldValue = records[0].get(); 109 | Object defaultValue = null; 110 | if (records.length % 2 == 0) { 111 | defaultValue = records[records.length - 1].get(); 112 | } 113 | 114 | Object returnValue = null; 115 | for (int i = 1; i + 1 < records.length; i += 2) { 116 | Object caseValue = records[i].get(); 117 | if (fieldValue == null || caseValue == null) { 118 | break; 119 | } 120 | 121 | Object caseObj = ((PrimitiveObjectInspector) argumentOIs[i]).getPrimitiveJavaObject(caseValue); 122 | Object fieldObj = ((PrimitiveObjectInspector) argumentOIs[0]).getPrimitiveJavaObject(fieldValue); 123 | 124 | if (caseObj.toString().equals(fieldObj.toString())) { 125 | returnValue = records[i + 1].get(); 126 | returnValue = returnOIResolver.convertIfNecessary(returnValue, argumentOIs[i + 1]); 127 | break; 128 | } 129 | 130 | } 131 | 132 | if (returnValue == null) { 133 | returnValue = defaultValue; 134 | returnValue = returnOIResolver.convertIfNecessary(returnValue, argumentOIs[records.length - 1]); 135 | } 136 | return returnValue; 137 | } 138 | 139 | @Override 140 | public String getDisplayString(String[] children) { 141 | StringBuilder sb = new StringBuilder(); 142 | sb.append("decode ("); 143 | for (int i = 0; i < children.length - 1; i++) { 144 | sb.append(children[i]).append(", "); 145 | } 146 | sb.append(children[children.length - 1]).append(")"); 147 | return sb.toString(); 148 | } 149 | 150 | } 151 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFDenseRank.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.commons.lang.StringUtils; 22 | import org.apache.hadoop.hive.ql.exec.Description; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 24 | import org.apache.hadoop.hive.ql.metadata.HiveException; 25 | import org.apache.hadoop.hive.ql.udf.UDFType; 26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; 30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 31 | import org.apache.hadoop.io.LongWritable; 32 | import org.apache.hive.pdk.HivePdkUnitTest; 33 | import org.apache.hive.pdk.HivePdkUnitTests; 34 | 35 | 36 | @Description(name = "dense_rank", 37 | value = "_FUNC_(hash_key, order_by_col1, order_by_col2 ...) " + 38 | "- Returns the rank of a row in an ordered group of rows", 39 | extended = "Example:\n" 40 | + " > SELECT _FUNC_(HASH(p1, p2), order_by_col1) FROM (\n" 41 | + " > SELECT order_by_col1 FROM table \n" 42 | + " > DISTRIBUTE BY HASH(p1,p2)\n" 43 | + " > SORT BY p1, p2, order_by_col1 \n" 44 | + " > );\n\n" 45 | + "ORACLE:\n" 46 | + " > SELECT DENSE_RANK() OVER(PARTITION BY p1,p2 ORDER BY order_by_col1) FROM table;\n" 47 | ) 48 | 49 | @HivePdkUnitTests( 50 | setup = "", cleanup = "", 51 | cases = { 52 | @HivePdkUnitTest( 53 | query = "SELECT t.empno, t.deptno, t.sal, nexr_dense_rank(t.deptno, t.sal) as rank" 54 | + " FROM (" 55 | + " select a.empno, a.deptno, a.sal from " 56 | + " emp a" 57 | + " distribute by a.deptno" 58 | + " sort BY a.deptno, a.sal, a.empno" 59 | + " ) t;", 60 | result = "7934\t10\t1300\t1\n" 61 | + "7782\t10\t2450\t2\n" 62 | + "7839\t10\t5000\t3\n" 63 | + "7369\t20\t800\t1\n" 64 | + "7876\t20\t1100\t2\n" 65 | + "7566\t20\t2975\t3\n" 66 | + "7788\t20\t3000\t4\n" 67 | + "7902\t20\t3000\t4\n" 68 | + "7900\t30\t950\t1\n" 69 | + "7521\t30\t1250\t2\n" 70 | + "7654\t30\t1250\t2\n" 71 | + "7844\t30\t1500\t3\n" 72 | + "7499\t30\t1600\t4\n" 73 | + "7698\t30\t2850\t5") 74 | } 75 | ) 76 | 77 | @UDFType(deterministic = false, stateful = true) 78 | public class GenericUDFDenseRank extends GenericUDFRank { 79 | 80 | @Override 81 | protected void different(boolean newKey) { 82 | if (newKey) { 83 | counter = 1; 84 | } else { 85 | counter++; 86 | } 87 | result.set(counter); 88 | } 89 | 90 | @Override 91 | protected void next() { 92 | } 93 | 94 | @Override 95 | public String getDisplayString(String[] children) { 96 | return "dense_rank(" + StringUtils.join(children, ',') + ")"; 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFGreatest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.nexr.platform.hive.udf; 19 | 20 | import org.apache.hadoop.hive.ql.exec.Description; 21 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 23 | import org.apache.hadoop.hive.ql.metadata.HiveException; 24 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; 26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 28 | import org.apache.hive.pdk.HivePdkUnitTest; 29 | import org.apache.hive.pdk.HivePdkUnitTests; 30 | 31 | /** 32 | * GenericUDF Class for SQL construct "greatest(value1, value2, value3, ....)". 33 | * Oracle's CREATEST returns the greatest of the list of one or more expressions. 34 | * 35 | */ 36 | @Description(name = "greatest", value = "_FUNC_(value1, value2, value3, ....) " + 37 | "- Returns the greatest value in the list.", 38 | extended = "Example:\n" + " > SELECT _FUNC_(2, 5, 12, 3) FROM src;\n 12") 39 | @HivePdkUnitTests(setup = "create table dual_data (i int); " 40 | + "insert overwrite table dual_data select 1 from dual limit 1;", 41 | cleanup = "drop table if exists dual_data;", 42 | cases = { 43 | @HivePdkUnitTest(query = "SELECT nexr_greatest(2, 5, 12, 3) " + 44 | "FROM dual_data;", result = "12"), 45 | @HivePdkUnitTest(query = "SELECT nexr_greatest('2', '5', '12', '3') " + 46 | "FROM dual_data;", result = "5"), 47 | @HivePdkUnitTest(query = "SELECT nexr_greatest('apples', 'oranges', 'bananas') " + 48 | "FROM dual_data;", result = "oranges") }) 49 | public class GenericUDFGreatest extends GenericUDF { 50 | 51 | private ObjectInspector[] argumentOIs; 52 | private GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; 53 | 54 | @Override 55 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 56 | 57 | for (int i = 0; i < arguments.length; i++) { 58 | if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) { 59 | throw new UDFArgumentTypeException(i, "Only primitive type arguments are accepted but " 60 | + arguments[i].getTypeName() + " is passed."); 61 | } 62 | } 63 | 64 | argumentOIs = arguments; 65 | returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); 66 | for (int i = 0; i < arguments.length; i++) { 67 | if (!returnOIResolver.update(arguments[i])) { 68 | throw new UDFArgumentTypeException(i, "The value of return should have the same type: \"" 69 | + returnOIResolver.get().getTypeName() + 70 | "\" is expected but \"" + arguments[i].getTypeName() 71 | + "\" is found"); 72 | } 73 | } 74 | 75 | return returnOIResolver.get(); 76 | } 77 | 78 | @Override 79 | public Object evaluate(DeferredObject[] fields) throws HiveException { 80 | Object greatestObject = null; 81 | ObjectInspector greatestOI = null; 82 | 83 | for (int i = 0; i < fields.length; i++) { 84 | Object fieldObject = fields[i].get(); 85 | if (greatestObject == null) { 86 | greatestObject = fieldObject; 87 | greatestOI = argumentOIs[i]; 88 | continue; 89 | } 90 | 91 | if (ObjectInspectorUtils.compare(greatestObject, greatestOI, fieldObject, argumentOIs[i]) <= 0) { 92 | greatestObject = fieldObject; 93 | greatestOI = argumentOIs[i]; 94 | } 95 | } 96 | 97 | return returnOIResolver.convertIfNecessary(greatestObject, greatestOI); 98 | } 99 | 100 | @Override 101 | public String getDisplayString(String[] children) { 102 | StringBuilder sb = new StringBuilder(); 103 | sb.append("greatest ("); 104 | for (int i = 0; i < children.length - 1; i++) { 105 | sb.append(children[i]).append(", "); 106 | } 107 | sb.append(children[children.length - 1]).append(")"); 108 | return sb.toString(); 109 | } 110 | 111 | } 112 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFInstr.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.hive.pdk.HivePdkUnitTest; 22 | import org.apache.hive.pdk.HivePdkUnitTests; 23 | 24 | import org.apache.hadoop.hive.ql.exec.Description; 25 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 26 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; 27 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 28 | import org.apache.hadoop.hive.ql.metadata.HiveException; 29 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 30 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; 31 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 32 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; 33 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 34 | import org.apache.hadoop.io.IntWritable; 35 | import org.apache.hadoop.io.Text; 36 | 37 | /** 38 | * GenericUDFInstr 39 | * 40 | * This integrates the function from Oracle and Mysql 41 | * http://www.techonthenet.com/oracle/functions/instr.php 42 | * http://dev.mysql.com/doc/refman/5.1/en/string-functions.html#function_instr 43 | * 44 | * INSTR(string, substring, [start_position, [nth_appearance]]). 45 | * string is the string to search. 46 | * substring is the substring to search for in string. 47 | * start_position is the position in string where the search will start. 48 | * This argument is optional. If omitted, it defaults to 1. 49 | * The first position in the string is 1. 50 | * If the start_position is negative, the function counts back start_position number of characters 51 | * from the end of string and then searches towards the beginning of string. 52 | * nth_appearance is the nth appearance of substring. 53 | * This is optional. If omitted, it defaults to 1. 54 | * 55 | */ 56 | @Description(name = "instr", 57 | value = "_FUNC_(string, substring, [start_position, [nth_appearance]]) " + 58 | "- Returns the index of the first occurance of substr in str", 59 | extended = "Example:\n" 60 | + " > SELECT _FUNC_('Facebook', 'boo') FROM src LIMIT 1;\n" + " 5\n") 61 | @HivePdkUnitTests( 62 | setup = "", cleanup = "", 63 | cases = { 64 | @HivePdkUnitTest( 65 | query = "SELECT nexr_instr('Tech on the net', 'e') FROM onerow;", 66 | result = "2"), 67 | @HivePdkUnitTest( 68 | query = "SELECT nexr_instr('Tech on the net', 'e', 1, 1) FROM onerow;", 69 | result = "2"), 70 | @HivePdkUnitTest( 71 | query = "SELECT nexr_instr('Tech on the net', 'e', 1, 2) FROM onerow;", 72 | result = "11"), 73 | @HivePdkUnitTest( 74 | query = "SELECT nexr_instr('Tech on the net', 'e', 1, 3) FROM onerow;", 75 | result = "14"), 76 | @HivePdkUnitTest( 77 | query = "SELECT nexr_instr('Tech on the net', 'e', -5, 1) FROM onerow;", 78 | result = "11") 79 | } 80 | ) 81 | 82 | public class GenericUDFInstr extends GenericUDF { 83 | 84 | private ObjectInspectorConverters.Converter[] converters; 85 | 86 | static final int DEFAULT_START_INDEX = 1; 87 | static final int DEFAULT_NTH = 1; 88 | 89 | @Override 90 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 91 | if (arguments.length < 2 || arguments.length > 4) { 92 | throw new UDFArgumentLengthException("The function INSTR must have 2 or more and less than 4 arguments."); 93 | } 94 | 95 | for (int i = 0; i < arguments.length; i++) { 96 | if (i < 2) {// 0,1 : String 97 | if (!arguments[i].getTypeName().toLowerCase().equals("string")) { 98 | throw new UDFArgumentTypeException(i, "The " + GenericUDFUtils.getOrdinal(i + 1) 99 | + " argument of function INSTR is expected to a string type, but " + 100 | arguments[i].getTypeName().toLowerCase() + " is found"); 101 | } 102 | } else {// 2,3 : Int 103 | if (!arguments[i].getTypeName().equals("int")) { 104 | throw new UDFArgumentTypeException(i, "The " + GenericUDFUtils.getOrdinal(i + 1) 105 | + " argument of function INSTR is expected to a int type, but " + 106 | arguments[i].getTypeName().toLowerCase() + " is found"); 107 | } 108 | } 109 | } 110 | 111 | converters = new ObjectInspectorConverters.Converter[arguments.length]; 112 | for (int i = 0; i < arguments.length; i++) { 113 | if (i < 2) {// 0,1 : String 114 | converters[i] = ObjectInspectorConverters.getConverter( 115 | arguments[i], PrimitiveObjectInspectorFactory.writableStringObjectInspector); 116 | } else {// 2,3 : Int 117 | converters[i] = ObjectInspectorConverters.getConverter( 118 | arguments[i], PrimitiveObjectInspectorFactory.writableIntObjectInspector); 119 | } 120 | } 121 | 122 | return PrimitiveObjectInspectorFactory.writableIntObjectInspector; 123 | } 124 | 125 | private IntWritable intWritable = new IntWritable(0); 126 | 127 | @Override 128 | public Object evaluate(DeferredObject[] arguments) throws HiveException { 129 | if (arguments[0].get() == null || arguments[1].get() == null) { 130 | return null; 131 | } 132 | 133 | Text text = (Text) converters[0].convert(arguments[0].get()); 134 | Text subtext = (Text) converters[1].convert(arguments[1].get()); 135 | int startIndex = (arguments.length >= 3) ? 136 | ((IntWritable) converters[2].convert(arguments[2].get())).get() : DEFAULT_START_INDEX; 137 | int nth = (arguments.length == 4) ? 138 | ((IntWritable) converters[3].convert(arguments[3].get())).get() : DEFAULT_NTH; 139 | 140 | // argument checking 141 | if (startIndex < 0) { 142 | // if startIndex is negative, 143 | // the function counts back startIndex number of characters from the end of text and then searches 144 | // towards the beginning of text. 145 | startIndex = text.getLength() + startIndex; 146 | } 147 | if (startIndex <= 0 || startIndex > text.getLength()) { 148 | intWritable.set(0); 149 | return intWritable; 150 | } 151 | 152 | int index = 0; 153 | int currentIndex = startIndex; 154 | for (int i = 0; i < nth; i++) { 155 | index = GenericUDFUtils.findText(text, subtext, currentIndex - 1) + 1; 156 | if (index == 0) {// not found 157 | intWritable.set(0); 158 | return intWritable; 159 | } 160 | currentIndex = index + 1; 161 | } 162 | intWritable.set(index); 163 | return intWritable; 164 | } 165 | 166 | @Override 167 | public String getDisplayString(String[] children) { 168 | assert (children.length >= 2 && children.length <= 4); 169 | StringBuffer displayString = new StringBuffer(); 170 | 171 | displayString.append("instr()"); 172 | for (int i = 0; i < children.length; i++) { 173 | displayString.append(children[i]); 174 | } 175 | displayString.append(")"); 176 | return displayString.toString(); 177 | } 178 | } 179 | 180 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFLag.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import java.util.ArrayList; 22 | 23 | import org.apache.commons.lang.StringUtils; 24 | import org.apache.hadoop.hive.ql.exec.Description; 25 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 26 | import org.apache.hadoop.hive.ql.metadata.HiveException; 27 | import org.apache.hadoop.hive.ql.udf.UDFType; 28 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 30 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; 31 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; 32 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 33 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; 34 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 35 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; 36 | import org.apache.hive.pdk.HivePdkUnitTest; 37 | import org.apache.hive.pdk.HivePdkUnitTests; 38 | 39 | 40 | @Description(name = "lag", 41 | value = "_FUNC_(hash_key,column[,offset[,default]]) " + 42 | "- Returns values from a previous row in the table.", 43 | extended = "Example:\n" 44 | + " > SELECT _FUNC_(HASH(p1,p2), column [,offset[,default]]) FROM (\n" 45 | + " > SELECT ~ FROM table DISTRIBUTE BY HASH(p1,p2) SORT BY p1,p2\n" 46 | + " > );") 47 | 48 | @HivePdkUnitTests( 49 | setup = "", cleanup = "", 50 | cases = { 51 | @HivePdkUnitTest( 52 | query = "SELECT t.empno, t.deptno, t.sal, nexr_lag(hash(t.deptno),t.sal) as col" 53 | +" FROM (" 54 | +" select a.empno, a.deptno, a.sal from emp a" 55 | +" distribute by hash(a.deptno)" 56 | +" sort BY a.deptno, a.empno" 57 | +" ) t;", 58 | result = 59 | "7782\t10\t2450\tNULL\n" 60 | +"7839\t10\t5000\t2450\n" 61 | +"7934\t10\t1300\t5000\n" 62 | +"7369\t20\t800\tNULL\n" 63 | +"7566\t20\t2975\t800\n" 64 | +"7788\t20\t3000\t2975\n" 65 | +"7876\t20\t1100\t3000\n" 66 | +"7902\t20\t3000\t1100\n" 67 | +"7499\t30\t1600\tNULL\n" 68 | +"7521\t30\t1250\t1600\n" 69 | +"7654\t30\t1250\t1250\n" 70 | +"7698\t30\t2850\t1250\n" 71 | +"7844\t30\t1500\t2850\n" 72 | +"7900\t30\t950\t1500"), 73 | @HivePdkUnitTest( 74 | query = "SELECT t.empno, t.deptno, t.sal, nexr_lag(hash(t.deptno),t.sal,2,0) as col" 75 | +" FROM (" 76 | +" select a.empno, a.deptno, a.sal from emp a " 77 | +" distribute by hash(a.deptno)" 78 | +" sort BY a.deptno, a.empno" 79 | +" ) t;", 80 | result = 81 | "7782\t10\t2450\t0\n" 82 | +"7839\t10\t5000\t0\n" 83 | +"7934\t10\t1300\t2450\n" 84 | +"7369\t20\t800\t0\n" 85 | +"7566\t20\t2975\t0\n" 86 | +"7788\t20\t3000\t800\n" 87 | +"7876\t20\t1100\t2975\n" 88 | +"7902\t20\t3000\t3000\n" 89 | +"7499\t30\t1600\t0\n" 90 | +"7521\t30\t1250\t0\n" 91 | +"7654\t30\t1250\t1600\n" 92 | +"7698\t30\t2850\t1250\n" 93 | +"7844\t30\t1500\t1250\n" 94 | +"7900\t30\t950\t2850") 95 | } 96 | ) 97 | 98 | 99 | @UDFType(deterministic = false, stateful = true) 100 | public class GenericUDFLag extends GenericUDF { 101 | private ObjectInspector[] argumentOIs; 102 | private ObjectInspector resultOI, prevHashStandardOI, valueStandardOI; 103 | private Object prevHash; 104 | private ArrayList queue = new ArrayList(); 105 | @Override 106 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 107 | if (arguments.length<2||arguments.length>4) { 108 | throw new UDFArgumentException("The function accepts more than two arguments."); 109 | } 110 | 111 | argumentOIs = arguments; 112 | prevHashStandardOI = ObjectInspectorUtils.getStandardObjectInspector(arguments[0],ObjectInspectorCopyOption.JAVA); 113 | valueStandardOI = ObjectInspectorUtils.getStandardObjectInspector(arguments[1],ObjectInspectorCopyOption.JAVA); 114 | resultOI=arguments[1]; 115 | return resultOI; 116 | } 117 | 118 | @Override 119 | public Object evaluate(DeferredObject[] arguments) throws HiveException { 120 | Object hash = arguments[0].get(); 121 | Object value = arguments[1].get(); 122 | int offset=1; 123 | Object defaultValue=null; 124 | 125 | if (arguments.length>=3) 126 | offset = PrimitiveObjectInspectorUtils.getInt(arguments[2].get(),(PrimitiveObjectInspector)argumentOIs[2]); 127 | if (arguments.length==4) 128 | defaultValue=arguments[3].get(); 129 | 130 | if (prevHash==null||ObjectInspectorUtils.compare(prevHash,prevHashStandardOI,hash,argumentOIs[0])!=0) { 131 | queue.clear(); 132 | } 133 | 134 | queue.add(ObjectInspectorUtils.copyToStandardObject(value,argumentOIs[1],ObjectInspectorCopyOption.JAVA)); 135 | prevHash=ObjectInspectorUtils.copyToStandardObject(hash, argumentOIs[0],ObjectInspectorCopyOption.JAVA); 136 | if (queue.size()==offset+1) { 137 | Converter converter = ObjectInspectorConverters.getConverter(valueStandardOI, resultOI); 138 | return converter.convert(queue.remove(0)); 139 | } 140 | return defaultValue; 141 | } 142 | 143 | @Override 144 | public String getDisplayString(String[] children) { 145 | return "lag(" + StringUtils.join(children, ',') + ")"; 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFLnnvl.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; 24 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 25 | import org.apache.hadoop.hive.ql.metadata.HiveException; 26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 27 | import org.apache.hadoop.hive.serde.Constants; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; 30 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 31 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 32 | import org.apache.hadoop.io.BooleanWritable; 33 | import org.apache.hive.pdk.HivePdkUnitTest; 34 | import org.apache.hive.pdk.HivePdkUnitTests; 35 | 36 | /** 37 | * GenericUDF Class for SQL construct "lnnvl(condition)". see LNNVL. 39 | *

40 | * example: 41 | * 42 | *

 43 |  * SELECT COUNT(*) FROM employees WHERE LNNVL(commission_pct >= .2) ;
 44 |  * returns employees who receive a commission of less than 20% and no commission.
 45 |  * 
46 | * 47 | */ 48 | @Description(name = "lnnvl", value = "_FUNC_(condition) " + 49 | "- Evalutates a condition when one of the operands may contains a NULL value.", 50 | extended = "Example:\n " 51 | + " > SELECT _FUNC_(condition) FROM src;\n FALSE if condition is true " + " return false") 52 | @HivePdkUnitTests(setup = "create table dual_data (i int); " 53 | + "insert overwrite table dual_data select 1 from dual limit 1;", 54 | cleanup = "drop table if exists dual_data;", 55 | cases = { 56 | @HivePdkUnitTest(query = "SELECT nexr_lnnvl(true) FROM dual_data;", result = "false"), 57 | @HivePdkUnitTest(query = "SELECT name, height FROM dual WHERE nexr_lnnvl(height > 175 );", result = "Adam\t174.3\n" 58 | + "Bravo\tNULL") }) 59 | public class GenericUDFLnnvl extends GenericUDF { 60 | 61 | private ObjectInspector[] argumentIOs; 62 | private ObjectInspector returnInspector; 63 | 64 | private ObjectInspectorConverters.Converter[] converters; 65 | 66 | @Override 67 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 68 | 69 | if (arguments.length < 1) { 70 | throw new UDFArgumentLengthException("The function lnnvl(condition ) needs at least one arguments"); 71 | } 72 | 73 | if (arguments[0].getTypeName() != Constants.BOOLEAN_TYPE_NAME 74 | && arguments[0].getTypeName() != Constants.VOID_TYPE_NAME) { 75 | throw new UDFArgumentTypeException(0, "Argument (0) of function lnnvl must be " 76 | + Constants.BOOLEAN_TYPE_NAME + " but " + arguments[0].getTypeName() + " was found."); 77 | } 78 | 79 | this.argumentIOs = arguments; 80 | converters = new ObjectInspectorConverters.Converter[arguments.length]; 81 | for (int i = 0; i < arguments.length; i++) { 82 | converters[i] = ObjectInspectorConverters.getConverter(arguments[i], 83 | PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); 84 | } 85 | 86 | returnInspector = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; 87 | return returnInspector; 88 | } 89 | 90 | private BooleanWritable booleanWritable = new BooleanWritable(); 91 | 92 | @Override 93 | public Object evaluate(DeferredObject[] arguments) throws HiveException { 94 | 95 | if (arguments[0].get() == null) { 96 | booleanWritable.set(true); 97 | return booleanWritable; 98 | } 99 | 100 | BooleanWritable value = (BooleanWritable) ((PrimitiveObjectInspector) argumentIOs[0]) 101 | .getPrimitiveWritableObject(arguments[0].get()); 102 | 103 | booleanWritable.set(!value.get()); 104 | return booleanWritable; 105 | } 106 | 107 | @Override 108 | public String getDisplayString(String[] children) { 109 | StringBuilder sb = new StringBuilder(); 110 | sb.append("lnnvl ("); 111 | for (int i = 0; i < children.length - 1; i++) { 112 | sb.append(children[i]).append(", "); 113 | } 114 | sb.append(children[children.length - 1]).append(")"); 115 | return sb.toString(); 116 | } 117 | 118 | } 119 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFMax.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.commons.lang.StringUtils; 22 | import org.apache.hadoop.hive.ql.exec.Description; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 24 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 25 | import org.apache.hadoop.hive.ql.metadata.HiveException; 26 | import org.apache.hadoop.hive.ql.udf.UDFType; 27 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 28 | import org.apache.hadoop.hive.serde.Constants; 29 | import org.apache.hadoop.hive.serde2.io.DoubleWritable; 30 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 31 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; 32 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; 33 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 34 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; 35 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 36 | import org.apache.hadoop.io.LongWritable; 37 | import org.apache.hive.pdk.HivePdkUnitTest; 38 | import org.apache.hive.pdk.HivePdkUnitTests; 39 | 40 | @Description(name = "max", 41 | value = "_FUNC_(hash_key, order_by_col1, order_by_col2 ...) " + 42 | "- Returns the max value in a group of values", 43 | extended = "Example:\n" 44 | + " > SELECT _FUNC_(HASH(p1, p2), order_by_col1, order_by_col2, ... ) FROM (\n" 45 | + " > SELECT ~ FROM table DISTRIBUTE BY HASH(p1,p2) " + 46 | "SORT BY p1,p2,order_by_col1, order_by_col2 DESC, ... \n" 47 | + " > );") 48 | 49 | @HivePdkUnitTests( 50 | setup = "", cleanup = "", 51 | cases = { 52 | @HivePdkUnitTest( 53 | query = "SELECT t.empno, t.deptno, t.sal, nexr_max(hash(t.deptno),t.sal) as sal_max" 54 | +" FROM (" 55 | +" select a.empno, a.deptno, a.sal from emp a" 56 | +" distribute by hash(a.deptno)" 57 | +" sort BY a.deptno, a.empno desc" 58 | +" ) t;", 59 | result ="7934\t10\t1300\t1300\n" 60 | +"7839\t10\t5000\t5000\n" 61 | +"7782\t10\t2450\t5000\n" 62 | +"7902\t20\t3000\t3000\n" 63 | +"7876\t20\t1100\t3000\n" 64 | +"7788\t20\t3000\t3000\n" 65 | +"7566\t20\t2975\t3000\n" 66 | +"7369\t20\t800\t3000\n" 67 | +"7900\t30\t950\t950\n" 68 | +"7844\t30\t1500\t1500\n" 69 | +"7698\t30\t2850\t2850\n" 70 | +"7654\t30\t1250\t2850\n" 71 | +"7521\t30\t1250\t2850\n" 72 | +"7499\t30\t1600\t2850") 73 | } 74 | ) 75 | 76 | @UDFType(deterministic = false, stateful = true) 77 | public class GenericUDFMax extends GenericUDF { 78 | private final LongWritable longResult = new LongWritable(); 79 | private final DoubleWritable doubleResult = new DoubleWritable(); 80 | private ObjectInspector hashOI,valueOI,prevHashStandardOI,resultOI,maxStandardOI; 81 | private Object prevHash,max; 82 | @Override 83 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 84 | if (arguments.length != 2) { 85 | throw new UDFArgumentException("Exactly two argument is expected."); 86 | } 87 | 88 | for(int i=0;i SELECT _FUNC_(HASH(p1, p2), order_by_col1, order_by_col2, ... ) FROM (\n" 46 | + " > SELECT ~ FROM table DISTRIBUTE BY HASH(p1,p2) " 47 | + "SORT BY order_by_col1, order_by_col2 DESC, ... \n" 48 | + " > );") 49 | 50 | @HivePdkUnitTests( 51 | setup = "", cleanup = "", 52 | cases = { 53 | @HivePdkUnitTest( 54 | query = "SELECT t.empno, t.deptno, t.sal, nexr_min(hash(t.deptno),t.sal) as sal_min" 55 | +" FROM (" 56 | +" select a.empno, a.deptno, a.sal from emp a" 57 | +" distribute by hash(a.deptno)" 58 | +" sort BY a.deptno, a.empno" 59 | +" ) t;" , 60 | result ="7782\t10\t2450\t2450\n" 61 | +"7839\t10\t5000\t2450\n" 62 | +"7934\t10\t1300\t1300\n" 63 | +"7369\t20\t800\t800\n" 64 | +"7566\t20\t2975\t800\n" 65 | +"7788\t20\t3000\t800\n" 66 | +"7876\t20\t1100\t800\n" 67 | +"7902\t20\t3000\t800\n" 68 | +"7499\t30\t1600\t1600\n" 69 | +"7521\t30\t1250\t1250\n" 70 | +"7654\t30\t1250\t1250\n" 71 | +"7698\t30\t2850\t1250\n" 72 | +"7844\t30\t1500\t1250\n" 73 | +"7900\t30\t950\t950") 74 | } 75 | ) 76 | 77 | @UDFType(deterministic = false, stateful = true) 78 | public class GenericUDFMin extends GenericUDF { 79 | private final LongWritable longResult = new LongWritable(); 80 | private final DoubleWritable doubleResult = new DoubleWritable(); 81 | private ObjectInspector hashOI,valueOI,prevHashStandardOI,resultOI,minStandardOI; 82 | private Object prevHash,min; 83 | @Override 84 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 85 | if (arguments.length != 2) { 86 | throw new UDFArgumentException("Exactly two argument is expected."); 87 | } 88 | 89 | for(int i=0;i0){ 129 | min = ObjectInspectorUtils.copyToStandardObject(value,valueOI,ObjectInspectorCopyOption.JAVA); 130 | } 131 | } 132 | 133 | prevHash=ObjectInspectorUtils.copyToStandardObject(hash,hashOI, ObjectInspectorCopyOption.JAVA); 134 | 135 | Converter converter = ObjectInspectorConverters.getConverter(minStandardOI, resultOI); 136 | if(resultOI.getTypeName()==Constants.DOUBLE_TYPE_NAME){ 137 | DoubleWritable minW = (DoubleWritable)converter.convert(min); 138 | doubleResult.set(minW.get()); 139 | return doubleResult; 140 | } 141 | LongWritable minW = (LongWritable)converter.convert(min); 142 | longResult.set(minW.get()); 143 | return longResult; 144 | } 145 | 146 | @Override 147 | public String getDisplayString(String[] children) { 148 | return "min(" + StringUtils.join(children, ',') + ")"; 149 | } 150 | } 151 | 152 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFNVL.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; 24 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 25 | import org.apache.hadoop.hive.ql.metadata.HiveException; 26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 27 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 29 | import org.apache.hive.pdk.HivePdkUnitTest; 30 | import org.apache.hive.pdk.HivePdkUnitTests; 31 | 32 | /** 33 | * GenericUDF Class for SQL construct "nvl(expr1, expr2)". see NVL. 35 | *

36 | * There is function COALESCE in Hive, 37 | * but it is convenient to convert from SQL to HQL without query 38 | * changes. 39 | *

40 | * 41 | */ 42 | @Description(name = "nvl", value = "_FUNC_(expr1, expr2) - Returns expr2 if expr1 is null", 43 | extended = "Example:\n" 44 | + " > SELECT _FUNC_(dep, 'Not Applicable') FROM src;\n 'Not Applicable' if dep is null") 45 | @HivePdkUnitTests(setup = "create table dual_data (i int); " 46 | + "insert overwrite table dual_data select null from dual limit 1;", 47 | cleanup = "drop table if exists dual_data;", 48 | cases = { @HivePdkUnitTest(query = "SELECT nexr_nvl(i, 'Not Applicable') " + 49 | "FROM dual_data;", result = "Not Applicable") }) 50 | public class GenericUDFNVL extends GenericUDF { 51 | 52 | private ObjectInspector[] argumentOIs; 53 | private GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; 54 | 55 | @Override 56 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 57 | 58 | if (arguments.length < 2) { 59 | throw new UDFArgumentLengthException("The function nvl(expr1, expr2) needs at least two arguments."); 60 | } 61 | 62 | for (int i = 0; i < arguments.length; i++) { 63 | if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) { 64 | throw new UDFArgumentTypeException(i, "Only primitive type arguments are accepted but " 65 | + arguments[i].getTypeName() + " is passed."); 66 | } 67 | } 68 | 69 | argumentOIs = arguments; 70 | returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); 71 | 72 | if (!(returnOIResolver.update(arguments[0]) && returnOIResolver.update(arguments[1]))) { 73 | throw new UDFArgumentTypeException(1, 74 | "The first and the second arguments of function NVL should have the same type, " 75 | + "but they are different: \"" + arguments[0].getTypeName() + "\" and \"" 76 | + arguments[1].getTypeName() + "\""); 77 | } 78 | 79 | return returnOIResolver.get(); 80 | } 81 | 82 | @Override 83 | public Object evaluate(DeferredObject[] arguments) throws HiveException { 84 | 85 | Object returnValue = null; 86 | if (arguments[0].get() == null) { 87 | // fieldValue is null, return defaultValue 88 | returnValue = returnOIResolver.convertIfNecessary(arguments[1].get(), argumentOIs[1]); 89 | } else { 90 | returnValue = returnOIResolver.convertIfNecessary(arguments[0].get(), argumentOIs[0]); 91 | } 92 | 93 | return returnValue; 94 | } 95 | 96 | @Override 97 | public String getDisplayString(String[] children) { 98 | StringBuilder sb = new StringBuilder(); 99 | sb.append("nvl ("); 100 | for (int i = 0; i < children.length - 1; i++) { 101 | sb.append(children[i]).append(", "); 102 | } 103 | sb.append(children[children.length - 1]).append(")"); 104 | return sb.toString(); 105 | } 106 | 107 | } 108 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFNVL2.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | 20 | package com.nexr.platform.hive.udf; 21 | 22 | import org.apache.hadoop.hive.ql.exec.Description; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 24 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; 25 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 26 | import org.apache.hadoop.hive.ql.metadata.HiveException; 27 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 28 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 30 | import org.apache.hive.pdk.HivePdkUnitTest; 31 | import org.apache.hive.pdk.HivePdkUnitTests; 32 | 33 | /** 34 | * GenericUDF Class for SQL construct "nvl2(string1, value_if_not_null, value_if_null)". 35 | * see NVL2. 37 | *

38 | * There is function COALESCE in Hive, 39 | * but it is convenient to convert from SQL to HQL without query 40 | * changes. 41 | *

42 | * example: 43 | * 44 | *

 45 |  * select nvl2(supplier_city, 'Completed', 'n/a');
 46 |  * 
 47 |  * It returns 'n/a' if supplier_city is null otherwise return 'Completed'.
 48 |  * 
49 | * 50 | * 51 | */ 52 | @Description(name = "nvl2", value = "_FUNC_(string1, value_if_not_null, value_if_null) " + 53 | "- Returns value_if_not_null if string1 is not null, otherwise value_if_null", 54 | extended = "Example:\n" 55 | + " > SELECT _FUNC_(supplier_city, 'Completed', 'n/a') " + 56 | "FROM src;\n 'n/a' if supplier_city is null") 57 | @HivePdkUnitTests(setup = "create table dual_data (i int); " 58 | + "insert overwrite table dual_data select null from dual limit 1;", 59 | cleanup = "drop table if exists dual_data;", 60 | cases = { 61 | @HivePdkUnitTest(query = "SELECT nexr_nvl2(null, 'Completed', 'n/a') " + 62 | "FROM dual_data;", result = "n/a") }) 63 | public class GenericUDFNVL2 extends GenericUDF { 64 | 65 | private ObjectInspector[] argumentOIs; 66 | private GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; 67 | 68 | @Override 69 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 70 | 71 | if (arguments.length < 3) { 72 | throw new UDFArgumentLengthException( 73 | "The function nvl2(string1, value_if_not_null, value_if_null) needs " 74 | + "at least three arguments."); 75 | } 76 | 77 | for (int i = 0; i < arguments.length; i++) { 78 | if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) { 79 | throw new UDFArgumentTypeException(i, "Only primitive type arguments are accepted but " 80 | + arguments[i].getTypeName() + " is passed."); 81 | } 82 | } 83 | 84 | argumentOIs = arguments; 85 | returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true); 86 | for (int i = 0; i < arguments.length; i++) { 87 | if (!returnOIResolver.update(arguments[i])) { 88 | throw new UDFArgumentTypeException(i, "The value of return should have the same type: \"" 89 | + returnOIResolver.get().getTypeName() + "\" is expected but \"" 90 | + arguments[i].getTypeName() 91 | + "\" is found"); 92 | } 93 | } 94 | 95 | return returnOIResolver.get(); 96 | } 97 | 98 | @Override 99 | public Object evaluate(DeferredObject[] arguments) throws HiveException { 100 | 101 | Object returnValue = null; 102 | if (arguments[0].get() == null) { 103 | // return value_if_null 104 | returnValue = returnOIResolver.convertIfNecessary(arguments[2].get(), argumentOIs[2]); 105 | } else { 106 | returnValue = returnOIResolver.convertIfNecessary(arguments[1].get(), argumentOIs[1]); 107 | } 108 | 109 | return returnValue; 110 | } 111 | 112 | @Override 113 | public String getDisplayString(String[] children) { 114 | StringBuilder sb = new StringBuilder(); 115 | sb.append("nvl2 ("); 116 | for (int i = 0; i < children.length - 1; i++) { 117 | sb.append(children[i]).append(", "); 118 | } 119 | sb.append(children[children.length - 1]).append(")"); 120 | return sb.toString(); 121 | } 122 | 123 | } 124 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFRank.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.commons.lang.StringUtils; 22 | import org.apache.hadoop.hive.ql.exec.Description; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 24 | import org.apache.hadoop.hive.ql.metadata.HiveException; 25 | import org.apache.hadoop.hive.ql.udf.UDFType; 26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; 30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 31 | import org.apache.hadoop.io.LongWritable; 32 | import org.apache.hive.pdk.HivePdkUnitTest; 33 | import org.apache.hive.pdk.HivePdkUnitTests; 34 | 35 | 36 | /** 37 | * GenericUDFRank 38 | */ 39 | @Description(name = "rank", 40 | value = "_FUNC_(hash_key, order_by_col1, order_by_col2 ...) " + 41 | "- Returns the rank of a value in a group of values", 42 | extended = "Example:\n" 43 | + " > SELECT _FUNC_(HASH(p1, p2), order_by_col1) FROM (\n" 44 | + " > SELECT order_by_col1 FROM table \n" 45 | + " > DISTRIBUTE BY HASH(p1,p2)\n" 46 | + " > SORT BY p1, p2, order_by_col1 \n" 47 | + " > );\n\n" 48 | + " ORACLE \n" 49 | + " >SELECT RANK() OVER(PARTITION BY p1,p2 ORDER BY order_by_col1) FROM table;\n") 50 | 51 | 52 | @HivePdkUnitTests( 53 | setup = "", cleanup = "", 54 | cases = { 55 | @HivePdkUnitTest( 56 | query = "SELECT t.empno, t.deptno, t.sal, nexr_rank(t.deptno, t.sal) as rank" 57 | + " FROM (" 58 | + " select a.empno, a.deptno, a.sal from " 59 | + " emp a" 60 | + " distribute by a.deptno" 61 | + " sort BY a.deptno, a.sal, a.empno" 62 | + " ) t;", 63 | result = "7934\t10\t1300\t1\n" 64 | + "7782\t10\t2450\t2\n" 65 | + "7839\t10\t5000\t3\n" 66 | + "7369\t20\t800\t1\n" 67 | + "7876\t20\t1100\t2\n" 68 | + "7566\t20\t2975\t3\n" 69 | + "7788\t20\t3000\t4\n" 70 | + "7902\t20\t3000\t4\n" 71 | + "7900\t30\t950\t1\n" 72 | + "7521\t30\t1250\t2\n" 73 | + "7654\t30\t1250\t2\n" 74 | + "7844\t30\t1500\t4\n" 75 | + "7499\t30\t1600\t5\n" 76 | + "7698\t30\t2850\t6") 77 | } 78 | ) 79 | 80 | @UDFType(deterministic = false, stateful = true) 81 | public class GenericUDFRank extends GenericUDF { 82 | 83 | private ObjectInspector[] argumentIOs; 84 | private Object[] prevArguments; 85 | private ObjectInspector prevHashKeyIO; 86 | 87 | protected long counter; 88 | protected final LongWritable result = new LongWritable(1); 89 | 90 | @Override 91 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 92 | if (arguments.length < 2) { 93 | throw new UDFArgumentException("The function RANK accepts at least more than 2 arguments."); 94 | } 95 | result.set(1); 96 | argumentIOs = arguments; 97 | prevArguments = new Object[arguments.length]; 98 | prevHashKeyIO = ObjectInspectorUtils.getStandardObjectInspector(arguments[0], ObjectInspectorCopyOption.JAVA); 99 | return PrimitiveObjectInspectorFactory.writableLongObjectInspector; 100 | } 101 | 102 | @Override 103 | public Object evaluate(DeferredObject[] arguments) throws HiveException { 104 | Object hashKey = arguments[0].get(); 105 | Object prevHashKey = prevArguments[0]; 106 | 107 | if (prevHashKey == null || ObjectInspectorUtils.compare(prevHashKey, prevHashKeyIO, hashKey, argumentIOs[0]) != 0) { 108 | different(true); 109 | } else { 110 | for (int i = 1; i < arguments.length; i++) { 111 | if (ObjectInspectorUtils.compare(prevArguments[i], ObjectInspectorUtils.getStandardObjectInspector(argumentIOs[i], 112 | ObjectInspectorCopyOption.JAVA), arguments[i].get(), argumentIOs[i]) != 0) { 113 | different(false); 114 | break; 115 | } 116 | } 117 | } 118 | next(); 119 | 120 | for (int i = 0; i < arguments.length; i++) { 121 | prevArguments[i] = ObjectInspectorUtils.copyToStandardObject(arguments[i].get(), 122 | argumentIOs[i], ObjectInspectorCopyOption.JAVA); 123 | } 124 | 125 | return result; 126 | } 127 | 128 | @Override 129 | public String getDisplayString(String[] children) { 130 | return "rank(" + StringUtils.join(children, ',') + ")"; 131 | } 132 | 133 | protected void different(boolean newKey) { 134 | if (newKey) { 135 | counter = 1; 136 | } 137 | result.set(counter); 138 | } 139 | 140 | protected void next() { 141 | counter++; 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFRowNumber.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.commons.lang.StringUtils; 22 | import org.apache.hadoop.hive.ql.exec.Description; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 24 | import org.apache.hadoop.hive.ql.metadata.HiveException; 25 | import org.apache.hadoop.hive.ql.udf.UDFType; 26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; 30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 31 | import org.apache.hadoop.io.LongWritable; 32 | import org.apache.hive.pdk.HivePdkUnitTest; 33 | import org.apache.hive.pdk.HivePdkUnitTests; 34 | 35 | /** 36 | * GenericUDFRowNumber 37 | * 38 | */ 39 | @Description(name = "row_number", 40 | value = "_FUNC_(hash_key) - Returns the row number of a value in a group of values", 41 | extended = "Example:\n" 42 | + " > SELECT _FUNC_(HASH(p1, p2)) FROM (\n" 43 | + " > SELECT order_by_col1 FROM table \n" 44 | + " > DISTRIBUTE BY HASH(p1,p2)\n" 45 | + " > SORT BY p1,p2, order_by_col1 \n" 46 | + " > );\n\n" 47 | + "ORACLE :\n" 48 | + " > SELECT ROW_NUMBER() OVER(PARTITION BY p1,p2 ORDER BY order_by_col1) FROM table;\n" 49 | ) 50 | 51 | @HivePdkUnitTests( 52 | setup = "", cleanup = "", 53 | cases = { 54 | @HivePdkUnitTest( 55 | query = "SELECT t.empno, t.deptno, t.sal, nexr_row_number(hash(t.deptno)) as rank" 56 | +" FROM (" 57 | +" select a.empno, a.deptno, a.sal from " 58 | +" emp a" 59 | +" distribute by hash(a.deptno)" 60 | +" sort BY a.deptno, a.sal, a.empno" 61 | +" ) t;", 62 | 63 | result ="7934\t10\t1300\t1\n" 64 | +"7782\t10\t2450\t2\n" 65 | +"7839\t10\t5000\t3\n" 66 | +"7369\t20\t800\t1\n" 67 | +"7876\t20\t1100\t2\n" 68 | +"7566\t20\t2975\t3\n" 69 | +"7788\t20\t3000\t4\n" 70 | +"7902\t20\t3000\t5\n" 71 | +"7900\t30\t950\t1\n" 72 | +"7521\t30\t1250\t2\n" 73 | +"7654\t30\t1250\t3\n" 74 | +"7844\t30\t1500\t4\n" 75 | +"7499\t30\t1600\t5\n" 76 | +"7698\t30\t2850\t6" ) 77 | } 78 | ) 79 | 80 | @UDFType(deterministic = false, stateful = true) 81 | public class GenericUDFRowNumber extends GenericUDF { 82 | private final LongWritable result = new LongWritable(1); 83 | private ObjectInspector argumentIOs, prevHashKeyIO; 84 | private Object prevHashKey; 85 | @Override 86 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 87 | if (arguments.length!=1) { 88 | throw new UDFArgumentException("The function ROW_NUMBER accepts 1 argument."); 89 | } 90 | result.set(1); 91 | argumentIOs = arguments[0]; 92 | prevHashKeyIO = ObjectInspectorUtils.getStandardObjectInspector(arguments[0],ObjectInspectorCopyOption.JAVA); 93 | return PrimitiveObjectInspectorFactory.writableLongObjectInspector; 94 | } 95 | 96 | @Override 97 | public Object evaluate(DeferredObject[] arguments) throws HiveException { 98 | Object hashKey = arguments[0].get(); 99 | if (prevHashKey==null||ObjectInspectorUtils.compare(prevHashKey,prevHashKeyIO,hashKey,argumentIOs)!=0) { 100 | result.set(0); 101 | } 102 | result.set(result.get()+1); 103 | prevHashKey=ObjectInspectorUtils.copyToStandardObject(arguments[0].get(), argumentIOs, ObjectInspectorCopyOption.JAVA); 104 | return result; 105 | } 106 | 107 | @Override 108 | public String getDisplayString(String[] children) { 109 | return "row_number(" + StringUtils.join(children, ',') + ")"; 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFSum.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.commons.lang.StringUtils; 22 | import org.apache.hadoop.hive.ql.exec.Description; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 24 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 25 | import org.apache.hadoop.hive.ql.metadata.HiveException; 26 | import org.apache.hadoop.hive.ql.udf.UDFType; 27 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 28 | import org.apache.hadoop.hive.serde.Constants; 29 | import org.apache.hadoop.hive.serde2.io.DoubleWritable; 30 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 31 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters; 32 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter; 33 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 34 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; 35 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 36 | import org.apache.hadoop.io.LongWritable; 37 | import org.apache.hive.pdk.HivePdkUnitTest; 38 | import org.apache.hive.pdk.HivePdkUnitTests; 39 | 40 | 41 | @Description(name = "sum", 42 | value = "_FUNC_(hash_key, order_by_col1, order_by_col2 ...) " + 43 | "- Returns the summed value of group", 44 | extended = "Example:\n" 45 | + " > SELECT _FUNC_(HASH(p1, p2), order_by_col1, order_by_col2, ... ) FROM (\n" 46 | + " > SELECT ~ FROM table DISTRIBUTE BY HASH(p1,p2) SORT BY p1,p2,order_by_col1, order_by_col2 DESC, ... \n" 47 | + " > );") 48 | 49 | @HivePdkUnitTests( 50 | setup = "", cleanup = "", 51 | cases = { 52 | @HivePdkUnitTest( 53 | query = "SELECT t.empno, t.deptno, t.sal, nexr_sum(hash(t.deptno),t.sal) as sal_sum" 54 | +" FROM (" 55 | +" select a.empno, a.deptno, a.sal from emp a" 56 | +" distribute by hash(a.deptno)" 57 | +" sort BY a.deptno, a.empno" 58 | +" ) t;", 59 | result ="7782\t10\t2450\t2450\n" 60 | +"7839\t10\t5000\t7450\n" 61 | +"7934\t10\t1300\t8750\n" 62 | +"7369\t20\t800\t800\n" 63 | +"7566\t20\t2975\t3775\n" 64 | +"7788\t20\t3000\t6775\n" 65 | +"7876\t20\t1100\t7875\n" 66 | +"7902\t20\t3000\t10875\n" 67 | +"7499\t30\t1600\t1600\n" 68 | +"7521\t30\t1250\t2850\n" 69 | +"7654\t30\t1250\t4100\n" 70 | +"7698\t30\t2850\t6950\n" 71 | +"7844\t30\t1500\t8450\n" 72 | +"7900\t30\t950\t9400") 73 | } 74 | ) 75 | 76 | @UDFType(deterministic = false, stateful = true) 77 | public class GenericUDFSum extends GenericUDF { 78 | private final LongWritable longResult = new LongWritable(); 79 | private final DoubleWritable doubleResult = new DoubleWritable(); 80 | private ObjectInspector hashOI, valueOI, prevHashStandardOI, resultOI; 81 | private Object prevHash; 82 | @Override 83 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 84 | if (arguments.length != 2) { 85 | throw new UDFArgumentException("Exactly two argument is expected."); 86 | } 87 | 88 | for(int i=0;i 44 | * It has constraint to use, for example, 45 | * It can convert double value with format-mask. Without format-mask, it returns 46 | * long value. There is cast for type conversion in HIVE. 47 | * It can be used for the most frequently requested 48 | * examples such as below. 49 | *

50 | * 51 | * 52 | */ 53 | @Description(name = "to_number", value = "_FUNC_(value, format_mask) " + 54 | "- Returns the number converted from string.", extended = "Example:\n" 55 | + " > SELECT _FUNC_('1210') FROM src;\n 1210") 56 | @HivePdkUnitTests(setup = "create table dual_data (i int); " 57 | + "insert overwrite table dual_data select 0 from dual limit 1;", 58 | cleanup = "drop table if exists dual_data;", cases = { 59 | @HivePdkUnitTest(query = "SELECT nexr_to_number('1234') " + 60 | "FROM dual_data;", result = "1234"), 61 | @HivePdkUnitTest(query = "SELECT nexr_to_number('1234.56', 9999.99) " + 62 | "FROM dual_data;", result = "1234.56"), 63 | @HivePdkUnitTest(query = "SELECT nexr_to_number('1234.56') " + 64 | "FROM dual_data;", result = "1234"), 65 | @HivePdkUnitTest(query = "SELECT nexr_to_number('1234', 9999.99) " + 66 | "FROM dual_data;", result = "1234.0"), 67 | @HivePdkUnitTest(query = "SELECT nexr_to_number('$1234.56', '$9999.99') " + 68 | "FROM dual_data;", result = "1234.56"), 69 | @HivePdkUnitTest(query = "SELECT nexr_to_number('HIGH', '$9999.99') " + 70 | "FROM dual_data;", result = "NULL") }) 71 | public class GenericUDFToNumber extends GenericUDF { 72 | 73 | private ObjectInspector returnInspector; 74 | 75 | private ObjectInspectorConverters.Converter[] converters; 76 | 77 | @Override 78 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 79 | 80 | if (arguments.length < 1) { 81 | throw new UDFArgumentLengthException( 82 | "The function to_number(value, format_mask, nls_language) needs at least one argument."); 83 | } 84 | 85 | if (arguments[0].getTypeName() != Constants.STRING_TYPE_NAME) { 86 | throw new UDFArgumentTypeException(0, "Argument " + (1) + " of function to_number must be \"" 87 | + Constants.STRING_TYPE_NAME + "\", but \"" 88 | + arguments[0].getTypeName() + "\" was found."); 89 | } 90 | 91 | if (arguments.length > 1 92 | && !(arguments[1].getTypeName() == Constants.DOUBLE_TYPE_NAME || 93 | arguments[1].getTypeName() == Constants.STRING_TYPE_NAME)) { 94 | throw new UDFArgumentTypeException(1, "Argument " + (2) 95 | + " of function to_number must be \"" 96 | + Constants.DOUBLE_TYPE_NAME + "\", but \"" 97 | + arguments[1].getTypeName() + "\" was found."); 98 | } 99 | 100 | converters = new ObjectInspectorConverters.Converter[arguments.length]; 101 | for (int i = 0; i < arguments.length; i++) { 102 | converters[i] = ObjectInspectorConverters.getConverter(arguments[i], 103 | PrimitiveObjectInspectorFactory.writableStringObjectInspector); 104 | } 105 | 106 | if (arguments.length == 1) { 107 | returnInspector = PrimitiveObjectInspectorFactory.writableLongObjectInspector; 108 | } else { 109 | returnInspector = PrimitiveObjectInspectorFactory.writableDoubleObjectInspector; 110 | } 111 | 112 | return returnInspector; 113 | } 114 | 115 | private LongWritable longResult = new LongWritable(); 116 | private DoubleWritable doubleResult = new DoubleWritable(); 117 | 118 | @Override 119 | public Object evaluate(DeferredObject[] arguments) throws HiveException { 120 | 121 | if (arguments[0].get() == null) { 122 | return null; 123 | } 124 | 125 | try { 126 | Text value = (Text) converters[0].convert(arguments[0].get()); 127 | 128 | Locale locale = Locale.getDefault(); 129 | 130 | // @todo convert nls_params to locale if needed. 131 | 132 | NumberFormat formatter = NumberFormat.getInstance(locale); 133 | if (formatter instanceof DecimalFormat) { 134 | DecimalFormat df = (DecimalFormat) formatter; 135 | 136 | if (returnInspector.getTypeName() == Constants.BIGINT_TYPE_NAME) { 137 | longResult.set(df.parse(value.toString()).longValue()); 138 | return longResult; 139 | } 140 | 141 | // Double 142 | String pattern = ((Text) converters[1].convert(arguments[1].get())).toString(); 143 | pattern = pattern.replace("9", "0"); 144 | df.applyPattern(pattern); 145 | doubleResult.set(df.parse(value.toString()).doubleValue()); 146 | } 147 | 148 | return doubleResult; 149 | 150 | } catch (Exception e) { 151 | e.printStackTrace(); 152 | return null; 153 | } 154 | 155 | } 156 | 157 | @Override 158 | public String getDisplayString(String[] children) { 159 | StringBuilder sb = new StringBuilder(); 160 | sb.append("to_number("); 161 | for (int i = 0; i < children.length - 1; i++) { 162 | sb.append(children[i]).append(", "); 163 | } 164 | sb.append(children[children.length - 1]).append(")"); 165 | return sb.toString(); 166 | } 167 | 168 | } 169 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDFTrunc.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | package com.nexr.platform.hive.udf; 19 | 20 | import java.text.ParseException; 21 | import java.text.SimpleDateFormat; 22 | import java.util.Calendar; 23 | import java.util.Date; 24 | 25 | import org.apache.hadoop.hive.ql.exec.Description; 26 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 27 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; 28 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 29 | import org.apache.hadoop.hive.ql.metadata.HiveException; 30 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 31 | import org.apache.hadoop.hive.serde.Constants; 32 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 33 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 34 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 35 | import org.apache.hadoop.io.Text; 36 | import org.apache.hive.pdk.HivePdkUnitTest; 37 | import org.apache.hive.pdk.HivePdkUnitTests; 38 | 39 | /** 40 | * GenericUDFTrunc Class for SQL construct "trunc(date, [format])". This mimics the function form TRUNC(date). The date should 42 | * be valid pattern of 'yyyy-MM-dd HH:mm:ss'. 43 | *

44 | * Below are the valid format parameters: 45 | *

    46 | *
  • Year : YYYY 47 | *
  • Month : MM 48 | *
  • Day : DD 49 | *
50 | * 51 | * 52 | */ 53 | @Description(name = "trunc", value = "_FUNC_(date, [format_mask]) " + 54 | "- Returns a date in string truncated to a specific unit of measure.", extended = "Example:\n" 55 | + " > SELECT _FUNC_('2011-08-02 01:01:01') FROM src ;\n returns '2011-08-02 00:00:00' ") 56 | @HivePdkUnitTests(setup = "create table dual_data (i int); " 57 | + "insert overwrite table dual_data select 1 from dual limit 1;", 58 | cleanup = "drop table if exists dual_data;", cases = { 59 | @HivePdkUnitTest(query = "SELECT nexr_trunc('2011-08-02 01:01:01') " + 60 | "FROM dual_data;", result = "2011-08-02 00:00:00"), 61 | @HivePdkUnitTest(query = "SELECT nexr_trunc('2011-08-02 01:01:01','YYYY') " + 62 | "FROM dual_data;", result = "2011-01-01 00:00:00"), 63 | @HivePdkUnitTest(query = "SELECT nexr_trunc('2011-08-02 01:01:01','MM') " + 64 | "FROM dual_data;", result = "2011-08-01 00:00:00"), 65 | @HivePdkUnitTest(query = "SELECT nexr_trunc('2011-08-02 01:01:01','DD') " + 66 | "FROM dual_data;", result = "2011-08-02 00:00:00") }) 67 | public class GenericUDFTrunc extends GenericUDF { 68 | 69 | private final String YYYY = "YYYY"; 70 | private final String MM = "MM"; 71 | private final String DD = "DD"; 72 | 73 | private final SimpleDateFormat HIVE_DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 74 | 75 | private ObjectInspector[] argumentIOs; 76 | private ObjectInspector returnInspector; 77 | 78 | @Override 79 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 80 | 81 | if (arguments.length < 1) { 82 | throw new UDFArgumentLengthException("The function trunc(date, format) needs at least one argument."); 83 | } 84 | 85 | for (int i = 0; i < arguments.length; i++) { 86 | if (arguments[i].getTypeName() != Constants.STRING_TYPE_NAME) { 87 | throw new UDFArgumentTypeException(i, "Only String type arguments are accepted but " 88 | + arguments[i].getTypeName() + " is passed."); 89 | } 90 | } 91 | 92 | argumentIOs = arguments; 93 | returnInspector = PrimitiveObjectInspectorFactory.writableStringObjectInspector; 94 | return returnInspector; 95 | } 96 | 97 | private Text returnText = new Text(); 98 | 99 | @Override 100 | public Object evaluate(DeferredObject[] records) throws HiveException { 101 | String date = ((PrimitiveObjectInspector) argumentIOs[0]).getPrimitiveJavaObject(records[0].get()).toString(); 102 | Date parsedDate = null; 103 | try { 104 | parsedDate = HIVE_DATE_FORMAT.parse(date); 105 | } catch (ParseException e) { 106 | e.printStackTrace(); 107 | return null; 108 | } 109 | char TRUNC_MODE = 'D'; 110 | if (records.length > 1) { 111 | String mode = ((PrimitiveObjectInspector) argumentIOs[1]).getPrimitiveJavaObject(records[1].get()) 112 | .toString(); 113 | if (mode.equals(YYYY)) { 114 | TRUNC_MODE = 'Y'; 115 | } else if (mode.equals(MM)) { 116 | TRUNC_MODE = 'M'; 117 | } else if (mode.equals(DD)) { 118 | TRUNC_MODE = 'D'; 119 | } 120 | } 121 | 122 | Calendar calendar = Calendar.getInstance(); 123 | calendar.setTime(parsedDate); 124 | int year = calendar.get(Calendar.YEAR); 125 | int month = calendar.get(Calendar.MONTH); 126 | int day = calendar.get(Calendar.DAY_OF_MONTH); 127 | switch (TRUNC_MODE) { 128 | case 'Y': 129 | calendar.clear(); 130 | calendar.set(Calendar.YEAR, year); 131 | break; 132 | case 'M': 133 | calendar.clear(); 134 | calendar.set(Calendar.YEAR, year); 135 | calendar.set(Calendar.MONTH, month); 136 | break; 137 | case 'D': 138 | default: 139 | calendar.clear(); 140 | calendar.set(Calendar.YEAR, year); 141 | calendar.set(Calendar.MONTH, month); 142 | calendar.set(Calendar.DAY_OF_MONTH, day); 143 | break; 144 | } 145 | 146 | String truncated = HIVE_DATE_FORMAT.format(calendar.getTime()); 147 | 148 | returnText.set(truncated); 149 | return returnText; 150 | } 151 | 152 | @Override 153 | public String getDisplayString(String[] children) { 154 | StringBuilder sb = new StringBuilder(); 155 | sb.append("trunc ("); 156 | for (int i = 0; i < children.length - 1; i++) { 157 | sb.append(children[i]).append(", "); 158 | } 159 | sb.append(children[children.length - 1]).append(")"); 160 | return sb.toString(); 161 | } 162 | 163 | } 164 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/GenericUDTFPopulate.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 23 | import org.apache.hadoop.hive.ql.metadata.HiveException; 24 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; 25 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 26 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; 27 | 28 | @Description(name = "populate", 29 | value = "_FUNC_(a) - populate struct input as single row") 30 | public class GenericUDTFPopulate extends GenericUDTF { 31 | 32 | @Override 33 | public void close() throws HiveException { 34 | } 35 | 36 | @Override 37 | public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { 38 | if (args.length != 1 && args[0].getCategory() != ObjectInspector.Category.STRUCT) { 39 | throw new UDFArgumentException("populate() takes only one struct type argument"); 40 | } 41 | return (StructObjectInspector) args[0]; 42 | } 43 | 44 | @Override 45 | public void process(Object[] o) throws HiveException { 46 | forward(o[0]); 47 | } 48 | 49 | @Override 50 | public String toString() { 51 | return "explode"; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/UDFChr.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.hive.pdk.HivePdkUnitTest; 22 | import org.apache.hive.pdk.HivePdkUnitTests; 23 | 24 | import org.apache.hadoop.hive.ql.exec.Description; 25 | import org.apache.hadoop.hive.ql.exec.UDF; 26 | import org.apache.hadoop.io.IntWritable; 27 | import org.apache.hadoop.io.Text; 28 | 29 | /** 30 | * UDFChr 31 | * 32 | * This integrates the function from Oracle 33 | * http://www.techonthenet.com/oracle/functions/chr.php 34 | * 35 | * chr(number_code). 36 | * number_code is the NUMBER code used to retrieve the character. 37 | * 38 | */ 39 | 40 | @Description(name = "chr", 41 | value = "_FUNC_(number_code) - Returns returns the character based on the NUMBER code", 42 | extended = "Example:\n" 43 | + " > SELECT _FUNC_(116) FROM src LIMIT 1;\n" + " t\n" 44 | + " > SELECT _FUNC_(84) FROM src LIMIT 1;\n" + " T\n") 45 | @HivePdkUnitTests( 46 | setup = "", cleanup = "", 47 | cases = { 48 | @HivePdkUnitTest( 49 | query = "SELECT nexr_chr(116) FROM onerow;", 50 | result = "t"), 51 | @HivePdkUnitTest( 52 | query = "SELECT nexr_chr(84) FROM onerow;", 53 | result = "T") 54 | } 55 | ) 56 | 57 | 58 | public class UDFChr extends UDF { 59 | private Text result = new Text(); 60 | 61 | public Text evaluate(IntWritable ascii_number) { 62 | if (ascii_number == null) { 63 | return null; 64 | } 65 | 66 | result.set(Character.toString((char) ascii_number.get())); 67 | return result; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/UDFDateFormat.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import java.text.ParseException; 22 | import java.text.SimpleDateFormat; 23 | import java.util.Date; 24 | 25 | import org.apache.hadoop.hive.ql.exec.Description; 26 | import org.apache.hadoop.hive.ql.exec.UDF; 27 | import org.apache.hadoop.hive.ql.udf.UDFType; 28 | import org.apache.hadoop.io.Text; 29 | import org.apache.hive.pdk.HivePdkUnitTest; 30 | import org.apache.hive.pdk.HivePdkUnitTests; 31 | 32 | /** 33 | * UDFDateFormat 34 | */ 35 | 36 | @Description(name = "date_format", 37 | value = "_FUNC_(dateText,pattern) - Return time string with given pattern. ", 38 | extended = "Convert time string with 'yyyy-MM-dd HH:mm:ss' pattern to time string with given pattern.\n" 39 | +" (see [http://java.sun.com/j2se/1.4.2/docs/api/java/text/SimpleDateFormat.html])\n\n" 40 | +"Example:\n" 41 | +" > SELECT _FUNC_ ('2011-05-11 12:05:11','yyyyMMdd') FRom src LIMIT 1;\n" 42 | +"20110511" 43 | ) 44 | @HivePdkUnitTests( 45 | setup = "",cleanup = "", 46 | cases = { 47 | @HivePdkUnitTest( 48 | query = "SELECT nexr_date_format('2011-05-11 12:05:11', 'yyyyMMdd') FROM onerow;", 49 | result = "20110511" 50 | ), 51 | @HivePdkUnitTest( 52 | query = "SELECT nexr_date_format('2011-07-21 09:21:00', 'yyyy-MM-dd') FROM onerow;", 53 | result = "2011-07-21" 54 | ) 55 | } 56 | ) 57 | 58 | @UDFType(deterministic = false) 59 | public class UDFDateFormat extends UDF { 60 | private final SimpleDateFormat standardFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 61 | private final SimpleDateFormat formatter = new SimpleDateFormat(); 62 | 63 | public UDFDateFormat() { 64 | standardFormatter.setLenient(false); 65 | formatter.setLenient(false); 66 | } 67 | 68 | Text result = new Text(); 69 | Text lastPatternText = new Text(); 70 | 71 | public Text evaluate(Text dateText, Text patternText) { 72 | if (dateText == null || patternText == null) { 73 | return null; 74 | } 75 | 76 | try { 77 | if (!patternText.equals(lastPatternText)) { 78 | formatter.applyPattern(patternText.toString()); 79 | lastPatternText.set(patternText); 80 | } 81 | } catch (Exception e) { 82 | return null; 83 | } 84 | 85 | Date date; 86 | try { 87 | date = standardFormatter.parse(dateText.toString()); 88 | result.set(formatter.format(date)); 89 | return result; 90 | } catch (ParseException e) { 91 | return null; 92 | } 93 | } 94 | } 95 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/UDFLastDay.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import java.text.ParseException; 22 | import java.text.SimpleDateFormat; 23 | import java.util.Calendar; 24 | import java.util.Date; 25 | 26 | import org.apache.hadoop.hive.ql.exec.Description; 27 | import org.apache.hadoop.hive.ql.exec.UDF; 28 | import org.apache.hadoop.hive.ql.udf.UDFType; 29 | import org.apache.hadoop.io.Text; 30 | import org.apache.hive.pdk.HivePdkUnitTest; 31 | import org.apache.hive.pdk.HivePdkUnitTests; 32 | 33 | /** 34 | * UDFLastDay 35 | */ 36 | 37 | @UDFType(deterministic = false) 38 | @Description(name = "last_day", 39 | value = "_FUNC_(dateString) - returns the last day of the month based " + 40 | "on a date string with yyyy-MM-dd HH:mm:ss pattern.", 41 | extended = "Example:\n" 42 | +"> SELECT last_day('2003-03-15 01:22:33') FROM src LIMIT 1;" 43 | +"2003-03-31 00:00:00\n" 44 | ) 45 | @HivePdkUnitTests( 46 | setup = "", cleanup = "", 47 | cases = { 48 | @HivePdkUnitTest( 49 | query = "SELECT nexr_last_day('2003-03-15 01:22:33') FROM onerow;", 50 | result = "2003-03-31 00:00:00" 51 | ), 52 | @HivePdkUnitTest( 53 | query = "SELECT nexr_last_day('2011-07-21 09:21:00') FROM onerow;", 54 | result = "2011-07-31 00:00:00" 55 | ) 56 | } 57 | ) 58 | public class UDFLastDay extends UDF { 59 | private final SimpleDateFormat standardFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 60 | private final Calendar calendar = Calendar.getInstance(); 61 | 62 | public UDFLastDay() { 63 | standardFormatter.setLenient(false); 64 | } 65 | 66 | Text result = new Text(); 67 | 68 | public Text evaluate(Text dateText){ 69 | if (dateText == null) { 70 | return null; 71 | } 72 | 73 | try { 74 | calendar.setTime(standardFormatter.parse(dateText.toString())); 75 | int lastDate = calendar.getActualMaximum(Calendar.DATE); 76 | calendar.set(Calendar.DATE, lastDate); 77 | calendar.set(Calendar.HOUR_OF_DAY, 0); 78 | calendar.set(Calendar.MINUTE, 0); 79 | calendar.set(Calendar.SECOND, 0); 80 | calendar.set(Calendar.MILLISECOND, 0); 81 | Date newDate = calendar.getTime(); 82 | result.set(standardFormatter.format(newDate)); 83 | return result; 84 | } catch (ParseException e) { 85 | return null; 86 | } 87 | } 88 | } 89 | 90 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/UDFStrToDate.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one or more contributor license 3 | * agreements. See the NOTICE file distributed with this work for additional information regarding 4 | * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the 5 | * "License"); you may not use this file except in compliance with the License. You may obtain a 6 | * copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software distributed under the License 11 | * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | * or implied. See the License for the specific language governing permissions and limitations under 13 | * the License. 14 | */ 15 | 16 | package com.nexr.platform.hive.udf; 17 | 18 | import java.text.ParseException; 19 | import java.text.SimpleDateFormat; 20 | import java.util.Calendar; 21 | import java.util.Date; 22 | 23 | import org.apache.hadoop.hive.ql.exec.Description; 24 | import org.apache.hadoop.hive.ql.exec.UDF; 25 | import org.apache.hadoop.hive.ql.udf.UDFType; 26 | import org.apache.hadoop.io.IntWritable; 27 | import org.apache.hadoop.io.Text; 28 | import org.apache.hive.pdk.HivePdkUnitTest; 29 | import org.apache.hive.pdk.HivePdkUnitTests; 30 | 31 | /** 32 | * UDFDateFormat 33 | * 34 | */ 35 | 36 | @Description(name = "str_to_date", value = "_FUNC_(dateText, pattern [, days]) - Convert time string with given pattern " 37 | + "to time string with 'yyyy-MM-dd HH:mm:ss' pattern\n", extended = "Example:\n" 38 | + "> SELECT _FUNC_('2011/05/01','yyyy/MM/dd') FROM src LIMIT 1;\n" 39 | + "2011-05-01 00:00:00\n" 40 | + "> SELECT _FUNC_('2011/07/21 12:55:11'.'yyyy/MM/dd HH:mm:ss') FROM src LIMIT 1;\n" 41 | + "2011-07-21 12:55:11\n") 42 | @HivePdkUnitTests(setup = "", cleanup = "", cases = { 43 | @HivePdkUnitTest(query = "SELECT nexr_str_to_date('2011/05/01','yyyy/MM/dd') FROM onerow;", result = "2011-05-01 00:00:00"), 44 | @HivePdkUnitTest(query = "SELECT nexr_str_to_date('2011/07/21 12:55:11','yyyy/MM/dd HH:mm:ss') " 45 | + "FROM onerow;", result = "2011-07-21 12:55:11"), 46 | @HivePdkUnitTest(query = "SELECT nexr_str_to_date('2011/05/01','yyyy/MM/dd', 1) FROM onerow;", result = "2011-05-02 00:00:00")}) 47 | @UDFType(deterministic = false) 48 | public class UDFStrToDate extends UDF { 49 | private final SimpleDateFormat standardFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 50 | private final SimpleDateFormat formatter = new SimpleDateFormat(); 51 | private final Calendar calendar = Calendar.getInstance(); 52 | 53 | public UDFStrToDate() { 54 | standardFormatter.setLenient(false); 55 | formatter.setLenient(false); 56 | } 57 | 58 | Text result = new Text(); 59 | Text lastPatternText = new Text(); 60 | 61 | public Text evaluate(Text dateText, Text patternText) { 62 | if (dateText == null || patternText == null) { 63 | return null; 64 | } 65 | try { 66 | if (!patternText.equals(lastPatternText)) { 67 | formatter.applyPattern(patternText.toString()); 68 | lastPatternText.set(patternText); 69 | } 70 | } catch (Exception e) { 71 | return null; 72 | } 73 | 74 | Date date; 75 | try { 76 | date = formatter.parse(dateText.toString()); 77 | result.set(standardFormatter.format(date)); 78 | return result; 79 | } catch (ParseException e) { 80 | return null; 81 | } 82 | } 83 | 84 | Text t = new Text(); 85 | 86 | public Text evaluate(Text dateText, Text patternText, IntWritable days) { 87 | if (dateText == null || patternText == null || days == null) { 88 | return null; 89 | } 90 | 91 | t = evaluate(dateText, patternText); 92 | try { 93 | calendar.setTime(standardFormatter.parse(t.toString())); 94 | calendar.add(Calendar.DAY_OF_MONTH, days.get()); 95 | Date newDate = calendar.getTime(); 96 | result.set(standardFormatter.format(newDate)); 97 | return result; 98 | } catch (ParseException e) { 99 | e.printStackTrace(); 100 | return null; 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/UDFSubstrForOracle.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDF; 23 | import org.apache.hadoop.io.IntWritable; 24 | import org.apache.hadoop.io.Text; 25 | import org.apache.hive.pdk.HivePdkUnitTest; 26 | import org.apache.hive.pdk.HivePdkUnitTests; 27 | 28 | /** 29 | * UDFSubstrForOracle. 30 | * 31 | * In some case, Hive built-in substr UDF return different value from Oracle substr funtion's. 32 | * This substr function exactly behave like oracle. 33 | * the substr function allows you to extract a substring from a string 34 | * The syntax for the substr function is: 35 | * substr( string, start_position, [ length ] ) 36 | * string is the source string. 37 | * start_position is the position for extraction. The first position in the string is always 1. 38 | * length is optional. It is the number of characters to extract. 39 | * If this parameter is omitted, substr will return the entire string. 40 | * 41 | * 42 | */ 43 | @Description(name = "substr", 44 | value = "_FUNC_(str, start_pos[, length ]) " + 45 | "- returns the substring of str that starts at pos_start and is of length", 46 | extended = "start_pos is a 1-based index. If start_pos<0 the starting position is" 47 | + " determined by counting backwards from the end of str.\n" 48 | + "Example:\n " 49 | + " > SELECT _FUNC_('abcdefg', 5) FROM src LIMIT 1;\n" 50 | + " efg\n" 51 | + " > SELECT _FUNC_('abcdefg',-5,4) FROM src LIMIT 1;\n" 52 | + " cdef" 53 | + " > SELECT _FUNC_('abcdefg',3,4) FROM src LIMIT 1;\n" 54 | + " cdef") 55 | 56 | 57 | @HivePdkUnitTests( 58 | setup ="", cleanup ="", 59 | cases = { 60 | @HivePdkUnitTest( 61 | query = "SELECT nexr_substr('abcdefg', 5) FROM onerow;", 62 | result = "efg" 63 | ), 64 | @HivePdkUnitTest( 65 | query = "SELECT nexr_substr('abcdefg',3,4) FROM onerow;", 66 | result = "cdef" 67 | ), 68 | @HivePdkUnitTest( 69 | query = "SELECT nexr_substr('abcdefg',-5,4) FROM onerow;", 70 | result = "cdef" 71 | ) 72 | } 73 | ) 74 | public class UDFSubstrForOracle extends UDF { 75 | private Text r; 76 | 77 | public UDFSubstrForOracle() { 78 | r = new Text(); 79 | } 80 | 81 | public Text evaluate(Text t, IntWritable pos, IntWritable len) { 82 | 83 | if ((t == null) || (pos == null) || (len == null)) { 84 | return null; 85 | } 86 | 87 | r.clear(); 88 | if ((len.get() <= 0)) { 89 | //return r; 90 | return null; 91 | } 92 | 93 | String s = t.toString(); 94 | if ((Math.abs(pos.get()) > s.length())) { 95 | //return r; 96 | return null; 97 | } 98 | 99 | int start, end; 100 | 101 | if (pos.get() > 0) { 102 | start = pos.get() - 1; 103 | } else if (pos.get() < 0) { 104 | start = s.length() + pos.get(); 105 | } else { 106 | start = 0; 107 | } 108 | 109 | if ((s.length() - start) < len.get()) { 110 | end = s.length(); 111 | } else { 112 | end = start + len.get(); 113 | } 114 | 115 | r.set(s.substring(start, end)); 116 | return r; 117 | } 118 | 119 | private IntWritable maxValue = new IntWritable(Integer.MAX_VALUE); 120 | 121 | public Text evaluate(Text s, IntWritable pos) { 122 | return evaluate(s, pos, maxValue); 123 | } 124 | 125 | } 126 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/UDFSysDate.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import java.text.ParseException; 22 | import java.text.SimpleDateFormat; 23 | import java.util.Calendar; 24 | import java.util.Date; 25 | 26 | import org.apache.hadoop.hive.ql.exec.Description; 27 | import org.apache.hadoop.hive.ql.exec.UDF; 28 | import org.apache.hadoop.io.IntWritable; 29 | import org.apache.hadoop.io.Text; 30 | 31 | /** 32 | * Generic UDF for string function CURDATE(), 33 | * SYSDATE(). This mimcs the function from MySQL 34 | * http://dev.mysql.com/doc/refman/5.1/en/date-and-time-functions.html#function_sysdate 35 | * 36 | *
37 |  * usage:
38 |  * SYSDATE()
39 |  * 
40 | *

41 | */ 42 | @Description(name = "sysdate", 43 | value = "_FUNC_() - Returns the current date and time as a value in 'yyyy-MM-dd HH:mm:ss' format" 44 | +"_FUNC_(dateFormat) - Returns the current date and time as a value in given format" 45 | +"_FUNC_(dateFormat, num_days) - Returns the date that is num_days after current date in given date format", 46 | extended = "Example:\n" 47 | + " > SELECT _FUNC_() FROM src LIMIT 1;\n" + "2011-06-13 13:47:36" 48 | + " > SELECT _FUNC_('yyyyMMdd') FROM src LIMIT 1;\n" + "20110613" 49 | + " > SELECT _FUNC_('yyyyMMdd',1) FROM src LIMIT 1;\n" + "20110614") 50 | 51 | public class UDFSysDate extends UDF{ 52 | private final SimpleDateFormat stdFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 53 | private final SimpleDateFormat formatter = new SimpleDateFormat(); 54 | private final Calendar calendar = Calendar.getInstance(); 55 | 56 | public UDFSysDate() { 57 | stdFormatter.setLenient(false); 58 | formatter.setLenient(false); 59 | } 60 | 61 | Text result= new Text(); 62 | public Text evaluate() { 63 | Date date = new Date(); 64 | result.set(stdFormatter.format(date)); 65 | return result; 66 | } 67 | 68 | public Text evaluate(Text format) { 69 | if (format==null) { 70 | format.set("yyyy-MM-dd HH:mm:ss"); 71 | } 72 | 73 | Date date = new Date(); 74 | formatter.applyPattern(format.toString()); 75 | result.set(formatter.format(date)); 76 | return result; 77 | } 78 | 79 | public Text evaluate(Text format, IntWritable days){ 80 | if (format==null) { 81 | format.set("yyyy-MM-dd HH:mm:ss"); 82 | } 83 | 84 | formatter.applyPattern(format.toString()); 85 | Date date = new Date(); 86 | calendar.setTime(date); 87 | calendar.add(Calendar.DAY_OF_MONTH, days.get()); 88 | Date newDate = calendar.getTime(); 89 | result.set(formatter.format(newDate)); 90 | return result; 91 | } 92 | 93 | } 94 | -------------------------------------------------------------------------------- /src/main/java/com/nexr/platform/hive/udf/UDFToChar.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.nexr.platform.hive.udf; 20 | 21 | import java.text.DecimalFormat; 22 | import java.text.ParseException; 23 | import java.text.SimpleDateFormat; 24 | import java.util.Date; 25 | 26 | import org.apache.hadoop.hive.ql.exec.Description; 27 | import org.apache.hadoop.hive.ql.exec.UDF; 28 | import org.apache.hadoop.hive.ql.udf.UDFType; 29 | import org.apache.hadoop.hive.serde2.ByteStream; 30 | import org.apache.hadoop.hive.serde2.io.ByteWritable; 31 | import org.apache.hadoop.hive.serde2.io.DoubleWritable; 32 | import org.apache.hadoop.hive.serde2.io.ShortWritable; 33 | import org.apache.hadoop.hive.serde2.lazy.LazyInteger; 34 | import org.apache.hadoop.hive.serde2.lazy.LazyLong; 35 | import org.apache.hadoop.io.FloatWritable; 36 | import org.apache.hadoop.io.IntWritable; 37 | import org.apache.hadoop.io.LongWritable; 38 | import org.apache.hadoop.io.NullWritable; 39 | import org.apache.hadoop.io.Text; 40 | import org.apache.hive.pdk.HivePdkUnitTest; 41 | import org.apache.hive.pdk.HivePdkUnitTests; 42 | 43 | /** 44 | * UDFToChar 45 | * 46 | * This function is an alternative to Oracle to_char function. 47 | */ 48 | 49 | @UDFType(deterministic = false) 50 | @Description(name = "to_char", 51 | value = "_FUNC_(date, pattern) converts a string with yyyy-MM-dd HH:mm:ss pattern " + 52 | "to a string with given pattern.\n" 53 | +"_FUNC_(datetime, pattern) converts a string with yyyy-MM-dd pattern " + 54 | "to a string with given pattern.\n" 55 | +"_FUNC_(number [,format]) converts a number to a string\n", 56 | extended = "Example:\n" 57 | +" > SELECT to_char('2011-05-11 10:00:12'.'yyyyMMdd') FROM src LIMIT 1;\n" 58 | +"20110511\n" 59 | ) 60 | 61 | @HivePdkUnitTests( 62 | setup = "", cleanup = "", 63 | cases = { 64 | @HivePdkUnitTest(query = "SELECT nexr_to_char('2011-05-01 10:00:12', 'yyyyMMdd') FROM onerow;",result = "20110501"), 65 | @HivePdkUnitTest(query = "SELECT nexr_to_char('2011-07-01 00:00:00', 'yyyy-MM-dd') FROM onerow;",result = "2011-07-01"), 66 | @HivePdkUnitTest(query = "SELECT nexr_to_char('2011-07-01', 'yyyy/MM/dd') FROM onerow;",result = "2011/07/01"), 67 | @HivePdkUnitTest(query = "SELECT nexr_to_char(234) FROM onerow;",result = "234"), 68 | @HivePdkUnitTest(query = "SELECT nexr_to_char(col_tinyint) FROM datatypes;",result = "1"), 69 | @HivePdkUnitTest(query = "SELECT nexr_to_char(col_smallint) FROM datatypes;",result = "12"), 70 | @HivePdkUnitTest(query = "SELECT nexr_to_char(col_int) FROM datatypes;",result = "123"), 71 | @HivePdkUnitTest(query = "SELECT nexr_to_char(col_bigint) FROM datatypes;",result = "1234"), 72 | @HivePdkUnitTest(query = "SELECT nexr_to_char(col_float) FROM datatypes;",result = "12.34"), 73 | @HivePdkUnitTest(query = "SELECT nexr_to_char(col_double) FROM datatypes;",result = "1234.1234"), 74 | @HivePdkUnitTest(query = "SELECT nexr_to_char(1210.73,'9999.9') FROM datatypes;",result = "1210.7"), 75 | @HivePdkUnitTest(query = "SELECT nexr_to_char(1210.73,'9,999.99') FROM datatypes;",result = "1,210.73"), 76 | @HivePdkUnitTest(query = "SELECT nexr_to_char(1210,'$9999') FROM datatypes;",result = "$1210"), 77 | @HivePdkUnitTest(query = "SELECT nexr_to_char('test') FROM onerow;",result = "test") 78 | } 79 | ) 80 | 81 | 82 | public class UDFToChar extends UDF { 83 | private final SimpleDateFormat standardFormatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 84 | private final SimpleDateFormat formatter = new SimpleDateFormat(); 85 | private final DecimalFormat decimalFormat = new DecimalFormat(); 86 | private ByteStream.Output out = new ByteStream.Output(); 87 | 88 | private Text result = new Text(); 89 | private Text lastPatternText = new Text(); 90 | 91 | public UDFToChar() { 92 | standardFormatter.setLenient(false); 93 | formatter.setLenient(false); 94 | } 95 | 96 | public Text evaluate(NullWritable i) { 97 | return null; 98 | } 99 | 100 | public Text evaluate(ByteWritable i) { 101 | if (i == null) { 102 | return null; 103 | } else { 104 | out.reset(); 105 | LazyInteger.writeUTF8NoException(out, i.get()); 106 | result.set(out.getData(), 0, out.getCount()); 107 | return result; 108 | } 109 | } 110 | public Text evaluate(ByteWritable i, Text format) { 111 | if (i == null|| format==null) { 112 | return null; 113 | } else { 114 | String pattern = format.toString().replace("9", "#"); 115 | decimalFormat.applyPattern(pattern); 116 | result.set(decimalFormat.format(i.get())); 117 | return result; 118 | } 119 | } 120 | 121 | public Text evaluate(ShortWritable i) { 122 | if (i == null) { 123 | return null; 124 | } else { 125 | out.reset(); 126 | LazyInteger.writeUTF8NoException(out, i.get()); 127 | result.set(out.getData(), 0, out.getCount()); 128 | return result; 129 | } 130 | } 131 | 132 | public Text evaluate(ShortWritable i, Text format) { 133 | if (i == null|| format==null) { 134 | return null; 135 | } else { 136 | String pattern = format.toString().replace("9", "#"); 137 | decimalFormat.applyPattern(pattern); 138 | result.set(decimalFormat.format(i.get())); 139 | return result; 140 | } 141 | } 142 | 143 | public Text evaluate(IntWritable i) { 144 | if (i == null) { 145 | return null; 146 | } else { 147 | out.reset(); 148 | LazyInteger.writeUTF8NoException(out, i.get()); 149 | result.set(out.getData(), 0, out.getCount()); 150 | return result; 151 | } 152 | } 153 | 154 | public Text evaluate(IntWritable i, Text format) { 155 | if (i == null|| format==null) { 156 | return null; 157 | } else { 158 | String pattern = format.toString().replace("9", "#"); 159 | decimalFormat.applyPattern(pattern); 160 | result.set(decimalFormat.format(i.get())); 161 | return result; 162 | } 163 | } 164 | 165 | public Text evaluate(LongWritable i) { 166 | if (i == null) { 167 | return null; 168 | } else { 169 | out.reset(); 170 | LazyLong.writeUTF8NoException(out, i.get()); 171 | result.set(out.getData(), 0, out.getCount()); 172 | return result; 173 | } 174 | } 175 | 176 | public Text evaluate(LongWritable i, Text format) { 177 | if (i == null|| format==null) { 178 | return null; 179 | } else { 180 | String pattern = format.toString().replace("9", "#"); 181 | decimalFormat.applyPattern(pattern); 182 | result.set(decimalFormat.format(i.get())); 183 | return result; 184 | } 185 | } 186 | 187 | public Text evaluate(FloatWritable i) { 188 | if (i == null) { 189 | return null; 190 | } else { 191 | result.set(i.toString()); 192 | return result; 193 | } 194 | } 195 | 196 | 197 | public Text evaluate(FloatWritable i, Text format) { 198 | if (i == null|| format==null) { 199 | return null; 200 | } else { 201 | String pattern = format.toString().replace("9", "#"); 202 | decimalFormat.applyPattern(pattern); 203 | result.set(decimalFormat.format(i.get())); 204 | return result; 205 | } 206 | } 207 | 208 | public Text evaluate(DoubleWritable i) { 209 | if (i == null) { 210 | return null; 211 | } else { 212 | result.set(i.toString()); 213 | return result; 214 | } 215 | } 216 | 217 | public Text evaluate(DoubleWritable i, Text format) { 218 | if (i == null|| format==null) { 219 | return null; 220 | } else { 221 | String pattern = format.toString().replace("9", "#"); 222 | decimalFormat.applyPattern(pattern); 223 | result.set(decimalFormat.format(i.get())); 224 | return result; 225 | } 226 | } 227 | 228 | public Text evaluate(Text dateText, Text patternText) { 229 | if (dateText == null || patternText == null) { 230 | return null; 231 | } 232 | if (dateText.toString().trim().length()==10){ 233 | standardFormatter.applyPattern("yyyy-MM-dd"); 234 | } 235 | 236 | try { 237 | if (!patternText.equals(lastPatternText)) { 238 | formatter.applyPattern(patternText.toString()); 239 | lastPatternText.set(patternText); 240 | } 241 | } catch (Exception e) { 242 | return null; 243 | } 244 | 245 | Date date; 246 | try { 247 | date = standardFormatter.parse(dateText.toString()); 248 | result.set(formatter.format(date)); 249 | return result; 250 | } catch (ParseException e) { 251 | return null; 252 | } 253 | } 254 | 255 | public Text evaluate(Text text){ 256 | return text; 257 | } 258 | 259 | } -------------------------------------------------------------------------------- /test/cleanup.sql: -------------------------------------------------------------------------------- 1 | drop table if exists onerow; 2 | drop table if exists dual; 3 | drop table if exists emp; 4 | drop table if exists datatypes; -------------------------------------------------------------------------------- /test/datatypes.txt: -------------------------------------------------------------------------------- 1 | 1 12 123 1234 true 12.34 1234.1234 hello world 10,20,30,40 a,b,c 10:val1,20:val2 a1,name,1234 -------------------------------------------------------------------------------- /test/dual.txt: -------------------------------------------------------------------------------- 1 | 0 Adam 30 174.3 0 2 | 1 Bravo 32 1 3 | 2 Charles 34 178.5 2 -------------------------------------------------------------------------------- /test/emp.txt: -------------------------------------------------------------------------------- 1 | 7369 SMITH CLERK 7902 1980-12-17 00:00:00.0 800 \N 20 2 | 7499 ALLEN SALESMAN 7698 1981-02-20 00:00:00.0 1600 300 30 3 | 7521 WARD SALESMAN 7698 1981-02-22 00:00:00.0 1250 500 30 4 | 7566 JONES MANAGER 7839 1981-04-02 00:00:00.0 2975 \N 20 5 | 7654 MARTIN SALESMAN 7698 1981-09-28 00:00:00.0 1250 1400 30 6 | 7698 BLAKE MANAGER 7839 1981-05-01 00:00:00.0 2850 \N 30 7 | 7782 CLARK MANAGER 7839 1981-06-09 00:00:00.0 2450 \N 10 8 | 7788 SCOTT ANALYST 7566 1987-04-19 00:00:00.0 3000 \N 20 9 | 7839 KING PRESIDENT \N 1981-11-17 00:00:00.0 5000 \N 10 10 | 7844 TURNER SALESMAN 7698 1981-09-08 00:00:00.0 1500 0 30 11 | 7876 ADAMS CLERK 7788 1987-05-23 00:00:00.0 1100 \N 20 12 | 7900 JAMES CLERK 7698 1981-12-03 00:00:00.0 950 \N 30 13 | 7902 FORD ANALYST 7566 1981-12-03 00:00:00.0 3000 \N 20 14 | 7934 MILLER CLERK 7782 1982-01-23 00:00:00.0 1300 \N 10 15 | -------------------------------------------------------------------------------- /test/lineitem.sql: -------------------------------------------------------------------------------- 1 | DROP TABLE lineitem; 2 | CREATE TABLE lineitem (L_ORDERKEY INT, 3 | L_PARTKEY INT, 4 | L_SUPPKEY INT, 5 | L_LINENUMBER INT, 6 | L_QUANTITY DOUBLE, 7 | L_EXTENDEDPRICE DOUBLE, 8 | L_DISCOUNT DOUBLE, 9 | L_TAX DOUBLE, 10 | L_RETURNFLAG STRING, 11 | L_LINESTATUS STRING, 12 | l_shipdate STRING, 13 | L_COMMITDATE STRING, 14 | L_RECEIPTDATE STRING, 15 | L_SHIPINSTRUCT STRING, 16 | L_SHIPMODE STRING, 17 | L_COMMENT STRING) 18 | ROW FORMAT DELIMITED 19 | FIELDS TERMINATED BY '|'; 20 | 21 | LOAD DATA LOCAL INPATH '${env:HIVE_PLUGIN_ROOT_DIR}/test/lineitem.txt' OVERWRITE INTO TABLE lineitem; -------------------------------------------------------------------------------- /test/lineitem.txt: -------------------------------------------------------------------------------- 1 | 1|155190|7706|1|17|21168.23|0.04|0.02|N|O|1996-03-13|1996-02-12|1996-03-22|DELIVER IN PERSON|TRUCK|egular courts above the| 2 | 1|67310|7311|2|36|45983.16|0.09|0.06|N|O|1996-04-12|1996-02-28|1996-04-20|TAKE BACK RETURN|MAIL|ly final dependencies: slyly bold | 3 | 1|63700|3701|3|8|13309.60|0.10|0.02|N|O|1996-01-29|1996-03-05|1996-01-31|TAKE BACK RETURN|REG AIR|riously. regular, express dep| 4 | 1|2132|4633|4|28|28955.64|0.09|0.06|N|O|1996-04-21|1996-03-30|1996-05-16|NONE|AIR|lites. fluffily even de| 5 | 1|24027|1534|5|24|22824.48|0.10|0.04|N|O|1996-03-30|1996-03-14|1996-04-01|NONE|FOB| pending foxes. slyly re| 6 | 1|15635|638|6|32|49620.16|0.07|0.02|N|O|1996-01-30|1996-02-07|1996-02-03|DELIVER IN PERSON|MAIL|arefully slyly ex| 7 | 2|106170|1191|1|38|44694.46|0.00|0.05|N|O|1997-01-28|1997-01-14|1997-02-02|TAKE BACK RETURN|RAIL|ven requests. deposits breach a| 8 | 3|4297|1798|1|45|54058.05|0.06|0.00|R|F|1994-02-02|1994-01-04|1994-02-23|NONE|AIR|ongside of the furiously brave acco| 9 | 3|19036|6540|2|49|46796.47|0.10|0.00|R|F|1993-11-09|1993-12-20|1993-11-24|TAKE BACK RETURN|RAIL| unusual accounts. eve| 10 | 3|128449|3474|3|27|39890.88|0.06|0.07|A|F|1994-01-16|1993-11-22|1994-01-23|DELIVER IN PERSON|SHIP|nal foxes wake. | 11 | 3|29380|1883|4|2|2618.76|0.01|0.06|A|F|1993-12-04|1994-01-07|1994-01-01|NONE|TRUCK|y. fluffily pending d| 12 | 3|183095|650|5|28|32986.52|0.04|0.00|R|F|1993-12-14|1994-01-10|1994-01-01|TAKE BACK RETURN|FOB|ages nag slyly pending| 13 | 3|62143|9662|6|26|28733.64|0.10|0.02|A|F|1993-10-29|1993-12-18|1993-11-04|TAKE BACK RETURN|RAIL|ges sleep after the caref| 14 | 4|88035|5560|1|30|30690.90|0.03|0.08|N|O|1996-01-10|1995-12-14|1996-01-18|DELIVER IN PERSON|REG AIR|- quickly regular packages sleep. idly| 15 | 5|108570|8571|1|15|23678.55|0.02|0.04|R|F|1994-10-31|1994-08-31|1994-11-20|NONE|AIR|ts wake furiously | 16 | 5|123927|3928|2|26|50723.92|0.07|0.08|R|F|1994-10-16|1994-09-25|1994-10-19|NONE|FOB|sts use slyly quickly special instruc| 17 | 5|37531|35|3|50|73426.50|0.08|0.03|A|F|1994-08-08|1994-10-13|1994-08-26|DELIVER IN PERSON|AIR|eodolites. fluffily unusual| 18 | 6|139636|2150|1|37|61998.31|0.08|0.03|A|F|1992-04-27|1992-05-15|1992-05-02|TAKE BACK RETURN|TRUCK|p furiously special foxes| 19 | 7|182052|9607|1|12|13608.60|0.07|0.03|N|O|1996-05-07|1996-03-13|1996-06-03|TAKE BACK RETURN|FOB|ss pinto beans wake against th| 20 | 7|145243|7758|2|9|11594.16|0.08|0.08|N|O|1996-02-01|1996-03-02|1996-02-19|TAKE BACK RETURN|SHIP|es. instructions| 21 | 7|94780|9799|3|46|81639.88|0.10|0.07|N|O|1996-01-15|1996-03-27|1996-02-03|COLLECT COD|MAIL| unusual reques| 22 | 7|163073|3074|4|28|31809.96|0.03|0.04|N|O|1996-03-21|1996-04-08|1996-04-20|NONE|FOB|. slyly special requests haggl| 23 | 7|151894|9440|5|38|73943.82|0.08|0.01|N|O|1996-02-11|1996-02-24|1996-02-18|DELIVER IN PERSON|TRUCK|ns haggle carefully ironic deposits. bl| 24 | 7|79251|1759|6|35|43058.75|0.06|0.03|N|O|1996-01-16|1996-02-23|1996-01-22|TAKE BACK RETURN|FOB|jole. excuses wake carefully alongside of | 25 | 7|157238|2269|7|5|6476.15|0.04|0.02|N|O|1996-02-10|1996-03-26|1996-02-13|NONE|FOB|ithely regula| 26 | 32|82704|7721|1|28|47227.60|0.05|0.08|N|O|1995-10-23|1995-08-27|1995-10-26|TAKE BACK RETURN|TRUCK|sleep quickly. req| 27 | 32|197921|441|2|32|64605.44|0.02|0.00|N|O|1995-08-14|1995-10-07|1995-08-27|COLLECT COD|AIR|lithely regular deposits. fluffily | 28 | 32|44161|6666|3|2|2210.32|0.09|0.02|N|O|1995-08-07|1995-10-07|1995-08-23|DELIVER IN PERSON|AIR| express accounts wake according to the| 29 | 32|2743|7744|4|4|6582.96|0.09|0.03|N|O|1995-08-04|1995-10-01|1995-09-03|NONE|REG AIR|e slyly final pac| 30 | 32|85811|8320|5|44|79059.64|0.05|0.06|N|O|1995-08-28|1995-08-20|1995-09-14|DELIVER IN PERSON|AIR|symptotes nag according to the ironic depo| 31 | 32|11615|4117|6|6|9159.66|0.04|0.03|N|O|1995-07-21|1995-09-23|1995-07-25|COLLECT COD|RAIL| gifts cajole carefully.| 32 | 33|61336|8855|1|31|40217.23|0.09|0.04|A|F|1993-10-29|1993-12-19|1993-11-08|COLLECT COD|TRUCK|ng to the furiously ironic package| 33 | 33|60519|5532|2|32|47344.32|0.02|0.05|A|F|1993-12-09|1994-01-04|1993-12-28|COLLECT COD|MAIL|gular theodolites| 34 | 33|137469|9983|3|5|7532.30|0.05|0.03|A|F|1993-12-09|1993-12-25|1993-12-23|TAKE BACK RETURN|AIR|. stealthily bold exc| 35 | 33|33918|3919|4|41|75928.31|0.09|0.00|R|F|1993-11-09|1994-01-24|1993-11-11|TAKE BACK RETURN|MAIL|unusual packages doubt caref| 36 | 34|88362|871|1|13|17554.68|0.00|0.07|N|O|1998-10-23|1998-09-14|1998-11-06|NONE|REG AIR|nic accounts. deposits are alon| 37 | 34|89414|1923|2|22|30875.02|0.08|0.06|N|O|1998-10-09|1998-10-16|1998-10-12|NONE|FOB|thely slyly p| 38 | 34|169544|4577|3|6|9681.24|0.02|0.06|N|O|1998-10-30|1998-09-20|1998-11-05|NONE|FOB|ar foxes sleep | 39 | 35|450|2951|1|24|32410.80|0.02|0.00|N|O|1996-02-21|1996-01-03|1996-03-18|TAKE BACK RETURN|FOB|, regular tithe| 40 | 35|161940|4457|2|34|68065.96|0.06|0.08|N|O|1996-01-22|1996-01-06|1996-01-27|DELIVER IN PERSON|RAIL|s are carefully against the f| 41 | 35|120896|8433|3|7|13418.23|0.06|0.04|N|O|1996-01-19|1995-12-22|1996-01-29|NONE|MAIL| the carefully regular | 42 | 35|85175|7684|4|25|29004.25|0.06|0.05|N|O|1995-11-26|1995-12-25|1995-12-21|DELIVER IN PERSON|SHIP| quickly unti| 43 | 35|119917|4940|5|34|65854.94|0.08|0.06|N|O|1995-11-08|1996-01-15|1995-11-26|COLLECT COD|MAIL|. silent, unusual deposits boost| 44 | 35|30762|3266|6|28|47397.28|0.03|0.02|N|O|1996-02-01|1995-12-24|1996-02-28|COLLECT COD|RAIL|ly alongside of | 45 | 36|119767|9768|1|42|75043.92|0.09|0.00|N|O|1996-02-03|1996-01-21|1996-02-23|COLLECT COD|SHIP| careful courts. special | 46 | 37|22630|5133|1|40|62105.20|0.09|0.03|A|F|1992-07-21|1992-08-01|1992-08-15|NONE|REG AIR|luffily regular requests. slyly final acco| 47 | 37|126782|1807|2|39|70542.42|0.05|0.02|A|F|1992-07-02|1992-08-18|1992-07-28|TAKE BACK RETURN|RAIL|the final requests. ca| 48 | 37|12903|5405|3|43|78083.70|0.05|0.08|A|F|1992-07-10|1992-07-06|1992-08-02|DELIVER IN PERSON|TRUCK|iously ste| 49 | 38|175839|874|1|44|84252.52|0.04|0.02|N|O|1996-09-29|1996-11-17|1996-09-30|COLLECT COD|MAIL|s. blithely unusual theodolites am| 50 | 39|2320|9821|1|44|53782.08|0.09|0.06|N|O|1996-11-14|1996-12-15|1996-12-12|COLLECT COD|RAIL|eodolites. careful| 51 | 39|186582|4137|2|26|43383.08|0.08|0.04|N|O|1996-11-04|1996-10-20|1996-11-20|NONE|FOB|ckages across the slyly silent| 52 | 39|67831|5350|3|46|82746.18|0.06|0.08|N|O|1996-09-26|1996-12-19|1996-10-26|DELIVER IN PERSON|AIR|he carefully e| 53 | 39|20590|3093|4|32|48338.88|0.07|0.05|N|O|1996-10-02|1996-12-19|1996-10-14|COLLECT COD|MAIL|heodolites sleep silently pending foxes. ac| 54 | 39|54519|9530|5|43|63360.93|0.01|0.01|N|O|1996-10-17|1996-11-14|1996-10-26|COLLECT COD|MAIL|yly regular i| 55 | 39|94368|6878|6|40|54494.40|0.06|0.05|N|O|1996-12-08|1996-10-22|1997-01-01|COLLECT COD|AIR|quickly ironic fox| 56 | 64|85951|5952|1|21|40675.95|0.05|0.02|R|F|1994-09-30|1994-09-18|1994-10-26|DELIVER IN PERSON|REG AIR|ch slyly final, thin platelets.| 57 | 65|59694|4705|1|26|42995.94|0.03|0.03|A|F|1995-04-20|1995-04-25|1995-05-13|NONE|TRUCK|pending deposits nag even packages. ca| 58 | 65|73815|8830|2|22|39353.82|0.00|0.05|N|O|1995-07-17|1995-06-04|1995-07-19|COLLECT COD|FOB| ideas. special, r| 59 | 65|1388|3889|3|21|27076.98|0.09|0.07|N|O|1995-07-06|1995-05-14|1995-07-31|DELIVER IN PERSON|RAIL|bove the even packages. accounts nag carefu| 60 | 66|115118|7630|1|31|35126.41|0.00|0.08|R|F|1994-02-19|1994-03-11|1994-02-20|TAKE BACK RETURN|RAIL|ut the unusual accounts sleep at the bo| 61 | 66|173489|3490|2|41|64061.68|0.04|0.07|A|F|1994-02-21|1994-03-01|1994-03-18|COLLECT COD|AIR| regular de| 62 | 67|21636|9143|1|4|6230.52|0.09|0.04|N|O|1997-04-17|1997-01-31|1997-04-20|NONE|SHIP| cajole thinly expres| 63 | 67|20193|5198|2|12|13358.28|0.09|0.05|N|O|1997-01-27|1997-02-21|1997-02-22|NONE|REG AIR| even packages cajole| 64 | 67|173600|6118|3|5|8368.00|0.03|0.07|N|O|1997-02-20|1997-02-12|1997-02-21|DELIVER IN PERSON|TRUCK|y unusual packages thrash pinto | 65 | 67|87514|7515|4|44|66066.44|0.08|0.06|N|O|1997-03-18|1997-01-29|1997-04-13|DELIVER IN PERSON|RAIL|se quickly above the even, express reques| 66 | 67|40613|8126|5|23|35733.03|0.05|0.07|N|O|1997-04-19|1997-02-14|1997-05-06|DELIVER IN PERSON|REG AIR|ly regular deposit| 67 | 67|178306|824|6|29|40144.70|0.02|0.05|N|O|1997-01-25|1997-01-27|1997-01-27|DELIVER IN PERSON|FOB|ultipliers | 68 | 68|7068|9569|1|3|2925.18|0.05|0.02|N|O|1998-07-04|1998-06-05|1998-07-21|NONE|RAIL|fully special instructions cajole. furious| 69 | 68|175180|2732|2|46|57738.28|0.02|0.05|N|O|1998-06-26|1998-06-07|1998-07-05|NONE|MAIL| requests are unusual, regular pinto | 70 | 68|34980|7484|3|46|88089.08|0.04|0.05|N|O|1998-08-13|1998-07-08|1998-08-29|NONE|RAIL|egular dependencies affix ironically along | 71 | 68|94728|2256|4|20|34454.40|0.07|0.01|N|O|1998-06-27|1998-05-23|1998-07-02|NONE|REG AIR| excuses integrate fluffily | 72 | 68|82758|5267|5|27|47000.25|0.03|0.06|N|O|1998-06-19|1998-06-25|1998-06-29|DELIVER IN PERSON|SHIP|ccounts. deposits use. furiously| 73 | 68|102561|5072|6|30|46906.80|0.05|0.06|N|O|1998-08-11|1998-07-11|1998-08-14|NONE|RAIL|oxes are slyly blithely fin| 74 | 68|139247|1761|7|41|52735.84|0.09|0.08|N|O|1998-06-24|1998-06-27|1998-07-06|NONE|SHIP|eposits nag special ideas. furiousl| 75 | 69|115209|7721|1|48|58761.60|0.01|0.07|A|F|1994-08-17|1994-08-11|1994-09-08|NONE|TRUCK|regular epitaphs. carefully even ideas hag| 76 | 69|104180|9201|2|32|37893.76|0.08|0.06|A|F|1994-08-24|1994-08-17|1994-08-31|NONE|REG AIR|s sleep carefully bold, | 77 | 69|137267|4807|3|17|22172.42|0.09|0.00|A|F|1994-07-02|1994-07-07|1994-07-03|TAKE BACK RETURN|AIR|final, pending instr| 78 | 69|37502|2509|4|3|4318.50|0.09|0.04|R|F|1994-06-06|1994-07-27|1994-06-15|NONE|MAIL| blithely final d| 79 | 69|92070|7089|5|42|44606.94|0.07|0.04|R|F|1994-07-31|1994-07-26|1994-08-28|DELIVER IN PERSON|REG AIR|tect regular, speci| 80 | 69|18504|1006|6|23|32717.50|0.05|0.00|A|F|1994-10-03|1994-08-06|1994-10-24|NONE|SHIP|nding accounts ca| 81 | 70|64128|9141|1|8|8736.96|0.03|0.08|R|F|1994-01-12|1994-02-27|1994-01-14|TAKE BACK RETURN|FOB|ggle. carefully pending dependenc| 82 | 70|196156|1195|2|13|16277.95|0.06|0.06|A|F|1994-03-03|1994-02-13|1994-03-26|COLLECT COD|AIR|lyly special packag| 83 | 70|179809|7361|3|1|1888.80|0.03|0.05|R|F|1994-01-26|1994-03-05|1994-01-28|TAKE BACK RETURN|RAIL|quickly. fluffily unusual theodolites c| 84 | 70|45734|743|4|11|18477.03|0.01|0.05|A|F|1994-03-17|1994-03-17|1994-03-27|NONE|MAIL|alongside of the deposits. fur| 85 | 70|37131|2138|5|37|39520.81|0.09|0.04|R|F|1994-02-13|1994-03-16|1994-02-21|COLLECT COD|MAIL|n accounts are. q| 86 | 70|55655|3171|6|19|30602.35|0.06|0.03|A|F|1994-01-26|1994-02-17|1994-02-06|TAKE BACK RETURN|SHIP| packages wake pending accounts.| 87 | 71|61931|1932|1|25|47323.25|0.09|0.07|N|O|1998-04-10|1998-04-22|1998-04-11|COLLECT COD|FOB|ckly. slyly| 88 | 71|65916|3435|2|3|5645.73|0.09|0.07|N|O|1998-05-23|1998-04-03|1998-06-02|COLLECT COD|SHIP|y. pinto beans haggle after the| 89 | 71|34432|1942|3|45|61489.35|0.00|0.07|N|O|1998-02-23|1998-03-20|1998-03-24|DELIVER IN PERSON|SHIP| ironic packages believe blithely a| 90 | 71|96645|9155|4|33|54174.12|0.00|0.01|N|O|1998-04-12|1998-03-20|1998-04-15|NONE|FOB| serve quickly fluffily bold deposi| 91 | 71|103255|5766|5|39|49071.75|0.08|0.06|N|O|1998-01-29|1998-04-07|1998-02-18|DELIVER IN PERSON|RAIL|l accounts sleep across the pack| 92 | 71|195635|674|6|34|58841.42|0.04|0.01|N|O|1998-03-05|1998-04-22|1998-03-30|DELIVER IN PERSON|TRUCK|s cajole. | 93 | 96|123076|613|1|23|25278.61|0.10|0.06|A|F|1994-07-19|1994-06-29|1994-07-25|DELIVER IN PERSON|TRUCK|ep-- carefully reg| 94 | 96|135390|5391|2|30|42761.70|0.01|0.06|R|F|1994-06-03|1994-05-29|1994-06-22|DELIVER IN PERSON|TRUCK|e quickly even ideas. furiou| 95 | 97|119477|1989|1|13|19454.11|0.00|0.02|R|F|1993-04-01|1993-04-04|1993-04-08|NONE|TRUCK|ayers cajole against the furiously| 96 | 97|49568|2073|2|37|56149.72|0.02|0.06|A|F|1993-04-13|1993-03-30|1993-04-14|DELIVER IN PERSON|SHIP|ic requests boost carefully quic| 97 | 97|77699|5221|3|19|31857.11|0.06|0.08|R|F|1993-05-14|1993-03-05|1993-05-25|TAKE BACK RETURN|RAIL|gifts. furiously ironic packages cajole. | 98 | 98|40216|217|1|28|32373.88|0.06|0.07|A|F|1994-12-24|1994-10-25|1995-01-16|COLLECT COD|REG AIR| pending, regular accounts s| 99 | 98|109743|7274|2|1|1752.74|0.00|0.00|A|F|1994-12-01|1994-12-12|1994-12-15|DELIVER IN PERSON|TRUCK|. unusual instructions against| 100 | 98|44706|4707|3|14|23109.80|0.05|0.02|A|F|1994-12-30|1994-11-22|1995-01-27|COLLECT COD|AIR| cajole furiously. blithely ironic ideas | 101 | -------------------------------------------------------------------------------- /test/lineitem_cleanup.q: -------------------------------------------------------------------------------- 1 | DROP TABLE lineitem; -------------------------------------------------------------------------------- /test/onerow.txt: -------------------------------------------------------------------------------- 1 | plugh -------------------------------------------------------------------------------- /test/setup.sql: -------------------------------------------------------------------------------- 1 | create table onerow(s string); 2 | load data local inpath '${env:HIVE_PLUGIN_ROOT_DIR}/test/onerow.txt' overwrite into table onerow; 3 | 4 | create table dual(id int, name string, age int, height double, dep int) 5 | row format delimited fields terminated by '\t'; 6 | load data local inpath '${env:HIVE_PLUGIN_ROOT_DIR}/test/dual.txt' overwrite into table dual; 7 | 8 | create table emp (empno int,ename string,job string,mgr int,hiredate string,sal int,comm int,deptno int) 9 | ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' STORED AS TEXTFILE; 10 | load data local inpath '${env:HIVE_PLUGIN_ROOT_DIR}/test/emp.txt' into table emp; 11 | 12 | create table datatypes ( 13 | col_tinyint tinyint, 14 | col_smallint smallint, 15 | col_int int, 16 | col_bigint bigint, 17 | col_boolean boolean, 18 | col_float float, 19 | col_double double, 20 | col_string string, 21 | col_int_array array, 22 | col_string_array array, 23 | col_map map, 24 | col_struct struct< id:string, name:string, val:int> 25 | ) ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t' COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':' STORED AS TEXTFILE; 26 | 27 | load data local inpath '${env:HIVE_PLUGIN_ROOT_DIR}/test/datatypes.txt' into table datatypes; --------------------------------------------------------------------------------