├── .gitignore
├── LICENSE
├── README.md
├── build.xml
├── pom.xml
├── src
└── main
│ └── java
│ └── com
│ └── nexr
│ └── platform
│ └── hive
│ └── udf
│ ├── GenericUDAFDedup.java
│ ├── GenericUDFCount.java
│ ├── GenericUDFDecode.java
│ ├── GenericUDFDenseRank.java
│ ├── GenericUDFGreatest.java
│ ├── GenericUDFInstr.java
│ ├── GenericUDFLag.java
│ ├── GenericUDFLnnvl.java
│ ├── GenericUDFMax.java
│ ├── GenericUDFMin.java
│ ├── GenericUDFNVL.java
│ ├── GenericUDFNVL2.java
│ ├── GenericUDFRank.java
│ ├── GenericUDFRowNumber.java
│ ├── GenericUDFSum.java
│ ├── GenericUDFToNumber.java
│ ├── GenericUDFTrunc.java
│ ├── GenericUDTFPopulate.java
│ ├── UDFChr.java
│ ├── UDFDateFormat.java
│ ├── UDFLastDay.java
│ ├── UDFStrToDate.java
│ ├── UDFSubstrForOracle.java
│ ├── UDFSysDate.java
│ └── UDFToChar.java
└── test
├── cleanup.sql
├── datatypes.txt
├── dual.txt
├── emp.txt
├── lineitem.sql
├── lineitem.txt
├── lineitem_cleanup.q
├── onerow.txt
└── setup.sql
/.gitignore:
--------------------------------------------------------------------------------
1 | .classpath
2 | .project
3 | .launches
4 | .settings
5 | target
6 | logs
7 | javadocs
8 | build
9 | dist
10 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
203 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # NexR Hive UDFs
2 |
3 | ## About
4 | NexR Hive UDFs is a collection of user defined functions for Hive.
5 |
6 | ## License
7 | [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0)
8 |
9 | ## Quickstart
10 | $ git clone https://github.com/nexr/hive-udf.git
11 | $ cd hive-udf
12 | $ mvn clean package
13 | or
14 | $ ant -Dhive.install.dir=../hive/build/dist clean package
15 |
16 | ## Run the tests using Hive PDK
17 | $ ant -Dhive.install.dir=../hive/build/dist test
18 |
19 | ## Install and Configurations
20 | ### hive-site.xml
21 |
22 | hive.aux.jars.path
23 | file:///path/to/nexr-hive-udf-[VERSION].jar
24 |
25 |
26 | ### Registering the UDFs
27 | hive> add jar /path/to/nexr-hive-udf-[VERSION].jar;
28 | hive> CREATE TEMPORARY FUNCTION nvl AS 'com.nexr.platform.hive.udf.GenericUDFNVL';
29 | hive> CREATE TEMPORARY FUNCTION decode AS 'com.nexr.platform.hive.udf.GenericUDFDecode';
30 | hive> CREATE TEMPORARY FUNCTION nvl2 AS 'com.nexr.platform.hive.udf.GenericUDFNVL2';
31 | hive> CREATE TEMPORARY FUNCTION str_to_date AS 'com.nexr.platform.hive.udf.UDFStrToDate';
32 | hive> CREATE TEMPORARY FUNCTION date_format AS 'com.nexr.platform.hive.udf.UDFDateFormat';
33 | hive> CREATE TEMPORARY FUNCTION to_char AS 'com.nexr.platform.hive.udf.UDFToChar';
34 | hive> CREATE TEMPORARY FUNCTION instr4 AS 'com.nexr.platform.hive.udf.GenericUDFInstr';
35 | hive> CREATE TEMPORARY FUNCTION chr AS 'com.nexr.platform.hive.udf.UDFChr';
36 | hive> CREATE TEMPORARY FUNCTION last_day AS 'com.nexr.platform.hive.udf.UDFLastDay';
37 | hive> CREATE TEMPORARY FUNCTION greatest AS 'com.nexr.platform.hive.udf.GenericUDFGreatest';
38 | hive> CREATE TEMPORARY FUNCTION to_number AS 'com.nexr.platform.hive.udf.GenericUDFToNumber';
39 | hive> CREATE TEMPORARY FUNCTION trunc AS 'com.nexr.platform.hive.udf.GenericUDFTrunc';
40 | hive> CREATE TEMPORARY FUNCTION rank AS 'com.nexr.platform.hive.udf.GenericUDFRank';
41 | hive> CREATE TEMPORARY FUNCTION row_number AS 'com.nexr.platform.hive.udf.GenericUDFRowNumber';
42 | hive> CREATE TEMPORARY FUNCTION sysdate AS 'com.nexr.platform.hive.udf.UDFSysDate';
43 | hive> CREATE TEMPORARY FUNCTION populate AS 'com.nexr.platform.hive.udf.GenericUDTFPopulate';
44 | hive> CREATE TEMPORARY FUNCTION dedup AS 'com.nexr.platform.hive.udf.GenericUDAFDedup';
45 | hive> CREATE TEMPORARY FUNCTION lnnvl AS 'com.nexr.platform.hive.udf.GenericUDFLnnvl';
46 | hive> CREATE TEMPORARY FUNCTION substr AS 'com.nexr.platform.hive.udf.UDFSubstrForOracle';
47 |
48 | ## Usage of Hive UDFs
49 | See the details at [Project Wiki](https://github.com/nexr/hive-udf/wiki).
50 |
51 | ## References
52 | * [Apache Hive](http://hive.apache.org/)
53 | * [Oracle 11g SQL Functions](http://docs.oracle.com/cd/B28359_01/server.111/b28286/functions001.htm#i88893)
54 |
--------------------------------------------------------------------------------
/build.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 | com.nexr
5 | nexr-hive-udf
6 | 0.2-SNAPSHOT
7 | nexr-hive-udf
8 | nexr-hive-udf
9 |
10 |
11 | UTF-8
12 | 0.20.2
13 | 0.9.0
14 |
15 |
16 |
17 |
18 |
19 | org.apache.hadoop
20 | hadoop-core
21 | ${hadoop.version}
22 |
23 |
24 |
25 | org.apache.hive
26 | hive-exec
27 | ${hive.version}
28 |
29 |
30 | org.apache.hive
31 | hive-metastore
32 | ${hive.version}
33 |
34 |
35 | org.apache.hive
36 | hive-pdk
37 | ${hive.version}
38 |
39 |
40 | javax.jdo
41 | jdo2-api
42 | 2.3-eb
43 |
44 |
45 | commons-logging
46 | commons-logging
47 | 1.1.1
48 |
49 |
50 | junit
51 | junit
52 | 4.7
53 | test
54 |
55 |
56 |
57 |
58 |
--------------------------------------------------------------------------------
/src/main/java/com/nexr/platform/hive/udf/GenericUDAFDedup.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package com.nexr.platform.hive.udf;
20 |
21 | import org.apache.hadoop.hive.ql.exec.Description;
22 | import org.apache.hadoop.hive.ql.metadata.HiveException;
23 | import org.apache.hadoop.hive.ql.parse.SemanticException;
24 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator;
25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFParameterInfo;
26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2;
27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
30 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
31 | import org.apache.hive.pdk.HivePdkUnitTest;
32 | import org.apache.hive.pdk.HivePdkUnitTests;
33 |
34 | import java.util.ArrayList;
35 | import java.util.List;
36 |
37 | @Description(name = "dedup", value = "_FUNC_(x,y,z) - Removes duplicated row and return it as a struct.")
38 | public class GenericUDAFDedup implements GenericUDAFResolver2 {
39 |
40 | public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) throws SemanticException {
41 | return new GenericUDAFDedupEval();
42 | }
43 |
44 | public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException {
45 | return new GenericUDAFDedupEval();
46 | }
47 |
48 | public static class GenericUDAFDedupEval extends GenericUDAFEvaluator {
49 |
50 | ObjectInspector[] input;
51 | ObjectInspector output;
52 |
53 | public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException {
54 | super.init(m, parameters);
55 | this.input = parameters;
56 | switch (m) {
57 | case PARTIAL1:
58 | case COMPLETE:
59 | List names = new ArrayList(parameters.length);
60 | List inspectors = new ArrayList(parameters.length);
61 | for (int i = 0; i < parameters.length; i++) {
62 | names.add("_col" + i);
63 | inspectors.add(ObjectInspectorUtils.getStandardObjectInspector(parameters[i]));
64 | }
65 | return output = ObjectInspectorFactory.getStandardStructObjectInspector(names, inspectors);
66 | case PARTIAL2:
67 | return output = parameters[0];
68 | case FINAL:
69 | return output = ObjectInspectorUtils.getStandardObjectInspector(parameters[0]);
70 | default:
71 | throw new IllegalArgumentException("never");
72 | }
73 | }
74 |
75 | @Override
76 | public AggregationBuffer getNewAggregationBuffer() throws HiveException {
77 | return new DedupRow();
78 | }
79 |
80 | @Override
81 | public void reset(AggregationBuffer agg) throws HiveException {
82 | ((DedupRow) agg).row = null;
83 | }
84 |
85 | @Override
86 | public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException {
87 | if (((DedupRow) agg).row == null) {
88 | Object[] array = new Object[parameters.length];
89 | for (int i = 0; i < array.length; i++) {
90 | array[i] = ObjectInspectorUtils.copyToStandardObject(parameters[i], input[i]);
91 | }
92 | ((DedupRow) agg).row = array;
93 | }
94 | }
95 |
96 | @Override
97 | public Object terminatePartial(AggregationBuffer agg) throws HiveException {
98 | return ((DedupRow) agg).row;
99 | }
100 |
101 | @Override
102 | public void merge(AggregationBuffer agg, Object partial) throws HiveException {
103 | if (((DedupRow) agg).row == null) {
104 | ((DedupRow) agg).row = ObjectInspectorUtils.copyToStandardObject(partial, input[0]);
105 | }
106 | }
107 |
108 | @Override
109 | public Object terminate(AggregationBuffer agg) throws HiveException {
110 | return ((DedupRow) agg).row;
111 | }
112 |
113 | static class DedupRow implements AggregationBuffer {
114 | Object row;
115 | }
116 | }
117 | }
118 |
--------------------------------------------------------------------------------
/src/main/java/com/nexr/platform/hive/udf/GenericUDFCount.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package com.nexr.platform.hive.udf;
20 |
21 | import org.apache.commons.lang.StringUtils;
22 | import org.apache.hadoop.hive.ql.exec.Description;
23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
24 | import org.apache.hadoop.hive.ql.metadata.HiveException;
25 | import org.apache.hadoop.hive.ql.udf.UDFType;
26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
31 | import org.apache.hadoop.io.LongWritable;
32 | import org.apache.hive.pdk.HivePdkUnitTest;
33 | import org.apache.hive.pdk.HivePdkUnitTests;
34 |
35 |
36 | @Description(name = "count",
37 | value = "_FUNC_(hash_key, order_by_col) " +
38 | "- Returns the count value of group",
39 | extended = "Example:\n"
40 | + " > SELECT _FUNC_(HASH(p1, p2), order_by_col) FROM (\n"
41 | + " > SELECT ~ FROM table DISTRIBUTE BY HASH(p1,p2) SORT BY p1,p2,order_by_col DESC, ... \n"
42 | + " > );")
43 |
44 | @HivePdkUnitTests(
45 | setup = "", cleanup = "",
46 | cases = {
47 | @HivePdkUnitTest(
48 | query = "SELECT t.deptno, nexr_count(hash(t.deptno),t.empno) as emp_count"
49 | +" FROM ("
50 | +" select a.empno, a.deptno from emp a"
51 | +" distribute by hash(a.deptno)"
52 | +" sort BY a.deptno, a.empno"
53 | +" ) t;",
54 | result ="10\t1\n"
55 | +"10\t2\n"
56 | +"10\t3\n"
57 | +"20\t1\n"
58 | +"20\t2\n"
59 | +"20\t3\n"
60 | +"20\t4\n"
61 | +"20\t5\n"
62 | +"30\t1\n"
63 | +"30\t2\n"
64 | +"30\t3\n"
65 | +"30\t4\n"
66 | +"30\t5\n"
67 | +"30\t6")
68 | }
69 | )
70 |
71 | @UDFType(deterministic = false, stateful = true)
72 | public class GenericUDFCount extends GenericUDF {
73 | private final LongWritable longResult = new LongWritable();
74 | private ObjectInspector hashOI, prevHashStandardOI;
75 | private Object prevHash;
76 | @Override
77 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
78 | if (arguments.length != 2) {
79 | throw new UDFArgumentException("Exactly two argument is expected.");
80 | }
81 |
82 | hashOI = arguments[0];
83 | prevHashStandardOI=ObjectInspectorUtils.getStandardObjectInspector(hashOI,ObjectInspectorCopyOption.JAVA);
84 |
85 | longResult.set(0);
86 |
87 | return PrimitiveObjectInspectorFactory.writableLongObjectInspector;
88 | }
89 |
90 | @Override
91 | public Object evaluate(DeferredObject[] arguments) throws HiveException {
92 | Object hash = arguments[0].get();
93 |
94 | if (prevHash==null||ObjectInspectorUtils.compare(prevHash,prevHashStandardOI,hash,hashOI)!=0) {
95 | longResult.set(0);
96 |
97 | }
98 |
99 | prevHash=ObjectInspectorUtils.copyToStandardObject(hash,hashOI, ObjectInspectorCopyOption.JAVA);
100 | longResult.set(longResult.get()+1);
101 | return longResult;
102 | }
103 |
104 | @Override
105 | public String getDisplayString(String[] children) {
106 | return "count(" + StringUtils.join(children, ',') + ")";
107 | }
108 | }
109 |
110 |
--------------------------------------------------------------------------------
/src/main/java/com/nexr/platform/hive/udf/GenericUDFDecode.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package com.nexr.platform.hive.udf;
20 |
21 | import org.apache.hadoop.hive.ql.exec.Description;
22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
24 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
25 | import org.apache.hadoop.hive.ql.metadata.HiveException;
26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
27 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
29 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
30 | import org.apache.hive.pdk.HivePdkUnitTest;
31 | import org.apache.hive.pdk.HivePdkUnitTests;
32 |
33 | /**
34 | * GenericUDF Class for SQL construct "decode(value1, value2, value3, .... defaultValue)".
35 | * oracle's DECODE compares first argument to each other value one by one.
36 | *
37 | * There is CASE...WHEN...clause in Hive,
38 | * but it is convenient to convert from SQL to HQL without query change.
39 | *
40 | *
41 | */
42 | @Description(name = "decode", value = "_FUNC_(value1, value2, value3, .... defaultValue) " +
43 | "- Returns value3 if value1=value2 otherwise defaultValue", extended = "Example:\n"
44 | + " > SELECT _FUNC_(dep, 0, \"ACCOUNT\", 1, \"HR\", \"NO-DEP\") FROM src LIMIT 1;\n 'ACCOUNT' if dep=0")
45 | @HivePdkUnitTests(setup = "create table dual_data (i int); "
46 | + "insert overwrite table dual_data select 1 from dual limit 1;",
47 | cleanup = "drop table if exists dual_data;",
48 | cases = {
49 | @HivePdkUnitTest(
50 | query = "SELECT nexr_decode(i, 0, 'ACCOUNT', 1, 'HR', 'NO-DEP') FROM dual_data;",
51 | result = "HR"),
52 | @HivePdkUnitTest(
53 | query = "SELECT nexr_decode(dep, 0, 'ACCOUNT', 1, 'HR', 'NO-DEP') " +
54 | "FROM dual WHERE id = 0;", result = "ACCOUNT"),
55 | @HivePdkUnitTest(
56 | query = "SELECT nexr_decode(dep, 0, 'ACCOUNT', 1, 'HR', 'NO-DEP') " +
57 | "FROM dual WHERE id = 2;", result = "NO-DEP") })
58 | public class GenericUDFDecode extends GenericUDF {
59 |
60 | private ObjectInspector[] argumentOIs;
61 | private GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
62 | private GenericUDFUtils.ReturnObjectInspectorResolver caseOIResolver;
63 |
64 | @Override
65 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
66 | if (arguments.length < 3) {
67 | throw new UDFArgumentLengthException(
68 | "The function decode(value1,value2,value3...default) needs "
69 | + "at least three arguments.");
70 | }
71 |
72 | argumentOIs = arguments;
73 | caseOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
74 | returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
75 | caseOIResolver.update(arguments[0]);
76 | for (int i = 1; i + 1 < arguments.length; i += 2) {
77 | // 1, 3, 5, 7, ...
78 | if (!caseOIResolver.update(arguments[i])) {
79 | throw new UDFArgumentTypeException(i, "The value of 'value'" + (i + 1)
80 | + " should have the same type: \""
81 | + caseOIResolver.get().getTypeName()
82 | + "\" is expected but \"" + arguments[i].getTypeName()
83 | + "\" is found");
84 | }
85 | // 2, 4, 6...
86 | if (!returnOIResolver.update(arguments[i + 1])) {
87 | throw new UDFArgumentTypeException(i + 1,"The value of return(value "
88 | + (i + 1) + " should have the same type: \""
89 | + returnOIResolver.get().getTypeName()
90 | + "\" is expected but \"" + arguments[i + 1].getTypeName()
91 | + "\" is found");
92 | }
93 | }
94 | if (!returnOIResolver.update(arguments[arguments.length - 1])) {
95 | throw new UDFArgumentTypeException(arguments.length - 1,
96 | "The value of return should have the same type: \"" +
97 | returnOIResolver.get().getTypeName() + "\" is expected but \"" +
98 | arguments[arguments.length - 1].getTypeName() + "\" is found");
99 | }
100 |
101 | return returnOIResolver.get();
102 |
103 | }
104 |
105 | @Override
106 | public Object evaluate(DeferredObject[] records) throws HiveException {
107 |
108 | Object fieldValue = records[0].get();
109 | Object defaultValue = null;
110 | if (records.length % 2 == 0) {
111 | defaultValue = records[records.length - 1].get();
112 | }
113 |
114 | Object returnValue = null;
115 | for (int i = 1; i + 1 < records.length; i += 2) {
116 | Object caseValue = records[i].get();
117 | if (fieldValue == null || caseValue == null) {
118 | break;
119 | }
120 |
121 | Object caseObj = ((PrimitiveObjectInspector) argumentOIs[i]).getPrimitiveJavaObject(caseValue);
122 | Object fieldObj = ((PrimitiveObjectInspector) argumentOIs[0]).getPrimitiveJavaObject(fieldValue);
123 |
124 | if (caseObj.toString().equals(fieldObj.toString())) {
125 | returnValue = records[i + 1].get();
126 | returnValue = returnOIResolver.convertIfNecessary(returnValue, argumentOIs[i + 1]);
127 | break;
128 | }
129 |
130 | }
131 |
132 | if (returnValue == null) {
133 | returnValue = defaultValue;
134 | returnValue = returnOIResolver.convertIfNecessary(returnValue, argumentOIs[records.length - 1]);
135 | }
136 | return returnValue;
137 | }
138 |
139 | @Override
140 | public String getDisplayString(String[] children) {
141 | StringBuilder sb = new StringBuilder();
142 | sb.append("decode (");
143 | for (int i = 0; i < children.length - 1; i++) {
144 | sb.append(children[i]).append(", ");
145 | }
146 | sb.append(children[children.length - 1]).append(")");
147 | return sb.toString();
148 | }
149 |
150 | }
151 |
--------------------------------------------------------------------------------
/src/main/java/com/nexr/platform/hive/udf/GenericUDFDenseRank.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package com.nexr.platform.hive.udf;
20 |
21 | import org.apache.commons.lang.StringUtils;
22 | import org.apache.hadoop.hive.ql.exec.Description;
23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
24 | import org.apache.hadoop.hive.ql.metadata.HiveException;
25 | import org.apache.hadoop.hive.ql.udf.UDFType;
26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
31 | import org.apache.hadoop.io.LongWritable;
32 | import org.apache.hive.pdk.HivePdkUnitTest;
33 | import org.apache.hive.pdk.HivePdkUnitTests;
34 |
35 |
36 | @Description(name = "dense_rank",
37 | value = "_FUNC_(hash_key, order_by_col1, order_by_col2 ...) " +
38 | "- Returns the rank of a row in an ordered group of rows",
39 | extended = "Example:\n"
40 | + " > SELECT _FUNC_(HASH(p1, p2), order_by_col1) FROM (\n"
41 | + " > SELECT order_by_col1 FROM table \n"
42 | + " > DISTRIBUTE BY HASH(p1,p2)\n"
43 | + " > SORT BY p1, p2, order_by_col1 \n"
44 | + " > );\n\n"
45 | + "ORACLE:\n"
46 | + " > SELECT DENSE_RANK() OVER(PARTITION BY p1,p2 ORDER BY order_by_col1) FROM table;\n"
47 | )
48 |
49 | @HivePdkUnitTests(
50 | setup = "", cleanup = "",
51 | cases = {
52 | @HivePdkUnitTest(
53 | query = "SELECT t.empno, t.deptno, t.sal, nexr_dense_rank(t.deptno, t.sal) as rank"
54 | + " FROM ("
55 | + " select a.empno, a.deptno, a.sal from "
56 | + " emp a"
57 | + " distribute by a.deptno"
58 | + " sort BY a.deptno, a.sal, a.empno"
59 | + " ) t;",
60 | result = "7934\t10\t1300\t1\n"
61 | + "7782\t10\t2450\t2\n"
62 | + "7839\t10\t5000\t3\n"
63 | + "7369\t20\t800\t1\n"
64 | + "7876\t20\t1100\t2\n"
65 | + "7566\t20\t2975\t3\n"
66 | + "7788\t20\t3000\t4\n"
67 | + "7902\t20\t3000\t4\n"
68 | + "7900\t30\t950\t1\n"
69 | + "7521\t30\t1250\t2\n"
70 | + "7654\t30\t1250\t2\n"
71 | + "7844\t30\t1500\t3\n"
72 | + "7499\t30\t1600\t4\n"
73 | + "7698\t30\t2850\t5")
74 | }
75 | )
76 |
77 | @UDFType(deterministic = false, stateful = true)
78 | public class GenericUDFDenseRank extends GenericUDFRank {
79 |
80 | @Override
81 | protected void different(boolean newKey) {
82 | if (newKey) {
83 | counter = 1;
84 | } else {
85 | counter++;
86 | }
87 | result.set(counter);
88 | }
89 |
90 | @Override
91 | protected void next() {
92 | }
93 |
94 | @Override
95 | public String getDisplayString(String[] children) {
96 | return "dense_rank(" + StringUtils.join(children, ',') + ")";
97 | }
98 | }
99 |
--------------------------------------------------------------------------------
/src/main/java/com/nexr/platform/hive/udf/GenericUDFGreatest.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 | package com.nexr.platform.hive.udf;
19 |
20 | import org.apache.hadoop.hive.ql.exec.Description;
21 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
23 | import org.apache.hadoop.hive.ql.metadata.HiveException;
24 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
28 | import org.apache.hive.pdk.HivePdkUnitTest;
29 | import org.apache.hive.pdk.HivePdkUnitTests;
30 |
31 | /**
32 | * GenericUDF Class for SQL construct "greatest(value1, value2, value3, ....)".
33 | * Oracle's CREATEST returns the greatest of the list of one or more expressions.
34 | *
35 | */
36 | @Description(name = "greatest", value = "_FUNC_(value1, value2, value3, ....) " +
37 | "- Returns the greatest value in the list.",
38 | extended = "Example:\n" + " > SELECT _FUNC_(2, 5, 12, 3) FROM src;\n 12")
39 | @HivePdkUnitTests(setup = "create table dual_data (i int); "
40 | + "insert overwrite table dual_data select 1 from dual limit 1;",
41 | cleanup = "drop table if exists dual_data;",
42 | cases = {
43 | @HivePdkUnitTest(query = "SELECT nexr_greatest(2, 5, 12, 3) " +
44 | "FROM dual_data;", result = "12"),
45 | @HivePdkUnitTest(query = "SELECT nexr_greatest('2', '5', '12', '3') " +
46 | "FROM dual_data;", result = "5"),
47 | @HivePdkUnitTest(query = "SELECT nexr_greatest('apples', 'oranges', 'bananas') " +
48 | "FROM dual_data;", result = "oranges") })
49 | public class GenericUDFGreatest extends GenericUDF {
50 |
51 | private ObjectInspector[] argumentOIs;
52 | private GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver;
53 |
54 | @Override
55 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
56 |
57 | for (int i = 0; i < arguments.length; i++) {
58 | if (arguments[i].getCategory() != ObjectInspector.Category.PRIMITIVE) {
59 | throw new UDFArgumentTypeException(i, "Only primitive type arguments are accepted but "
60 | + arguments[i].getTypeName() + " is passed.");
61 | }
62 | }
63 |
64 | argumentOIs = arguments;
65 | returnOIResolver = new GenericUDFUtils.ReturnObjectInspectorResolver(true);
66 | for (int i = 0; i < arguments.length; i++) {
67 | if (!returnOIResolver.update(arguments[i])) {
68 | throw new UDFArgumentTypeException(i, "The value of return should have the same type: \""
69 | + returnOIResolver.get().getTypeName() +
70 | "\" is expected but \"" + arguments[i].getTypeName()
71 | + "\" is found");
72 | }
73 | }
74 |
75 | return returnOIResolver.get();
76 | }
77 |
78 | @Override
79 | public Object evaluate(DeferredObject[] fields) throws HiveException {
80 | Object greatestObject = null;
81 | ObjectInspector greatestOI = null;
82 |
83 | for (int i = 0; i < fields.length; i++) {
84 | Object fieldObject = fields[i].get();
85 | if (greatestObject == null) {
86 | greatestObject = fieldObject;
87 | greatestOI = argumentOIs[i];
88 | continue;
89 | }
90 |
91 | if (ObjectInspectorUtils.compare(greatestObject, greatestOI, fieldObject, argumentOIs[i]) <= 0) {
92 | greatestObject = fieldObject;
93 | greatestOI = argumentOIs[i];
94 | }
95 | }
96 |
97 | return returnOIResolver.convertIfNecessary(greatestObject, greatestOI);
98 | }
99 |
100 | @Override
101 | public String getDisplayString(String[] children) {
102 | StringBuilder sb = new StringBuilder();
103 | sb.append("greatest (");
104 | for (int i = 0; i < children.length - 1; i++) {
105 | sb.append(children[i]).append(", ");
106 | }
107 | sb.append(children[children.length - 1]).append(")");
108 | return sb.toString();
109 | }
110 |
111 | }
112 |
--------------------------------------------------------------------------------
/src/main/java/com/nexr/platform/hive/udf/GenericUDFInstr.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package com.nexr.platform.hive.udf;
20 |
21 | import org.apache.hive.pdk.HivePdkUnitTest;
22 | import org.apache.hive.pdk.HivePdkUnitTests;
23 |
24 | import org.apache.hadoop.hive.ql.exec.Description;
25 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
26 | import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
27 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
28 | import org.apache.hadoop.hive.ql.metadata.HiveException;
29 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
30 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils;
31 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
32 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
33 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
34 | import org.apache.hadoop.io.IntWritable;
35 | import org.apache.hadoop.io.Text;
36 |
37 | /**
38 | * GenericUDFInstr
39 | *
40 | * This integrates the function from Oracle and Mysql
41 | * http://www.techonthenet.com/oracle/functions/instr.php
42 | * http://dev.mysql.com/doc/refman/5.1/en/string-functions.html#function_instr
43 | *
44 | * INSTR(string, substring, [start_position, [nth_appearance]]).
45 | * string is the string to search.
46 | * substring is the substring to search for in string.
47 | * start_position is the position in string where the search will start.
48 | * This argument is optional. If omitted, it defaults to 1.
49 | * The first position in the string is 1.
50 | * If the start_position is negative, the function counts back start_position number of characters
51 | * from the end of string and then searches towards the beginning of string.
52 | * nth_appearance is the nth appearance of substring.
53 | * This is optional. If omitted, it defaults to 1.
54 | *
55 | */
56 | @Description(name = "instr",
57 | value = "_FUNC_(string, substring, [start_position, [nth_appearance]]) " +
58 | "- Returns the index of the first occurance of substr in str",
59 | extended = "Example:\n"
60 | + " > SELECT _FUNC_('Facebook', 'boo') FROM src LIMIT 1;\n" + " 5\n")
61 | @HivePdkUnitTests(
62 | setup = "", cleanup = "",
63 | cases = {
64 | @HivePdkUnitTest(
65 | query = "SELECT nexr_instr('Tech on the net', 'e') FROM onerow;",
66 | result = "2"),
67 | @HivePdkUnitTest(
68 | query = "SELECT nexr_instr('Tech on the net', 'e', 1, 1) FROM onerow;",
69 | result = "2"),
70 | @HivePdkUnitTest(
71 | query = "SELECT nexr_instr('Tech on the net', 'e', 1, 2) FROM onerow;",
72 | result = "11"),
73 | @HivePdkUnitTest(
74 | query = "SELECT nexr_instr('Tech on the net', 'e', 1, 3) FROM onerow;",
75 | result = "14"),
76 | @HivePdkUnitTest(
77 | query = "SELECT nexr_instr('Tech on the net', 'e', -5, 1) FROM onerow;",
78 | result = "11")
79 | }
80 | )
81 |
82 | public class GenericUDFInstr extends GenericUDF {
83 |
84 | private ObjectInspectorConverters.Converter[] converters;
85 |
86 | static final int DEFAULT_START_INDEX = 1;
87 | static final int DEFAULT_NTH = 1;
88 |
89 | @Override
90 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
91 | if (arguments.length < 2 || arguments.length > 4) {
92 | throw new UDFArgumentLengthException("The function INSTR must have 2 or more and less than 4 arguments.");
93 | }
94 |
95 | for (int i = 0; i < arguments.length; i++) {
96 | if (i < 2) {// 0,1 : String
97 | if (!arguments[i].getTypeName().toLowerCase().equals("string")) {
98 | throw new UDFArgumentTypeException(i, "The " + GenericUDFUtils.getOrdinal(i + 1)
99 | + " argument of function INSTR is expected to a string type, but " +
100 | arguments[i].getTypeName().toLowerCase() + " is found");
101 | }
102 | } else {// 2,3 : Int
103 | if (!arguments[i].getTypeName().equals("int")) {
104 | throw new UDFArgumentTypeException(i, "The " + GenericUDFUtils.getOrdinal(i + 1)
105 | + " argument of function INSTR is expected to a int type, but " +
106 | arguments[i].getTypeName().toLowerCase() + " is found");
107 | }
108 | }
109 | }
110 |
111 | converters = new ObjectInspectorConverters.Converter[arguments.length];
112 | for (int i = 0; i < arguments.length; i++) {
113 | if (i < 2) {// 0,1 : String
114 | converters[i] = ObjectInspectorConverters.getConverter(
115 | arguments[i], PrimitiveObjectInspectorFactory.writableStringObjectInspector);
116 | } else {// 2,3 : Int
117 | converters[i] = ObjectInspectorConverters.getConverter(
118 | arguments[i], PrimitiveObjectInspectorFactory.writableIntObjectInspector);
119 | }
120 | }
121 |
122 | return PrimitiveObjectInspectorFactory.writableIntObjectInspector;
123 | }
124 |
125 | private IntWritable intWritable = new IntWritable(0);
126 |
127 | @Override
128 | public Object evaluate(DeferredObject[] arguments) throws HiveException {
129 | if (arguments[0].get() == null || arguments[1].get() == null) {
130 | return null;
131 | }
132 |
133 | Text text = (Text) converters[0].convert(arguments[0].get());
134 | Text subtext = (Text) converters[1].convert(arguments[1].get());
135 | int startIndex = (arguments.length >= 3) ?
136 | ((IntWritable) converters[2].convert(arguments[2].get())).get() : DEFAULT_START_INDEX;
137 | int nth = (arguments.length == 4) ?
138 | ((IntWritable) converters[3].convert(arguments[3].get())).get() : DEFAULT_NTH;
139 |
140 | // argument checking
141 | if (startIndex < 0) {
142 | // if startIndex is negative,
143 | // the function counts back startIndex number of characters from the end of text and then searches
144 | // towards the beginning of text.
145 | startIndex = text.getLength() + startIndex;
146 | }
147 | if (startIndex <= 0 || startIndex > text.getLength()) {
148 | intWritable.set(0);
149 | return intWritable;
150 | }
151 |
152 | int index = 0;
153 | int currentIndex = startIndex;
154 | for (int i = 0; i < nth; i++) {
155 | index = GenericUDFUtils.findText(text, subtext, currentIndex - 1) + 1;
156 | if (index == 0) {// not found
157 | intWritable.set(0);
158 | return intWritable;
159 | }
160 | currentIndex = index + 1;
161 | }
162 | intWritable.set(index);
163 | return intWritable;
164 | }
165 |
166 | @Override
167 | public String getDisplayString(String[] children) {
168 | assert (children.length >= 2 && children.length <= 4);
169 | StringBuffer displayString = new StringBuffer();
170 |
171 | displayString.append("instr()");
172 | for (int i = 0; i < children.length; i++) {
173 | displayString.append(children[i]);
174 | }
175 | displayString.append(")");
176 | return displayString.toString();
177 | }
178 | }
179 |
180 |
--------------------------------------------------------------------------------
/src/main/java/com/nexr/platform/hive/udf/GenericUDFLag.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Licensed to the Apache Software Foundation (ASF) under one
3 | * or more contributor license agreements. See the NOTICE file
4 | * distributed with this work for additional information
5 | * regarding copyright ownership. The ASF licenses this file
6 | * to you under the Apache License, Version 2.0 (the
7 | * "License"); you may not use this file except in compliance
8 | * with the License. You may obtain a copy of the License at
9 | *
10 | * http://www.apache.org/licenses/LICENSE-2.0
11 | *
12 | * Unless required by applicable law or agreed to in writing, software
13 | * distributed under the License is distributed on an "AS IS" BASIS,
14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 | * See the License for the specific language governing permissions and
16 | * limitations under the License.
17 | */
18 |
19 | package com.nexr.platform.hive.udf;
20 |
21 | import java.util.ArrayList;
22 |
23 | import org.apache.commons.lang.StringUtils;
24 | import org.apache.hadoop.hive.ql.exec.Description;
25 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
26 | import org.apache.hadoop.hive.ql.metadata.HiveException;
27 | import org.apache.hadoop.hive.ql.udf.UDFType;
28 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;
29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
30 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
31 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
32 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
33 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
34 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
35 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
36 | import org.apache.hive.pdk.HivePdkUnitTest;
37 | import org.apache.hive.pdk.HivePdkUnitTests;
38 |
39 |
40 | @Description(name = "lag",
41 | value = "_FUNC_(hash_key,column[,offset[,default]]) " +
42 | "- Returns values from a previous row in the table.",
43 | extended = "Example:\n"
44 | + " > SELECT _FUNC_(HASH(p1,p2), column [,offset[,default]]) FROM (\n"
45 | + " > SELECT ~ FROM table DISTRIBUTE BY HASH(p1,p2) SORT BY p1,p2\n"
46 | + " > );")
47 |
48 | @HivePdkUnitTests(
49 | setup = "", cleanup = "",
50 | cases = {
51 | @HivePdkUnitTest(
52 | query = "SELECT t.empno, t.deptno, t.sal, nexr_lag(hash(t.deptno),t.sal) as col"
53 | +" FROM ("
54 | +" select a.empno, a.deptno, a.sal from emp a"
55 | +" distribute by hash(a.deptno)"
56 | +" sort BY a.deptno, a.empno"
57 | +" ) t;",
58 | result =
59 | "7782\t10\t2450\tNULL\n"
60 | +"7839\t10\t5000\t2450\n"
61 | +"7934\t10\t1300\t5000\n"
62 | +"7369\t20\t800\tNULL\n"
63 | +"7566\t20\t2975\t800\n"
64 | +"7788\t20\t3000\t2975\n"
65 | +"7876\t20\t1100\t3000\n"
66 | +"7902\t20\t3000\t1100\n"
67 | +"7499\t30\t1600\tNULL\n"
68 | +"7521\t30\t1250\t1600\n"
69 | +"7654\t30\t1250\t1250\n"
70 | +"7698\t30\t2850\t1250\n"
71 | +"7844\t30\t1500\t2850\n"
72 | +"7900\t30\t950\t1500"),
73 | @HivePdkUnitTest(
74 | query = "SELECT t.empno, t.deptno, t.sal, nexr_lag(hash(t.deptno),t.sal,2,0) as col"
75 | +" FROM ("
76 | +" select a.empno, a.deptno, a.sal from emp a "
77 | +" distribute by hash(a.deptno)"
78 | +" sort BY a.deptno, a.empno"
79 | +" ) t;",
80 | result =
81 | "7782\t10\t2450\t0\n"
82 | +"7839\t10\t5000\t0\n"
83 | +"7934\t10\t1300\t2450\n"
84 | +"7369\t20\t800\t0\n"
85 | +"7566\t20\t2975\t0\n"
86 | +"7788\t20\t3000\t800\n"
87 | +"7876\t20\t1100\t2975\n"
88 | +"7902\t20\t3000\t3000\n"
89 | +"7499\t30\t1600\t0\n"
90 | +"7521\t30\t1250\t0\n"
91 | +"7654\t30\t1250\t1600\n"
92 | +"7698\t30\t2850\t1250\n"
93 | +"7844\t30\t1500\t1250\n"
94 | +"7900\t30\t950\t2850")
95 | }
96 | )
97 |
98 |
99 | @UDFType(deterministic = false, stateful = true)
100 | public class GenericUDFLag extends GenericUDF {
101 | private ObjectInspector[] argumentOIs;
102 | private ObjectInspector resultOI, prevHashStandardOI, valueStandardOI;
103 | private Object prevHash;
104 | private ArrayList