├── .gitignore ├── LICENSE.txt ├── README.md ├── pom.xml ├── sample └── items.csv └── src └── main └── java └── com └── cloudera └── hive └── udf ├── examples ├── Sum.java └── Upper.java └── functions ├── DenseRank.java ├── FirstValue.java ├── ParseKeyValueTuple.java ├── Rank.java └── RowNumber.java /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled source # 2 | ################### 3 | *.com 4 | *.class 5 | *.dll 6 | *.exe 7 | *.o 8 | *.so 9 | 10 | # Packages # 11 | ############ 12 | # it's better to unpack these files and commit the raw source 13 | # git has its own built in compression methods 14 | *.7z 15 | *.dmg 16 | *.gz 17 | *.iso 18 | *.jar 19 | *.war 20 | *.ear 21 | *.rar 22 | *.tar 23 | *.zip 24 | 25 | # Logs and databases # 26 | ###################### 27 | *.log 28 | 29 | # OS generated files # 30 | ###################### 31 | .DS_Store 32 | .DS_Store? 33 | ._* 34 | .Spotlight-V100 35 | .Trashes 36 | Icon? 37 | ehthumbs.db 38 | Thumbs.db 39 | 40 | # IDE generated files # 41 | ############################ 42 | *.iml 43 | *.ipr 44 | *.iws 45 | .idea/ 46 | *.project 47 | *.classpath 48 | .settings/ 49 | 50 | # Java files # 51 | ############## 52 | *.class 53 | 54 | # Build Specific # 55 | ################ 56 | dist/* 57 | build/ 58 | target/ 59 | out/ 60 | lib_managed/ 61 | src_managed/ 62 | project/boot/ 63 | project/plugins/project/ 64 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. 203 | 204 | -------------------------------------------- 205 | OOZIE SUBCOMPONENTS: 206 | 207 | The Apache Oozie includes a number of subcomponents with 208 | separate copyright notices and license terms. Your use of the source 209 | code for the these subcomponents is subject to the terms and 210 | conditions of the following licenses. 211 | 212 | -------------------------------------------- 213 | For the HSQLDB component: 214 | 215 | COPYRIGHTS AND LICENSES (based on BSD License) 216 | 217 | For work developed by the HSQL Development Group: 218 | 219 | Copyright (c) 2001-2010, The HSQL Development Group 220 | All rights reserved. 221 | 222 | Redistribution and use in source and binary forms, with or without 223 | modification, are permitted provided that the following conditions are met: 224 | 225 | Redistributions of source code must retain the above copyright notice, this 226 | list of conditions and the following disclaimer. 227 | 228 | Redistributions in binary form must reproduce the above copyright notice, 229 | this list of conditions and the following disclaimer in the documentation 230 | and/or other materials provided with the distribution. 231 | 232 | Neither the name of the HSQL Development Group nor the names of its 233 | contributors may be used to endorse or promote products derived from this 234 | software without specific prior written permission. 235 | 236 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 237 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 238 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 239 | ARE DISCLAIMED. IN NO EVENT SHALL HSQL DEVELOPMENT GROUP, HSQLDB.ORG, 240 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 241 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 242 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 243 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 244 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 245 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 246 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 247 | 248 | 249 | For work originally developed by the Hypersonic SQL Group: 250 | 251 | Copyright (c) 1995-2000 by the Hypersonic SQL Group. 252 | All rights reserved. 253 | Redistribution and use in source and binary forms, with or without 254 | modification, are permitted provided that the following conditions are met: 255 | 256 | Redistributions of source code must retain the above copyright notice, this 257 | list of conditions and the following disclaimer. 258 | 259 | Redistributions in binary form must reproduce the above copyright notice, 260 | this list of conditions and the following disclaimer in the documentation 261 | and/or other materials provided with the distribution. 262 | 263 | Neither the name of the Hypersonic SQL Group nor the names of its 264 | contributors may be used to endorse or promote products derived from this 265 | software without specific prior written permission. 266 | 267 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 268 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 269 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 270 | ARE DISCLAIMED. IN NO EVENT SHALL THE HYPERSONIC SQL GROUP, 271 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 272 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 273 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 274 | LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 275 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 276 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 277 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 278 | 279 | This software consists of voluntary contributions made by many individuals on behalf of the 280 | Hypersonic SQL Group. 281 | 282 | -------------------------------------------- 283 | For JDOM component : 284 | 285 | /*-- 286 | 287 | Copyright (C) 2000-2011 Jason Hunter & Brett McLaughlin. 288 | All rights reserved. 289 | 290 | Redistribution and use in source and binary forms, with or without 291 | modification, are permitted provided that the following conditions 292 | are met: 293 | 294 | 1. Redistributions of source code must retain the above copyright 295 | notice, this list of conditions, and the following disclaimer. 296 | 297 | 2. Redistributions in binary form must reproduce the above copyright 298 | notice, this list of conditions, and the disclaimer that follows 299 | these conditions in the documentation and/or other materials 300 | provided with the distribution. 301 | 302 | 3. The name "JDOM" must not be used to endorse or promote products 303 | derived from this software without prior written permission. For 304 | written permission, please contact . 305 | 306 | 4. Products derived from this software may not be called "JDOM", nor 307 | may "JDOM" appear in their name, without prior written permission 308 | from the JDOM Project Management . 309 | 310 | In addition, we request (but do not require) that you include in the 311 | end-user documentation provided with the redistribution and/or in the 312 | software itself an acknowledgement equivalent to the following: 313 | "This product includes software developed by the 314 | JDOM Project (http://www.jdom.org/)." 315 | Alternatively, the acknowledgment may be graphical using the logos 316 | available at http://www.jdom.org/images/logos. 317 | 318 | THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 319 | WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 320 | OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 321 | DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT 322 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 323 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 324 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 325 | USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 326 | ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 327 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 328 | OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 329 | SUCH DAMAGE. 330 | 331 | This software consists of voluntary contributions made by many 332 | individuals on behalf of the JDOM Project and was originally 333 | created by Jason Hunter and 334 | Brett McLaughlin . For more information 335 | on the JDOM Project, please see . 336 | 337 | */ 338 | 339 | -------------------------------------------- 340 | For PostgreSQL JDBC Driver component : 341 | 342 | BSD License 343 | 344 | The PostgreSQL JDBC driver is distributed under the BSD license, same as the server. The simplest explanation of the licensing terms is that you can do whatever you want with the product and source code as long as you don't claim you wrote it or sue us. You should give it a read though, it's only half a page. 345 | 346 | Copyright (c) 1997-2010, PostgreSQL Global Development Group 347 | All rights reserved. 348 | 349 | Redistribution and use in source and binary forms, with or without 350 | modification, are permitted provided that the following conditions are met: 351 | 352 | 1. Redistributions of source code must retain the above copyright notice, 353 | this list of conditions and the following disclaimer. 354 | 2. Redistributions in binary form must reproduce the above copyright notice, 355 | this list of conditions and the following disclaimer in the documentation 356 | and/or other materials provided with the distribution. 357 | 3. Neither the name of the PostgreSQL Global Development Group nor the names 358 | of its contributors may be used to endorse or promote products derived 359 | from this software without specific prior written permission. 360 | 361 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 362 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 363 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 364 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 365 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 366 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 367 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 368 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 369 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 370 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 371 | POSSIBILITY OF SUCH DAMAGE. 372 | 373 | 374 | -------------------------------------------- 375 | For Enterprise JavaBeans (EJB) 3.0, JavaBeans Activation Framework (JAF), JavaMail API component : 376 | 377 | 378 | COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0 379 | 380 | COMMON DEVELOPMENT AND DISTRIBUTION LICENSE (CDDL) Version 1.0 381 | 382 | 1. Definitions. 383 | 384 | 1.1. "Contributor" means each individual or entity that 385 | creates or contributes to the creation of Modifications. 386 | 387 | 1.2. "Contributor Version" means the combination of the 388 | Original Software, prior Modifications used by a 389 | Contributor (if any), and the Modifications made by that 390 | particular Contributor. 391 | 392 | 1.3. "Covered Software" means (a) the Original Software, or 393 | (b) Modifications, or (c) the combination of files 394 | containing Original Software with files containing 395 | Modifications, in each case including portions thereof. 396 | 397 | 1.4. "Executable" means the Covered Software in any form 398 | other than Source Code. 399 | 400 | 1.5. "Initial Developer" means the individual or entity 401 | that first makes Original Software available under this 402 | License. 403 | 404 | 1.6. "Larger Work" means a work which combines Covered 405 | Software or portions thereof with code not governed by the 406 | terms of this License. 407 | 408 | 1.7. "License" means this document. 409 | 410 | 1.8. "Licensable" means having the right to grant, to the 411 | maximum extent possible, whether at the time of the initial 412 | grant or subsequently acquired, any and all of the rights 413 | conveyed herein. 414 | 415 | 1.9. "Modifications" means the Source Code and Executable 416 | form of any of the following: 417 | 418 | A. Any file that results from an addition to, 419 | deletion from or modification of the contents of a 420 | file containing Original Software or previous 421 | Modifications; 422 | 423 | B. Any new file that contains any part of the 424 | Original Software or previous Modification; or 425 | 426 | C. Any new file that is contributed or otherwise made 427 | available under the terms of this License. 428 | 429 | 1.10. "Original Software" means the Source Code and 430 | Executable form of computer software code that is 431 | originally released under this License. 432 | 433 | 1.11. "Patent Claims" means any patent claim(s), now owned 434 | or hereafter acquired, including without limitation, 435 | method, process, and apparatus claims, in any patent 436 | Licensable by grantor. 437 | 438 | 1.12. "Source Code" means (a) the common form of computer 439 | software code in which modifications are made and (b) 440 | associated documentation included in or with such code. 441 | 442 | 1.13. "You" (or "Your") means an individual or a legal 443 | entity exercising rights under, and complying with all of 444 | the terms of, this License. For legal entities, "You" 445 | includes any entity which controls, is controlled by, or is 446 | under common control with You. For purposes of this 447 | definition, "control" means (a) the power, direct or 448 | indirect, to cause the direction or management of such 449 | entity, whether by contract or otherwise, or (b) ownership 450 | of more than fifty percent (50%) of the outstanding shares 451 | or beneficial ownership of such entity. 452 | 453 | 2. License Grants. 454 | 455 | 2.1. The Initial Developer Grant. 456 | 457 | Conditioned upon Your compliance with Section 3.1 below and 458 | subject to third party intellectual property claims, the 459 | Initial Developer hereby grants You a world-wide, 460 | royalty-free, non-exclusive license: 461 | 462 | (a) under intellectual property rights (other than 463 | patent or trademark) Licensable by Initial Developer, 464 | to use, reproduce, modify, display, perform, 465 | sublicense and distribute the Original Software (or 466 | portions thereof), with or without Modifications, 467 | and/or as part of a Larger Work; and 468 | 469 | (b) under Patent Claims infringed by the making, 470 | using or selling of Original Software, to make, have 471 | made, use, practice, sell, and offer for sale, and/or 472 | otherwise dispose of the Original Software (or 473 | portions thereof). 474 | 475 | (c) The licenses granted in Sections 2.1(a) and (b) 476 | are effective on the date Initial Developer first 477 | distributes or otherwise makes the Original Software 478 | available to a third party under the terms of this 479 | License. 480 | 481 | (d) Notwithstanding Section 2.1(b) above, no patent 482 | license is granted: (1) for code that You delete from 483 | the Original Software, or (2) for infringements 484 | caused by: (i) the modification of the Original 485 | Software, or (ii) the combination of the Original 486 | Software with other software or devices. 487 | 488 | 2.2. Contributor Grant. 489 | 490 | Conditioned upon Your compliance with Section 3.1 below and 491 | subject to third party intellectual property claims, each 492 | Contributor hereby grants You a world-wide, royalty-free, 493 | non-exclusive license: 494 | 495 | (a) under intellectual property rights (other than 496 | patent or trademark) Licensable by Contributor to 497 | use, reproduce, modify, display, perform, sublicense 498 | and distribute the Modifications created by such 499 | Contributor (or portions thereof), either on an 500 | unmodified basis, with other Modifications, as 501 | Covered Software and/or as part of a Larger Work; and 502 | 503 | (b) under Patent Claims infringed by the making, 504 | using, or selling of Modifications made by that 505 | Contributor either alone and/or in combination with 506 | its Contributor Version (or portions of such 507 | combination), to make, use, sell, offer for sale, 508 | have made, and/or otherwise dispose of: (1) 509 | Modifications made by that Contributor (or portions 510 | thereof); and (2) the combination of Modifications 511 | made by that Contributor with its Contributor Version 512 | (or portions of such combination). 513 | 514 | (c) The licenses granted in Sections 2.2(a) and 515 | 2.2(b) are effective on the date Contributor first 516 | distributes or otherwise makes the Modifications 517 | available to a third party. 518 | 519 | (d) Notwithstanding Section 2.2(b) above, no patent 520 | license is granted: (1) for any code that Contributor 521 | has deleted from the Contributor Version; (2) for 522 | infringements caused by: (i) third party 523 | modifications of Contributor Version, or (ii) the 524 | combination of Modifications made by that Contributor 525 | with other software (except as part of the 526 | Contributor Version) or other devices; or (3) under 527 | Patent Claims infringed by Covered Software in the 528 | absence of Modifications made by that Contributor. 529 | 530 | 3. Distribution Obligations. 531 | 532 | 3.1. Availability of Source Code. 533 | 534 | Any Covered Software that You distribute or otherwise make 535 | available in Executable form must also be made available in 536 | Source Code form and that Source Code form must be 537 | distributed only under the terms of this License. You must 538 | include a copy of this License with every copy of the 539 | Source Code form of the Covered Software You distribute or 540 | otherwise make available. You must inform recipients of any 541 | such Covered Software in Executable form as to how they can 542 | obtain such Covered Software in Source Code form in a 543 | reasonable manner on or through a medium customarily used 544 | for software exchange. 545 | 546 | 3.2. Modifications. 547 | 548 | The Modifications that You create or to which You 549 | contribute are governed by the terms of this License. You 550 | represent that You believe Your Modifications are Your 551 | original creation(s) and/or You have sufficient rights to 552 | grant the rights conveyed by this License. 553 | 554 | 3.3. Required Notices. 555 | 556 | You must include a notice in each of Your Modifications 557 | that identifies You as the Contributor of the Modification. 558 | You may not remove or alter any copyright, patent or 559 | trademark notices contained within the Covered Software, or 560 | any notices of licensing or any descriptive text giving 561 | attribution to any Contributor or the Initial Developer. 562 | 563 | 3.4. Application of Additional Terms. 564 | 565 | You may not offer or impose any terms on any Covered 566 | Software in Source Code form that alters or restricts the 567 | applicable version of this License or the recipients' 568 | rights hereunder. You may choose to offer, and to charge a 569 | fee for, warranty, support, indemnity or liability 570 | obligations to one or more recipients of Covered Software. 571 | However, you may do so only on Your own behalf, and not on 572 | behalf of the Initial Developer or any Contributor. You 573 | must make it absolutely clear that any such warranty, 574 | support, indemnity or liability obligation is offered by 575 | You alone, and You hereby agree to indemnify the Initial 576 | Developer and every Contributor for any liability incurred 577 | by the Initial Developer or such Contributor as a result of 578 | warranty, support, indemnity or liability terms You offer. 579 | 580 | 3.5. Distribution of Executable Versions. 581 | 582 | You may distribute the Executable form of the Covered 583 | Software under the terms of this License or under the terms 584 | of a license of Your choice, which may contain terms 585 | different from this License, provided that You are in 586 | compliance with the terms of this License and that the 587 | license for the Executable form does not attempt to limit 588 | or alter the recipient's rights in the Source Code form 589 | from the rights set forth in this License. If You 590 | distribute the Covered Software in Executable form under a 591 | different license, You must make it absolutely clear that 592 | any terms which differ from this License are offered by You 593 | alone, not by the Initial Developer or Contributor. You 594 | hereby agree to indemnify the Initial Developer and every 595 | Contributor for any liability incurred by the Initial 596 | Developer or such Contributor as a result of any such terms 597 | You offer. 598 | 599 | 3.6. Larger Works. 600 | 601 | You may create a Larger Work by combining Covered Software 602 | with other code not governed by the terms of this License 603 | and distribute the Larger Work as a single product. In such 604 | a case, You must make sure the requirements of this License 605 | are fulfilled for the Covered Software. 606 | 607 | 4. Versions of the License. 608 | 609 | 4.1. New Versions. 610 | 611 | Sun Microsystems, Inc. is the initial license steward and 612 | may publish revised and/or new versions of this License 613 | from time to time. Each version will be given a 614 | distinguishing version number. Except as provided in 615 | Section 4.3, no one other than the license steward has the 616 | right to modify this License. 617 | 618 | 4.2. Effect of New Versions. 619 | 620 | You may always continue to use, distribute or otherwise 621 | make the Covered Software available under the terms of the 622 | version of the License under which You originally received 623 | the Covered Software. If the Initial Developer includes a 624 | notice in the Original Software prohibiting it from being 625 | distributed or otherwise made available under any 626 | subsequent version of the License, You must distribute and 627 | make the Covered Software available under the terms of the 628 | version of the License under which You originally received 629 | the Covered Software. Otherwise, You may also choose to 630 | use, distribute or otherwise make the Covered Software 631 | available under the terms of any subsequent version of the 632 | License published by the license steward. 633 | 634 | 4.3. Modified Versions. 635 | 636 | When You are an Initial Developer and You want to create a 637 | new license for Your Original Software, You may create and 638 | use a modified version of this License if You: (a) rename 639 | the license and remove any references to the name of the 640 | license steward (except to note that the license differs 641 | from this License); and (b) otherwise make it clear that 642 | the license contains terms which differ from this License. 643 | 644 | 5. DISCLAIMER OF WARRANTY. 645 | 646 | COVERED SOFTWARE IS PROVIDED UNDER THIS LICENSE ON AN "AS IS" 647 | BASIS, WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, 648 | INCLUDING, WITHOUT LIMITATION, WARRANTIES THAT THE COVERED 649 | SOFTWARE IS FREE OF DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR 650 | PURPOSE OR NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND 651 | PERFORMANCE OF THE COVERED SOFTWARE IS WITH YOU. SHOULD ANY 652 | COVERED SOFTWARE PROVE DEFECTIVE IN ANY RESPECT, YOU (NOT THE 653 | INITIAL DEVELOPER OR ANY OTHER CONTRIBUTOR) ASSUME THE COST OF 654 | ANY NECESSARY SERVICING, REPAIR OR CORRECTION. THIS DISCLAIMER OF 655 | WARRANTY CONSTITUTES AN ESSENTIAL PART OF THIS LICENSE. NO USE OF 656 | ANY COVERED SOFTWARE IS AUTHORIZED HEREUNDER EXCEPT UNDER THIS 657 | DISCLAIMER. 658 | 659 | 6. TERMINATION. 660 | 661 | 6.1. This License and the rights granted hereunder will 662 | terminate automatically if You fail to comply with terms 663 | herein and fail to cure such breach within 30 days of 664 | becoming aware of the breach. Provisions which, by their 665 | nature, must remain in effect beyond the termination of 666 | this License shall survive. 667 | 668 | 6.2. If You assert a patent infringement claim (excluding 669 | declaratory judgment actions) against Initial Developer or 670 | a Contributor (the Initial Developer or Contributor against 671 | whom You assert such claim is referred to as "Participant") 672 | alleging that the Participant Software (meaning the 673 | Contributor Version where the Participant is a Contributor 674 | or the Original Software where the Participant is the 675 | Initial Developer) directly or indirectly infringes any 676 | patent, then any and all rights granted directly or 677 | indirectly to You by such Participant, the Initial 678 | Developer (if the Initial Developer is not the Participant) 679 | and all Contributors under Sections 2.1 and/or 2.2 of this 680 | License shall, upon 60 days notice from Participant 681 | terminate prospectively and automatically at the expiration 682 | of such 60 day notice period, unless if within such 60 day 683 | period You withdraw Your claim with respect to the 684 | Participant Software against such Participant either 685 | unilaterally or pursuant to a written agreement with 686 | Participant. 687 | 688 | 6.3. In the event of termination under Sections 6.1 or 6.2 689 | above, all end user licenses that have been validly granted 690 | by You or any distributor hereunder prior to termination 691 | (excluding licenses granted to You by any distributor) 692 | shall survive termination. 693 | 694 | 7. LIMITATION OF LIABILITY. 695 | 696 | UNDER NO CIRCUMSTANCES AND UNDER NO LEGAL THEORY, WHETHER TORT 697 | (INCLUDING NEGLIGENCE), CONTRACT, OR OTHERWISE, SHALL YOU, THE 698 | INITIAL DEVELOPER, ANY OTHER CONTRIBUTOR, OR ANY DISTRIBUTOR OF 699 | COVERED SOFTWARE, OR ANY SUPPLIER OF ANY OF SUCH PARTIES, BE 700 | LIABLE TO ANY PERSON FOR ANY INDIRECT, SPECIAL, INCIDENTAL, OR 701 | CONSEQUENTIAL DAMAGES OF ANY CHARACTER INCLUDING, WITHOUT 702 | LIMITATION, DAMAGES FOR LOST PROFITS, LOSS OF GOODWILL, WORK 703 | STOPPAGE, COMPUTER FAILURE OR MALFUNCTION, OR ANY AND ALL OTHER 704 | COMMERCIAL DAMAGES OR LOSSES, EVEN IF SUCH PARTY SHALL HAVE BEEN 705 | INFORMED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION OF 706 | LIABILITY SHALL NOT APPLY TO LIABILITY FOR DEATH OR PERSONAL 707 | INJURY RESULTING FROM SUCH PARTY'S NEGLIGENCE TO THE EXTENT 708 | APPLICABLE LAW PROHIBITS SUCH LIMITATION. SOME JURISDICTIONS DO 709 | NOT ALLOW THE EXCLUSION OR LIMITATION OF INCIDENTAL OR 710 | CONSEQUENTIAL DAMAGES, SO THIS EXCLUSION AND LIMITATION MAY NOT 711 | APPLY TO YOU. 712 | 713 | 8. U.S. GOVERNMENT END USERS. 714 | 715 | The Covered Software is a "commercial item," as that term is 716 | defined in 48 C.F.R. 2.101 (Oct. 1995), consisting of "commercial 717 | computer software" (as that term is defined at 48 C.F.R. ¤ 718 | 252.227-7014(a)(1)) and "commercial computer software 719 | documentation" as such terms are used in 48 C.F.R. 12.212 (Sept. 720 | 1995). Consistent with 48 C.F.R. 12.212 and 48 C.F.R. 227.7202-1 721 | through 227.7202-4 (June 1995), all U.S. Government End Users 722 | acquire Covered Software with only those rights set forth herein. 723 | This U.S. Government Rights clause is in lieu of, and supersedes, 724 | any other FAR, DFAR, or other clause or provision that addresses 725 | Government rights in computer software under this License. 726 | 727 | 9. MISCELLANEOUS. 728 | 729 | This License represents the complete agreement concerning subject 730 | matter hereof. If any provision of this License is held to be 731 | unenforceable, such provision shall be reformed only to the 732 | extent necessary to make it enforceable. This License shall be 733 | governed by the law of the jurisdiction specified in a notice 734 | contained within the Original Software (except to the extent 735 | applicable law, if any, provides otherwise), excluding such 736 | jurisdiction's conflict-of-law provisions. Any litigation 737 | relating to this License shall be subject to the jurisdiction of 738 | the courts located in the jurisdiction and venue specified in a 739 | notice contained within the Original Software, with the losing 740 | party responsible for costs, including, without limitation, court 741 | costs and reasonable attorneys' fees and expenses. The 742 | application of the United Nations Convention on Contracts for the 743 | International Sale of Goods is expressly excluded. Any law or 744 | regulation which provides that the language of a contract shall 745 | be construed against the drafter shall not apply to this License. 746 | You agree that You alone are responsible for compliance with the 747 | United States export administration regulations (and the export 748 | control laws and regulation of any other countries) when You use, 749 | distribute or otherwise make available any Covered Software. 750 | 751 | 10. RESPONSIBILITY FOR CLAIMS. 752 | 753 | As between Initial Developer and the Contributors, each party is 754 | responsible for claims and damages arising, directly or 755 | indirectly, out of its utilization of rights under this License 756 | and You agree to work with Initial Developer and Contributors to 757 | distribute such responsibility on an equitable basis. Nothing 758 | herein is intended or shall be deemed to constitute any admission 759 | of liability. 760 | 761 | 762 | -------------------------------------------- 763 | For serp component : 764 | 765 | Serp BSD License 766 | Copyright (c) 2002-2006, A. Abram White 767 | All rights reserved. 768 | Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 769 | Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 770 | Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 771 | Neither the name of 'serp' nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 772 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 'AS IS' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 773 | 774 | 775 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Hive UDFs 2 | ========= 3 | 4 | Taking inspiration from Edward Capriolo, this project shows how to perform the rank() and dense_rank() functions 5 | within Hive, taking into account tied ranks. Also available is an implementation of first_value() and a UDTF of parse_key_val_tuple(). 6 | 7 | 8 | Building 9 | -------- 10 | 11 | This project uses Maven. To build the software, simply use "mvn package". 12 | 13 | 14 | Deploying within Hive 15 | --------------------- 16 | 17 | To make the jar available on a temporary basis: 18 | 19 | hive> add jar /home/paul/hive-udf-0.1-SNAPSHOT.jar; 20 | 21 | To make the function available on a temporary basis: 22 | 23 | hive> CREATE TEMPORARY FUNCTION rank AS 'com.cloudera.hive.examples.Rank'; 24 | 25 | 26 | Usage: rank(), dense_rank() and first_value() 27 | --------------------------------------------- 28 | 29 | The rank(), dense_rank() and first_value() functions take a minimum of one parameter - the column that is to be ranked / retrieved. Since the 30 | functions have no ability to sort data for themselves, we use a sub-query to appropriately distribute and sort data before passing to rank(). 31 | 32 | For example, imagine we have a table such as the following: 33 | 34 | select * from items; 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 |
itemcategoryprice
OrangeFruit0.30
AppleFruit0.25
BananaFruit0.75
CarrotVeg0.20
SproutVeg1.75
KiwiFruit0.30
45 | 46 | To use the rank function, prepare the data with an inner query: 47 | 48 | select item, category, price from items distribute by category sort by category, price; 49 | 50 | Then wrap this in another query that applies the rank function: 51 | 52 | select item, category, price, rank(price, category) from ( 53 | select item, category, price from items distribute by category sort by category, price) inner; 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 |
itemcategorypricerank
AppleFruit0.251
OrangeFruit0.302
KiwiFruit0.302
BananaFruit0.754
CarrotVeg0.201
SproutVeg1.752
64 | 65 | Notice that rank() takes the price column as the first parameter, the rest of the parameters are used to determine the row groupings. 66 | 67 | 68 | Usage: parse_key_val_tuple() 69 | ---------------------------- 70 | 71 | The parse_key_val_tuple() function is a UDTF that takes a minimum of 4 parameters. The input string to be parsed, the delimiter between all the fields, 72 | the separator between key and value pairs, and 1 to many Keys that you would like to extract. Note that the parameters are case sensitive. 73 | 74 | See the following links for Hive UDTF usage instructions 75 | 76 | - [Hive Table-GeneratingFunctions(UDTF)](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+UDF#LanguageManualUDF-Built-inTable-GeneratingFunctions\(UDTF\)) 77 | - [Hive Lateral Views](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+LateralView) 78 | 79 | Below are two sample queries that show the functions basic capabilities and perhaps illustrate the outcome of some edge cases: 80 | 81 | *Demo Setup* 82 | 83 | A one row table called 'dual' is used for selecting demo values statically for demo purposes only. 84 | 85 | echo "X" > dummy.txt 86 | hive 87 | CREATE TABLE dual(dummy STRING); 88 | LOAD DATA LOCAL INPATH 'dummy.txt' OVERWRITE INTO TABLE dual; 89 | 90 | ADD JAR /full/path/to/local/jar/hive-udf-0.1-SNAPSHOT.jar; 91 | CREATE TEMPORARY FUNCTION parse_key_val_tuple AS 'com.cloudera.hive.udf.functions.ParseKeyValueTuple'; 92 | 93 | *Static Parameter Sample:* 94 | 95 | SELECT data.label, t.* 96 | -- Static data from 'dummy' table for demo 97 | FROM( 98 | -- String to show many edge cases 99 | SELECT 'edge' AS label, 'foo=bar&extra=extra=separator&&empty=&bad&unused=string' AS text FROM dual LIMIT 1 100 | UNION ALL 101 | -- Good string 102 | SELECT 'good' AS label, 'foo=bar&extra=not-extra&&empty=not-empty&bad=not-bad' AS text FROM dual LIMIT 1 103 | UNION ALL 104 | -- Empty string 105 | SELECT 'empty' AS label, '' AS text FROM dual LIMIT 1 106 | UNION ALL 107 | -- NULL string 108 | SELECT 'null' AS label, NULL AS text FROM dual LIMIT 1 109 | ) data 110 | LATERAL VIEW parse_key_val_tuple(text, '&', '=', 'foo', 'extra', 'empty', 'bad') t AS foo, extra, empty, bad; 111 | 112 | 113 | *Dynamic Parameter Sample:* 114 | 115 | SELECT t.* 116 | -- Static data from 'dummy' table for demo 117 | FROM( 118 | -- type = person 119 | SELECT 'greeting=Hello\;person=Mr. Smith' AS text, 'person' AS type FROM dual LIMIT 1 120 | UNION ALL 121 | -- type = thing 122 | SELECT 'greeting=Hi\;thing=World' AS text, 'thing' AS type FROM dual LIMIT 1 123 | ) data 124 | -- lookup name by type 125 | LATERAL VIEW parse_key_val_tuple(text, '\;', '=', 'greeting', data.type) t AS greeting, name; 126 | 127 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4 | 4.0.0 5 | com.cloudera 6 | hive-udf 7 | 0.1-SNAPSHOT 8 | jar 9 | 10 | 11 | 12 | central 13 | Maven Central 14 | http://repo1.maven.org/maven2/ 15 | 16 | 17 | cloudera-repo 18 | Cloudera CDH 19 | https://repository.cloudera.com/artifactory/cloudera-repos/ 20 | 21 | 22 | 23 | hive-udf 24 | https://github.com/paulmw/hive-udf 25 | 26 | 27 | UTF-8 28 | 2.0.0-cdh4.1.0 29 | 0.9.0-cdh4.1.0 30 | 31 | 32 | 33 | 34 | 35 | 36 | org.apache.maven.plugins 37 | maven-compiler-plugin 38 | 2.3.2 39 | 40 | 1.6 41 | 1.6 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | junit 51 | junit 52 | 4.10 53 | test 54 | 55 | 56 | org.apache.hadoop 57 | hadoop-common 58 | ${hadoop.version} 59 | provided 60 | 61 | 62 | 63 | org.apache.hive 64 | hive-anttasks 65 | ${hive.version} 66 | 67 | 68 | org.apache.hive 69 | hive-cli 70 | ${hive.version} 71 | 72 | 73 | org.apache.hive 74 | hive-common 75 | ${hive.version} 76 | 77 | 78 | org.apache.hive 79 | hive-contrib 80 | ${hive.version} 81 | 82 | 83 | org.apache.hive 84 | hive-exec 85 | ${hive.version} 86 | 87 | 88 | org.apache.hive 89 | hive-metastore 90 | ${hive.version} 91 | 92 | 93 | org.apache.hive 94 | hive-serde 95 | ${hive.version} 96 | 97 | 98 | org.apache.hive 99 | hive-service 100 | ${hive.version} 101 | 102 | 103 | 104 | org.apache.hive 105 | hive-shims 106 | ${hive.version} 107 | 108 | 109 | 110 | org.apache.hive 111 | hive-builtins 112 | ${hive.version} 113 | 114 | 115 | 116 | 117 | 118 | -------------------------------------------------------------------------------- /sample/items.csv: -------------------------------------------------------------------------------- 1 | Apple,Fruit,0.25 2 | Orange,Fruit,0.30 3 | Kiwi,Fruit,0.30 4 | Banana,Fruit,0.75 5 | Carrot,Veg,0.20 6 | Sprout,Veg,1.75 -------------------------------------------------------------------------------- /src/main/java/com/cloudera/hive/udf/examples/Sum.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.hive.udf.examples; 20 | 21 | import org.apache.hadoop.hive.ql.exec.UDAF; 22 | import org.apache.hadoop.hive.ql.exec.UDAFEvaluator; 23 | import org.apache.hadoop.hive.ql.udf.UDFType; 24 | 25 | @UDFType() 26 | public class Sum extends UDAF { 27 | 28 | public static class SumIntUDAFEvaluator implements UDAFEvaluator { 29 | 30 | private int result; 31 | 32 | public void init() { 33 | result = 0; 34 | } 35 | 36 | public boolean iterate(int value) { 37 | result += value; 38 | return true; 39 | } 40 | 41 | public boolean merge(int other) { 42 | return iterate(other); 43 | } 44 | 45 | public int terminatePartial() { 46 | return result; 47 | } 48 | 49 | public int terminate() { 50 | return result; 51 | } 52 | 53 | } 54 | 55 | } -------------------------------------------------------------------------------- /src/main/java/com/cloudera/hive/udf/examples/Upper.java: -------------------------------------------------------------------------------- 1 | /** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.hive.udf.examples; import org.apache.hadoop.hive.ql.exec.UDF; /** * This is an example of a Hive UDF that uses the reflective API. */ public class Upper extends UDF { /** * This function doesn't override a predefined * @param value - the string to be converted to upper case. * @return */ public String evaluate(String value) { return value.toUpperCase(); } } -------------------------------------------------------------------------------- /src/main/java/com/cloudera/hive/udf/functions/DenseRank.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.hive.udf.functions; 20 | 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 23 | import org.apache.hadoop.hive.ql.metadata.HiveException; 24 | import org.apache.hadoop.hive.ql.udf.UDFType; 25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 31 | 32 | /** 33 | * This UDF provides a dense_rank() function. 34 | */ 35 | @Description(name = "rank", value = "_FUNC_(value, partition columns ...) - Returns the dense_rank of a value within a partitioned, sorted window.") 36 | @UDFType(deterministic = false, stateful = true) 37 | public class DenseRank extends GenericUDF { 38 | 39 | private long counter; 40 | private Object[] previousKey; 41 | private ObjectInspector[] ois; 42 | 43 | @Override 44 | public Object evaluate(DeferredObject[] currentKey) throws HiveException { 45 | if (!sameGroup(currentKey)) { 46 | this.counter = 0; 47 | copyToPreviousKey(currentKey); 48 | return new Long(++this.counter); 49 | } else { 50 | // Same group. Same value as well? 51 | if (!sameValue(currentKey)) { 52 | copyToPreviousKey(currentKey); 53 | return new Long(++this.counter); 54 | } else { 55 | copyToPreviousKey(currentKey); 56 | return new Long(this.counter); 57 | } 58 | } 59 | } 60 | 61 | @Override 62 | public String getDisplayString(String[] currentKey) { 63 | return "DenseRank"; 64 | } 65 | 66 | @Override 67 | public ObjectInspector initialize(ObjectInspector[] arg0) throws UDFArgumentException { 68 | ois=arg0; 69 | return PrimitiveObjectInspectorFactory.javaLongObjectInspector; 70 | } 71 | 72 | /** 73 | * This will help us copy objects from currrentKey to previousKeyHolder. 74 | * 75 | * @param currentKey 76 | * @throws HiveException 77 | */ 78 | private void copyToPreviousKey(DeferredObject[] currentKey) throws HiveException { 79 | if (currentKey != null) { 80 | previousKey = new Object[currentKey.length]; 81 | for (int index = 0; index < currentKey.length; index++) { 82 | previousKey[index]= ObjectInspectorUtils 83 | .copyToStandardObject(currentKey[index].get(),this.ois[index]); 84 | 85 | } 86 | } 87 | } 88 | 89 | /** 90 | * This will help us compare the currentKey and previousKey objects. 91 | * 92 | * @param currentKey 93 | * @return - true if both are same else false 94 | * @throws HiveException 95 | */ 96 | private boolean sameGroup(DeferredObject[] currentKey) throws HiveException { 97 | boolean status = false; 98 | 99 | //if both are null then we can classify as same 100 | if (currentKey == null && previousKey == null) { 101 | status = true; 102 | } 103 | 104 | //if both are not null and there legnth as well as 105 | //individual elements are same then we can classify as same 106 | if (currentKey != null && previousKey != null && currentKey.length == previousKey.length) { 107 | for (int index = 1; index < currentKey.length; index++) { 108 | 109 | if (ObjectInspectorUtils.compare(currentKey[index].get(), this.ois[index], 110 | previousKey[index], 111 | ObjectInspectorFactory.getReflectionObjectInspector(previousKey[index].getClass(), ObjectInspectorOptions.JAVA)) != 0) { 112 | 113 | return false; 114 | } 115 | 116 | } 117 | status = true; 118 | } 119 | return status; 120 | } 121 | 122 | /** 123 | * This will help us compare the currentKey and previousKey objects. 124 | * 125 | * @param currentKey 126 | * @return - true if both are same else false 127 | * @throws HiveException 128 | */ 129 | private boolean sameValue(DeferredObject[] currentKey) throws HiveException { 130 | boolean status = false; 131 | 132 | //if both are null then we can classify as same 133 | if (currentKey == null && previousKey == null) { 134 | status = true; 135 | } 136 | 137 | //if both are not null and there legnth as well as 138 | //individual elements are same then we can classify as same 139 | if (currentKey != null && previousKey != null && currentKey.length == previousKey.length) { 140 | // for (int index = 1; index < currentKey.length; index++) { 141 | 142 | if (ObjectInspectorUtils.compare(currentKey[0].get(), this.ois[0], 143 | previousKey[0], 144 | ObjectInspectorFactory.getReflectionObjectInspector(previousKey[0].getClass(), ObjectInspectorOptions.JAVA)) != 0) { 145 | 146 | return false; 147 | } 148 | 149 | // } 150 | status = true; 151 | } 152 | return status; 153 | } 154 | 155 | } 156 | -------------------------------------------------------------------------------- /src/main/java/com/cloudera/hive/udf/functions/FirstValue.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.hive.udf.functions; 20 | 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 23 | import org.apache.hadoop.hive.ql.metadata.HiveException; 24 | import org.apache.hadoop.hive.ql.udf.UDFType; 25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 26 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; 30 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 31 | 32 | /** 33 | * This UDF provides a first_value() function. 34 | */ 35 | @Description(name = "first_value", value = "_FUNC_(value, optional partition columns ...) - Returns the first_value of a column within a partitioned, sorted window.") 36 | @UDFType(deterministic = false, stateful = true) 37 | public class FirstValue extends GenericUDF { 38 | 39 | private boolean firstRow; 40 | private Object value; 41 | private Object[] previous; 42 | private ObjectInspector[] ois; 43 | 44 | @Override 45 | public ObjectInspector initialize(ObjectInspector[] ois) throws UDFArgumentException { 46 | this.firstRow = true; 47 | this.ois = ois; 48 | GenericUDFUtils.ReturnObjectInspectorResolver roir = new GenericUDFUtils.ReturnObjectInspectorResolver(true); 49 | roir.update(ois[0]); 50 | return roir.get(); 51 | } 52 | 53 | /** 54 | * This expects multiple parameters: the first should be the value cache, the rest should be the PARTITION BY columns. 55 | */ 56 | @Override 57 | public Object evaluate(DeferredObject[] current) throws HiveException { 58 | if(firstRow) { 59 | this.value = ObjectInspectorUtils.copyToStandardObject(current[0].get(), this.ois[0]); 60 | firstRow = false; 61 | copyToPreviousKey(current); 62 | } else { 63 | if (!groupIsUnchanged(current)) { 64 | this.value = ObjectInspectorUtils.copyToStandardObject(current[0].get(), this.ois[0]); 65 | } 66 | copyToPreviousKey(current); 67 | } 68 | return value; 69 | } 70 | 71 | @Override 72 | public String getDisplayString(String[] currentKey) { 73 | return "FV"; 74 | } 75 | 76 | 77 | 78 | /** 79 | * This will help us copy objects from currrentKey to previousKeyHolder. 80 | * 81 | * @param currentKey 82 | * @throws HiveException 83 | */ 84 | private void copyToPreviousKey(DeferredObject[] currentKey) throws HiveException { 85 | if (currentKey != null) { 86 | previous = new Object[currentKey.length]; 87 | for (int index = 0; index < currentKey.length; index++) { 88 | previous[index] = ObjectInspectorUtils.copyToStandardObject(currentKey[index].get(), this.ois[index]); 89 | } 90 | } 91 | } 92 | 93 | /** 94 | * This will help us compare the currentKey and previousKey objects. 95 | * 96 | * @param currentKey 97 | * @return - true if both are same else false 98 | * @throws HiveException 99 | */ 100 | private boolean groupIsUnchanged(DeferredObject[] currentKey) throws HiveException { 101 | boolean status = false; 102 | 103 | //if both are null then we can classify as same 104 | if (currentKey == null && previous == null) { 105 | status = true; 106 | } 107 | 108 | //if both are not null and their lengths as well as 109 | //individual elements are same then we can classify as same 110 | if (currentKey != null && previous != null && currentKey.length == previous.length) { 111 | for (int index = 1; index < currentKey.length; index++) { // Note the 1 here! INDEX 0 is the value, INDEX 1+ are the partition columns 112 | 113 | if (ObjectInspectorUtils.compare(currentKey[index].get(), this.ois[index], 114 | previous[index], 115 | ObjectInspectorFactory.getReflectionObjectInspector(previous[index].getClass(), ObjectInspectorOptions.JAVA)) != 0) { 116 | 117 | return false; 118 | } 119 | 120 | } 121 | status = true; 122 | } 123 | return status; 124 | } 125 | } 126 | -------------------------------------------------------------------------------- /src/main/java/com/cloudera/hive/udf/functions/ParseKeyValueTuple.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.hive.udf.functions; 20 | 21 | import com.google.common.base.Splitter; 22 | import com.google.common.collect.ImmutableList; 23 | import org.apache.commons.lang.StringUtils; 24 | import org.apache.commons.logging.Log; 25 | import org.apache.commons.logging.LogFactory; 26 | import org.apache.hadoop.hive.ql.exec.Description; 27 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 28 | import org.apache.hadoop.hive.ql.metadata.HiveException; 29 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; 30 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 31 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; 32 | import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; 33 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 34 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; 35 | import org.apache.hadoop.io.Text; 36 | 37 | import java.util.*; 38 | 39 | /** 40 | * This UDTF provides a function to parse a string of delimited key value pairs.
41 | * An example of this would be a URL Query string or a string representation of a cookie.
42 | *

43 | * Note: Both static and dynamic input parameters are supported. 44 | *

45 | * Example Strings:
46 | * URL Query: "KEY1=val1&KEY2=val2&KEY3=val3"
47 | * Cookie String: "KEY1=val1;KEY2=val2;KEY3=val3"
48 | *

49 | * Example Queries:
50 | * URL Query: "SELECT b.* FROM src LATERAL VIEW _FUNC_(inputString, '&', '=', 'KEY1', 'KEY2', 'KEY3') b as key1, key2, key3 LIMIT 1;"
51 | * Cookie String: "SELECT b.* FROM src LATERAL VIEW _FUNC_(inputString, '\;', '=', 'KEY1', 'KEY2', 'KEY3') b as key1, key2, key3 LIMIT 1;" 52 | * 53 | * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDTF 54 | */ 55 | @Description(name = "parse_key_val_tuple", 56 | value = "_FUNC_(inputString, fieldDelimiter, keyValSeparator, keyName, keyName2, ..., keyNameN) - extracts N (N>=1) parts from a delimited key value String.\n" 57 | + "It takes an inputString, fieldDelimiter, keyValSeparator, and one or multiple keyNames, and returns a tuple. " 58 | + "All the input parameters and output column types are string.", 59 | extended = "Note: All parameters are case-sensitive, and should not contain unnecessary white spaces.\n" 60 | + "Note: Delimiter and separator characters such as ';' may need to be escaped\n" 61 | + "Example:\n" 62 | + " > SELECT b.* FROM src LATERAL VIEW _FUNC_(inputString, '\\;', '=', 'KEY1', 'KEY2', 'KEY3') b as key1, key2, key3 LIMIT 1;") 63 | public class ParseKeyValueTuple extends GenericUDTF { 64 | private static final Log LOG = LogFactory.getLog(ParseKeyValueTuple.class.getName()); 65 | private static final String FUNCTION_NAME = "parse_key_val_tuple"; 66 | private static final int STATIC_ARG_COUNT = 3; 67 | private static final String REQUIRED_TYPE = "string"; 68 | 69 | private transient ObjectInspector[] inputOIs; // Input ObjectInspectors 70 | private int numCols; // Number of output columns 71 | private Text[] cols; // Object pool of non-null Text, avoid creating objects all the time 72 | private transient Object[] nullCols; // Array of null column values (returned during errors) 73 | private boolean nullWarned = false; 74 | private boolean mapWarned = false; 75 | 76 | @Override 77 | public void close() { 78 | } 79 | 80 | /** 81 | * Initializes the UDTF fields and builds the StructObjectInspector for the output columns. 82 | * 83 | * @param args the UDTF args 84 | * @return output column StructObjectInspector 85 | * @throws UDFArgumentException when the arguments are invalid 86 | */ 87 | @Override 88 | public StructObjectInspector initialize(final ObjectInspector[] args) throws UDFArgumentException { 89 | validateArgs(args); 90 | // Initialize fields 91 | inputOIs = args; 92 | numCols = args.length - STATIC_ARG_COUNT; 93 | cols = new Text[numCols]; 94 | nullCols = new Object[numCols]; 95 | nullWarned = false; 96 | mapWarned = false; 97 | // Fill arrays 98 | for (int i = 0; i < numCols; ++i) { 99 | cols[i] = new Text(); 100 | nullCols[i] = null; 101 | } 102 | return createOutputObjectInspector(); 103 | } 104 | 105 | /** 106 | * Validates the arity and type of the input arguments. 107 | * 108 | * @param args the arguments to validate 109 | * @throws UDFArgumentException when the arguments are invalid 110 | */ 111 | private void validateArgs(final ObjectInspector[] args) throws UDFArgumentException { 112 | //Validate argument arity 113 | if (args.length < STATIC_ARG_COUNT + 1) { 114 | throw new UDFArgumentException(FUNCTION_NAME + " takes at least" + STATIC_ARG_COUNT + 1 + "arguments: the string, fieldDelimiter, pairDelimiter, and a key name"); 115 | } 116 | // Validate all arguments are string type 117 | for(final ObjectInspector arg: args) { 118 | if (arg.getCategory() != ObjectInspector.Category.PRIMITIVE || !REQUIRED_TYPE.equals(arg.getTypeName())) { 119 | throw new UDFArgumentException(FUNCTION_NAME +"'s arguments have to be " + REQUIRED_TYPE + " type"); 120 | } 121 | } 122 | } 123 | 124 | /** 125 | * Creates an output StructObjectInspector based on the number of columns needed. 126 | * Assumes all output types will be Text. 127 | * 128 | * @return an output StructObjectInspector 129 | */ 130 | private StructObjectInspector createOutputObjectInspector() { 131 | final ArrayList fieldNames = new ArrayList(numCols); 132 | final ArrayList fieldOIs = new ArrayList(numCols); 133 | for (int i = 0; i < numCols; ++i) { 134 | fieldNames.add("c" + i); // column name can be anything since it will be named by the UDTF "as" clause 135 | fieldOIs.add(PrimitiveObjectInspectorFactory.writableStringObjectInspector); // all returned type will be Text 136 | } 137 | return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames, fieldOIs); 138 | } 139 | 140 | /** 141 | * Process the UDTF input values and forward the resulting rows. 142 | * 143 | * @param o UDTF input values 144 | * @throws HiveException 145 | */ 146 | @Override 147 | public void process(final Object[] o) throws HiveException { 148 | if (o[0] == null) { 149 | forward(nullCols); 150 | return; 151 | } 152 | 153 | // Get UDTF input values 154 | final String inputStr = getStringFromInputObjects(o, 0); 155 | final String fieldDelimiter = getStringFromInputObjects(o, 1); 156 | final String keyValSeparator = getStringFromInputObjects(o, 2); 157 | final List keyNames = getKeyNamesFromInputObjects(o); 158 | 159 | if (inputValueIsEmpty(inputStr, fieldDelimiter, keyValSeparator, keyNames)) { 160 | if(!nullWarned) { 161 | LOG.warn("At least 1 Null row returned. An input argument was empty. Additional warnings for a null row will be suppressed."); 162 | nullWarned = true; 163 | } 164 | forward(nullCols); 165 | return; 166 | } 167 | 168 | final Map keyValMap = getKeyValMap(inputStr, fieldDelimiter, keyValSeparator, keyNames); 169 | final Text[] returnColumns = getReturnColumnValues(keyNames, keyValMap); 170 | forward(returnColumns); 171 | } 172 | 173 | /** 174 | * Gets an array of the keyNames from the passed object array based on UDTF arguments. 175 | * 176 | * @param o input objects 177 | * @return a list of keyNames 178 | */ 179 | private List getKeyNamesFromInputObjects(final Object[] o) { 180 | final ImmutableList.Builder builder = new ImmutableList.Builder(); 181 | for (int i = 0; i < numCols; i++) { 182 | final String keyName = getStringFromInputObjects(o, i + STATIC_ARG_COUNT); 183 | builder.add(keyName); 184 | } 185 | return builder.build(); 186 | } 187 | 188 | /** 189 | * Gets a string value from the passed object array based on UDTF arguments and an index. 190 | * 191 | * @param o input objects 192 | * @param i index to retrieve 193 | * @return the string value 194 | */ 195 | private String getStringFromInputObjects(final Object[] o, final int i) { 196 | return ((StringObjectInspector) inputOIs[i]).getPrimitiveJavaObject(o[i]); 197 | } 198 | 199 | /** 200 | * Returns true if any of the input strings are empty. 201 | * 202 | * @param inputStr the input string 203 | * @param fieldDelimiter the field delimiter 204 | * @param keyValSeparator the key value separator 205 | * @param keyNames the key names 206 | * @return true if any of the string are empty 207 | */ 208 | private boolean inputValueIsEmpty(final String inputStr, final String fieldDelimiter, final String keyValSeparator, final List keyNames) { 209 | return StringUtils.isEmpty(inputStr) || 210 | StringUtils.isEmpty(fieldDelimiter) || 211 | StringUtils.isEmpty(keyValSeparator) || 212 | keyNames.contains(""); 213 | } 214 | 215 | /** 216 | * Processes the input string into a KeyValue Map utilizing the fieldDelimiter and keyValSeparator.
217 | * Only considers valid pairs(has keyValSeparator) with non-empty/null keys that are in keyNames. 218 | *

219 | * Note: The key is the string before the first occurrence of keyValSeparator and the value is everything after.
220 | * Note: If a key occurs twice the last value seen will be represented. 221 | * 222 | * @param inputString the string to be processed 223 | * @param fieldDelimiter separator between KeyValue pairs 224 | * @param keyValSeparator separator between key and value 225 | * @param keyNames used to filter values inserted 226 | * @return the key value map for keyNames 227 | */ 228 | private Map getKeyValMap(final String inputString, final String fieldDelimiter, final String keyValSeparator, final List keyNames) { 229 | final Set uniqueKeyNames = new HashSet(keyNames); // Optimize in the case of duplicate key names 230 | final Map keyValMap = new HashMap(uniqueKeyNames.size()); //Initialized with the expected size 231 | final Iterable splitIterable = Splitter.on(fieldDelimiter).omitEmptyStrings().split(inputString); //Iterator to prevent excessive allocation 232 | int count = 0; // Counter to break out when we have seen all of the uniqueKeyNames 233 | for (final String keyValPair : splitIterable) { 234 | final String key = StringUtils.substringBefore(keyValPair, keyValSeparator); 235 | final String value = StringUtils.substringAfter(keyValPair, keyValSeparator); 236 | // Only consider valid pairs with non-empty/null keys that are in uniqueKeyNames 237 | if (StringUtils.contains(keyValPair, keyValSeparator) && !StringUtils.isEmpty(key) && uniqueKeyNames.contains(key) ) { 238 | final String prev = keyValMap.put(key, value); 239 | if(prev == null) { 240 | count++; 241 | } else if(!mapWarned) { // Otherwise a key was replaced 242 | LOG.warn("At least 1 inputString had a duplicate key for a keyName. The second value will be represented. Additional warnings for a duplicate key will be suppressed."); 243 | mapWarned = true; 244 | } 245 | if (count >= uniqueKeyNames.size()) { 246 | break; // We have seen all of the keyNames needed 247 | } 248 | } 249 | } 250 | return keyValMap; 251 | } 252 | 253 | /** 254 | * Retrieves all of the column values to be returned. 255 | * 256 | * @param keyNames keyNames of the return columns 257 | * @param keyValMap KeyValMap containing the values 258 | * @return Text array of return column values 259 | */ 260 | private Text[] getReturnColumnValues(final List keyNames, final Map keyValMap) { 261 | final Text[] returnColumns = new Text[numCols]; 262 | for (int i = 0; i < numCols; ++i) { 263 | final String ret = keyValMap.get(keyNames.get(i)); 264 | if (ret == null) { 265 | returnColumns[i] = null; 266 | } else { 267 | if (returnColumns[i] == null) { 268 | returnColumns[i] = cols[i]; // Use the object pool rather than creating a new object 269 | } 270 | returnColumns[i].set(ret); 271 | } 272 | } 273 | return returnColumns; 274 | } 275 | 276 | /** 277 | * Returns the name of the UDTF function. 278 | * 279 | * @return name of the UDTF function 280 | */ 281 | @Override 282 | public String toString() { 283 | return FUNCTION_NAME; 284 | } 285 | } -------------------------------------------------------------------------------- /src/main/java/com/cloudera/hive/udf/functions/Rank.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.hive.udf.functions; 20 | 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 23 | import org.apache.hadoop.hive.ql.metadata.HiveException; 24 | import org.apache.hadoop.hive.ql.udf.UDFType; 25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 31 | 32 | /** 33 | * This UDF provides a row_number() function. 34 | */ 35 | @Description(name = "rank", value = "_FUNC_(value, partition columns ...) - Returns the rank of a value within a partitioned, sorted window.") 36 | @UDFType(deterministic = false, stateful = true) 37 | public class Rank extends GenericUDF { 38 | 39 | private long counter; 40 | private long nextCounter; 41 | private Object[] previousKey; 42 | private ObjectInspector[] ois; 43 | 44 | @Override 45 | public Object evaluate(DeferredObject[] currentKey) throws HiveException { 46 | if (!sameGroup(currentKey)) { 47 | this.counter = 0; 48 | this.nextCounter = 0; 49 | copyToPreviousKey(currentKey); 50 | ++this.nextCounter; 51 | return new Long(++this.counter); 52 | } else { 53 | // Same group. Same value as well? 54 | if (!sameValue(currentKey)) { 55 | this.counter = this.nextCounter; 56 | copyToPreviousKey(currentKey); 57 | ++this.nextCounter; 58 | return new Long(++this.counter); 59 | } else { 60 | copyToPreviousKey(currentKey); 61 | ++this.nextCounter; 62 | return new Long(this.counter); 63 | } 64 | } 65 | } 66 | 67 | @Override 68 | public String getDisplayString(String[] currentKey) { 69 | return "Rank"; 70 | } 71 | 72 | @Override 73 | public ObjectInspector initialize(ObjectInspector[] ois) throws UDFArgumentException { 74 | this.ois = ois; 75 | return PrimitiveObjectInspectorFactory.javaLongObjectInspector; 76 | } 77 | 78 | /** 79 | * This will help us copy objects from currrentKey to previousKeyHolder. 80 | * 81 | * @param currentKey 82 | * @throws HiveException 83 | */ 84 | private void copyToPreviousKey(DeferredObject[] currentKey) throws HiveException { 85 | if (currentKey != null) { 86 | previousKey = new Object[currentKey.length]; 87 | for (int index = 0; index < currentKey.length; index++) { 88 | previousKey[index]= ObjectInspectorUtils 89 | .copyToStandardObject(currentKey[index].get(),this.ois[index]); 90 | 91 | } 92 | } 93 | } 94 | 95 | /** 96 | * This will help us compare the currentKey and previousKey objects. 97 | * 98 | * @param currentKey 99 | * @return - true if both are same else false 100 | * @throws HiveException 101 | */ 102 | private boolean sameGroup(DeferredObject[] currentKey) throws HiveException { 103 | boolean status = false; 104 | 105 | //if both are null then we can classify as same 106 | if (currentKey == null && previousKey == null) { 107 | status = true; 108 | } 109 | 110 | //if both are not null and there legnth as well as 111 | //individual elements are same then we can classify as same 112 | if (currentKey != null && previousKey != null && currentKey.length == previousKey.length) { 113 | for (int index = 1; index < currentKey.length; index++) { 114 | 115 | if (ObjectInspectorUtils.compare(currentKey[index].get(), this.ois[index], 116 | previousKey[index], 117 | ObjectInspectorFactory.getReflectionObjectInspector(previousKey[index].getClass(), ObjectInspectorOptions.JAVA)) != 0) { 118 | 119 | return false; 120 | } 121 | 122 | } 123 | status = true; 124 | } 125 | return status; 126 | } 127 | 128 | /** 129 | * This will help us compare the currentKey and previousKey objects. 130 | * 131 | * @param currentKey 132 | * @return - true if both are same else false 133 | * @throws HiveException 134 | */ 135 | private boolean sameValue(DeferredObject[] currentKey) throws HiveException { 136 | boolean status = false; 137 | 138 | //if both are null then we can classify as same 139 | if (currentKey == null && previousKey == null) { 140 | status = true; 141 | } 142 | 143 | //if both are not null and there legnth as well as 144 | //individual elements are same then we can classify as same 145 | if (currentKey != null && previousKey != null && currentKey.length == previousKey.length) { 146 | // for (int index = 1; index < currentKey.length; index++) { 147 | 148 | if (ObjectInspectorUtils.compare(currentKey[0].get(), this.ois[0], 149 | previousKey[0], 150 | ObjectInspectorFactory.getReflectionObjectInspector(previousKey[0].getClass(), ObjectInspectorOptions.JAVA)) != 0) { 151 | 152 | return false; 153 | } 154 | 155 | // } 156 | status = true; 157 | } 158 | return status; 159 | } 160 | 161 | } 162 | -------------------------------------------------------------------------------- /src/main/java/com/cloudera/hive/udf/functions/RowNumber.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, software 13 | * distributed under the License is distributed on an "AS IS" BASIS, 14 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 | * See the License for the specific language governing permissions and 16 | * limitations under the License. 17 | */ 18 | 19 | package com.cloudera.hive.udf.functions; 20 | 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 23 | import org.apache.hadoop.hive.ql.metadata.HiveException; 24 | import org.apache.hadoop.hive.ql.udf.UDFType; 25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory.ObjectInspectorOptions; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; 30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 31 | 32 | /** 33 | * This UDF provides a row_number() function. 34 | */ 35 | @Description(name = "row_number", value = "_FUNC_(value, partition columns ...) - Returns the row_number of a row within a partitioned, sorted window.") 36 | @UDFType(deterministic = false, stateful = true) 37 | public class RowNumber extends GenericUDF { 38 | 39 | private long counter; 40 | private Object[] previousKey; 41 | private ObjectInspector[] ois; 42 | 43 | @Override 44 | public Object evaluate(DeferredObject[] currentKey) throws HiveException { 45 | if (!sameGroupAsPrevious(currentKey)) { 46 | this.counter = 0; 47 | copyToPreviousKey(currentKey); 48 | } 49 | return new Long(++this.counter); 50 | } 51 | 52 | @Override 53 | public String getDisplayString(String[] currentKey) { 54 | return "RowNumber"; 55 | } 56 | 57 | @Override 58 | public ObjectInspector initialize(ObjectInspector[] ois) throws UDFArgumentException { 59 | this.ois = ois; 60 | return PrimitiveObjectInspectorFactory.javaLongObjectInspector; 61 | } 62 | 63 | /** 64 | * This will help us copy objects from currrentKey to previousKeyHolder. 65 | * 66 | * @param currentKey 67 | * @throws HiveException 68 | */ 69 | private void copyToPreviousKey(DeferredObject[] currentKey) throws HiveException { 70 | if (currentKey != null) { 71 | previousKey = new Object[currentKey.length]; 72 | for (int index = 0; index < currentKey.length; index++) { 73 | previousKey[index]= ObjectInspectorUtils 74 | .copyToStandardObject(currentKey[index].get(),this.ois[index]); 75 | 76 | } 77 | } 78 | } 79 | 80 | /** 81 | * This will help us compare the currentKey and previousKey objects. 82 | * 83 | * @param currentKey 84 | * @return - true if both are same else false 85 | * @throws HiveException 86 | */ 87 | private boolean sameGroupAsPrevious(DeferredObject[] currentKey) throws HiveException { 88 | boolean status = false; 89 | 90 | //if both are null then we can classify as same 91 | if (currentKey == null && previousKey == null) { 92 | status = true; 93 | } 94 | 95 | //if both are not null and there legnth as well as 96 | //individual elements are same then we can classify as same 97 | if (currentKey != null && previousKey != null && currentKey.length == previousKey.length) { 98 | for (int index = 0; index < currentKey.length; index++) { 99 | 100 | if (ObjectInspectorUtils.compare(currentKey[index].get(), this.ois[index], 101 | previousKey[index], 102 | ObjectInspectorFactory.getReflectionObjectInspector(previousKey[index].getClass(), ObjectInspectorOptions.JAVA)) != 0) { 103 | 104 | return false; 105 | } 106 | 107 | } 108 | status = true; 109 | } 110 | return status; 111 | } 112 | 113 | } 114 | --------------------------------------------------------------------------------