├── LICENSE ├── README.md ├── pom.xml └── src ├── main └── java │ └── com │ └── hive │ └── bitmap │ ├── common │ └── BitmapUtil.java │ └── udf │ ├── BitmapAndUDF.java │ ├── BitmapContainsUDF.java │ ├── BitmapCountUDF.java │ ├── BitmapFromArrayUDF.java │ ├── BitmapIntersectUDAF.java │ ├── BitmapOrUDF.java │ ├── BitmapToArrayUDF.java │ ├── BitmapUnionUDAF.java │ ├── BitmapXorUDF.java │ └── ToBitmapUDAF.java └── test └── java └── com └── hive └── bitmap └── BitmapUDFTest.java /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # hive-bitmap-udf 2 | 3 | 在hive、spark中使用Roaring64Bitmap实现精确去重功能 4 | 主要目的: 5 | 1. 提升 hive、spark 中精确去重性能,代替hive或Spark 中的 count(distinct uuid); 6 | 2. 节省 hive 存储 ,使用 bitmap 对数据压缩 ,减少了存储成本; 7 | 3. 提供在 hive、spark 中 bitmap 的灵活运算 ,比如:交集、并集、差集运算 ,计算后的 bitmap 也可以直接写入 hive 表中; 8 | 9 | 如果方便的话,还请各位帮忙点个star,为开源项目加油! 10 | ## 1. 项目编译 11 | ```angular2html 12 | java 版本:1.8 13 | ``` 14 | ```angular2html 15 | mvn clean package 16 | ``` 17 | 编译完成后使用jar包:hive-bitmap-udf.jar 18 | ## 2. 在hive中创建UDF 19 | 将 hive-bitmap-udf.jar 上传至HDFS系统或者放到本地,在spark或者hive中注册使用 20 | ``` 21 | add jar hdfs://node:9000/hive-bitmap-udf.jar; 22 | 23 | CREATE TEMPORARY FUNCTION to_bitmap AS 'com.hive.bitmap.udf.ToBitmapUDAF'; 24 | CREATE TEMPORARY FUNCTION bitmap_union AS 'com.hive.bitmap.udf.BitmapUnionUDAF'; 25 | 26 | CREATE TEMPORARY FUNCTION bitmap_count AS 'com.hive.bitmap.udf.BitmapCountUDF'; 27 | CREATE TEMPORARY FUNCTION bitmap_and AS 'com.hive.bitmap.udf.BitmapAndUDF'; 28 | CREATE TEMPORARY FUNCTION bitmap_or AS 'com.hive.bitmap.udf.BitmapOrUDF'; 29 | CREATE TEMPORARY FUNCTION bitmap_xor AS 'com.hive.bitmap.udf.BitmapXorUDF'; 30 | CREATE TEMPORARY FUNCTION bitmap_to_array AS 'com.hive.bitmap.udf.BitmapToArrayUDF'; 31 | CREATE TEMPORARY FUNCTION bitmap_from_array AS 'com.hive.bitmap.udf.BitmapFromArrayUDF'; 32 | CREATE TEMPORARY FUNCTION bitmap_contains AS 'com.hive.bitmap.udf.BitmapContainsUDF'; 33 | 34 | ``` 35 | 36 | ## 3. UDF说明 37 | 38 | | UDF | 描述 | 案例 | 结果类型 | 39 | |:-----------------:|:---------------------------:|:--------------------------------:|:-------------:| 40 | | to_bitmap | 将num(int或bigint) 转化为 bitmap | to_bitmap(num) | bitmap | 41 | | bitmap_union | 多个bitmap合并为一个bitmap(并集) | bitmap_union(bitmap) | bitmap | 42 | | bitmap_count | 计算bitmap中存储的num个数 | bitmap_count(bitmap) | bigint | 43 | | bitmap_and | 计算两个bitmap交集 | bitmap_and(bitmap1,bitmap2) | bitmap | 44 | | bitmap_or | 计算两个bitmap并集 | bitmap_or(bitmap1,bitmap2) | bitmap | 45 | | bitmap_xor | 计算两个bitmap差集 | bitmap_xor(bitmap1,bitmap2) | bitmap | 46 | | bitmap_from_array | array 转化为bitmap | bitmap_from_array(array) | bitmap | 47 | | bitmap_to_array | bitmap转化为array | bitmap_to_array(bitmap) | array | 48 | | bitmap_contains | bitmap是否包含另一个bitmap全部元素 | bitmap_contains(bitmap1,bitmap2) | boolean | 49 | | bitmap_contains | bitmap是否包含某个元素 | bitmap_contains(bitmap,num) | boolean | 50 | | bitmap_intersect | 多个bitmap的交集 | bitmap_intersect(bitmap) | bitmap | 51 | 52 | ## 4. 下载地址 53 | https://github.com/lihuigang/hive-bitmap-udf/releases 54 | ## 5. 在 hive 中创建 bitmap 类型表,导入数据并查询 55 | ``` 56 | CREATE TABLE IF NOT EXISTS `hive_bitmap_table` 57 | ( 58 | k int comment 'id', 59 | bitmap binary comment 'bitmap' 60 | ) comment 'hive bitmap 类型表' 61 | STORED AS ORC; 62 | 63 | -- 数据写入 64 | insert into table hive_bitmap_table select 1 as id,to_bitmap(1) as bitmap; 65 | insert into table hive_bitmap_table select 2 as id,to_bitmap(2) as bitmap; 66 | 67 | -- 查询 68 | 69 | select bitmap_union(bitmap) from hive_bitmap_table; 70 | select bitmap_count(bitmap_union(bitmap)) from hive_bitmap_table; 71 | 72 | select bitmap_contains(bitmap,1) from hive_bitmap_table; 73 | select bitmap_contains(bitmap,bitmap_from_array(array(1,2))) from hive_bitmap_table; 74 | 75 | 76 | 77 | ``` 78 | 79 | ## 5. 在 hive 中使用 bitmap 实现精确去重 80 | ``` 81 | CREATE TABLE IF NOT EXISTS `hive_table` 82 | ( 83 | k int comment 'id', 84 | uuid bigint comment '用户id' 85 | ) comment 'hive 普通类型表' 86 | STORED AS ORC; 87 | 88 | -- 普通查询(计算去重人数) 89 | 90 | select count(distinct uuid) from hive_table; 91 | 92 | -- bitmap查询(计算去重人数) 93 | 94 | select bitmap_count(to_bitmap(uuid)) from hive_table; 95 | 96 | ``` 97 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 18 | 20 | 4.0.0 21 | 22 | com.hive.bitmap 23 | hive-bitmap-udf 24 | jar 25 | 1.0 26 | 27 | 28 | 29 | org.apache.spark 30 | spark-hive_2.12 31 | 3.1.2 32 | test 33 | 34 | 35 | 36 | org.apache.hive 37 | hive-exec 38 | 2.3.7 39 | provided 40 | 41 | 42 | janino 43 | org.codehaus.janino 44 | 45 | 46 | commons-compiler 47 | org.codehaus.janino 48 | 49 | 50 | 51 | 52 | 53 | org.roaringbitmap 54 | RoaringBitmap 55 | 0.9.28 56 | 57 | 58 | 59 | junit 60 | junit 61 | 4.13.1 62 | test 63 | 64 | 65 | 66 | hive-bitmap-udf 67 | 68 | 69 | org.apache.maven.plugins 70 | maven-compiler-plugin 71 | 3.8.0 72 | 73 | 1.8 74 | 1.8 75 | UTF-8 76 | 77 | 78 | 79 | org.apache.maven.plugins 80 | maven-source-plugin 81 | 3.1.0 82 | 83 | true 84 | 85 | 86 | 87 | create-source-jar 88 | 89 | jar-no-fork 90 | 91 | 92 | 93 | 94 | 95 | 96 | org.apache.maven.plugins 97 | maven-shade-plugin 98 | 3.1.0 99 | 100 | 101 | package 102 | 103 | shade 104 | 105 | 106 | 107 | 108 | org.roaringbitmap 109 | org.bitmap.shade.roaringbitmap 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | -------------------------------------------------------------------------------- /src/main/java/com/hive/bitmap/common/BitmapUtil.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package com.hive.bitmap.common; 19 | 20 | import java.io.ByteArrayInputStream; 21 | import java.io.ByteArrayOutputStream; 22 | import java.io.DataInputStream; 23 | import java.io.DataOutputStream; 24 | import java.io.IOException; 25 | 26 | import org.roaringbitmap.longlong.Roaring64Bitmap; 27 | 28 | public class BitmapUtil { 29 | public static byte[] serializeToBytes(Roaring64Bitmap bitmap) throws IOException { 30 | ByteArrayOutputStream bos = new ByteArrayOutputStream(); 31 | DataOutputStream dos = new DataOutputStream(bos); 32 | bitmap.serialize(dos); 33 | dos.close(); 34 | return bos.toByteArray(); 35 | } 36 | 37 | public static Roaring64Bitmap deserializeToBitmap(byte[] bytes) throws IOException { 38 | Roaring64Bitmap bitmapValue = new Roaring64Bitmap(); 39 | if (bytes == null) { 40 | return bitmapValue; 41 | } 42 | DataInputStream in = new DataInputStream(new ByteArrayInputStream(bytes)); 43 | bitmapValue.deserialize(in); 44 | in.close(); 45 | return bitmapValue; 46 | } 47 | } -------------------------------------------------------------------------------- /src/main/java/com/hive/bitmap/udf/BitmapAndUDF.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package com.hive.bitmap.udf; 19 | 20 | import com.hive.bitmap.common.BitmapUtil; 21 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 22 | import org.roaringbitmap.longlong.Roaring64Bitmap; 23 | 24 | import org.apache.hadoop.hive.ql.exec.Description; 25 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 26 | import org.apache.hadoop.hive.ql.metadata.HiveException; 27 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 29 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; 30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 31 | 32 | import java.io.IOException; 33 | 34 | @Description(name = "bitmap_and", value = "a _FUNC_ b - Compute intersection of two or more input bitmaps, return the new bitmap") 35 | public class BitmapAndUDF extends GenericUDF { 36 | 37 | private transient BinaryObjectInspector inputOI0; 38 | private transient BinaryObjectInspector inputOI1; 39 | 40 | @Override 41 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 42 | 43 | if (arguments.length != 2) { 44 | throw new UDFArgumentTypeException(arguments.length, "Exactly two argument is expected."); 45 | } 46 | 47 | ObjectInspector input0 = arguments[0]; 48 | ObjectInspector input1 = arguments[1]; 49 | if (!(input0 instanceof BinaryObjectInspector) || !(input1 instanceof BinaryObjectInspector)) { 50 | throw new UDFArgumentException("first and second argument must be a binary"); 51 | } 52 | 53 | this.inputOI0 = (BinaryObjectInspector) input0; 54 | this.inputOI1 = (BinaryObjectInspector) input1; 55 | 56 | return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector; 57 | } 58 | 59 | @Override 60 | public Object evaluate(DeferredObject[] args) throws HiveException { 61 | 62 | byte[] inputBytes0 = this.inputOI0.getPrimitiveJavaObject(args[0].get()); 63 | byte[] inputBytes1 = this.inputOI1.getPrimitiveJavaObject(args[1].get()); 64 | 65 | try { 66 | Roaring64Bitmap bitmap0 = BitmapUtil.deserializeToBitmap(inputBytes0); 67 | Roaring64Bitmap bitmap1 = BitmapUtil.deserializeToBitmap(inputBytes1); 68 | bitmap0.and(bitmap1); 69 | return BitmapUtil.serializeToBytes(bitmap0); 70 | } catch (IOException ioException) { 71 | ioException.printStackTrace(); 72 | throw new RuntimeException(ioException); 73 | } 74 | } 75 | 76 | @Override 77 | public String getDisplayString(String[] children) { 78 | return "Usage: bitmap_and(bitmap1,bitmap2)"; 79 | } 80 | } -------------------------------------------------------------------------------- /src/main/java/com/hive/bitmap/udf/BitmapContainsUDF.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package com.hive.bitmap.udf; 19 | 20 | import com.hive.bitmap.common.BitmapUtil; 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 24 | import org.apache.hadoop.hive.ql.metadata.HiveException; 25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 27 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 28 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.*; 29 | import org.roaringbitmap.longlong.Roaring64Bitmap; 30 | 31 | import java.io.IOException; 32 | import java.util.Iterator; 33 | 34 | @Description(name = "bitmap_contains") 35 | public class BitmapContainsUDF extends GenericUDF { 36 | 37 | private transient BinaryObjectInspector inputOI01; 38 | private transient PrimitiveObjectInspector inputOI02; 39 | 40 | @Override 41 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 42 | 43 | if (arguments.length != 2) { 44 | throw new UDFArgumentTypeException(arguments.length, "Exactly two argument is expected."); 45 | } 46 | 47 | ObjectInspector input0 = arguments[0]; 48 | ObjectInspector input1 = arguments[1]; 49 | if (!(input0 instanceof BinaryObjectInspector)) { 50 | throw new UDFArgumentException("first argument must be a binary"); 51 | } 52 | 53 | if (!(input1 instanceof IntObjectInspector || input1 instanceof LongObjectInspector || input1 instanceof BinaryObjectInspector)) { 54 | throw new UDFArgumentException("second argument must be a int or bigint or bitmap"); 55 | } 56 | 57 | this.inputOI01 = (BinaryObjectInspector) input0; 58 | this.inputOI02 = (PrimitiveObjectInspector) input1; 59 | 60 | return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector; 61 | } 62 | 63 | @Override 64 | public Object evaluate(DeferredObject[] args) throws HiveException { 65 | Roaring64Bitmap bitmapValue = getBitmapFromBytes(args[0]); 66 | 67 | if (this.inputOI02 instanceof BinaryObjectInspector) { 68 | Roaring64Bitmap bitmap2 = getBitmapFromBytes(args[1]); 69 | return checkBitmapContains(bitmapValue, bitmap2); 70 | } else { 71 | long number = PrimitiveObjectInspectorUtils.getLong(args[1].get(), inputOI02); 72 | return bitmapValue.contains(number); 73 | } 74 | } 75 | 76 | @Override 77 | public String getDisplayString(String[] children) { 78 | return "Usage: bitmap_contains(bitmap,num)"; 79 | } 80 | 81 | private boolean checkBitmapContains(Roaring64Bitmap bitmap1, Roaring64Bitmap bitmap2) { 82 | 83 | if (bitmap2.isEmpty()) { 84 | return false; 85 | } 86 | Iterator iterator = bitmap2.iterator(); 87 | while (iterator.hasNext()) { 88 | if (!bitmap1.contains(iterator.next())) { 89 | return false; 90 | } 91 | } 92 | return true; 93 | } 94 | 95 | private Roaring64Bitmap getBitmapFromBytes(DeferredObject arg) throws HiveException { 96 | byte[] inputBytes = this.inputOI01.getPrimitiveJavaObject(arg.get()); 97 | try { 98 | return BitmapUtil.deserializeToBitmap(inputBytes); 99 | } catch (IOException ioException) { 100 | throw new HiveException(ioException); 101 | } 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/com/hive/bitmap/udf/BitmapCountUDF.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package com.hive.bitmap.udf; 19 | 20 | import com.hive.bitmap.common.BitmapUtil; 21 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 22 | import org.roaringbitmap.longlong.Roaring64Bitmap; 23 | 24 | import org.apache.hadoop.hive.ql.exec.Description; 25 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 26 | import org.apache.hadoop.hive.ql.metadata.HiveException; 27 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 29 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; 30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 31 | 32 | import java.io.IOException; 33 | 34 | @Description(name = "bitmap_count", value = "a _FUNC_ b - Returns the number of distinct integers added to the bitmap (e.g., number of bits set)") 35 | public class BitmapCountUDF extends GenericUDF { 36 | 37 | private transient BinaryObjectInspector inputOI; 38 | 39 | @Override 40 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 41 | 42 | if (arguments.length != 1) { 43 | throw new UDFArgumentTypeException(arguments.length, "Exactly one argument is expected."); 44 | } 45 | 46 | ObjectInspector input = arguments[0]; 47 | if (!(input instanceof BinaryObjectInspector)) { 48 | throw new UDFArgumentException("first argument must be a binary"); 49 | } 50 | 51 | this.inputOI = (BinaryObjectInspector) input; 52 | 53 | return PrimitiveObjectInspectorFactory.javaLongObjectInspector; 54 | } 55 | 56 | @Override 57 | public Object evaluate(DeferredObject[] args) throws HiveException { 58 | if (args[0] == null) { 59 | return 0; 60 | } 61 | byte[] inputBytes = this.inputOI.getPrimitiveJavaObject(args[0].get()); 62 | 63 | try { 64 | Roaring64Bitmap bitmapValue = BitmapUtil.deserializeToBitmap(inputBytes); 65 | return bitmapValue.getLongCardinality(); 66 | } catch (IOException ioException) { 67 | ioException.printStackTrace(); 68 | throw new HiveException(ioException); 69 | } 70 | } 71 | 72 | @Override 73 | public String getDisplayString(String[] children) { 74 | return "Usage: bitmap_count(bitmap)"; 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /src/main/java/com/hive/bitmap/udf/BitmapFromArrayUDF.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package com.hive.bitmap.udf; 19 | 20 | import com.hive.bitmap.common.BitmapUtil; 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 24 | import org.apache.hadoop.hive.ql.metadata.HiveException; 25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 26 | import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 28 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; 29 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 30 | import org.roaringbitmap.longlong.Roaring64Bitmap; 31 | 32 | import java.io.IOException; 33 | import java.util.List; 34 | 35 | @Description(name = "bitmap_from_array") 36 | public class BitmapFromArrayUDF extends GenericUDF { 37 | 38 | private transient ListObjectInspector inputOI; 39 | 40 | @Override 41 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 42 | 43 | if (arguments.length != 1) { 44 | throw new UDFArgumentTypeException(arguments.length, "Exactly one argument is expected."); 45 | } 46 | 47 | ObjectInspector input = arguments[0]; 48 | if (!(input instanceof ListObjectInspector)) { 49 | throw new UDFArgumentException("first argument must be a array"); 50 | } 51 | 52 | this.inputOI = (ListObjectInspector) input; 53 | 54 | return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector; 55 | } 56 | 57 | @Override 58 | public Object evaluate(DeferredObject[] args) throws HiveException { 59 | 60 | List list = this.inputOI.getList(args[0].get()); 61 | Roaring64Bitmap bitmap = new Roaring64Bitmap(); 62 | list.forEach(e -> bitmap.add(Long.parseLong(e.toString()))); 63 | try { 64 | return BitmapUtil.serializeToBytes(bitmap); 65 | } catch (IOException ioException) { 66 | throw new HiveException(ioException); 67 | } 68 | } 69 | 70 | @Override 71 | public String getDisplayString(String[] children) { 72 | return "Usage: bitmap_to_array(bitmap)"; 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /src/main/java/com/hive/bitmap/udf/BitmapIntersectUDAF.java: -------------------------------------------------------------------------------- 1 | package com.hive.bitmap.udf; 2 | 3 | import com.hive.bitmap.common.BitmapUtil; 4 | import org.apache.hadoop.hive.ql.exec.Description; 5 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 6 | import org.apache.hadoop.hive.ql.metadata.HiveException; 7 | import org.apache.hadoop.hive.ql.parse.SemanticException; 8 | import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver; 9 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; 10 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 11 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; 12 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 13 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; 14 | import org.roaringbitmap.longlong.Roaring64Bitmap; 15 | 16 | import java.io.IOException; 17 | 18 | @Description(name = "bitmap_intersect", value = "_FUNC_(expr) - Calculate the grouped bitmap intersection , Returns an doris bitmap representation of a column.") 19 | public class BitmapIntersectUDAF extends AbstractGenericUDAFResolver { 20 | @Override 21 | public GenericUDAFEvaluator getEvaluator(TypeInfo[] args) throws SemanticException { 22 | if (args.length != 1) { 23 | throw new UDFArgumentException(String.format("Exactly one argument is expected, but get %d", args.length)); 24 | } 25 | return new IntersectEvaluator(); 26 | } 27 | 28 | public static class IntersectEvaluator extends GenericUDAFEvaluator { 29 | 30 | private transient BinaryObjectInspector binaryOI; 31 | 32 | @AggregationType(estimable = true) 33 | static class BitmapAgg extends AbstractAggregationBuffer { 34 | Roaring64Bitmap bitmap; 35 | } 36 | 37 | @Override 38 | public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveException { 39 | this.binaryOI = (BinaryObjectInspector) parameters[0]; 40 | return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector; 41 | } 42 | 43 | @Override 44 | public AggregationBuffer getNewAggregationBuffer() throws HiveException { 45 | BitmapAgg result = new BitmapAgg(); 46 | reset(result); 47 | return result; 48 | } 49 | 50 | @Override 51 | public void reset(AggregationBuffer agg) throws HiveException { 52 | ((BitmapAgg) agg).bitmap = new Roaring64Bitmap(); 53 | } 54 | 55 | @Override 56 | public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { 57 | assert (parameters.length == 1); 58 | merge(agg, parameters[0]); 59 | } 60 | 61 | @Override 62 | public Object terminatePartial(AggregationBuffer agg) throws HiveException { 63 | return terminate(agg); 64 | } 65 | 66 | @Override 67 | public Object terminate(AggregationBuffer agg) throws HiveException { 68 | BitmapAgg tmpAgg = (BitmapAgg) agg; 69 | try { 70 | return BitmapUtil.serializeToBytes(tmpAgg.bitmap); 71 | } catch (IOException e) { 72 | throw new HiveException(e); 73 | } 74 | } 75 | 76 | @Override 77 | public void merge(AggregationBuffer agg, Object partial) throws HiveException { 78 | BitmapAgg tmpAgg = (BitmapAgg) agg; 79 | byte[] partialResult = this.binaryOI.getPrimitiveJavaObject(partial); 80 | try { 81 | if (tmpAgg.bitmap.isEmpty()) { 82 | tmpAgg.bitmap.or(BitmapUtil.deserializeToBitmap(partialResult)); 83 | } else { 84 | tmpAgg.bitmap.and(BitmapUtil.deserializeToBitmap(partialResult)); 85 | } 86 | } catch (IOException e) { 87 | throw new HiveException(e); 88 | } 89 | } 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /src/main/java/com/hive/bitmap/udf/BitmapOrUDF.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package com.hive.bitmap.udf; 19 | 20 | import com.hive.bitmap.common.BitmapUtil; 21 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 22 | import org.roaringbitmap.longlong.Roaring64Bitmap; 23 | 24 | import org.apache.hadoop.hive.ql.exec.Description; 25 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 26 | import org.apache.hadoop.hive.ql.metadata.HiveException; 27 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 29 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; 30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 31 | 32 | import java.io.IOException; 33 | 34 | @Description(name = "bitmap_or", value = "a _FUNC_ b - Compute union of two or more input bitmaps, returns the new bitmap") 35 | public class BitmapOrUDF extends GenericUDF { 36 | 37 | private transient BinaryObjectInspector inputOI0; 38 | private transient BinaryObjectInspector inputOI1; 39 | 40 | @Override 41 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 42 | 43 | if (arguments.length != 2) { 44 | throw new UDFArgumentTypeException(arguments.length, "Exactly two argument is expected."); 45 | } 46 | 47 | ObjectInspector input0 = arguments[0]; 48 | ObjectInspector input1 = arguments[1]; 49 | if (!(input0 instanceof BinaryObjectInspector) || !(input1 instanceof BinaryObjectInspector)) { 50 | throw new UDFArgumentException("first and second argument must be a binary"); 51 | } 52 | 53 | this.inputOI0 = (BinaryObjectInspector) input0; 54 | this.inputOI1 = (BinaryObjectInspector) input1; 55 | 56 | return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector; 57 | } 58 | 59 | @Override 60 | public Object evaluate(DeferredObject[] args) throws HiveException { 61 | 62 | byte[] inputBytes0 = this.inputOI0.getPrimitiveJavaObject(args[0].get()); 63 | byte[] inputBytes1 = this.inputOI1.getPrimitiveJavaObject(args[1].get()); 64 | 65 | try { 66 | Roaring64Bitmap bitmap0 = BitmapUtil.deserializeToBitmap(inputBytes0); 67 | Roaring64Bitmap bitmap1 = BitmapUtil.deserializeToBitmap(inputBytes1); 68 | bitmap0.or(bitmap1); 69 | return BitmapUtil.serializeToBytes(bitmap0); 70 | } catch (IOException ioException) { 71 | ioException.printStackTrace(); 72 | throw new RuntimeException(ioException); 73 | } 74 | } 75 | 76 | @Override 77 | public String getDisplayString(String[] children) { 78 | return "Usage: bitmap_or(bitmap1,bitmap2)"; 79 | } 80 | } -------------------------------------------------------------------------------- /src/main/java/com/hive/bitmap/udf/BitmapToArrayUDF.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package com.hive.bitmap.udf; 19 | 20 | import com.hive.bitmap.common.BitmapUtil; 21 | import org.apache.hadoop.hive.ql.exec.Description; 22 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 23 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 24 | import org.apache.hadoop.hive.ql.metadata.HiveException; 25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 27 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; 28 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; 29 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 30 | import org.roaringbitmap.longlong.Roaring64Bitmap; 31 | 32 | import java.io.IOException; 33 | import java.util.ArrayList; 34 | import java.util.Iterator; 35 | import java.util.List; 36 | 37 | @Description(name = "bitmap_to_array") 38 | public class BitmapToArrayUDF extends GenericUDF { 39 | 40 | private transient BinaryObjectInspector inputOI; 41 | 42 | @Override 43 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 44 | 45 | if (arguments.length != 1) { 46 | throw new UDFArgumentTypeException(arguments.length, "Exactly one argument is expected."); 47 | } 48 | 49 | ObjectInspector input = arguments[0]; 50 | if (!(input instanceof BinaryObjectInspector)) { 51 | throw new UDFArgumentException("first argument must be a binary"); 52 | } 53 | 54 | this.inputOI = (BinaryObjectInspector) input; 55 | 56 | return ObjectInspectorFactory.getStandardListObjectInspector( 57 | PrimitiveObjectInspectorFactory.javaLongObjectInspector); 58 | } 59 | 60 | @Override 61 | public Object evaluate(DeferredObject[] args) throws HiveException { 62 | if (args[0] == null) { 63 | return 0; 64 | } 65 | byte[] inputBytes = this.inputOI.getPrimitiveJavaObject(args[0].get()); 66 | 67 | try { 68 | Roaring64Bitmap bitmapValue = BitmapUtil.deserializeToBitmap(inputBytes); 69 | List expList = new ArrayList<>(); 70 | Iterator iterator = bitmapValue.iterator(); 71 | while (iterator.hasNext()) { 72 | expList.add(iterator.next()); 73 | } 74 | return expList; 75 | } catch (IOException ioException) { 76 | ioException.printStackTrace(); 77 | throw new HiveException(ioException); 78 | } 79 | } 80 | 81 | @Override 82 | public String getDisplayString(String[] children) { 83 | return "Usage: bitmap_to_array(bitmap)"; 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/main/java/com/hive/bitmap/udf/BitmapUnionUDAF.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package com.hive.bitmap.udf; 19 | 20 | import org.apache.hadoop.hive.ql.exec.Description; 21 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 22 | import org.apache.hadoop.hive.ql.metadata.HiveException; 23 | import org.apache.hadoop.hive.ql.parse.SemanticException; 24 | import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver; 25 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; 26 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 27 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; 28 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 29 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; 30 | 31 | import java.io.IOException; 32 | 33 | import org.roaringbitmap.longlong.Roaring64Bitmap; 34 | import com.hive.bitmap.common.BitmapUtil; 35 | 36 | 37 | /** 38 | * bitmap_union. 39 | * 40 | */ 41 | @Description(name = "bitmap_union", value = "_FUNC_(expr) - Calculate the grouped bitmap union , Returns an doris bitmap representation of a column.") 42 | public class BitmapUnionUDAF extends AbstractGenericUDAFResolver { 43 | 44 | @Override 45 | public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) 46 | throws SemanticException { 47 | if (parameters.length != 1) { 48 | throw new UDFArgumentTypeException(parameters.length - 1, 49 | "Exactly one argument is expected."); 50 | } 51 | return new GenericEvaluate(); 52 | } 53 | 54 | //The UDAF evaluator assumes that all rows it's evaluating have 55 | //the same (desired) value. 56 | public static class GenericEvaluate extends GenericUDAFEvaluator { 57 | 58 | private transient BinaryObjectInspector binaryOI; 59 | 60 | @Override 61 | public ObjectInspector init(Mode m, ObjectInspector[] parameters) 62 | throws HiveException { 63 | super.init(m, parameters); 64 | this.binaryOI = (BinaryObjectInspector) parameters[0]; 65 | return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector; 66 | } 67 | 68 | /** class for storing the current partial result aggregation */ 69 | @AggregationType(estimable = true) 70 | static class BitmapAgg extends AbstractAggregationBuffer { 71 | Roaring64Bitmap bitmap; 72 | } 73 | 74 | @Override 75 | public void reset(AggregationBuffer agg) throws HiveException { 76 | ((BitmapAgg) agg).bitmap = new Roaring64Bitmap(); 77 | } 78 | 79 | @Override 80 | public AggregationBuffer getNewAggregationBuffer() throws HiveException { 81 | BitmapAgg result = new BitmapAgg(); 82 | reset(result); 83 | return result; 84 | } 85 | 86 | @Override 87 | public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { 88 | assert (parameters.length == 1); 89 | Object p = parameters[0]; 90 | if (p != null) { 91 | merge(agg, p); 92 | } 93 | } 94 | 95 | @Override 96 | public Object terminate(AggregationBuffer agg) { 97 | BitmapAgg myagg = (BitmapAgg) agg; 98 | try { 99 | return BitmapUtil.serializeToBytes(myagg.bitmap); 100 | } catch (IOException e) { 101 | throw new RuntimeException(e); 102 | } 103 | } 104 | 105 | @Override 106 | public void merge(AggregationBuffer agg, Object partial) { 107 | BitmapAgg myagg = (BitmapAgg) agg; 108 | byte[] partialResult = this.binaryOI.getPrimitiveJavaObject(partial); 109 | try { 110 | myagg.bitmap.or(BitmapUtil.deserializeToBitmap(partialResult)); 111 | } catch (IOException e) { 112 | throw new RuntimeException(e); 113 | } 114 | } 115 | 116 | @Override 117 | public Object terminatePartial(AggregationBuffer agg) { 118 | return terminate(agg); 119 | } 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /src/main/java/com/hive/bitmap/udf/BitmapXorUDF.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package com.hive.bitmap.udf; 19 | 20 | import com.hive.bitmap.common.BitmapUtil; 21 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 22 | import org.roaringbitmap.longlong.Roaring64Bitmap; 23 | 24 | import org.apache.hadoop.hive.ql.exec.Description; 25 | import org.apache.hadoop.hive.ql.exec.UDFArgumentException; 26 | import org.apache.hadoop.hive.ql.metadata.HiveException; 27 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; 28 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 29 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; 30 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 31 | 32 | import java.io.IOException; 33 | 34 | @Description(name = "bitmap_xor", value = "a _FUNC_ b - Compute the symmetric union of two or more input bitmaps, return the new bitmap") 35 | public class BitmapXorUDF extends GenericUDF { 36 | 37 | private transient BinaryObjectInspector inputOI0; 38 | private transient BinaryObjectInspector inputOI1; 39 | 40 | @Override 41 | public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { 42 | 43 | if (arguments.length != 2) { 44 | throw new UDFArgumentTypeException(arguments.length, "Exactly two argument is expected."); 45 | } 46 | 47 | 48 | ObjectInspector input0 = arguments[0]; 49 | ObjectInspector input1 = arguments[1]; 50 | if (!(input0 instanceof BinaryObjectInspector) || !(input1 instanceof BinaryObjectInspector)) { 51 | throw new UDFArgumentException("first and second argument must be a binary"); 52 | } 53 | 54 | this.inputOI0 = (BinaryObjectInspector) input0; 55 | this.inputOI1 = (BinaryObjectInspector) input1; 56 | 57 | return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector; 58 | } 59 | 60 | @Override 61 | public Object evaluate(DeferredObject[] args) throws HiveException { 62 | 63 | byte[] inputBytes0 = this.inputOI0.getPrimitiveJavaObject(args[0].get()); 64 | byte[] inputBytes1 = this.inputOI1.getPrimitiveJavaObject(args[1].get()); 65 | 66 | 67 | try { 68 | Roaring64Bitmap bitmap0 = BitmapUtil.deserializeToBitmap(inputBytes0); 69 | Roaring64Bitmap bitmap1 = BitmapUtil.deserializeToBitmap(inputBytes1); 70 | bitmap0.xor(bitmap1); 71 | return BitmapUtil.serializeToBytes(bitmap0); 72 | } catch (IOException ioException) { 73 | ioException.printStackTrace(); 74 | throw new RuntimeException(ioException); 75 | } 76 | } 77 | 78 | @Override 79 | public String getDisplayString(String[] children) { 80 | return "Usage: bitmap_xor(bitmap1,bitmap2)"; 81 | } 82 | } -------------------------------------------------------------------------------- /src/main/java/com/hive/bitmap/udf/ToBitmapUDAF.java: -------------------------------------------------------------------------------- 1 | // Licensed to the Apache Software Foundation (ASF) under one 2 | // or more contributor license agreements. See the NOTICE file 3 | // distributed with this work for additional information 4 | // regarding copyright ownership. The ASF licenses this file 5 | // to you under the Apache License, Version 2.0 (the 6 | // "License"); you may not use this file except in compliance 7 | // with the License. You may obtain a copy of the License at 8 | // 9 | // http://www.apache.org/licenses/LICENSE-2.0 10 | // 11 | // Unless required by applicable law or agreed to in writing, 12 | // software distributed under the License is distributed on an 13 | // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 14 | // KIND, either express or implied. See the License for the 15 | // specific language governing permissions and limitations 16 | // under the License. 17 | 18 | package com.hive.bitmap.udf; 19 | 20 | import com.hive.bitmap.common.BitmapUtil; 21 | import org.roaringbitmap.longlong.Roaring64Bitmap; 22 | 23 | import org.apache.hadoop.hive.ql.exec.Description; 24 | import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; 25 | import org.apache.hadoop.hive.ql.metadata.HiveException; 26 | import org.apache.hadoop.hive.ql.parse.SemanticException; 27 | import org.apache.hadoop.hive.ql.udf.generic.AbstractGenericUDAFResolver; 28 | import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator; 29 | import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; 30 | import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; 31 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.BinaryObjectInspector; 32 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; 33 | import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; 34 | import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; 35 | 36 | import java.io.IOException; 37 | 38 | /** 39 | * ToBitmap. 40 | * 41 | */ 42 | @Description(name = "to_bitmap", value = "_FUNC_(expr) - Returns an doris bitmap representation of a column.") 43 | public class ToBitmapUDAF extends AbstractGenericUDAFResolver { 44 | 45 | @Override 46 | public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) 47 | throws SemanticException { 48 | if (parameters.length != 1) { 49 | throw new UDFArgumentTypeException(parameters.length - 1, 50 | "Exactly one argument is expected."); 51 | } 52 | return new GenericEvaluate(); 53 | } 54 | 55 | //The UDAF evaluator assumes that all rows it's evaluating have 56 | //the same (desired) value. 57 | public static class GenericEvaluate extends GenericUDAFEvaluator { 58 | 59 | // For PARTIAL1 and COMPLETE: ObjectInspectors for original data 60 | private PrimitiveObjectInspector inputOI; 61 | 62 | // For PARTIAL2 and FINAL: ObjectInspectors for partial aggregations 63 | // (doris bitmaps) 64 | 65 | private transient BinaryObjectInspector internalMergeOI; 66 | 67 | @Override 68 | public ObjectInspector init(Mode m, ObjectInspector[] parameters) 69 | throws HiveException { 70 | super.init(m, parameters); 71 | // init output object inspectors 72 | // The output of a partial aggregation is a binary 73 | if (m == Mode.PARTIAL1) { 74 | inputOI = (PrimitiveObjectInspector) parameters[0]; 75 | } else { 76 | this.internalMergeOI = (BinaryObjectInspector) parameters[0]; 77 | } 78 | return PrimitiveObjectInspectorFactory.javaByteArrayObjectInspector; 79 | } 80 | 81 | /** class for storing the current partial result aggregation */ 82 | @AggregationType(estimable = true) 83 | static class BitmapAgg extends AbstractAggregationBuffer { 84 | Roaring64Bitmap bitmap; 85 | } 86 | 87 | @Override 88 | public void reset(AggregationBuffer agg) throws HiveException { 89 | ((BitmapAgg) agg).bitmap = new Roaring64Bitmap(); 90 | } 91 | 92 | @Override 93 | public AggregationBuffer getNewAggregationBuffer() throws HiveException { 94 | BitmapAgg result = new BitmapAgg(); 95 | reset(result); 96 | return result; 97 | } 98 | 99 | @Override 100 | public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveException { 101 | assert (parameters.length == 1); 102 | Object p = parameters[0]; 103 | if (p != null) { 104 | BitmapAgg myagg = (BitmapAgg) agg; 105 | try { 106 | long row = PrimitiveObjectInspectorUtils.getLong(p, inputOI); 107 | addBitmap(row, myagg); 108 | } catch (NumberFormatException e) { 109 | throw new HiveException(e); 110 | } 111 | } 112 | } 113 | 114 | @Override 115 | public Object terminate(AggregationBuffer agg) { 116 | BitmapAgg myagg = (BitmapAgg) agg; 117 | try { 118 | return BitmapUtil.serializeToBytes(myagg.bitmap); 119 | } catch (IOException e) { 120 | throw new RuntimeException(e); 121 | } 122 | } 123 | 124 | @Override 125 | public void merge(AggregationBuffer agg, Object partial) { 126 | BitmapAgg myagg = (BitmapAgg) agg; 127 | byte[] partialResult = this.internalMergeOI.getPrimitiveJavaObject(partial); 128 | try { 129 | myagg.bitmap.or(BitmapUtil.deserializeToBitmap(partialResult)); 130 | } catch (IOException e) { 131 | throw new RuntimeException(e); 132 | } 133 | } 134 | 135 | @Override 136 | public Object terminatePartial(AggregationBuffer agg) { 137 | return terminate(agg); 138 | } 139 | 140 | private void addBitmap(long newRow, BitmapAgg myagg) { 141 | myagg.bitmap.add(newRow); 142 | } 143 | } 144 | } -------------------------------------------------------------------------------- /src/test/java/com/hive/bitmap/BitmapUDFTest.java: -------------------------------------------------------------------------------- 1 | package com.hive.bitmap; 2 | 3 | import org.apache.spark.SparkConf; 4 | import org.apache.spark.sql.SparkSession; 5 | import org.junit.Before; 6 | import org.junit.Test; 7 | 8 | 9 | public class BitmapUDFTest { 10 | 11 | private SparkConf sparkConf = new SparkConf().setAppName("build job").set("log.level", "ERROR"); 12 | 13 | private SparkSession spark = SparkSession.builder().enableHiveSupport() 14 | .master("local") 15 | .config(sparkConf).getOrCreate(); 16 | 17 | @Before 18 | public void init() { 19 | 20 | spark.sql("CREATE TEMPORARY FUNCTION to_bitmap AS 'com.hive.bitmap.udf.ToBitmapUDAF'"); 21 | spark.sql("CREATE TEMPORARY FUNCTION bitmap_union AS 'com.hive.bitmap.udf.BitmapUnionUDAF'"); 22 | spark.sql("CREATE TEMPORARY FUNCTION bitmap_count AS 'com.hive.bitmap.udf.BitmapCountUDF'"); 23 | spark.sql("CREATE TEMPORARY FUNCTION bitmap_and AS 'com.hive.bitmap.udf.BitmapAndUDF'"); 24 | spark.sql("CREATE TEMPORARY FUNCTION bitmap_or AS 'com.hive.bitmap.udf.BitmapOrUDF'"); 25 | spark.sql("CREATE TEMPORARY FUNCTION bitmap_xor AS 'com.hive.bitmap.udf.BitmapXorUDF'"); 26 | spark.sql("CREATE TEMPORARY FUNCTION bitmap_to_array AS 'com.hive.bitmap.udf.BitmapToArrayUDF'"); 27 | spark.sql("CREATE TEMPORARY FUNCTION bitmap_from_array AS 'com.hive.bitmap.udf.BitmapFromArrayUDF'"); 28 | spark.sql("CREATE TEMPORARY FUNCTION bitmap_contains AS 'com.hive.bitmap.udf.BitmapContainsUDF'"); 29 | spark.sql("CREATE TEMPORARY FUNCTION bitmap_intersect as 'com.hive.bitmap.udf.BitmapIntersectUDAF'"); 30 | } 31 | 32 | @Test 33 | public void bitmapToArrayUDFTest() { 34 | spark.sql("select bitmap_count(bitmap_from_array(array(1,2,3,4,5))) AS `cnt=5`").show(); 35 | spark.sql("select bitmap_to_array(bitmap_from_array(array(1,2,3,4,5)))").show(); 36 | spark.sql("select bitmap_to_array(bitmap_and(bitmap_from_array(array(1,2,3,4,5)),bitmap_from_array(array(1,2))))").show(); 37 | spark.sql("select bitmap_to_array(bitmap_or(bitmap_from_array(array(1,2,3)),bitmap_from_array(array(5))))").show(); 38 | spark.sql("select bitmap_to_array(bitmap_xor(bitmap_from_array(array(1,2,3)),bitmap_from_array(array(3))))").show(); 39 | } 40 | 41 | @Test 42 | public void bitmapContainsUDFTest() { 43 | spark.sql("select bitmap_contains(bitmap_from_array(array(1,2,3)),2)").show(); 44 | spark.sql("select bitmap_contains(bitmap_from_array(array(1,2,3)),bitmap_from_array(array(1,2,3)))").show(); 45 | spark.sql("select bitmap_contains(bitmap_from_array(array(1,2,3)),bitmap_from_array(array(1,2,3,4)))").show(); 46 | spark.sql("select bitmap_contains(bitmap_from_array(array(1,2,3)),cast( null as binary))").show(); 47 | } 48 | 49 | @Test 50 | public void bitmapUnionIntersectUDAFTest() { 51 | String s = "select\n" + 52 | "\tbitmap_to_array(bitmap_intersect(val)) as `r1=[3]`,\n" + 53 | "\tbitmap_to_array(bitmap_union(val)) as `r2=[1,2,3,5,6]`\n" + 54 | "from\n" + 55 | "(\n" + 56 | "\tselect bitmap_from_array(array(1,2,3)) as val\n" + 57 | "\tunion all\n" + 58 | "\tselect bitmap_from_array(array(1,3,5)) as val\n" + 59 | "\tunion all\n" + 60 | "\tselect bitmap_from_array(array(2,3,6)) as val\n" + 61 | ")t"; 62 | spark.sql(s).show(); 63 | } 64 | } 65 | --------------------------------------------------------------------------------