├── README.md ├── 商业组第一名 └── 易观OLAP-PingCAP.pptx ├── 开源组第一名 ├── 0001-Add-AggregateFunctionPath.patch ├── README.md ├── from_24s_to_0_5s.vectorlinex.pptx ├── readme.txt ├── tools │ ├── README.md │ ├── a.sql │ ├── col.model │ ├── createtable.go │ ├── import.sh │ ├── index.go │ ├── process_data.sh │ ├── q.sh │ ├── query_sql.sh │ ├── queryx.sh │ └── to_csv.sh └── 易观OLAP比赛源码-向量线科技.zip └── 易观 ├── AggregationLDCount.java └── AggregationLDSum.java /README.md: -------------------------------------------------------------------------------- 1 | # olap 2 | 易观olap大赛 3 | 4 | 5 | 易观目前使用Presto实现,通过自定义UDAF实现有序漏斗转化,请参考AggregationLDCount.java和AggregationLDSum.java实现,大赛测试用例实现如下: 6 | 7 | 1、查询2017年1月份,时间窗口为7天,事件顺序为10001、10004、10008的漏斗,结果为[3999974, 3995900, 3608934],21s 8 | SELECT ld_sum(xwho_state, 3) 9 | FROM (SELECT ld_count(xwhen, 7 * 86400000, xwhat_id, '10001,10004,10008') AS xwho_state 10 | FROM t_funnel_devicelog 11 | WHERE day >= '20170101' 12 | AND day <= '20170131' 13 | AND xwhat_id IN (10004, 10001, 10008) 14 | GROUP BY xwho 15 | ) a; 16 | 17 | 2、查询2017年1月份,时间窗口为3天,事件顺序为10004、10008、10010的漏斗,结果为[3999422,3573367,697506],11s 18 | SELECT ld_sum(xwho_state, 3) 19 | FROM (SELECT ld_count(xwhen, 3 * 86400000, xwhat_id, '10004,10008,10010') AS xwho_state 20 | FROM t_funnel_devicelog 21 | WHERE day >= '20170101' 22 | AND day <= '20170131' 23 | AND xwhat_id IN (10004, 10010, 10008) 24 | GROUP BY xwho 25 | ) a; 26 | 27 | 3、查询2017年1月份,时间窗口为3天,事件顺序为10004、10007、10009、10010,并且10004事件的brand属性为’Apple’的漏斗,结果为[3639301, 2449480, 559517, 35795],14s 28 | SELECT ld_sum(xwho_state, 4) 29 | FROM (SELECT ld_count(xwhen, 3 * 86400000, xwhat_id, '10004,10007,10009,10010') AS xwho_state 30 | FROM t_funnel_devicelog 31 | WHERE day >= '20170101' 32 | AND day <= '20170131' 33 | AND (xwhat_id IN (10007, 10009, 10010) 34 | OR xwhat_id = 10004 35 | AND view_brand = 'Apple') 36 | GROUP BY xwho 37 | ) a; 38 | 39 | -------------------------------------------------------------------------------- /商业组第一名/易观OLAP-PingCAP.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/analysys/olap/c4a0078fbd363f2e5217d76aacecec26568e04d4/商业组第一名/易观OLAP-PingCAP.pptx -------------------------------------------------------------------------------- /开源组第一名/0001-Add-AggregateFunctionPath.patch: -------------------------------------------------------------------------------- 1 | From 02075e1e1cbc023787d6f01f524d472f552ca051 Mon Sep 17 00:00:00 2001 2 | From: flow 3 | Date: Sun, 17 Sep 2017 15:49:46 +0800 4 | Subject: [PATCH] Add AggregateFunctionPath 5 | 6 | --- 7 | dbms/CMakeLists.txt | 2 + 8 | .../AggregateFunctions/AggregateFunctionPath.cpp | 32 +++ 9 | .../src/AggregateFunctions/AggregateFunctionPath.h | 249 +++++++++++++++++++++ 10 | .../registerAggregateFunctions.cpp | 2 + 11 | .../Storages/MergeTree/MergedBlockOutputStream.cpp | 8 +- 12 | .../Storages/MergeTree/MergedBlockOutputStream.h | 8 +- 13 | 6 files changed, 296 insertions(+), 5 deletions(-) 14 | create mode 100644 dbms/src/AggregateFunctions/AggregateFunctionPath.cpp 15 | create mode 100644 dbms/src/AggregateFunctions/AggregateFunctionPath.h 16 | 17 | diff --git a/dbms/CMakeLists.txt b/dbms/CMakeLists.txt 18 | index 0dcf288..e3cbbe9 100644 19 | --- a/dbms/CMakeLists.txt 20 | +++ b/dbms/CMakeLists.txt 21 | @@ -177,6 +177,7 @@ target_link_libraries (dbms 22 | ${Boost_SYSTEM_LIBRARY} 23 | ${Poco_Data_LIBRARY} 24 | btrie 25 | + daemon 26 | ) 27 | 28 | if (Poco_DataODBC_FOUND) 29 | @@ -212,6 +213,7 @@ target_include_directories (dbms PUBLIC ${MYSQLXX_INCLUDE_DIR}) 30 | target_include_directories (dbms PRIVATE ${POCOEXT_INCLUDE_DIR}) 31 | target_include_directories (dbms PRIVATE ${COMMON_INCLUDE_DIR}) 32 | target_include_directories (dbms PUBLIC ${DBMS_INCLUDE_DIR}) 33 | +target_include_directories (dbms PUBLIC ${ClickHouse_SOURCE_DIR}/libs/libdaemon/include) 34 | 35 | if (ENABLE_TESTS) 36 | add_subdirectory (tests) 37 | diff --git a/dbms/src/AggregateFunctions/AggregateFunctionPath.cpp b/dbms/src/AggregateFunctions/AggregateFunctionPath.cpp 38 | new file mode 100644 39 | index 0000000..c3387f8 40 | --- /dev/null 41 | +++ b/dbms/src/AggregateFunctions/AggregateFunctionPath.cpp 42 | @@ -0,0 +1,32 @@ 43 | +#include 44 | +#include 45 | +#include 46 | + 47 | +namespace DB 48 | +{ 49 | + 50 | +namespace 51 | +{ 52 | + 53 | +AggregateFunctionPtr createAggregateFunctionPath(const std::string & name, const DataTypes & argument_types, const Array & params) 54 | +{ 55 | + 56 | + if (params.size() <= 0 || params.size() > 32) 57 | + throw Exception("Aggregate function " + name + " requires (1, 32] event ids.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); 58 | + 59 | + AggregateFunctionPtr res(createWithNumericType(*argument_types[1])); 60 | + 61 | + if (!res) 62 | + throw Exception("Illegal type " + argument_types[1]->getName() + " of argument 2 for aggregate function " + name, ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); 63 | + 64 | + return res; 65 | +} 66 | + 67 | +} 68 | + 69 | +void registerAggregateFunctionPath(AggregateFunctionFactory & factory) 70 | +{ 71 | + factory.registerFunction("path", createAggregateFunctionPath, AggregateFunctionFactory::CaseInsensitive); 72 | +} 73 | + 74 | +} 75 | diff --git a/dbms/src/AggregateFunctions/AggregateFunctionPath.h b/dbms/src/AggregateFunctions/AggregateFunctionPath.h 76 | new file mode 100644 77 | index 0000000..14d9265 78 | --- /dev/null 79 | +++ b/dbms/src/AggregateFunctions/AggregateFunctionPath.h 80 | @@ -0,0 +1,249 @@ 81 | +#pragma once 82 | + 83 | +#include 84 | +#include 85 | +#include 86 | +#include 87 | + 88 | +#include 89 | + 90 | +#include 91 | +#include 92 | + 93 | +#include 94 | + 95 | + 96 | +namespace DB 97 | +{ 98 | +struct ComparePairFirst final 99 | +{ 100 | + template 101 | + bool operator()(const std::pair & lhs, const std::pair & rhs) const 102 | + { 103 | + return lhs.first < rhs.first; 104 | + } 105 | +}; 106 | + 107 | +struct AggregateFunctionPathData final 108 | +{ 109 | + using Allocator = MixedArenaAllocator<4096>; 110 | + using TimestampEvent = std::pair; 111 | + using TimestampEvents = PODArray; 112 | + using Comparator = ComparePairFirst; 113 | + 114 | + bool done = false; 115 | + UInt32 level = 0; 116 | + 117 | + bool sorted = true; 118 | + TimestampEvents timestamp_events; 119 | + 120 | + void add(UInt64 timestamp, UInt16 event, Arena * arena) 121 | + { 122 | + if (done) 123 | + throw Exception("This group is done! Looks like you forgot to correctly manage your data among nodes"); 124 | + 125 | + // Since most events should have already been sorted by timestamp. 126 | + if (sorted && timestamp_events.size() > 0 && timestamp_events.back().first > timestamp) 127 | + sorted = false; 128 | + timestamp_events.push_back(std::make_pair(timestamp, event), arena); 129 | + } 130 | + 131 | + void merge(const AggregateFunctionPathData & other, Arena * arena) 132 | + { 133 | + done = done || other.done; 134 | + level = std::max(level, other.level); 135 | + if (done) 136 | + return; 137 | + 138 | + const auto size = timestamp_events.size(); 139 | + 140 | + timestamp_events.insert(std::begin(other.timestamp_events), std::end(other.timestamp_events), arena); 141 | + 142 | + /// either sort whole container or do so partially merging ranges afterwards 143 | + if (!sorted && !other.sorted) 144 | + std::sort(std::begin(timestamp_events), std::end(timestamp_events), Comparator{}); 145 | + else 146 | + { 147 | + const auto begin = std::begin(timestamp_events); 148 | + const auto middle = std::next(begin, size); 149 | + const auto end = std::end(timestamp_events); 150 | + 151 | + if (!sorted) 152 | + std::sort(begin, middle, Comparator{}); 153 | + 154 | + if (!other.sorted) 155 | + std::sort(middle, end, Comparator{}); 156 | + 157 | + std::inplace_merge(begin, middle, end, Comparator{}); 158 | + } 159 | + 160 | + sorted = true; 161 | + } 162 | + 163 | + void sort() 164 | + { 165 | + if (!sorted) 166 | + { 167 | + std::sort(std::begin(timestamp_events), std::end(timestamp_events), Comparator{}); 168 | + sorted = true; 169 | + } 170 | + } 171 | +}; 172 | + 173 | + 174 | +template 175 | +class AggregateFunctionPath final : public IBinaryAggregateFunction> 176 | +{ 177 | +private: 178 | + using Events = UInt16[32]; 179 | + 180 | + UInt64 window; 181 | + Events check_events; 182 | + size_t check_events_size; 183 | + 184 | + // return the index + 1 of event 185 | + inline size_t findEventLevel(UInt16 event) const 186 | + { 187 | + for (size_t i = 0; i < check_events_size; i++) 188 | + { 189 | + if (event == check_events[i]) 190 | + { 191 | + return i + 1; 192 | + } 193 | + } 194 | + return 0xFFFF; 195 | + } 196 | + 197 | + UInt32 match(const AggregateFunctionPathData & data) const 198 | + { 199 | + if (data.done) 200 | + return data.level; 201 | + 202 | + if (check_events_size == 1) 203 | + return 1; 204 | + 205 | + const_cast(data).sort(); 206 | + 207 | + auto total_len = data.timestamp_events.size(); 208 | + size_t max_level = 0; 209 | + for (size_t i = total_len; i > 0; i--) 210 | + { 211 | + auto event = (data.timestamp_events)[i - 1].second; 212 | + auto event_level = findEventLevel(event); 213 | + if (event_level <= max_level) 214 | + continue; 215 | + 216 | + if (search(data, i, event_level)) 217 | + { 218 | + max_level = event_level; 219 | + if (max_level == check_events_size) 220 | + break; 221 | + } 222 | + } 223 | + 224 | + return max_level; 225 | + } 226 | + 227 | + 228 | + inline bool search(const AggregateFunctionPathData & data, size_t end_event_pos, size_t end_event_level) const 229 | + { 230 | + if (end_event_level == 1) 231 | + { 232 | + return true; 233 | + } 234 | + auto edge_time = (data.timestamp_events)[end_event_pos - 1].first - window; 235 | + auto event_level = end_event_level; 236 | + for (size_t i = end_event_pos; i > 0; i--) 237 | + { 238 | + auto time_event = (data.timestamp_events)[i - 1]; 239 | + if (time_event.first < edge_time) 240 | + return false; 241 | + if (check_events[event_level - 1] == time_event.second) 242 | + { 243 | + event_level--; 244 | + if (event_level == 0) 245 | + return true; 246 | + } 247 | + } 248 | + return false; 249 | + } 250 | + 251 | +public: 252 | + String getName() const override 253 | + { 254 | + return "path"; 255 | + } 256 | + 257 | + DataTypePtr getReturnType() const override 258 | + { 259 | + return std::make_shared(); 260 | + } 261 | + 262 | + void setParameters(const Array & params) override 263 | + { 264 | + if (params.size() <= 1 || params.size() > 33) 265 | + throw Exception("Aggregate function " + getName() + " requires (windows_in_seconds, 1_to_32_event_ids).", 266 | + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); 267 | + 268 | + // Only support up to seconds in param, and timestamp is in milliseconds 269 | + window = params[0].safeGet() * 1000; 270 | + 271 | + check_events_size = params.size() - 1; 272 | + for (size_t i = 1; i < params.size(); i++) 273 | + { 274 | + UInt64 p = params[i].safeGet(); 275 | + check_events[i - 1] = (UInt16)p; 276 | + } 277 | + } 278 | + 279 | + void setArgumentsImpl(const DataTypes & arguments) 280 | + { 281 | + DataTypePtr timestampType = arguments[0]; 282 | + DataTypePtr eventType = arguments[1]; 283 | + 284 | + if (!(timestampType->getName() == "UInt64")) 285 | + throw Exception("Illegal type " + timestampType->getName() + " of argument for aggregate function " + getName() 286 | + + " (1 arg, timestamp: UInt64)", 287 | + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); 288 | + if (!(eventType->isNumeric())) 289 | + throw Exception( 290 | + "Illegal type " + eventType->getName() + " of argument for aggregate function " + getName() + " (2 arg, event id: numeric)", 291 | + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); 292 | + } 293 | + 294 | + void addImpl( 295 | + AggregateDataPtr place, const IColumn & column_timestamp, const IColumn & column_event, size_t row_num, Arena * arena) const 296 | + { 297 | + this->data(place).add( // 298 | + static_cast &>(column_timestamp).getData()[row_num], 299 | + static_cast &>(column_event).getData()[row_num], 300 | + arena); 301 | + } 302 | + 303 | + void merge(AggregateDataPtr place, ConstAggregateDataPtr rhs, Arena * arena) const override 304 | + { 305 | + this->data(place).merge(this->data(rhs), arena); 306 | + } 307 | + 308 | + void serialize(ConstAggregateDataPtr place, WriteBuffer & buf) const override 309 | + { 310 | + writeVarUInt(match(this->data(place)), buf); 311 | + } 312 | + 313 | + void deserialize(AggregateDataPtr place, ReadBuffer & buf, Arena *) const override 314 | + { 315 | + readVarUInt(this->data(place).level, buf); 316 | + this->data(place).done = true; 317 | + } 318 | + 319 | + void insertResultInto(ConstAggregateDataPtr place, IColumn & to) const override 320 | + { 321 | + static_cast(to).getData().push_back(match(this->data(place))); 322 | + } 323 | + 324 | + bool allocatesMemoryInArena() const override 325 | + { 326 | + return true; 327 | + } 328 | +}; 329 | +} 330 | diff --git a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp 331 | index 5c8646f..f055e18 100644 332 | --- a/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp 333 | +++ b/dbms/src/AggregateFunctions/registerAggregateFunctions.cpp 334 | @@ -5,6 +5,7 @@ 335 | namespace DB 336 | { 337 | 338 | +void registerAggregateFunctionPath(AggregateFunctionFactory & factory); 339 | void registerAggregateFunctionAvg(AggregateFunctionFactory & factory); 340 | void registerAggregateFunctionCount(AggregateFunctionFactory & factory); 341 | void registerAggregateFunctionGroupArray(AggregateFunctionFactory & factory); 342 | @@ -30,6 +31,7 @@ void registerAggregateFunctions() 343 | { 344 | auto & factory = AggregateFunctionFactory::instance(); 345 | 346 | + registerAggregateFunctionPath(factory); 347 | registerAggregateFunctionAvg(factory); 348 | registerAggregateFunctionCount(factory); 349 | registerAggregateFunctionGroupArray(factory); 350 | diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp 351 | index 4936cd8..6e0b05f 100644 352 | --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp 353 | +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.cpp 354 | @@ -37,7 +37,7 @@ IMergedBlockOutputStream::IMergedBlockOutputStream( 355 | min_compress_block_size(min_compress_block_size_), 356 | max_compress_block_size(max_compress_block_size_), 357 | aio_threshold(aio_threshold_), 358 | - compression_method(compression_method_) 359 | + _compression_method(compression_method_) 360 | { 361 | } 362 | 363 | @@ -69,7 +69,7 @@ void IMergedBlockOutputStream::addStream( 364 | path + escaped_column_name, NULL_MAP_EXTENSION, 365 | path + escaped_column_name, NULL_MARKS_FILE_EXTENSION, 366 | max_compress_block_size, 367 | - compression_method, 368 | + compression_method(name), 369 | estimated_size, 370 | aio_threshold); 371 | 372 | @@ -91,7 +91,7 @@ void IMergedBlockOutputStream::addStream( 373 | path + escaped_size_name, DATA_FILE_EXTENSION, 374 | path + escaped_size_name, MARKS_FILE_EXTENSION, 375 | max_compress_block_size, 376 | - compression_method, 377 | + compression_method(name), 378 | estimated_size, 379 | aio_threshold); 380 | } 381 | @@ -105,7 +105,7 @@ void IMergedBlockOutputStream::addStream( 382 | path + escaped_column_name, DATA_FILE_EXTENSION, 383 | path + escaped_column_name, MARKS_FILE_EXTENSION, 384 | max_compress_block_size, 385 | - compression_method, 386 | + compression_method(name), 387 | estimated_size, 388 | aio_threshold); 389 | } 390 | diff --git a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h 391 | index 92a0dda..2749980 100644 392 | --- a/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h 393 | +++ b/dbms/src/Storages/MergeTree/MergedBlockOutputStream.h 394 | @@ -81,9 +81,15 @@ protected: 395 | 396 | size_t aio_threshold; 397 | 398 | - CompressionMethod compression_method; 399 | + CompressionMethod compression_method(const String & name){ 400 | + if(endsWith(name, "_nc")){ 401 | + return CompressionMethod::NONE; 402 | + } 403 | + return _compression_method; 404 | + } 405 | 406 | private: 407 | + CompressionMethod _compression_method; 408 | /// Internal version of writeData. 409 | void writeDataImpl(const String & name, const IDataType & type, const IColumn & column, 410 | OffsetColumns & offset_columns, size_t level, bool write_array_data, bool skip_offsets); 411 | -- 412 | 2.10.1 (Apple Git-78) 413 | 414 | -------------------------------------------------------------------------------- /开源组第一名/README.md: -------------------------------------------------------------------------------- 1 | ## 使用方法: 2 | 3 | 去github拉取ClickHouse源码,然后添加修改 4 | 5 | ``` 6 | git clone git@github.com:yandex/ClickHouse.git 7 | cd ClickHouse 8 | git checkout ab7672f329f7736756542268178e6f9f7e32325a 9 | git checkout -b path 10 | git apply 0001-Add-AggregateFunctionPath.patch 11 | ``` 12 | 13 | 14 | ## 编译方法: 15 | 按照文档,安装所有依赖,https://clickhouse.yandex/docs/en/development/build.html 16 | 17 | 然后使用以下命令编译出 clickhouse 文件 18 | 19 | ``` 20 | mkdir build 21 | cd build 22 | cmake .. 23 | make -j 8 clickhouse 24 | ls dbms/src/Server/clickhouse 25 | ``` 26 | 27 | 可执行文件: 28 | dbms/src/Server/clickhouse 29 | 30 | 31 | 32 | 33 | 34 | ## 部署方法: 35 | 36 | 部署方法: 37 | 38 | 每一个目标节点安装依赖: 39 | 40 | ``` 41 | sudo yum -y install rpm-build redhat-rpm-config gcc-c++ readline-devel\ 42 | unixODBC-devel subversion python-devel git wget openssl-devel m4 createrepo\ 43 | libicu-devel zlib-devel libtool-ltdl-devel 44 | ``` 45 | 46 | 然后把 clickhouse 文件放到 /data/ccc/ 目录,命名为 ccc 47 | 48 | ``` 49 | mkdir -p /data/ccc/ 50 | cp clickhouse /data/ccc/ccc 51 | ``` 52 | 53 | 把源码的 dbms/src/Server/config.xml 放到 /data/ccc/ 目录,注意修改里面的相关配置: 54 | 55 | ``` 56 | /var/lib/clickhouse/ 指定存储目录 57 | /var/lib/clickhouse/tmp/ 指定临时目录 58 | 以下指定监听端口和地址 59 | 9000 60 | 61 | 62 | ::1 63 | 127.0.0.1 64 | ``` 65 | 66 | 启动Server: 67 | `./ccc --server --config-file=/data/ccc/config.xml` 68 | 69 | 启动Client: 70 | `./ccc --client --host 127.0.0.1 --port 9000` 71 | 72 | 73 | 74 | ## 数据导入: 75 | 76 | 在每一个clickhouse节点建本地表: 77 | 78 | ``` 79 | CREATE TABLE event ( 80 | user_id UInt32, 81 | timestamp_nc UInt64, 82 | event_id_nc UInt32, 83 | event_name String, 84 | event_tag_brand String, 85 | event_tag_content String, 86 | event_tag_how Int32, 87 | event_tag_page_num Int32, 88 | event_tag_price Int32, 89 | event_tag_price_all Int32, 90 | event_date_nc Date) 91 | ENGINE = MergeTree(event_date_nc, (user_id, timestamp_nc, event_date_nc), 8192); 92 | ``` 93 | 94 | 然后在每一个clickhouse节点建 分布式表: 95 | 96 | ``` 97 | CREATE TABLE dist_event ( 98 | user_id UInt32, 99 | timestamp_nc UInt64, 100 | event_id_nc UInt32, 101 | event_name String, 102 | event_tag_brand String, 103 | event_tag_content String, 104 | event_tag_how Int32, 105 | event_tag_page_num Int32, 106 | event_tag_price Int32, 107 | event_tag_price_all Int32, 108 | event_date_nc Date) 109 | ENGINE = Distributed(default, default, event, user_id); 110 | ``` 111 | 112 | 分布式表需要对配置文件添加的 配置,参考 https://clickhouse.yandex/docs/en/table_engines/distributed.html 113 | 114 | 然后使用 tools 文件夹里面的工具,把数据文件处理之后,导入 dist_event 表。参考 `tools/README.md` 115 | 116 | 117 | -------------------------------------------------------------------------------- /开源组第一名/from_24s_to_0_5s.vectorlinex.pptx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/analysys/olap/c4a0078fbd363f2e5217d76aacecec26568e04d4/开源组第一名/from_24s_to_0_5s.vectorlinex.pptx -------------------------------------------------------------------------------- /开源组第一名/readme.txt: -------------------------------------------------------------------------------- 1 | 请下载压缩包查阅 2 | -------------------------------------------------------------------------------- /开源组第一名/tools/README.md: -------------------------------------------------------------------------------- 1 | 数据导入、测试文档 2 | --- 3 | 4 | #### 环境准备 5 | - 参考上级目录README文档编译部署好ClickHouse服务 6 | - 安装好Go环境,方便处理数据 7 | 8 | #### 数据处理 9 | 10 | - 安装三方json包,节省导入效率 11 | ``` 12 | ## 安装三方json包,节省导入效率 13 | go get -u github.com/json-iterator/go 14 | 15 | ``` 16 | 17 | - 读一遍数据文件识别动态Scheme生成sql和模型文件 18 | ``` 19 | go run createtable.go -files=`ls 2017*` | tee create_table.sql` 20 | ``` 21 | 22 | - 通过模型文件,处理数据, 将数据存入 output目录 23 | ``` 24 | ## 数据如果单节点硬盘存不下,可以将数据按月份分散到多台机器,修改 `ls 2017XXX`来处理数据 25 | output="`pwd`/output" 26 | mkdir -p $output 27 | for f in `ls 2017*`;do 28 | go run index.go -file="$f" -out="`pwd`/output" 29 | done 30 | ``` 31 | 32 | - 生成csv文件 33 | ``` 34 | ## 正式数据,user_id内容需要去掉前缀id,提高group效率 35 | ## prefix参数,2017表示要处理数据文件名的前缀,如果是多台机器分散处理,合理修改此参数 36 | sh to_csv.sh 2017 37 | ``` 38 | 39 | 40 | - 导入数据 41 | ``` 42 | ## prefix参数,2017表示要处理数据文件名的前缀,如果是多台机器分散处理,合理修改此参数 43 | sh import.sh 2017 44 | ``` 45 | 46 | - 按月合并数据块,提高查询效率 47 | ``` 48 | # https://clickhouse.yandex/docs/en/query_language/queries.html#optimize 49 | # 依次在各个节点sql交互环境下执行以下命令,按月合并数据块 50 | OPTIMIZE TABLE event PARTITION 201706 FINAL; 51 | OPTIMIZE TABLE event PARTITION 201707 FINAL; 52 | OPTIMIZE TABLE event PARTITION 201708 FINAL; 53 | ``` 54 | 55 | - 查询 56 | - 测试sql:7,8月份, 转化路径 10004,10008, 10009,10010, 且10004事件的标签品牌是 Apple或者LianX, 时间窗口为30天的漏斗情况 57 | - 修改q.sh中的clients变量值, 设置为各个节点的hosts 58 | - 参考a.sql文件示例,执行:`sh query_sql.sh a.sql` 59 | - 结果 `4000000 4000000 3999994 3999936` 60 | 61 | 62 | 63 | 64 | -------------------------------------------------------------------------------- /开源组第一名/tools/a.sql: -------------------------------------------------------------------------------- 1 | SELECT 2 | sumIf(c, level >= 1) AS _1, 3 | sumIf(c, level >= 2) AS _2, 4 | sumIf(c, level >= 3) AS _3, 5 | sumIf(c, level >= 4) AS _4 6 | FROM 7 | ( 8 | SELECT 9 | level, 10 | count(*) AS c 11 | FROM 12 | ( 13 | SELECT 14 | user_id, 15 | path(2592000, 10004, 10008, 10009,10010)(timestamp_nc, event_id_nc) AS level 16 | FROM event 17 | WHERE ( (event_date_nc >= toDate('2017-07-01')) AND (event_date_nc <= toDate('2017-08-31')) AND ( (event_id_nc IN (10008, 10009, 10010)) OR 18 | (event_id_nc = 10004 AND (event_tag_brand = 'Apple' or event_tag_brand = 'LianX') ) ) ) 19 | GROUP BY user_id 20 | ) 21 | GROUP BY level 22 | ORDER BY level ASC 23 | ); -------------------------------------------------------------------------------- /开源组第一名/tools/col.model: -------------------------------------------------------------------------------- 1 | {"price":"Int32","content":"String","page_num":"Int32","price_all":"Int32","how":"Int32","brand":"String"} -------------------------------------------------------------------------------- /开源组第一名/tools/createtable.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "flag" 7 | "fmt" 8 | "log" 9 | "os" 10 | "sort" 11 | "strings" 12 | "sync" 13 | "unsafe" 14 | 15 | json "github.com/json-iterator/go" 16 | ) 17 | 18 | //这个脚本处理所有文件的数据,在当前目录动态生成 modelFile 文件 19 | var ( 20 | files string 21 | tagMap = make(map[string]string) 22 | lock sync.Mutex 23 | index = 4 24 | 25 | emptyjs = []byte("{}") 26 | tabBs = []byte("\t") 27 | modelFile = "col.model" 28 | keys = []string{} 29 | ) 30 | 31 | func init() { 32 | flag.StringVar(&files, "files", "", "file to load") 33 | } 34 | 35 | func main() { 36 | flag.Parse() 37 | var wg sync.WaitGroup 38 | for _, f := range strings.Split(files, " ") { 39 | wg.Add(1) 40 | go func() { 41 | process(f) 42 | wg.Done() 43 | }() 44 | } 45 | wg.Wait() 46 | 47 | saveModel() 48 | sql := getSql() 49 | fmt.Println(sql) 50 | } 51 | 52 | func process(f string) { 53 | r, err := os.Open(f) 54 | if err != nil { 55 | panic(err) 56 | } 57 | sc := bufio.NewScanner(r) 58 | for sc.Scan() { 59 | js := bytes.Split(sc.Bytes(), tabBs)[index] 60 | if bytes.Equal(js, emptyjs) { 61 | continue 62 | } 63 | it := make(map[string]interface{}) 64 | err := json.Unmarshal(js, &it) 65 | if err != nil { 66 | log.Print(err.Error()) 67 | } else { 68 | setTag(it) 69 | } 70 | } 71 | } 72 | 73 | func setTag(it map[string]interface{}) { 74 | for k, v := range it { 75 | if _, ok := tagMap[k]; !ok { 76 | lock.Lock() 77 | str := "Int32" 78 | switch v.(type) { 79 | case string: 80 | str = "String" 81 | } 82 | tagMap[k] = str 83 | lock.Unlock() 84 | } 85 | } 86 | } 87 | 88 | func saveModel() { 89 | for k, _ := range tagMap { 90 | keys = append(keys, k) 91 | } 92 | sort.Strings(keys) 93 | 94 | f, err := os.OpenFile(modelFile, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0660) 95 | if err != nil { 96 | panic(err.Error()) 97 | } 98 | bs, _ := json.Marshal(tagMap) 99 | f.Write(bs) 100 | f.Close() 101 | } 102 | 103 | func getSql() string { 104 | sqlTemplate := `CREATE TABLE trend_event 105 | ( 106 | user_id UInt32, 107 | timestamp UInt64, 108 | event_id UInt32, 109 | event_name String, 110 | 111 | %s 112 | event_date Date 113 | ) engine = MergeTree(event_date, (user_id, timestamp, event_date), 8192); 114 | ` 115 | bs := bytes.NewBuffer([]byte{}) 116 | for _, k := range keys { 117 | v := tagMap[k] 118 | bs.WriteString(" ") 119 | bs.WriteString("event_tag_" + k) 120 | bs.WriteString(" ") 121 | bs.WriteString(v) 122 | bs.WriteString(",\n") 123 | } 124 | return fmt.Sprintf(sqlTemplate, bs.String()) 125 | } 126 | 127 | func String2Bytes(s string) []byte { 128 | x := (*[2]uintptr)(unsafe.Pointer(&s)) 129 | h := [3]uintptr{x[0], x[1], x[1]} 130 | return *(*[]byte)(unsafe.Pointer(&h)) 131 | } 132 | 133 | func Bytes2String(b []byte) string { 134 | return *(*string)(unsafe.Pointer(&b)) 135 | } 136 | -------------------------------------------------------------------------------- /开源组第一名/tools/import.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | prefix=$1 4 | echo ${prefix} 5 | for f in `ls csv/${prefix}*`;do 6 | /data/ccc/ccc --client --query="INSERT INTO dis_event FORMAT TabSeparated" 7 | done 8 | -------------------------------------------------------------------------------- /开源组第一名/tools/index.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "flag" 7 | "fmt" 8 | "os" 9 | "path/filepath" 10 | "sort" 11 | "sync" 12 | "unsafe" 13 | 14 | json "github.com/json-iterator/go" 15 | ) 16 | 17 | // 处理id 18 | // head output/20170501 | awk -F"\t" 'OFS="\t"{gsub("id", "",$1);$1=$1;print $0}' 19 | var ( 20 | file string 21 | outDir string 22 | tagMap = make(map[string]string) 23 | lock sync.Mutex 24 | index = 4 25 | emptyjs = []byte("{}") 26 | modelFile = "col.model" 27 | tabBs = []byte("\t") 28 | keys []string 29 | valueTypes []int 30 | ) 31 | 32 | const ( 33 | stringType = 1 34 | intType = 2 35 | ) 36 | 37 | func init() { 38 | flag.StringVar(&file, "file", "", "file to load") 39 | flag.StringVar(&outDir, "out", "/data/yiguan/output", "outdir path") 40 | } 41 | 42 | func main() { 43 | flag.Parse() 44 | readModel() 45 | process(file) 46 | } 47 | 48 | func process(f string) { 49 | r, err := os.Open(f) 50 | if err != nil { 51 | panic(err) 52 | } 53 | info, _ := r.Stat() 54 | sc := bufio.NewScanner(r) 55 | of, _ := os.OpenFile(filepath.Join(outDir, info.Name()), os.O_TRUNC|os.O_CREATE|os.O_RDWR, 0660) 56 | output := bufio.NewWriter(of) 57 | 58 | var i = 0 59 | for sc.Scan() { 60 | i++ 61 | bs := bytes.Split(sc.Bytes(), tabBs) 62 | //先输出 0-3 63 | 64 | output.Write(bs[0]) 65 | output.WriteRune('\t') 66 | 67 | output.Write(bs[1]) 68 | output.WriteRune('\t') 69 | 70 | output.Write(bs[2]) 71 | output.WriteRune('\t') 72 | 73 | output.Write(bs[3]) 74 | output.WriteRune('\t') 75 | 76 | it := make(map[string]interface{}) 77 | if !bytes.Equal(bs[4], emptyjs) { 78 | err := json.Unmarshal(bs[4], &it) 79 | if err != nil { 80 | panic(err) 81 | } 82 | } 83 | for i, k := range keys { 84 | v := valueTypes[i] 85 | if v == stringType { 86 | if _, ok := it[k]; !ok { 87 | output.WriteRune(' ') 88 | output.WriteRune('\t') 89 | continue 90 | } 91 | output.WriteString(it[k].(string)) 92 | } else { 93 | if _, ok := it[k]; !ok { 94 | output.WriteRune('0') 95 | output.WriteRune('\t') 96 | continue 97 | } 98 | output.WriteString(fmt.Sprintf("%v", it[k])) 99 | } 100 | output.WriteRune('\t') 101 | } 102 | 103 | //5 转date类型 20160707 104 | dd := bs[5] 105 | 106 | output.Write(dd[:4]) 107 | output.WriteRune('-') 108 | output.Write(dd[4:6]) 109 | output.WriteRune('-') 110 | output.Write(dd[6:8]) 111 | output.WriteRune('\n') 112 | if i%1000000 == 0 { 113 | output.Flush() 114 | } 115 | } 116 | output.Flush() 117 | 118 | fmt.Println("done => ", f) 119 | } 120 | 121 | func readModel() { 122 | f, _ := os.Open(modelFile) 123 | err := json.NewDecoder(f).Decode(&tagMap) 124 | if err != nil { 125 | panic(err) 126 | } 127 | for k, _ := range tagMap { 128 | keys = append(keys, k) 129 | } 130 | sort.Strings(keys) 131 | for _, k := range keys { 132 | if tagMap[k] == "String" { 133 | valueTypes = append(valueTypes, stringType) 134 | } else { 135 | valueTypes = append(valueTypes, intType) 136 | } 137 | } 138 | } 139 | 140 | func String2Bytes(s string) []byte { 141 | x := (*[2]uintptr)(unsafe.Pointer(&s)) 142 | h := [3]uintptr{x[0], x[1], x[1]} 143 | return *(*[]byte)(unsafe.Pointer(&h)) 144 | } 145 | 146 | func Bytes2String(b []byte) string { 147 | return *(*string)(unsafe.Pointer(&b)) 148 | } 149 | 150 | func cout(b []byte) { 151 | fmt.Print(Bytes2String(b)) 152 | } 153 | -------------------------------------------------------------------------------- /开源组第一名/tools/process_data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # 2631201798数据 3 | # 668726242 668904531 637996737 655574288 4 | 5 | prefix=$1 6 | output="`pwd`/output" 7 | mkdir -p $output 8 | for f in `ls /data/zhaoshu/${prefix}*`;do 9 | ./index -file="$f" -out="`pwd`/output" 10 | done 11 | 12 | -------------------------------------------------------------------------------- /开源组第一名/tools/q.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | sql=$1 3 | clients="10.9.161.77 10.9.113.205 10.9.83.235 10.9.169.253" 4 | 5 | function fetch(){ 6 | echo "$sql" | /data/ccc/ccc --client -m --host $1 7 | } 8 | for c in $clients;do 9 | fetch "$c" & 10 | done 11 | 12 | wait -------------------------------------------------------------------------------- /开源组第一名/tools/query_sql.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | sql=$1 3 | sh q.sh "`cat $sql`" | awk '{for(i=1;i<=NF;i++){a[i] += $i}}END{ for(j=1;j<=length(a);j++) {printf a[j]" " 4 | };printf "\n" }' -------------------------------------------------------------------------------- /开源组第一名/tools/queryx.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | sql=$1 3 | sh q.sh "$sql" | awk '{for(i=1;i<=NF;i++){a[i] += $i}}END{ for(j=1;j<=length(a);j++) {printf a[j]" " 4 | };printf "\n" }' -------------------------------------------------------------------------------- /开源组第一名/tools/to_csv.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | ## 替换user_id中的id 3 | 4 | prefix=$1 5 | echo ${prefix} 6 | 7 | cd output 8 | for f in `ls ${prefix}*`;do 9 | awk -F"\t" 'OFS="\t"{gsub("id", "",$1);$1=$1;print $0}' ${f} > ../csv/${f} 10 | done -------------------------------------------------------------------------------- /开源组第一名/易观OLAP比赛源码-向量线科技.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/analysys/olap/c4a0078fbd363f2e5217d76aacecec26568e04d4/开源组第一名/易观OLAP比赛源码-向量线科技.zip -------------------------------------------------------------------------------- /易观/AggregationLDCount.java: -------------------------------------------------------------------------------- 1 | package aggregation; 2 | 3 | import com.facebook.presto.spi.block.BlockBuilder; 4 | import com.facebook.presto.spi.function.*; 5 | import com.facebook.presto.spi.type.StandardTypes; 6 | 7 | import io.airlift.slice.Slice; 8 | import io.airlift.slice.Slices; 9 | import state.SliceState; 10 | 11 | import java.util.*; 12 | 13 | /* 14 | 计算漏斗的聚合函数, 同时能够保存人群, 步骤一 15 | 16 | 目标: 查询12月1号到20号20天, 时间窗口为7天, 事件个数为3个的漏斗 17 | select xwho, ld_count(xwhen, 7*86400000, xwhat, 'A,B,C') as xwho_state 18 | from tablename 19 | where ds >= '2016-12-01' and ds < '2016-12-21' and xwhat in ('A', 'B', 'C') 20 | group by xwho; 21 | 22 | 输出结果: 23 | 0001 2 24 | 0002 1 25 | 0003 2 26 | */ 27 | @AggregationFunction("ld_count") 28 | public class AggregationLDCount extends AggregationBase { 29 | 30 | private static final int COUNT_FLAG_LENGTH = 10; // 状态变量最前3位存放临时变量(1, 1, 8) 31 | private static final int COUNT_ONE_LENGTH = 8; // 状态变量中每个事件和其时间所占位数(long) 32 | 33 | @InputFunction 34 | public static void input(SliceState state, // 每个用户的状态 35 | @SqlType(StandardTypes.BIGINT) long xwhen, // 当前事件的时间戳 36 | @SqlType(StandardTypes.BIGINT) long windows, // 当前查询的时间窗口大小 37 | @SqlType(StandardTypes.VARCHAR) Slice xwhat, // 当前事件的名称, A还是B或者C 38 | @SqlType(StandardTypes.VARCHAR) Slice events) { // 当前查询的全部事件, 逗号分隔 39 | // 获取状态 40 | Slice slice = state.getSlice(); 41 | 42 | // 判断是否需要初始化events 43 | if (!event_pos_dict.containsKey(events)) { 44 | init_events(events, 0); 45 | } 46 | 47 | // 初始化slice 48 | if (null == slice) { 49 | slice = Slices.allocate(COUNT_FLAG_LENGTH); 50 | 51 | // 初始化前3位存放临时变量: {是否包含事件A(byte), 事件个数(byte), 时间窗口大小(long)} 52 | slice.setByte(0, 0); 53 | slice.setByte(1, event_pos_dict.get(events).size()); 54 | slice.setLong(2, windows); 55 | 56 | } 57 | 58 | // 新建slice, 并初始化 59 | int slice_length = slice.length(); 60 | Slice new_slice = Slices.allocate(slice_length + COUNT_ONE_LENGTH); 61 | new_slice.setBytes(0, slice.getBytes()); 62 | 63 | // 更改状态变量 64 | byte xwhat_index = event_pos_dict.get(events).get(xwhat); 65 | if (xwhat_index == 0) { 66 | new_slice.setByte(0, 1); 67 | } 68 | new_slice.setLong(slice_length, xwhen * 10 + xwhat_index); 69 | 70 | // 返回结果 71 | state.setSlice(new_slice); 72 | } 73 | 74 | @CombineFunction 75 | public static void combine(SliceState state, SliceState otherState) { 76 | // 获取状态 77 | Slice slice = state.getSlice(); 78 | Slice otherslice = otherState.getSlice(); 79 | 80 | // 更新状态, 并返回结果 81 | if (null == slice) { 82 | state.setSlice(otherslice); 83 | } else { 84 | int length1 = slice.length(); 85 | int length2 = otherslice.length(); 86 | 87 | // 初始化 88 | Slice slice_new = Slices.allocate(length1 + length2 - COUNT_FLAG_LENGTH); 89 | 90 | // 赋值 91 | slice_new.setBytes(0, slice.getBytes()); 92 | slice_new.setBytes(length1, otherslice.getBytes(), COUNT_FLAG_LENGTH, length2 - COUNT_FLAG_LENGTH); 93 | if (otherslice.getByte(0) == 1) { 94 | slice_new.setByte(0, 1); 95 | } 96 | 97 | // 返回结果 98 | state.setSlice(slice_new); 99 | } 100 | } 101 | 102 | @OutputFunction(StandardTypes.INTEGER) 103 | public static void output(SliceState state, BlockBuilder out) { 104 | // 获取状态 105 | Slice slice = state.getSlice(); 106 | 107 | // 数据为空, 或者没有事件A, 返回0 108 | if ((null == slice) || (slice.getByte(0) == 0)) { 109 | out.writeInt(0); 110 | out.closeEntry(); 111 | return; 112 | } 113 | 114 | // 构造列表和字典, 为排序做准备 115 | List time_array = new ArrayList<>(); 116 | for (int index = COUNT_FLAG_LENGTH; index < slice.length(); index += COUNT_ONE_LENGTH) { 117 | time_array.add(slice.getLong(index)); 118 | } 119 | 120 | // 排序数组, 这里可能比较耗时 121 | Collections.sort(time_array); 122 | 123 | // 获取中间变量 124 | byte events_count = slice.getByte(1); 125 | long windows = slice.getLong(2); 126 | 127 | // 遍历时间戳数据, 也就是遍历有序事件, 并构造结果 128 | int max_xwhat_index = 0; 129 | List temp = new ArrayList<>(); 130 | for (long xwhen_xwhat: time_array) { 131 | // 事件有序进入 132 | long timestamp = xwhen_xwhat / 10; 133 | byte xwhat = (byte) (xwhen_xwhat % 10); 134 | 135 | if (xwhat == 0) { 136 | // 新建临时对象, 存放 (A事件的时间戳, 当前最后一个事件的下标) 137 | long[] flag = {timestamp, xwhat}; 138 | temp.add(flag); 139 | } else { 140 | // 更新临时对象: 从后往前, 并根据条件适当跳出 141 | for (int i = temp.size() - 1; i >= 0; --i) { 142 | long[] flag = temp.get(i); 143 | if ((timestamp - flag[0]) >= windows) { 144 | // 当前事件的时间戳减去flag[0]超过时间窗口不合法, 跳出 145 | break; 146 | } else if (xwhat == (flag[1] + 1)) { 147 | // 当前事件为下一个事件, 更新数据并跳出 148 | flag[1] = xwhat; 149 | if (max_xwhat_index < xwhat) { 150 | max_xwhat_index = xwhat; 151 | } 152 | break; 153 | } 154 | } 155 | 156 | // 漏斗流程结束, 提前退出 157 | if ((max_xwhat_index + 1) == events_count) { 158 | break; 159 | } 160 | } 161 | } 162 | 163 | // 返回结果 164 | out.writeInt(max_xwhat_index + 1); 165 | out.closeEntry(); 166 | } 167 | } 168 | -------------------------------------------------------------------------------- /易观/AggregationLDSum.java: -------------------------------------------------------------------------------- 1 | package aggregation; 2 | 3 | // import com.facebook.presto.operator.aggregation.state.SliceState; 4 | 5 | import com.facebook.presto.spi.block.BlockBuilder; 6 | import com.facebook.presto.spi.block.BlockBuilderStatus; 7 | import com.facebook.presto.spi.function.*; 8 | import com.facebook.presto.spi.type.BigintType; 9 | import com.facebook.presto.spi.type.StandardTypes; 10 | import io.airlift.slice.Slice; 11 | import io.airlift.slice.Slices; 12 | import state.SliceState; 13 | 14 | /* 15 | 计算漏斗的聚合函数, 步骤二 16 | */ 17 | @AggregationFunction("ld_sum") 18 | public class AggregationLDSum extends AggregationBase { 19 | 20 | @InputFunction 21 | public static void input(SliceState state, 22 | @SqlType(StandardTypes.INTEGER) long xwho_state, // 每个用户的状态 23 | @SqlType(StandardTypes.INTEGER) long events_count) { // 查询事件的个数 24 | // 获取state状态 25 | Slice slice = state.getSlice(); 26 | 27 | // 初始化state, 长度为events_length个int 28 | if (null == slice) { 29 | slice = Slices.allocate((int) events_count * 4); 30 | } 31 | 32 | // 计算用户数 33 | for (int status = 0; status < xwho_state; ++status) { 34 | int index = status * 4; 35 | slice.setInt(index, slice.getInt(index) + 1); 36 | } 37 | 38 | // 返回状态 39 | state.setSlice(slice); 40 | } 41 | 42 | @CombineFunction 43 | public static void combine(SliceState state, SliceState otherState) { 44 | // 获取状态 45 | Slice slice = state.getSlice(); 46 | Slice otherslice = otherState.getSlice(); 47 | 48 | // 更新状态并返回结果 49 | if (null == slice) { 50 | state.setSlice(otherslice); 51 | } else { 52 | for (int index = 0; index < slice.length(); index += 4) { 53 | slice.setInt(index, slice.getInt(index) + otherslice.getInt(index)); 54 | } 55 | state.setSlice(slice); 56 | } 57 | } 58 | 59 | @OutputFunction("array(" + StandardTypes.BIGINT + ")") 60 | public static void output(SliceState state, BlockBuilder out) { 61 | // 获取状态 62 | Slice slice = state.getSlice(); 63 | 64 | // 数据为空, 返回一个空数组 65 | if (null == slice) { 66 | BlockBuilder blockBuilder = BigintType.BIGINT.createBlockBuilder(new BlockBuilderStatus(), 0); 67 | out.writeObject(blockBuilder.build()); 68 | out.closeEntry(); 69 | return; 70 | } 71 | 72 | // 构造结果: [A:100, B:50, C:10, ......] 73 | BlockBuilder blockBuilder = BigintType.BIGINT.createBlockBuilder(new BlockBuilderStatus(), slice.length() / 4); 74 | for (int index = 0; index < slice.length(); index += 4) { 75 | BigintType.BIGINT.writeLong(blockBuilder, slice.getInt(index)); 76 | } 77 | 78 | // 返回结果 79 | out.writeObject(blockBuilder.build()); 80 | out.closeEntry(); 81 | } 82 | 83 | } 84 | --------------------------------------------------------------------------------