├── .clang-format ├── .clang-tidy ├── .gitignore ├── 3rdparty_install.sh ├── CMakeLists.txt ├── LICENSE.txt ├── README.md ├── cmake ├── FindAvroCPP.cmake ├── FindGRPC.cmake ├── FindPackageHandleStandardArgs.cmake ├── FindPackageMessage.cmake ├── FindPostgreSQL.cmake ├── FindRocksDB.cmake ├── FindSnappy.cmake └── Findrapidjson.cmake ├── docker ├── Dockerfile ├── Dockerfile.build ├── Dockerfile.build3rdparty ├── build.sh └── build_3rdparty.sh ├── examples ├── CMakeLists.txt ├── example1-partition-topology │ ├── CMakeLists.txt │ └── example1-partition-topology.cpp ├── example10-avro │ ├── CMakeLists.txt │ └── example10-avro.cpp ├── example12-generic-avro │ ├── CMakeLists.txt │ └── example12-generic-avro.cpp ├── example13-zk │ ├── CMakeLists.txt │ └── example13-zk.cpp ├── example14-raw-sink │ ├── CMakeLists.txt │ └── example14-raw-sink.cpp ├── example2-join │ ├── CMakeLists.txt │ └── example2-join.cpp ├── example3-count │ ├── CMakeLists.txt │ └── example3-count.cpp ├── example4-count-large │ ├── CMakeLists.txt │ └── example4-count-large.cpp ├── example5-repartition │ ├── CMakeLists.txt │ └── example5-repartition.cpp ├── example6-filter │ ├── CMakeLists.txt │ └── example6-filter.cpp ├── example7-token-bucket │ ├── CMakeLists.txt │ └── example7-token-bucket.cpp ├── example8-ktable-mem │ ├── CMakeLists.txt │ └── example8-ktable-mem.cpp ├── example9-ktable-rocksdb │ ├── CMakeLists.txt │ └── example9-ktable-rocksdb.cpp └── example_setup │ ├── CMakeLists.txt │ └── example_setup.cpp ├── include └── kspp │ ├── avro │ ├── avro_text.h │ ├── avro_utils.h │ └── generic_avro.h │ ├── cluster_config.h │ ├── cluster_metadata.h │ ├── connect │ ├── connection_params.h │ ├── generic_avro_sink.h │ └── generic_producer.h │ ├── event_consumer.h │ ├── features │ └── aws │ │ ├── aws.h │ │ └── s3_offset_storage_provider.h │ ├── internal │ ├── commit_chain.h │ ├── event_queue.h │ ├── queue.h │ ├── rd_kafka_utils.h │ ├── rocksdb │ │ └── rocksdb_operators.h │ ├── sinks │ │ └── kafka_producer.h │ └── sources │ │ └── kafka_consumer.h │ ├── kevent.h │ ├── krecord.h │ ├── kspp.h │ ├── metrics │ ├── metrics.h │ └── prometheus_pushgateway_reporter.h │ ├── processors │ ├── count.h │ ├── count_by_value.h │ ├── delay.h │ ├── filter.h │ ├── flat_map.h │ ├── join.h │ ├── ktable.h │ ├── merge.h │ ├── rate_limiter.h │ ├── repartition.h │ ├── thoughput_limiter.h │ ├── transform.h │ └── visitor.h │ ├── schema_registry │ ├── confluent_http_proxy.h │ └── schema_registry_client.h │ ├── serdes │ ├── avro_serdes.h │ ├── binary_serdes.h │ ├── json_serdes.h │ ├── proto_serdes.h │ └── text_serdes.h │ ├── sinks │ ├── array_sink.h │ ├── avro_file_sink.h │ ├── avro_s3_sink.h │ ├── kafka_partition_sink.h │ ├── kafka_sink.h │ ├── null_sink.h │ ├── sink_defs.h │ └── stream_sink.h │ ├── sources │ ├── avro_file_source.h │ ├── kafka_source.h │ └── mem_stream_source.h │ ├── state_stores │ ├── mem_counter_store.h │ ├── mem_store.h │ ├── mem_token_bucket_store.h │ ├── mem_windowed_store.h │ ├── rocksdb_counter_store.h │ ├── rocksdb_store.h │ ├── rocksdb_windowed_store.h │ └── state_store.h │ ├── topology.h │ ├── topology_builder.h │ ├── type_name.h │ ├── typedefs.h │ └── utils │ ├── async.h │ ├── cluster_uri.h │ ├── concurrent_queue.h │ ├── destructor_callback.h │ ├── duration.h │ ├── env.h │ ├── http_client.h │ ├── kafka_utils.h │ ├── kspp_utils.h │ ├── offset_storage_provider.h │ ├── spinlock.h │ ├── string_utils.h │ ├── url.h │ └── url_parser.h ├── install.sh ├── libraries ├── CMakeLists.txt ├── kspp-bitbouncer │ ├── CMakeLists.txt │ ├── grpc │ │ ├── grpc_utils.cpp │ │ └── grpc_utils.h │ ├── grpc_avro_consumer.h │ ├── grpc_avro_schema_resolver.h │ ├── grpc_avro_serdes.h │ ├── grpc_avro_source.h │ ├── proto │ │ └── bb_streaming.proto │ └── tools │ │ ├── CMakeLists.txt │ │ └── bitbouncer │ │ ├── CMakeLists.txt │ │ ├── bb2avro.cpp │ │ ├── bb2console.cpp │ │ └── bb2pg.cpp ├── kspp-es │ ├── CMakeLists.txt │ ├── elasticsearch_generic_avro_sink.h │ ├── elasticsearch_producer.h │ ├── elasticsearch_utils.cpp │ ├── elasticsearch_utils.h │ └── tools │ │ ├── CMakeLists.txt │ │ └── kafka2es │ │ ├── CMakeLists.txt │ │ └── kafka2es.cpp ├── kspp-influx │ ├── CMakeLists.txt │ ├── influx_sink.cpp │ ├── influx_sink.h │ └── tools │ │ ├── CMakeLists.txt │ │ └── kafka2influx │ │ ├── CMakeLists.txt │ │ └── kafka2influxdb.cpp ├── kspp-kinesis │ ├── CMakeLists.txt │ ├── examples │ │ ├── aws-iot-sample │ │ │ ├── CMakeLists.txt │ │ │ └── aws-iot-sample.cpp │ │ ├── aws-kinesis-consumer │ │ │ ├── CMakeLists.txt │ │ │ └── aws-kinesis-consumer.cpp │ │ └── kinesis-test │ │ │ ├── CMakeLists.txt │ │ │ └── kinesis-test.cpp │ ├── kinesis_consumer.cpp │ ├── kinesis_consumer.h │ └── kinesis_source.h ├── kspp-mqtt │ ├── CMakeLists.txt │ ├── examples │ │ ├── CMakeLists.txt │ │ └── kspp-mqtt-producer-example │ │ │ ├── CMakeLists.txt │ │ │ └── kspp-mqtt-producer-example.cpp │ ├── mqtt_consumer.cpp │ ├── mqtt_consumer.h │ ├── mqtt_producer.cpp │ ├── mqtt_producer.h │ ├── mqtt_sink.h │ └── mqtt_source.h ├── kspp-pg │ ├── CMakeLists.txt │ ├── postgres_avro_utils.cpp │ ├── postgres_avro_utils.h │ ├── postgres_connection.cpp │ ├── postgres_connection.h │ ├── postgres_consumer.cpp │ ├── postgres_consumer.h │ ├── postgres_generic_avro_sink.h │ ├── postgres_generic_avro_source.cpp │ ├── postgres_generic_avro_source.h │ ├── postgres_producer.cpp │ ├── postgres_producer.h │ ├── postgres_read_cursor.cpp │ ├── postgres_read_cursor.h │ └── tools │ │ ├── CMakeLists.txt │ │ ├── avro2pg │ │ ├── CMakeLists.txt │ │ └── avro2pg.cpp │ │ └── postgres_connect │ │ ├── CMakeLists.txt │ │ ├── kafka2postgres.cpp │ │ └── postgres2kafka.cpp └── kspp-tds │ ├── CMakeLists.txt │ ├── tds_avro_utils.cpp │ ├── tds_avro_utils.h │ ├── tds_connection.cpp │ ├── tds_connection.h │ ├── tds_consumer.cpp │ ├── tds_consumer.h │ ├── tds_generic_avro_source.cpp │ ├── tds_generic_avro_source.h │ ├── tds_read_cursor.cpp │ ├── tds_read_cursor.h │ └── tools │ ├── CMakeLists.txt │ └── tds_connect │ ├── CMakeLists.txt │ ├── README.md │ └── tds2kafka.cpp ├── rebuild.sh ├── scripts ├── create_test_topics.bat ├── insert_large_test_text.bat ├── list_topics.bat └── pg10.txt ├── src ├── avro │ ├── avro_utils.cpp │ └── generic_avro.cpp ├── cluster_config.cpp ├── cluster_metadata.cpp ├── features │ └── aws │ │ ├── aws.cpp │ │ └── s3_offset_storage_provider.cpp ├── internal │ ├── commit_chain.cpp │ ├── rocksdb │ │ └── rocksdb_operators.cpp │ ├── sinks │ │ └── kafka_producer.cpp │ ├── sources │ │ ├── avro_file_source.cpp │ │ └── kafka_consumer.cpp │ └── utils │ │ ├── kspp_utils.cpp │ │ └── rd_kafka_utils.cpp ├── kspp.cpp ├── metrics │ └── prometheus_pushgateway_reporter.cpp ├── schema_registry │ ├── confluent_http_proxy.cpp │ └── schema_registy_client.cpp ├── serdes │ └── proto_serdes.cpp ├── topology.cpp └── utils │ ├── cluster_uri.cpp │ ├── env.cpp │ ├── http_client.cpp │ ├── kafka_utils.cpp │ ├── offset_storage_provider.cpp │ ├── string_utils.cpp │ ├── url.cpp │ └── url_parser.cpp ├── tests ├── CMakeLists.txt ├── run_all.sh ├── test1.cpp ├── test10_http.cpp ├── test11_metrics.cpp ├── test12_s3.cpp ├── test13_mem.cpp ├── test14_async.cpp ├── test2_mem_counter_store.cpp ├── test2_mem_store.cpp ├── test2_mem_windowed_store.cpp ├── test2_rocksdb_counter_store.cpp ├── test2_rocksdb_store.cpp ├── test2_rocksdb_windowed_store.cpp ├── test3_mem_token_bucket.cpp ├── test4_kafka_consumer.cpp ├── test5_kafka_source_sink.cpp ├── test6_repartition.cpp ├── test7_cluster_uri.cpp ├── test7_url_vector.cpp ├── test8_join.cpp └── test9_kafka_rest.cpp ├── tools ├── CMakeLists.txt ├── csv2avro │ ├── CMakeLists.txt │ └── csv2avro.cpp ├── kafka2avro │ ├── CMakeLists.txt │ └── kafka2avro.cpp ├── kafka2rocksdb │ ├── CMakeLists.txt │ └── main.cpp ├── kspp_avrogencpp │ ├── CMakeLists.txt │ └── kspp_avrogencpp.cpp ├── kspp_protobuf_register_schema │ ├── CMakeLists.txt │ └── kspp_protobuf_register_schema.cpp └── mqtt2kafka │ ├── CMakeLists.txt │ └── mqtt2kafka.cpp ├── utils_to_fix ├── source_provider.cpp └── source_provider.h └── windows ├── README.md ├── rebuild_windows_vs14-all-options.bat ├── rebuild_windows_vs14-minimal.bat └── rebuild_windows_vs14-test.bat /.gitignore: -------------------------------------------------------------------------------- 1 | cmake-build-* 2 | bin 3 | build 4 | lib 5 | tmp 6 | .idea 7 | *~ 8 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Boost Software License - Version 1.0 - August 17th, 2003 2 | 3 | Permission is hereby granted, free of charge, to any person or organization 4 | obtaining a copy of the software and accompanying documentation covered by 5 | this license (the "Software") to use, reproduce, display, distribute, 6 | execute, and transmit the Software, and to prepare derivative works of the 7 | Software, and to permit third-parties to whom the Software is furnished to 8 | do so, all subject to the following: 9 | 10 | The copyright notices in the Software and this entire statement, including 11 | the above license grant, this restriction and the following disclaimer, 12 | must be included in all copies of the Software, in whole or in part, and 13 | all derivative works of the Software, unless such copies or derivative 14 | works are solely in the form of machine-executable object code generated by 15 | a source language processor. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 | FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT 20 | SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE 21 | FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE, 22 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 | DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | kspp 2 | ========= 3 | 4 | [![Join the chat at https://gitter.im/kspp/Lobby](https://badges.gitter.im/kspp/Lobby.svg)](https://gitter.im/kspp/Lobby?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge) 5 | 6 | A high performance / realtime C++17 stream-processing framework with avro support. The design is influenced by apache 7 | kafka streams library. Change data capture for a postgres and sql server. Export to kafka, mqtt, postgres, elastic 8 | search, influxdb and avrofiles 9 | 10 | Sources: 11 | 12 | - kafka (uses librdkafka) 13 | - aws kinesis (experimental, using aws sdk) 14 | - postgres (uses libpq) 15 | - microsoft sqlserver (uses freetds) 16 | - memory stream 17 | 18 | Offset storage: 19 | 20 | - kafka 21 | - file 22 | - S3 (uses aws sdk) 23 | 24 | Sinks: 25 | 26 | - kafka (using librdkafka) 27 | - mqtt (using paho libraries) 28 | - postgres (uses libpq) 29 | - influxdb 30 | - elastic search 31 | - files (avro) 32 | - S3 (avro) 33 | - memory stream 34 | 35 | Statestores: 36 | 37 | - rocksdb 38 | - memory 39 | 40 | Codecs: 41 | 42 | - avro with confluent schema registry 43 | - protobuf with confluent schema registry 44 | - text 45 | - json 46 | 47 | Metrics: 48 | 49 | - prometheus 50 | 51 | It is intended to be run in kubernetes but works equally well standalone 52 | 53 | Platforms: Linux (Windows and Mac build are outdated) 54 | 55 | ## Ubuntu 22.04 x64: 56 | 57 | Remove stuff that you should not have... 58 | 59 | ``` 60 | sudo apt-get purge libprotobuf-dev libgrpc++-dev protobuf-compiler 61 | sudo apt-get purge openssl-dev 62 | ``` 63 | 64 | Install build tools 65 | 66 | ``` 67 | 68 | sudo apt-get install -y software-properties-common 69 | sudo apt-get update 70 | sudo apt-get install -y g++ sudo pax-utils automake autogen shtool libtool git wget cmake unzip build-essential pkg-config sed bison flex 71 | ``` 72 | 73 | Install build deps 74 | 75 | ``` 76 | sudo apt-get install -y python-dev autotools-dev libicu-dev zlib1g-dev openssl libssl-dev libbz2-dev libsnappy-dev liblz4-dev libzstd-dev \ 77 | libgflags-dev libcurl4-openssl-dev libc-ares-dev liblzma-dev libpq-dev freetds-dev libxml2-dev \ 78 | libfmt-dev libpcre2-dev libhttp-parser-dev 79 | 80 | # libgoogle-glog-dev missing packet for u22.04 81 | 82 | ``` 83 | 84 | install 3rd party deps we need to build from source (you can hack versions - see the file) 85 | 86 | ``` 87 | ./3rdparty_install.sh 88 | 89 | ``` 90 | 91 | build kspp 92 | 93 | ``` 94 | mkdir build && cd build 95 | cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_SHARED_LIBS=ON -DLINK_SHARED=ON .. 96 | make -j "$(getconf _NPROCESSORS_ONLN)" 97 | sudo make install 98 | cd .. 99 | ``` 100 | 101 | build docker image / ubuntu 22.04 102 | 103 | ``` 104 | cd docker-ubuntu 105 | ./build_3rdparty.sh 106 | ./build.sh 107 | ``` 108 | 109 | -------------------------------------------------------------------------------- /cmake/FindAvroCPP.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Copyright 2013 Produban 3 | # 4 | # Licensed under the Apache License, Version 2.0 (the "License"); 5 | # you may not use this file except in compliance with the License. 6 | # You may obtain a copy of the License at 7 | # 8 | # http://www.apache.org/licenses/LICENSE-2.0 9 | # 10 | # Unless required by applicable law or agreed to in writing, software 11 | # distributed under the License is distributed on an "AS IS" BASIS, 12 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | # See the License for the specific language governing permissions and 14 | # limitations under the License. 15 | # 16 | # Tries to find Avro headers and libraries. 17 | # 18 | # Usage of this module as follows: 19 | # 20 | # find_package(Avro) 21 | # 22 | # Variables used by this module, they can change the default behaviour and need 23 | # to be set before calling find_package: 24 | # 25 | # AVRO_ROOT_DIR Set this variable to the root installation of 26 | # Avro C++ if the module has problems finding 27 | # the proper installation path. 28 | # 29 | # Variables defined by this module: 30 | # 31 | # AVRO_FOUND System has Avro C++ libs/headers 32 | # AVRO_LIBRARIES The Avro C++ libraries 33 | # AVRO_INCLUDE_DIRS The location of Avro C++ headers 34 | 35 | message("\nLooking for Avro C++ headers and libraries") 36 | 37 | if (AVRO_ROOT_DIR) 38 | message(STATUS "Root dir: ${AVRO_ROOT_DIR}") 39 | endif () 40 | 41 | find_package(PkgConfig) 42 | pkg_check_modules(PC_AVRO avro-cpp) 43 | set(AVRO_DEFINITIONS ${PC_AVRO_CFLAGS_OTHER}) 44 | 45 | find_path(AVRO_INCLUDE_DIR 46 | NAMES 47 | Encoder.hh 48 | HINTS 49 | ${AVRO_ROOT_DIR}/include 50 | ${PC_AVRO_INCLUDEDIR} 51 | ${PC_AVRO_INCLUDE_DIRS} 52 | PATH_SUFFIXES 53 | avro 54 | ) 55 | 56 | if (AVRO_LINK_STATIC) 57 | set(AVRO_LOOK_FOR_LIB_NAMES avrocpp_s avrocpp) 58 | else () 59 | set(AVRO_LOOK_FOR_LIB_NAMES avrocpp) 60 | endif () 61 | 62 | find_library(AVRO_LIBRARY 63 | NAMES 64 | ${AVRO_LOOK_FOR_LIB_NAMES} 65 | PATHS 66 | ${AVRO_ROOT_DIR}/lib 67 | ${PC_AVRO_LIBDIR} 68 | ${PC_AVRO_LIBRARY_DIRS} 69 | ) 70 | 71 | include(FindPackageHandleStandardArgs) 72 | 73 | # handle the QUIETLY and REQUIRED arguments and set Avro_FOUND to TRUE 74 | # if all listed variables are TRUE 75 | find_package_handle_standard_args(Avro 76 | DEFAULT_MSG 77 | AVRO_LIBRARY 78 | AVRO_INCLUDE_DIR 79 | ) 80 | 81 | mark_as_advanced(AVRO_INCLUDE_DIR AVRO_LIBRARY) 82 | 83 | if (AVRO_FOUND) 84 | set(AVRO_LIBRARIES ${AVRO_LIBRARY}) 85 | set(AVRO_INCLUDE_DIRS ${AVRO_INCLUDE_DIR}) 86 | 87 | get_filename_component(AVRO_LIBRARY_DIR ${AVRO_LIBRARY} PATH) 88 | get_filename_component(AVRO_LIBRARY_NAME ${AVRO_LIBRARY} NAME_WE) 89 | 90 | mark_as_advanced(AVRO_LIBRARY_DIR AVRO_LIBRARY_NAME) 91 | 92 | message(STATUS "Include directories: ${AVRO_INCLUDE_DIRS}") 93 | message(STATUS "Libraries: ${AVRO_LIBRARIES}") 94 | endif () -------------------------------------------------------------------------------- /cmake/FindPackageMessage.cmake: -------------------------------------------------------------------------------- 1 | # Distributed under the OSI-approved BSD 3-Clause License. See accompanying 2 | # file Copyright.txt or https://cmake.org/licensing for details. 3 | 4 | #.rst: 5 | # FindPackageMessage 6 | # ------------------ 7 | # 8 | # 9 | # 10 | # FIND_PACKAGE_MESSAGE( "message for user" "find result details") 11 | # 12 | # This macro is intended to be used in FindXXX.cmake modules files. It 13 | # will print a message once for each unique find result. This is useful 14 | # for telling the user where a package was found. The first argument 15 | # specifies the name (XXX) of the package. The second argument 16 | # specifies the message to display. The third argument lists details 17 | # about the find result so that if they change the message will be 18 | # displayed again. The macro also obeys the QUIET argument to the 19 | # find_package command. 20 | # 21 | # Example: 22 | # 23 | # :: 24 | # 25 | # if(X11_FOUND) 26 | # FIND_PACKAGE_MESSAGE(X11 "Found X11: ${X11_X11_LIB}" 27 | # "[${X11_X11_LIB}][${X11_INCLUDE_DIR}]") 28 | # else() 29 | # ... 30 | # endif() 31 | 32 | function(FIND_PACKAGE_MESSAGE pkg msg details) 33 | # Avoid printing a message repeatedly for the same find result. 34 | if (NOT ${pkg}_FIND_QUIETLY) 35 | string(REPLACE "\n" "" details "${details}") 36 | set(DETAILS_VAR FIND_PACKAGE_MESSAGE_DETAILS_${pkg}) 37 | if (NOT "${details}" STREQUAL "${${DETAILS_VAR}}") 38 | # The message has not yet been printed. 39 | message(STATUS "${msg}") 40 | 41 | # Save the find details in the cache to avoid printing the same 42 | # message again. 43 | set("${DETAILS_VAR}" "${details}" 44 | CACHE INTERNAL "Details about finding ${pkg}") 45 | endif () 46 | endif () 47 | endfunction() 48 | -------------------------------------------------------------------------------- /cmake/FindRocksDB.cmake: -------------------------------------------------------------------------------- 1 | # Try to find RocksDB headers and library. 2 | # 3 | # Usage of this module as follows: 4 | # 5 | # find_package(RocksDB) 6 | # 7 | # Variables used by this module, they can change the default behaviour and need 8 | # to be set before calling find_package: 9 | # 10 | # ROCKSDB_ROOT_DIR Set this variable to the root installation of 11 | # RocksDB if the module has problems finding the 12 | # proper installation path. 13 | # 14 | # Variables defined by this module: 15 | # 16 | # ROCKSDB_FOUND System has RocksDB library/headers. 17 | # ROCKSDB_LIBRARIES The RocksDB library. 18 | # ROCKSDB_INCLUDE_DIRS The location of RocksDB headers. 19 | 20 | find_path(ROCKSDB_ROOT_DIR 21 | NAMES include/rocksdb/db.h 22 | ) 23 | 24 | find_library(ROCKSDB_LIBRARIES 25 | NAMES rocksdb 26 | HINTS ${ROCKSDB_ROOT_DIR}/lib 27 | ) 28 | 29 | find_path(ROCKSDB_INCLUDE_DIRS 30 | NAMES rocksdb/db.h 31 | HINTS ${ROCKSDB_ROOT_DIR}/include 32 | ) 33 | 34 | include(FindPackageHandleStandardArgs) 35 | find_package_handle_standard_args(RocksDB DEFAULT_MSG 36 | ROCKSDB_LIBRARIES 37 | ROCKSDB_INCLUDE_DIRS 38 | ) 39 | 40 | mark_as_advanced( 41 | ROCKSDB_ROOT_DIR 42 | ROCKSDB_LIBRARIES 43 | ROCKSDB_INCLUDE_DIRS 44 | ) -------------------------------------------------------------------------------- /cmake/FindSnappy.cmake: -------------------------------------------------------------------------------- 1 | # 2 | # Licensed to the Apache Software Foundation (ASF) under one 3 | # or more contributor license agreements. See the NOTICE file 4 | # distributed with this work for additional information 5 | # regarding copyright ownership. The ASF licenses this file 6 | # to you under the Apache License, Version 2.0 (the 7 | # "License"); you may not use this file except in compliance 8 | # with the License. You may obtain a copy of the License at 9 | # 10 | # http://www.apache.org/licenses/LICENSE-2.0 11 | # 12 | # Unless required by applicable law or agreed to in writing, 13 | # software distributed under the License is distributed on an 14 | # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | # KIND, either express or implied. See the License for the 16 | # specific language governing permissions and limitations 17 | # under the License. 18 | # 19 | # Tries to find Snappy headers and libraries. 20 | # 21 | # Usage of this module as follows: 22 | # 23 | # find_package(Snappy) 24 | # 25 | # Variables used by this module, they can change the default behaviour and need 26 | # to be set before calling find_package: 27 | # 28 | # SNAPPY_ROOT_DIR Set this variable to the root installation of 29 | # Snappy if the module has problems finding 30 | # the proper installation path. 31 | # 32 | # Variables defined by this module: 33 | # 34 | # SNAPPY_FOUND System has Snappy libs/headers 35 | # SNAPPY_LIBRARIES The Snappy libraries 36 | # SNAPPY_INCLUDE_DIR The location of Snappy headers 37 | 38 | find_path(SNAPPY_INCLUDE_DIR 39 | NAMES snappy.h 40 | HINTS ${SNAPPY_ROOT_DIR}/include) 41 | 42 | find_library(SNAPPY_LIBRARIES 43 | NAMES snappy 44 | HINTS ${SNAPPY_ROOT_DIR}/lib) 45 | 46 | include(FindPackageHandleStandardArgs) 47 | find_package_handle_standard_args(Snappy DEFAULT_MSG 48 | SNAPPY_LIBRARIES 49 | SNAPPY_INCLUDE_DIR) 50 | 51 | mark_as_advanced( 52 | SNAPPY_ROOT_DIR 53 | SNAPPY_LIBRARIES 54 | SNAPPY_INCLUDE_DIR) 55 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:22.04 2 | 3 | WORKDIR /root/ 4 | 5 | COPY extract/bin /usr/local/bin 6 | COPY extract/lib /usr/local/lib 7 | COPY extract/lib64 /usr/local/lib64 8 | COPY extract/runDeps . 9 | 10 | RUN runDeps=$(cat runDeps) && \ 11 | echo $runDeps && \ 12 | apt-get update && apt-get install -y $runDeps bash 13 | 14 | -------------------------------------------------------------------------------- /docker/Dockerfile.build: -------------------------------------------------------------------------------- 1 | FROM kspp-build3rdparty:latest 2 | WORKDIR /src 3 | 4 | MAINTAINER sk svante.karlsson@csi.se 5 | 6 | ENV TZ=GMT 7 | RUN ln -snf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone 8 | 9 | 10 | COPY cmake cmake 11 | COPY examples examples 12 | COPY include include 13 | COPY src src 14 | COPY tests tests 15 | COPY tools tools 16 | COPY libraries libraries 17 | COPY CMakeLists.txt . 18 | 19 | RUN mkdir build && \ 20 | cd build && \ 21 | cmake -DCMAKE_BUILD_TYPE=Release -DENABLE_ROCKSDB=ON -DENABLE_POSTGRES=ON -DENABLE_TDS=ON -DENABLE_ELASTICSEARCH=ON -DENABLE_INFLUXDB=ON -DENABLE_POSTGRES=ON -DENABLE_MQTT=ON -DBUILD_TOOLS=ON -DBUILD_SAMPLES=OFF -DBUILD_TESTS=OFF -DBUILD_STATIC_LIBS=ON -DBUILD_SHARED_LIBS=ON -DLINK_SHARED=ON .. && \ 22 | make -j "$(getconf _NPROCESSORS_ONLN)" && \ 23 | make install && \ 24 | strip --strip-all /usr/local/lib/*.so* && \ 25 | strip --strip-unneeded /usr/local/bin/* && \ 26 | cd .. && rm -rf build 27 | 28 | RUN runDeps="$( \ 29 | scanelf --needed --nobanner --recursive /usr/local \ 30 | | awk '{ gsub(/,/, "\n", $2); print $2 }' \ 31 | | sort -u \ 32 | | xargs -r dpkg -S | cut -d : -f 1 \ 33 | | sort -u \ 34 | )" && \ 35 | echo "$runDeps" > runDeps 36 | 37 | 38 | -------------------------------------------------------------------------------- /docker/build.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ef 3 | 4 | IMAGE_TAG=${1:-latest} 5 | 6 | export BUILD_CONTAINER_NAME=kspp-build:${IMAGE_TAG} 7 | export EXTRACT_CONTAINER=kspp-build-extract-${IMAGE_TAG} 8 | export TAG_NAME=kspp-sample:${IMAGE_TAG} 9 | 10 | 11 | rm -rf ./extract 12 | mkdir -p ./extract/bin 13 | mkdir -p ./extract/lib 14 | mkdir -p ./extract/lib64 15 | echo "removing old extract container" 16 | docker rm -f $EXTRACT_CONTAINER || true 17 | 18 | pushd .. 19 | docker build --build-arg IMAGE_TAG=${IMAGE_TAG} --file docker/Dockerfile.build --tag $BUILD_CONTAINER_NAME . 20 | popd 21 | 22 | docker create --name $EXTRACT_CONTAINER $BUILD_CONTAINER_NAME 23 | 24 | docker cp $EXTRACT_CONTAINER:/usr/local/lib ./extract 25 | echo $PWD 26 | find ./extract -name "*.a" -exec rm -rf {} \; 27 | 28 | docker cp $EXTRACT_CONTAINER:/usr/local/bin/kafka2es ./extract/bin 29 | docker cp $EXTRACT_CONTAINER:/usr/local/bin/kafka2influxdb ./extract/bin 30 | docker cp $EXTRACT_CONTAINER:/usr/local/bin/kafka2postgres ./extract/bin 31 | docker cp $EXTRACT_CONTAINER:/usr/local/bin/postgres2kafka ./extract/bin 32 | docker cp $EXTRACT_CONTAINER:/usr/local/bin/tds2kafka ./extract/bin 33 | docker cp $EXTRACT_CONTAINER:/usr/local/bin/mqtt2kafka ./extract/bin 34 | docker cp $EXTRACT_CONTAINER:/usr/local/bin/kspp_protobuf_register_schema ./extract/bin 35 | 36 | docker cp $EXTRACT_CONTAINER:/src/runDeps ./extract/runDeps 37 | 38 | docker rm -f $EXTRACT_CONTAINER 39 | 40 | docker build -f Dockerfile --no-cache -t$TAG_NAME . 41 | 42 | rm -rf ./extract 43 | 44 | -------------------------------------------------------------------------------- /docker/build_3rdparty.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | IMAGE_TAG=${1:-latest} 4 | 5 | pushd .. 6 | docker build --file docker/Dockerfile.build3rdparty --tag kspp-build3rdparty:${IMAGE_TAG} . 7 | popd 8 | 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(example_setup) 2 | add_subdirectory(example1-partition-topology) 3 | add_subdirectory(example2-join) 4 | add_subdirectory(example3-count) 5 | add_subdirectory(example4-count-large) 6 | add_subdirectory(example5-repartition) 7 | add_subdirectory(example6-filter) 8 | add_subdirectory(example7-token-bucket) 9 | add_subdirectory(example8-ktable-mem) 10 | 11 | if (ENABLE_ROCKSDB) 12 | add_subdirectory(example9-ktable-rocksdb) 13 | endif () 14 | 15 | add_subdirectory(example10-avro) 16 | add_subdirectory(example12-generic-avro) 17 | 18 | add_subdirectory(example14-raw-sink) 19 | 20 | -------------------------------------------------------------------------------- /examples/example1-partition-topology/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(example1-partition-topology) 2 | 3 | SET(SRCS 4 | example1-partition-topology.cpp 5 | ) 6 | 7 | add_executable(example1-partition-topology ${SRCS}) 8 | target_link_libraries(example1-partition-topology ${ALL_LIBS}) 9 | -------------------------------------------------------------------------------- /examples/example10-avro/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(example10-avro) 2 | 3 | SET(SRCS 4 | example10-avro.cpp 5 | ) 6 | 7 | add_executable(example10-avro ${SRCS}) 8 | target_link_libraries(example10-avro ${ALL_LIBS}) 9 | -------------------------------------------------------------------------------- /examples/example12-generic-avro/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | SET(SRCS 3 | example12-generic-avro.cpp 4 | ) 5 | 6 | add_executable(example12-generic-avro ${SRCS}) 7 | target_link_libraries(example12-generic-avro ${ALL_LIBS}) 8 | -------------------------------------------------------------------------------- /examples/example13-zk/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | 2 | SET(SRCS 3 | example13-zk.cpp 4 | ) 5 | 6 | add_executable(example13-zk ${SRCS}) 7 | target_link_libraries(example13-zk ${ALL_LIBS}) 8 | -------------------------------------------------------------------------------- /examples/example14-raw-sink/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(example14-raw-sink) 2 | 3 | SET(SRCS 4 | example14-raw-sink.cpp 5 | ) 6 | 7 | add_executable(example14-raw-sink ${SRCS}) 8 | target_link_libraries(example14-raw-sink ${ALL_LIBS}) 9 | -------------------------------------------------------------------------------- /examples/example14-raw-sink/example14-raw-sink.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std::chrono_literals; 8 | 9 | static boost::uuids::uuid to_uuid(int64_t x) { 10 | boost::uuids::uuid uuid; 11 | memset(uuid.data, 0, 16); 12 | memcpy(uuid.data, &x, 8); 13 | return uuid; 14 | } 15 | 16 | // How to use a sink without a topology 17 | int main(int argc, char **argv) { 18 | FLAGS_logtostderr = 1; 19 | google::InitGoogleLogging(argv[0]); 20 | 21 | std::string consumer_group("kspp-examples"); 22 | auto config = std::make_shared(consumer_group); 23 | config->load_config_from_env(); 24 | config->validate(); // optional 25 | 26 | auto avro_stream = std::make_shared>( 27 | config, 28 | "kspp_test14_raw", 29 | config->avro_serdes(), 30 | config->avro_serdes()); 31 | 32 | std::vector ids; 33 | for (int i = 0; i != 10; ++i) 34 | ids.push_back(to_uuid(i)); 35 | 36 | std::cerr << "creating " << avro_stream->log_name() << std::endl; 37 | for (int64_t update_nr = 0; update_nr != 10; ++update_nr) { 38 | for (auto &i: ids) 39 | insert(*avro_stream, i, update_nr); 40 | } 41 | 42 | avro_stream->flush(); 43 | 44 | //wait for all messages to flush.. 45 | //while(avro_stream->) 46 | 47 | return 0; 48 | } 49 | -------------------------------------------------------------------------------- /examples/example2-join/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(example2-join) 2 | 3 | SET(SRCS 4 | example2-join.cpp 5 | ) 6 | 7 | add_executable(example2-join ${SRCS}) 8 | target_link_libraries(example2-join ${ALL_LIBS}) 9 | -------------------------------------------------------------------------------- /examples/example3-count/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(example3-count) 2 | 3 | SET(SRCS 4 | example3-count.cpp 5 | ) 6 | 7 | add_executable(example3-count ${SRCS}) 8 | target_link_libraries(example3-count ${ALL_LIBS}) 9 | -------------------------------------------------------------------------------- /examples/example3-count/example3-count.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | #include 16 | 17 | using namespace std::chrono_literals; 18 | 19 | #define TOPIC_NAME "kspp_TextInput" 20 | 21 | int main(int argc, char **argv) { 22 | FLAGS_logtostderr = 1; 23 | google::InitGoogleLogging(argv[0]); 24 | 25 | std::string consumer_group("kspp-examples"); 26 | auto config = std::make_shared(consumer_group); 27 | config->load_config_from_env(); 28 | config->validate(); 29 | config->log(); 30 | 31 | kspp::topology_builder builder(config); 32 | { 33 | auto topology = builder.create_topology(); 34 | auto sink = topology->create_sink>(TOPIC_NAME); 35 | sink->push_back("hello kafka streams"); 36 | } 37 | 38 | auto partitions = kspp::kafka::get_number_partitions(config, TOPIC_NAME); 39 | auto partition_list = kspp::get_partition_list(partitions); 40 | 41 | { 42 | auto topology = builder.create_topology(); 43 | auto source = topology->create_processors>( 44 | partition_list, TOPIC_NAME); 45 | std::regex rgx("\\s+"); 46 | auto word_stream = topology->create_processors>( 47 | source, 48 | [&rgx](const auto record, auto stream) { 49 | std::sregex_token_iterator iter(record.value()->begin(), record.value()->end(), rgx, -1); 50 | std::sregex_token_iterator end; 51 | for (; iter != end; ++iter) 52 | insert(stream, (std::string) *iter); 53 | }); 54 | 55 | auto word_counts = topology->create_processors>( 56 | word_stream, 2s); 57 | 58 | 59 | auto merged = topology->create_processor>(); 60 | 61 | for (auto &i: word_counts) 62 | merged->add(*i); 63 | 64 | auto sink = topology->create_processor>(merged, &std::cerr); 65 | 66 | topology->start(kspp::OFFSET_BEGINNING); 67 | topology->flush(); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /examples/example4-count-large/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(example4-count-large) 2 | 3 | SET(SRCS 4 | example4-count-large.cpp 5 | ) 6 | 7 | add_executable(example4-count-large ${SRCS}) 8 | target_link_libraries(example4-count-large ${ALL_LIBS}) 9 | -------------------------------------------------------------------------------- /examples/example5-repartition/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(example5-repartition) 2 | 3 | SET(SRCS 4 | example5-repartition.cpp 5 | ) 6 | 7 | add_executable(example5-repartition ${SRCS}) 8 | target_link_libraries(example5-repartition ${ALL_LIBS}) 9 | -------------------------------------------------------------------------------- /examples/example6-filter/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(example6-filter) 2 | 3 | SET(SRCS 4 | example6-filter.cpp 5 | ) 6 | 7 | add_executable(example6-filter ${SRCS}) 8 | target_link_libraries(example6-filter ${ALL_LIBS}) 9 | -------------------------------------------------------------------------------- /examples/example6-filter/example6-filter.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #include 12 | #include 13 | #include 14 | #include 15 | 16 | using namespace kspp; 17 | using namespace std::chrono_literals; 18 | 19 | #define TOPIC_NAME "kspp_TextInput" 20 | 21 | int main(int argc, char **argv) { 22 | FLAGS_logtostderr = 1; 23 | google::InitGoogleLogging(argv[0]); 24 | 25 | std::string consumer_group("kspp-examples"); 26 | auto config = std::make_shared(consumer_group); 27 | config->load_config_from_env(); 28 | config->validate();// optional 29 | config->log(); // optional 30 | 31 | kspp::topology_builder builder(config); 32 | { 33 | auto topology = builder.create_topology(); 34 | auto sink = topology->create_sink>(TOPIC_NAME); 35 | sink->push_back("hello kafka streams"); 36 | } 37 | 38 | { 39 | auto partitions = kspp::kafka::get_number_partitions(config, TOPIC_NAME); 40 | auto partition_list = kspp::get_partition_list(partitions); 41 | 42 | 43 | auto topology = builder.create_topology(); 44 | auto sources = topology->create_processors>(partition_list, 45 | TOPIC_NAME); 46 | 47 | std::regex rgx("\\s+"); 48 | auto word_streams = topology->create_processors>(sources, [&rgx]( 49 | const auto record, auto stream) { 50 | std::sregex_token_iterator iter(record.value()->begin(), record.value()->end(), rgx, -1); 51 | std::sregex_token_iterator end; 52 | for (; iter != end; ++iter) 53 | insert(stream, (std::string) *iter); 54 | }); 55 | 56 | auto filtered_streams = topology->create_processors>(word_streams, 57 | [](const auto record) -> bool { 58 | return (record.key() != 59 | "hello"); 60 | }); 61 | 62 | auto mypipes = topology->create_processors>(filtered_streams); 63 | auto sinks = topology->create_processors>(mypipes, &std::cerr); 64 | for (auto i: mypipes) 65 | i->push_back("extra message injected"); 66 | topology->start(kspp::OFFSET_BEGINNING); 67 | topology->flush(); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /examples/example7-token-bucket/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(example7-token-bucket) 2 | 3 | SET(SRCS 4 | example7-token-bucket.cpp 5 | ) 6 | 7 | add_executable(example7-token-bucket ${SRCS}) 8 | target_link_libraries(example7-token-bucket ${ALL_LIBS}) 9 | -------------------------------------------------------------------------------- /examples/example8-ktable-mem/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(example8-ktable-mem) 2 | 3 | SET(SRCS 4 | example8-ktable-mem.cpp 5 | ) 6 | 7 | add_executable(example8-ktable-mem ${SRCS}) 8 | target_link_libraries(example8-ktable-mem ${ALL_LIBS}) 9 | -------------------------------------------------------------------------------- /examples/example9-ktable-rocksdb/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | project(example9-ktable-rocksdb) 2 | 3 | SET(SRCS 4 | example9-ktable-rocksdb.cpp 5 | ) 6 | 7 | add_executable(example9-ktable-rocksdb ${SRCS}) 8 | target_link_libraries(example9-ktable-rocksdb ${ALL_LIBS}) 9 | -------------------------------------------------------------------------------- /examples/example_setup/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | SET(SRCS 2 | example_setup.cpp 3 | ) 4 | 5 | add_executable(example_setup ${SRCS}) 6 | target_link_libraries(example_setup ${ALL_LIBS}) 7 | -------------------------------------------------------------------------------- /examples/example_setup/example_setup.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | using namespace std::chrono_literals; 11 | 12 | static boost::uuids::uuid to_uuid(int64_t x) { 13 | boost::uuids::uuid uuid; 14 | memset(uuid.data, 0, 16); 15 | memcpy(uuid.data, &x, 8); 16 | return uuid; 17 | } 18 | 19 | int main(int argc, char **argv) { 20 | FLAGS_logtostderr = 1; 21 | google::InitGoogleLogging(argv[0]); 22 | 23 | std::string consumer_group("kspp-examples"); 24 | auto config = std::make_shared(consumer_group); 25 | config->load_config_from_env(); 26 | config->validate(); // optional 27 | config->log();// optional 28 | 29 | kspp::topology_builder builder(config); 30 | auto topology = builder.create_topology(); 31 | auto table_stream = topology->create_sink>( 32 | "kspp_test0_table"); 33 | auto event_stream = topology->create_sink>( 34 | "kspp_test0_eventstream"); 35 | 36 | std::vector ids; 37 | for (int i = 0; i != 10000; ++i) 38 | ids.push_back(to_uuid(i)); 39 | 40 | std::cerr << "creating " << table_stream->log_name() << std::endl; 41 | for (int64_t update_nr = 0; update_nr != 100; ++update_nr) { 42 | for (auto &i: ids) { 43 | table_stream->push_back(i, update_nr); 44 | } 45 | } 46 | 47 | std::cerr << "creating " << event_stream->log_name() << std::endl; 48 | for (int64_t event_nr = 0; event_nr != 100; ++event_nr) { 49 | for (auto &i: ids) { 50 | event_stream->push_back(i, event_nr); 51 | } 52 | } 53 | 54 | topology->flush(); 55 | 56 | return 0; 57 | } 58 | -------------------------------------------------------------------------------- /include/kspp/avro/avro_text.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #pragma once 10 | 11 | namespace kspp { 12 | template <> 13 | inline size_t text_serdes::encode(const generic_avro &src, std::ostream &dst) { 14 | if (src.valid_schema() == nullptr) 15 | return 0; 16 | 17 | /* JSON encoder */ 18 | avro::EncoderPtr json_encoder = avro::jsonEncoder(*src.valid_schema()); 19 | 20 | /* JSON output stream */ 21 | auto json_os = avro::ostreamOutputStream(dst); 22 | 23 | try { 24 | /* Encode Avro datum to JSON */ 25 | json_encoder->init(*json_os.get()); 26 | avro::encode(*json_encoder, *src.generic_datum()); 27 | json_encoder->flush(); 28 | 29 | } catch (const avro::Exception &e) { 30 | LOG(ERROR) << "Binary to JSON transformation failed: " << e.what(); 31 | return 0; 32 | } 33 | return json_os->byteCount(); 34 | } 35 | } // namespace kspp 36 | -------------------------------------------------------------------------------- /include/kspp/cluster_metadata.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #pragma once 10 | 11 | namespace kspp { 12 | class cluster_config; 13 | 14 | class cluster_metadata { 15 | public: 16 | cluster_metadata(const cluster_config *); 17 | 18 | ~cluster_metadata(); 19 | 20 | void close(); 21 | 22 | void validate(); 23 | 24 | uint32_t get_number_partitions(std::string topic); 25 | 26 | bool consumer_group_exists(std::string consumer_group, std::chrono::seconds timeout) const; // uses rd kafka c api 27 | 28 | bool wait_for_topic_partition(std::string topic, int32_t partition, std::chrono::seconds timeout) const; 29 | 30 | bool wait_for_topic_leaders(std::string, std::chrono::seconds timeout) const; 31 | 32 | private: 33 | struct topic_data { 34 | inline bool available() const { 35 | return nr_of_partitions == available_parititions.size(); 36 | } 37 | 38 | uint32_t nr_of_partitions; 39 | std::vector available_parititions; 40 | }; 41 | 42 | mutable std::mutex mutex_; 43 | std::unique_ptr rk_handle_; 44 | mutable std::set available_consumer_groups_; 45 | mutable std::set missing_consumer_groups_; 46 | mutable std::map topic_data_; 47 | }; 48 | } -------------------------------------------------------------------------------- /include/kspp/connect/connection_params.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #pragma once 5 | 6 | namespace kspp { 7 | namespace connect { 8 | 9 | enum rescrape_policy_t { RESCRAPE_OFF, LAST_QUERY_TS, CLIENT_TS }; 10 | 11 | struct connection_params { 12 | 13 | std::string url; // where relevant 14 | 15 | std::string host; // where relevant 16 | int port; 17 | 18 | // authentication 19 | std::string user; 20 | std::string password; 21 | 22 | // resource id database in postgres & tds - not used in elastic 23 | std::string database_name; 24 | 25 | std::string http_header; 26 | 27 | // drop deletes of never-seen id's 28 | bool assume_beginning_of_stream = false; 29 | }; 30 | 31 | struct table_params { 32 | std::chrono::seconds poll_intervall = std::chrono::seconds(60); 33 | size_t max_items_in_fetch = 30000; 34 | rescrape_policy_t rescrape_policy = RESCRAPE_OFF; 35 | uint32_t rescrape_ticks = 1; 36 | std::string offset_storage; 37 | std::string query; 38 | std::string ts_column; 39 | int ts_multiplier = 1; // normally 1 or 1000 (ie ms or s) 40 | int ts_utc_offset = 0; 41 | std::string id_column; 42 | }; 43 | } // namespace connect 44 | } // namespace kspp 45 | -------------------------------------------------------------------------------- /include/kspp/connect/generic_avro_sink.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #pragma once 11 | 12 | namespace kspp { 13 | class generic_avro_sink : 14 | public topic_sink { 15 | public: 16 | generic_avro_sink( 17 | std::shared_ptr config, 18 | std::shared_ptr> 19 | impl) : 20 | impl_(impl) {} 21 | 22 | ~generic_avro_sink() override { close(); } 23 | 24 | bool good() const { return impl_->good(); } 25 | 26 | void register_metrics(kspp::processor *parent) { 27 | impl_->register_metrics(parent); 28 | } 29 | 30 | void close() override { 31 | /*if (!exit_) { 32 | exit_ = true; 33 | } 34 | */ 35 | impl_->close(); 36 | } 37 | 38 | bool eof() const override { return this->queue_.size() == 0 && impl_->eof(); } 39 | 40 | size_t queue_size() const override { return this->queue_.size(); } 41 | 42 | size_t outbound_queue_len() const override { 43 | return this->impl_->queue_size(); 44 | } 45 | 46 | int64_t next_event_time() const override { 47 | return this->queue_.next_event_time(); 48 | } 49 | 50 | size_t process(int64_t tick) override { 51 | if (this->queue_.empty()) 52 | return 0; 53 | size_t processed = 0; 54 | while (!this->queue_.empty()) { 55 | auto p = this->queue_.front(); 56 | if (p == nullptr || p->event_time() > tick) 57 | return processed; 58 | this->queue_.pop_front(); 59 | impl_->insert(p); 60 | ++(this->processed_count_); 61 | ++processed; 62 | this->lag_.add_event_time(tick, p->event_time()); 63 | } 64 | return processed; 65 | } 66 | 67 | std::string topic() const override { return impl_->topic(); } 68 | 69 | void poll(int timeout) override { impl_->poll(); } 70 | 71 | void flush() override { 72 | while (!eof()) { 73 | process(kspp::milliseconds_since_epoch()); 74 | poll(0); 75 | std::this_thread::sleep_for(std::chrono::milliseconds( 76 | 10)); // TODO the deletable messages should be deleted when poill gets 77 | // called an not from background thread 3rd queue is needed... 78 | } 79 | 80 | while (true) { 81 | int ec = 0; // TODO fixme 82 | // auto ec = _impl.flush(1000); 83 | if (ec == 0) 84 | break; 85 | } 86 | } 87 | 88 | protected: 89 | // bool exit_= false; 90 | std::shared_ptr> 91 | impl_; 92 | }; 93 | } // namespace kspp 94 | -------------------------------------------------------------------------------- /include/kspp/connect/generic_producer.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #pragma once 4 | 5 | namespace kspp { 6 | template class generic_producer { 7 | public: 8 | generic_producer() {} 9 | 10 | virtual ~generic_producer() {} 11 | 12 | virtual bool good() const = 0; 13 | 14 | virtual void register_metrics(kspp::processor *parent) = 0; 15 | 16 | virtual void close() = 0; 17 | 18 | virtual bool eof() const = 0; 19 | 20 | virtual void insert(std::shared_ptr>) = 0; 21 | 22 | virtual size_t queue_size() const = 0; 23 | 24 | virtual void poll() = 0; 25 | 26 | virtual std::string topic() const = 0; 27 | }; 28 | } // namespace kspp 29 | -------------------------------------------------------------------------------- /include/kspp/features/aws/aws.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace kspp { 4 | void init_aws(); 5 | } -------------------------------------------------------------------------------- /include/kspp/features/aws/s3_offset_storage_provider.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace kspp { 6 | class s3_offset_storage : public offset_storage { 7 | public: 8 | static std::shared_ptr create(kspp::url); 9 | 10 | s3_offset_storage(std::string host, std::string s3_bucket, std::string key, std::string access_key, 11 | std::string secret_key); 12 | 13 | ~s3_offset_storage() override; 14 | 15 | private: 16 | int64_t load_offset(int timeout_ms) override; 17 | 18 | void persist_offset(int64_t offset, int timeout_ms) override; 19 | 20 | const std::string s3_bucket_; 21 | const std::string s3_object_name_; 22 | std::shared_ptr s3_client_; 23 | }; 24 | } -------------------------------------------------------------------------------- /include/kspp/internal/commit_chain.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #pragma once 9 | 10 | namespace kspp { 11 | 12 | void autocommit_marker_gc(); 13 | 14 | class commit_chain { 15 | public: 16 | 17 | class autocommit_marker : public event_done_marker { 18 | public: 19 | autocommit_marker(std::function callback) 20 | : event_done_marker(callback) { 21 | } 22 | 23 | ~autocommit_marker() override; 24 | 25 | void init(int64_t offset, std::shared_ptr next) { 26 | event_done_marker::init(offset); 27 | //_offset = offset; 28 | next_ = next; 29 | } 30 | 31 | private: 32 | std::shared_ptr next_; 33 | }; 34 | 35 | public: 36 | commit_chain(std::string topic, int32_t partition); 37 | 38 | std::shared_ptr create(int64_t offset); 39 | 40 | // nr of outstanding requests 41 | inline size_t size() const { 42 | return size_; 43 | } 44 | 45 | inline int64_t last_good_offset() const { 46 | return last_good_offset_; 47 | } 48 | 49 | // first error code 50 | inline int32_t first_ec() const { 51 | return first_ec_; 52 | } 53 | 54 | private: 55 | void handle_result(int64_t offset, int32_t ec); 56 | 57 | const std::string topic_; 58 | const int32_t partition_; 59 | mutable spinlock spinlock_; 60 | volatile size_t size_; 61 | int64_t last_good_offset_; 62 | int32_t first_ec_; 63 | std::shared_ptr next_; 64 | }; 65 | } -------------------------------------------------------------------------------- /include/kspp/internal/event_queue.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #pragma once 8 | 9 | namespace kspp { 10 | 11 | /** 12 | normal use is single producer / single consumer 13 | but for topic sinks this will be multiproducer / single consumer 14 | TBD optimize for this usecase 15 | */ 16 | template 17 | class event_queue { 18 | public: 19 | event_queue() 20 | : next_event_time_(INT64_MAX) { 21 | } 22 | 23 | inline size_t size() const { 24 | spinlock::scoped_lock xxx(spinlock_); 25 | { return queue_.size(); } 26 | } 27 | 28 | inline int64_t next_event_time() const { 29 | return next_event_time_; 30 | } 31 | 32 | inline bool empty() const { 33 | return next_event_time_ == INT64_MAX; // this is faster than locking.. 34 | } 35 | 36 | //normal usage 37 | inline void push_back(std::shared_ptr> p) { 38 | if (p) { 39 | spinlock::scoped_lock xxx(spinlock_); 40 | { 41 | if (queue_.size() == 0) 42 | next_event_time_ = p->event_time(); 43 | queue_.push_back(p); 44 | } 45 | } 46 | } 47 | 48 | // used for error handling 49 | inline void push_front(std::shared_ptr> p) { 50 | if (p) { 51 | spinlock::scoped_lock xxx(spinlock_); 52 | { 53 | next_event_time_ = p->event_time(); 54 | queue_.push_front(p); 55 | } 56 | } 57 | } 58 | 59 | 60 | inline std::shared_ptr> front() { 61 | spinlock::scoped_lock xxx(spinlock_); 62 | return queue_.front(); 63 | } 64 | 65 | inline std::shared_ptr> back() { 66 | spinlock::scoped_lock xxx(spinlock_); 67 | return queue_.back(); 68 | } 69 | 70 | inline void pop_front() { 71 | spinlock::scoped_lock xxx(spinlock_); 72 | { 73 | queue_[0].reset(); 74 | queue_.pop_front(); 75 | if (queue_.size() == 0) 76 | next_event_time_ = INT64_MAX; 77 | else 78 | next_event_time_ = queue_[0]->event_time(); 79 | } 80 | } 81 | 82 | // used for erro handling 83 | inline void pop_back() { 84 | spinlock::scoped_lock xxx(spinlock_); 85 | { 86 | queue_[queue_.size() - 1].reset(); 87 | queue_.pop_back(); 88 | if (queue_.size() == 0) 89 | next_event_time_ = INT64_MAX; 90 | } 91 | } 92 | 93 | 94 | inline std::shared_ptr> pop_front_and_get() { 95 | if (empty()) 96 | return nullptr; 97 | 98 | spinlock::scoped_lock xxx(spinlock_); 99 | { 100 | auto p = queue_.front(); 101 | queue_[0].reset(); 102 | queue_.pop_front(); 103 | 104 | if (queue_.size() == 0) 105 | next_event_time_ = INT64_MAX; 106 | else 107 | next_event_time_ = queue_[0]->event_time(); 108 | return p; 109 | } 110 | } 111 | 112 | private: 113 | std::deque>> queue_; 114 | int64_t next_event_time_; 115 | mutable spinlock spinlock_; 116 | }; 117 | 118 | } -------------------------------------------------------------------------------- /include/kspp/internal/queue.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #pragma once 7 | 8 | namespace kspp { 9 | template 10 | class queue { 11 | public: 12 | queue() : empty_(true) { 13 | } 14 | 15 | inline size_t size() const { 16 | spinlock::scoped_lock xxx(spinlock_); 17 | return queue_.size(); 18 | } 19 | 20 | inline bool empty() const { 21 | return empty_; 22 | } 23 | 24 | inline void push_back(ITEM i) { 25 | spinlock::scoped_lock xxx(spinlock_); 26 | { 27 | empty_ = false; 28 | queue_.push_back(i); 29 | } 30 | } 31 | 32 | inline void push_front(ITEM i) { 33 | spinlock::scoped_lock xxx(spinlock_); 34 | { 35 | empty_ = false; 36 | queue_.push_front(i); 37 | } 38 | } 39 | 40 | inline ITEM front() { 41 | spinlock::scoped_lock xxx(spinlock_); 42 | return queue_.front(); 43 | } 44 | 45 | inline void pop_front() { 46 | spinlock::scoped_lock xxx(spinlock_); 47 | { 48 | queue_.pop_front(); 49 | if (queue_.size() == 0) 50 | empty_ = true; 51 | } 52 | } 53 | 54 | inline ITEM pop_and_get() { 55 | spinlock::scoped_lock xxx(spinlock_); 56 | { 57 | auto p = queue_.front(); 58 | queue_.pop_front(); 59 | 60 | if (queue_.size() == 0) 61 | empty_ = true; 62 | return p; 63 | } 64 | } 65 | 66 | private: 67 | std::deque queue_; 68 | bool empty_; 69 | mutable spinlock spinlock_; 70 | }; 71 | 72 | } -------------------------------------------------------------------------------- /include/kspp/internal/rd_kafka_utils.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #pragma once 6 | 7 | void set_config(RdKafka::Conf *conf, std::string key, std::string value); 8 | 9 | void set_config(RdKafka::Conf *conf, std::string key, RdKafka::Conf *topic_conf); 10 | 11 | void set_config(RdKafka::Conf *conf, std::string key, RdKafka::DeliveryReportCb *callback); 12 | 13 | void set_config(RdKafka::Conf *conf, std::string key, RdKafka::PartitionerCb *partitioner_cb); 14 | 15 | void set_config(RdKafka::Conf *conf, std::string key, RdKafka::EventCb *event_cb); 16 | 17 | void set_broker_config(RdKafka::Conf *rd_conf, const kspp::cluster_config *config); 18 | 19 | 20 | -------------------------------------------------------------------------------- /include/kspp/internal/rocksdb/rocksdb_operators.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #pragma once 5 | 6 | namespace rocksdb { 7 | std::shared_ptr CreateInt64AddOperator(); 8 | } 9 | 10 | namespace Int64AddOperator { 11 | inline int64_t Deserialize(const rocksdb::Slice &slice) { 12 | int64_t value = 0; 13 | if (slice.size() != sizeof(int64_t)) { 14 | LOG(ERROR) << "int64 value corruption, size: " << slice.size() << ", expected:" << sizeof(int64_t); 15 | return value; 16 | } 17 | memcpy((void *) &value, slice.data(), sizeof(int64_t)); 18 | return value; 19 | } 20 | 21 | inline int64_t Deserialize(const std::string &src) { 22 | int64_t value = 0; 23 | if (src.size() != sizeof(int64_t)) { 24 | LOG(ERROR) << "int64 value corruption, size: " << src.size() << ", expected:" << sizeof(int64_t); 25 | return value; 26 | } 27 | memcpy((void *) &value, src.data(), sizeof(int64_t)); 28 | return value; 29 | } 30 | 31 | inline std::string Serialize(int64_t val) { 32 | std::string result; 33 | result.resize(sizeof(int64_t)); 34 | memcpy((void *) result.data(), &val, sizeof(int64_t)); 35 | return result; 36 | } 37 | } 38 | 39 | -------------------------------------------------------------------------------- /include/kspp/internal/sinks/kafka_producer.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #pragma once 9 | 10 | namespace kspp { 11 | class kafka_producer { 12 | 13 | public: 14 | enum memory_management_mode { 15 | FREE = 1, COPY = 2 16 | }; 17 | 18 | kafka_producer(std::shared_ptr config, std::string topic); 19 | 20 | ~kafka_producer(); 21 | 22 | void close(); 23 | 24 | /** 25 | produce a message to partition -> (partition_hash % partition_cnt) 26 | */ 27 | int 28 | produce(uint32_t partition_hash, memory_management_mode mode, void *key, size_t keysz, void *value, size_t valuesz, 29 | int64_t timestamp, std::shared_ptr autocommit_marker); 30 | 31 | inline std::string topic() const { 32 | return topic_; 33 | } 34 | 35 | inline size_t queue_size() const { 36 | return closed_ ? 0 : producer_->outq_len(); 37 | } 38 | 39 | inline void poll(int timeout) { 40 | producer_->poll(timeout); 41 | } 42 | 43 | inline bool good() const { 44 | return (delivery_report_cb_.status() == RdKafka::ErrorCode::ERR_NO_ERROR); 45 | } 46 | 47 | inline size_t nr_of_partitions() { 48 | // this one does not seem to be assigned in cb?? 49 | return nr_of_partitions_; 50 | } 51 | 52 | inline int32_t flush(int timeout_ms) { 53 | return (queue_size() == 0) ? 0 : producer_->flush(timeout_ms); 54 | } 55 | 56 | private: 57 | class MyHashPartitionerCb : public RdKafka::PartitionerCb { 58 | public: 59 | int32_t 60 | partitioner_cb(const RdKafka::Topic *topic, const std::string *key, int32_t partition_cnt, void *msg_opaque); 61 | }; 62 | 63 | // better to have a static config of nr of parititions 64 | class MyDeliveryReportCb : public RdKafka::DeliveryReportCb { 65 | public: 66 | MyDeliveryReportCb(); 67 | 68 | virtual void dr_cb(RdKafka::Message &message); 69 | 70 | inline RdKafka::ErrorCode status() const { 71 | return _status; 72 | } 73 | 74 | private: 75 | RdKafka::ErrorCode _status; 76 | }; 77 | 78 | 79 | class MyEventCb : public RdKafka::EventCb { 80 | public: 81 | void event_cb(RdKafka::Event &event); 82 | }; 83 | 84 | 85 | const std::string topic_; 86 | std::unique_ptr rd_topic_; 87 | std::unique_ptr producer_; 88 | bool closed_ = false; 89 | size_t nr_of_partitions_ = 0; 90 | uint64_t msg_cnt_=0; // TODO move to metrics 91 | uint64_t msg_bytes_=0; // TODO move to metrics 92 | MyHashPartitionerCb default_partitioner_; 93 | MyDeliveryReportCb delivery_report_cb_; 94 | MyEventCb event_cb_; 95 | }; 96 | } // namespace 97 | 98 | 99 | -------------------------------------------------------------------------------- /include/kspp/internal/sources/kafka_consumer.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #pragma once 6 | 7 | namespace kspp { 8 | class cluster_config; 9 | 10 | class kafka_consumer { 11 | public: 12 | kafka_consumer(std::shared_ptr config, std::string topic, int32_t partition, 13 | std::string consumer_group, bool check_cluster = true); 14 | 15 | ~kafka_consumer(); 16 | 17 | void close(); 18 | 19 | std::unique_ptr consume(int librdkafka_timeout = 0); 20 | 21 | inline bool eof() const { 22 | return eof_; 23 | } 24 | 25 | inline std::string topic() const { 26 | return topic_; 27 | } 28 | 29 | inline int32_t partition() const { 30 | return partition_; 31 | } 32 | 33 | void start(int64_t offset); 34 | 35 | void stop(); 36 | 37 | int32_t commit(int64_t offset, bool flush = false); 38 | 39 | inline int64_t commited() const { 40 | return can_be_committed_; 41 | } 42 | 43 | int update_eof(); 44 | 45 | bool consumer_group_exists(std::string consumer_group, std::chrono::seconds timeout) const; 46 | 47 | private: 48 | class MyEventCb : public RdKafka::EventCb { 49 | public: 50 | void event_cb(RdKafka::Event &event); 51 | }; 52 | 53 | std::shared_ptr config_; 54 | const std::string topic_; 55 | const int32_t partition_; 56 | const std::string consumer_group_; 57 | std::vector topic_partition_; 58 | std::unique_ptr consumer_; 59 | int64_t can_be_committed_ = -1; 60 | int64_t last_committed_ = -1; 61 | size_t max_pending_commits_ = 5000; 62 | uint64_t msg_cnt_ = 0 ; // TODO move to metrics 63 | uint64_t msg_bytes_ = 0; // TODO move to metrics 64 | bool eof_ = false; 65 | bool closed_ = false; 66 | MyEventCb event_cb_; 67 | }; 68 | } 69 | 70 | -------------------------------------------------------------------------------- /include/kspp/metrics/prometheus_pushgateway_reporter.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #pragma once 9 | 10 | namespace kspp { 11 | class prometheus_pushgateway_reporter { 12 | public: 13 | prometheus_pushgateway_reporter(std::string job_name, std::string uri, bool verbose = false); 14 | 15 | ~prometheus_pushgateway_reporter(); 16 | 17 | void add_metrics(std::shared_ptr p); 18 | 19 | private: 20 | bool run_ = false; 21 | prometheus::Gateway gateway_; 22 | bool verbose_; 23 | std::shared_ptr thread_; 24 | }; 25 | 26 | std::shared_ptr 27 | operator<<(std::shared_ptr reporter, std::shared_ptr t); 28 | 29 | std::shared_ptr 30 | operator<<(std::shared_ptr reporter, std::vector> v); 31 | } 32 | -------------------------------------------------------------------------------- /include/kspp/processors/delay.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #pragma once 4 | namespace kspp { 5 | template 6 | class delay : public event_consumer, public partition_source { 7 | static constexpr const char *PROCESSOR_NAME = "delay"; 8 | public: 9 | typedef std::function> record)> predicate; // return true to keep 10 | 11 | delay(std::shared_ptr config, std::shared_ptr> source, 12 | std::chrono::milliseconds delaytime) 13 | : event_consumer(), partition_source(source.get(), source->partition()), source_(source), 14 | delay_(delaytime.count()) { 15 | source_->add_sink([this](auto r) { this->queue_.push_back(r); }); 16 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, "delay"); 17 | this->add_metrics_label(KSPP_PARTITION_TAG, std::to_string(source->partition())); 18 | } 19 | 20 | ~delay() { 21 | close(); 22 | } 23 | 24 | std::string log_name() const override { 25 | return PROCESSOR_NAME; 26 | } 27 | 28 | void start(int64_t offset) override { 29 | source_->start(offset); 30 | } 31 | 32 | void close() override { 33 | source_->close(); 34 | } 35 | 36 | size_t process(int64_t tick) override { 37 | source_->process(tick); 38 | 39 | size_t processed = 0; 40 | while (this->queue_.next_event_time() <= tick) { 41 | auto r = this->queue_.front(); 42 | if (r->event_time() + delay_ <= tick) { 43 | this->_lag.add_event_time(tick, r->event_time()); 44 | ++(this->processed_count_); 45 | this->queue_.pop_front(); 46 | this->send_to_sinks(r); 47 | ++processed; 48 | } else { 49 | break; 50 | } 51 | } 52 | return processed; 53 | } 54 | 55 | void commit(bool flush) override { 56 | source_->commit(flush); 57 | } 58 | 59 | size_t queue_size() const override { 60 | return this->queue_.size(); 61 | } 62 | 63 | int64_t next_event_time() const override { 64 | return event_consumer::next_event_time(); 65 | } 66 | 67 | bool eof() const override { 68 | return ((queue_size() == 0) && source_->eof()); 69 | } 70 | 71 | private: 72 | std::shared_ptr> source_; 73 | int delay_; 74 | }; 75 | } // namespace -------------------------------------------------------------------------------- /include/kspp/processors/filter.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #pragma once 4 | namespace kspp { 5 | template 6 | class filter : public event_consumer, public partition_source { 7 | static constexpr const char *PROCESSOR_NAME = "filter"; 8 | public: 9 | //typedef std::function> record)> predicate; // return true to keep 10 | typedef std::function &record)> predicate; // return true to keep 11 | 12 | filter(std::shared_ptr config, std::shared_ptr> source, predicate f) 13 | : event_consumer(), partition_source(source.get(), source->partition()), source_(source), 14 | predicate_(f), predicate_false_("predicate_false", "msg") { 15 | source_->add_sink([this](auto r) { 16 | this->queue_.push_back(r); 17 | }); 18 | this->add_metric(&predicate_false_); 19 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, "filter"); 20 | this->add_metrics_label(KSPP_PARTITION_TAG, std::to_string(source->partition())); 21 | } 22 | 23 | ~filter() { 24 | close(); 25 | } 26 | 27 | std::string log_name() const override { 28 | return PROCESSOR_NAME; 29 | } 30 | 31 | void start(int64_t offset) override { 32 | source_->start(offset); 33 | } 34 | 35 | void close() override { 36 | source_->close(); 37 | } 38 | 39 | size_t process(int64_t tick) override { 40 | source_->process(tick); 41 | size_t processed = 0; 42 | 43 | while (this->queue_.next_event_time() <= tick) { 44 | auto trans = this->queue_.pop_front_and_get(); 45 | ++processed; 46 | this->lag_.add_event_time(tick, trans->event_time()); 47 | ++(this->processed_count_); 48 | if (trans->record()) { 49 | if (predicate_(*trans->record())) { 50 | this->send_to_sinks(trans); 51 | } else { 52 | ++predicate_false_; 53 | } 54 | } 55 | } 56 | return processed; 57 | } 58 | 59 | void commit(bool flush) override { 60 | source_->commit(flush); 61 | } 62 | 63 | bool eof() const override { 64 | return ((queue_size() == 0) && source_->eof()); 65 | } 66 | 67 | size_t queue_size() const override { 68 | return event_consumer::queue_size(); 69 | } 70 | 71 | int64_t next_event_time() const override { 72 | return event_consumer::next_event_time(); 73 | } 74 | 75 | private: 76 | std::shared_ptr> source_; 77 | predicate predicate_; 78 | metric_counter predicate_false_; 79 | }; 80 | } // namespace -------------------------------------------------------------------------------- /include/kspp/processors/rate_limiter.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #pragma once 5 | 6 | // this should be a template on storage type 7 | // ie mem_bucket or rocksdb version 8 | // or virtual ptr to storage to be passed in constructor 9 | // right now this is processing time rate limiting 10 | // how do we swap betweeen processing and event time??? TBD 11 | namespace kspp { 12 | template 13 | class rate_limiter : public event_consumer, public partition_source { 14 | static constexpr const char *PROCESSOR_NAME = "rate_limiter"; 15 | public: 16 | rate_limiter(std::shared_ptr config, std::shared_ptr> source, 17 | std::chrono::milliseconds agetime, size_t capacity) 18 | : event_consumer(), partition_source(source.get(), source->partition()), source_(source), 19 | token_bucket_(std::make_shared>(agetime, capacity)), 20 | rejection_count_("rejection_count", "msg") { 21 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, "rate_limiter"); 22 | this->add_metrics_label(KSPP_PARTITION_TAG, std::to_string(source->partition())); 23 | source_->add_sink([this](auto r) { 24 | this->queue_.push_back(r); 25 | }); 26 | this->add_metric(&rejection_count_); 27 | } 28 | 29 | ~rate_limiter() { 30 | close(); 31 | } 32 | 33 | std::string log_name() const override { 34 | return PROCESSOR_NAME; 35 | } 36 | 37 | void start(int64_t offset) override { 38 | source_->start(offset); 39 | if (offset == kspp::OFFSET_BEGINNING) 40 | token_bucket_->clear(); 41 | } 42 | 43 | void close() override { 44 | source_->close(); 45 | } 46 | 47 | size_t process(int64_t tick) override { 48 | source_->process(tick); 49 | size_t processed = 0; 50 | while (this->queue_.next_event_time() <= tick) { 51 | auto trans = this->queue_.pop_front_and_get(); 52 | ++processed; 53 | ++(this->processed_count_); 54 | this->lag_.add_event_time(tick, trans->event_time()); 55 | // milliseconds_since_epoch for processing time limiter 56 | // 57 | if (token_bucket_->consume(trans->record()->key(), trans->event_time())) { // TBD tick??? 58 | this->send_to_sinks(trans); 59 | } else { 60 | ++rejection_count_; 61 | } 62 | } 63 | return processed; 64 | } 65 | 66 | void commit(bool flush) override { 67 | source_->commit(flush); 68 | } 69 | 70 | bool eof() const override { 71 | return (source_->eof() && (queue_size() == 0)); 72 | } 73 | 74 | size_t queue_size() const override { 75 | return event_consumer::queue_size(); 76 | } 77 | 78 | int64_t next_event_time() const override { 79 | return event_consumer::next_event_time(); 80 | } 81 | 82 | 83 | private: 84 | std::shared_ptr> source_; 85 | std::shared_ptr> token_bucket_; 86 | metric_counter rejection_count_; 87 | }; 88 | } // namespace -------------------------------------------------------------------------------- /include/kspp/processors/thoughput_limiter.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #pragma once 5 | 6 | // this should be a template on storage type 7 | // ie mem_bucket or rocksdb version 8 | // or virtual ptr to storage to be passed in constructor 9 | // right now this is processing time rate limiting 10 | // how do we swap betweeen processing and event time??? TBD 11 | namespace kspp { 12 | template 13 | class thoughput_limiter : public event_consumer, public partition_source { 14 | static constexpr const char *PROCESSOR_NAME = "thoughput_limiter"; 15 | public: 16 | thoughput_limiter(std::shared_ptr config, std::shared_ptr> source, 17 | double messages_per_sec) 18 | : event_consumer(), partition_source(source.get(), source->partition()), source_(source), 19 | token_bucket_(std::make_shared> 20 | (std::chrono::milliseconds(( 21 | int) (1000.0 / messages_per_sec)), 1)) { 22 | source_->add_sink([this](auto r) { 23 | this->queue_.push_back(r); 24 | }); 25 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, "thoughput_limiter"); 26 | this->add_metrics_label(KSPP_PARTITION_TAG, std::to_string(source->partition())); 27 | } 28 | 29 | ~thoughput_limiter() { 30 | close(); 31 | } 32 | 33 | std::string log_name() const override { 34 | return PROCESSOR_NAME; 35 | } 36 | 37 | void start(int64_t offset) override { 38 | source_->start(offset); 39 | if (offset == kspp::OFFSET_BEGINNING) 40 | token_bucket_->clear(); 41 | } 42 | 43 | void close() override { 44 | source_->close(); 45 | } 46 | 47 | size_t process(int64_t tick) override { 48 | source_->process(tick); 49 | 50 | size_t processed = 0; 51 | while (this->queue_.next_event_time() <= tick) { 52 | auto trans = this->queue_.front(); 53 | if (token_bucket_->consume(0, tick)) { 54 | this->lag_.add_event_time(tick, trans->event_time()); 55 | ++(this->processed_count_); 56 | ++processed; 57 | this->queue_.pop_front(); 58 | this->send_to_sinks(trans); 59 | } else { 60 | break; 61 | } 62 | } 63 | return processed; 64 | } 65 | 66 | void commit(bool flush) override { 67 | source_->commit(flush); 68 | } 69 | 70 | bool eof() const override { 71 | return (source_->eof() && (queue_size() == 0)); 72 | } 73 | 74 | size_t queue_size() const override { 75 | return event_consumer::queue_size(); 76 | } 77 | 78 | int64_t next_event_time() const override { 79 | return event_consumer::next_event_time(); 80 | } 81 | 82 | private: 83 | std::shared_ptr> source_; 84 | std::shared_ptr> token_bucket_; 85 | }; 86 | } // namespace -------------------------------------------------------------------------------- /include/kspp/processors/visitor.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #pragma once 6 | 7 | namespace kspp { 8 | template 9 | class visitor : public partition_sink { 10 | static constexpr const char *PROCESSOR_NAME = "visitor"; 11 | public: 12 | typedef std::function &record)> extractor; 13 | 14 | visitor(std::shared_ptr config, std::shared_ptr> source, extractor f) 15 | : partition_sink(source->partition()), source_(source), extractor_(f) { 16 | source_->add_sink([this](auto r) { this->queue_.push_back(r); }); 17 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, PROCESSOR_NAME); 18 | this->add_metrics_label(KSPP_PARTITION_TAG, std::to_string(source->partition())); 19 | } 20 | 21 | ~visitor() { 22 | close(); 23 | } 24 | 25 | std::string log_name() const override { 26 | return PROCESSOR_NAME; 27 | } 28 | 29 | void start(int64_t offset) override { 30 | source_->start(offset); 31 | } 32 | 33 | void close() override { 34 | source_->close(); 35 | } 36 | 37 | size_t process(int64_t tick) override { 38 | source_->process(tick); 39 | size_t processed = 0; 40 | while (this->queue_.next_event_time() <= tick) { 41 | auto trans = this->queue_.pop_front_and_get(); 42 | ++processed; 43 | this->_lag.add_event_time(tick, trans->event_time()); 44 | ++(this->processed_count_); 45 | if (trans->record()) 46 | extractor_(*trans->record()); 47 | } 48 | return processed; 49 | } 50 | 51 | void commit(bool flush) override { 52 | source_->commit(flush); 53 | } 54 | 55 | bool eof() const override { 56 | return ((queue_size() == 0) && source_->eof()); 57 | } 58 | 59 | size_t queue_size() const override { 60 | return event_consumer::queue_size(); 61 | } 62 | 63 | int64_t next_event_time() const override { 64 | return event_consumer::next_event_time(); 65 | } 66 | 67 | private: 68 | std::shared_ptr> source_; 69 | extractor extractor_; 70 | }; 71 | } 72 | -------------------------------------------------------------------------------- /include/kspp/schema_registry/schema_registry_client.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #pragma once 11 | 12 | namespace kspp { 13 | class cluster_config; 14 | 15 | class schema_registry_client { 16 | public: 17 | schema_registry_client(const kspp::cluster_config &config); 18 | ~schema_registry_client(); 19 | bool validate(); 20 | int32_t put_schema(std::string subject, std::shared_ptr schema); 21 | std::shared_ptr get_avro_schema(int32_t schema_id); 22 | 23 | 24 | int32_t put_schema(std::string subject, const nlohmann::json& schema); 25 | nlohmann::json verify_schema(std::string subject, const nlohmann::json& schema); 26 | std::shared_ptr get_json_schema(int32_t schema_id); 27 | std::shared_ptr get_json_schema(std::string subject); 28 | 29 | private: 30 | kspp::spinlock spinlock_; 31 | boost::asio::io_service ios_; 32 | std::unique_ptr work_; 33 | bool fail_fast_; 34 | std::shared_ptr proxy_; 35 | std::map> avro_cache_; 36 | std::thread thread_; 37 | }; 38 | 39 | nlohmann::json protobuf_register_schema(std::shared_ptr registry, std::string subject, const google::protobuf::FileDescriptor* file_descriptor); 40 | 41 | template 42 | nlohmann::json protobuf_register_schema(std::shared_ptr registry, std::string subject){ 43 | //if (std::shared_ptr schema = registry->get_json_schema(subject)) 44 | // return *schema; 45 | PROTO dummy; 46 | return protobuf_register_schema(registry, subject, dummy.descriptor()->file()); 47 | } 48 | } // kspp -------------------------------------------------------------------------------- /include/kspp/serdes/json_serdes.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #pragma once 8 | 9 | namespace kspp { 10 | class json_serdes { 11 | template 12 | struct fake_dependency : public std::false_type { 13 | }; 14 | 15 | public: 16 | json_serdes() {} 17 | 18 | static std::string name() { return "kspp::json"; } 19 | 20 | // does not use schema registry 21 | template 22 | int32_t register_schema(std::string name, const T &dummy) { 23 | return 0; 24 | } 25 | 26 | template 27 | size_t encode(const T &src, std::ostream &dst) { 28 | static_assert(fake_dependency::value, "you must use specialization to provide a encode for T"); 29 | } 30 | 31 | template 32 | size_t decode(std::istream &src, T &dst) { 33 | static_assert(fake_dependency::value, "you must use specialization to provide a decode for T"); 34 | } 35 | }; 36 | }; 37 | 38 | -------------------------------------------------------------------------------- /include/kspp/sinks/array_sink.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #pragma once 5 | 6 | /** 7 | * array_topic_sink - useful for testing 8 | * stores the records in an std::vector given in constructor 9 | */ 10 | 11 | namespace kspp { 12 | template 13 | class array_topic_sink : public topic_sink { 14 | static constexpr const char *PROCESSOR_NAME = "array_sink"; 15 | public: 16 | array_topic_sink(std::shared_ptr config, std::vector>> *a) 17 | : topic_sink(), array_(a) { 18 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, "array_sink"); 19 | } 20 | 21 | ~array_topic_sink() override { 22 | this->flush(); 23 | } 24 | 25 | void close() override { 26 | } 27 | 28 | std::string log_name() const override { 29 | return PROCESSOR_NAME; 30 | } 31 | 32 | size_t queue_size() const override { 33 | return event_consumer::queue_size(); 34 | } 35 | 36 | void flush() override { 37 | while (process(kspp::milliseconds_since_epoch()) > 0) { ; // noop 38 | } 39 | } 40 | 41 | bool eof() const override { 42 | return this->queue_.size(); 43 | } 44 | 45 | size_t process(int64_t tick) override { 46 | size_t processed = 0; 47 | 48 | //forward up this timestamp 49 | while (this->queue_.next_event_time() <= tick) { 50 | auto r = this->queue_.pop_front_and_get(); 51 | this->lag_.add_event_time(tick, r->event_time()); 52 | ++(this->processed_count_); 53 | array_->push_back((r->record())); 54 | ++processed; 55 | } 56 | 57 | return processed; 58 | } 59 | 60 | protected: 61 | std::vector>> *array_; 62 | }; 63 | } 64 | -------------------------------------------------------------------------------- /include/kspp/sinks/null_sink.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #pragma once 5 | 6 | // null sink - useful for testing 7 | namespace kspp { 8 | template 9 | class null_sink : public topic_sink { 10 | static constexpr const char *PROCESSOR_NAME = "genric_sink"; 11 | public: 12 | typedef std::function> record)> handler; 13 | 14 | null_sink(std::shared_ptr config, handler f = nullptr) 15 | : topic_sink(), handler_(f) { 16 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, "null_sink"); 17 | } 18 | 19 | ~null_sink() override { 20 | this->flush(); 21 | } 22 | 23 | void close() override { 24 | } 25 | 26 | std::string log_name() const override { 27 | return PROCESSOR_NAME; 28 | } 29 | 30 | size_t queue_size() const override { 31 | return event_consumer::queue_size(); 32 | } 33 | 34 | void flush() override { 35 | while (process(kspp::milliseconds_since_epoch()) > 0) { ; // noop 36 | } 37 | } 38 | 39 | bool eof() const override { 40 | return this->queue_.size(); 41 | } 42 | 43 | size_t process(int64_t tick) override { 44 | size_t processed = 0; 45 | 46 | //forward up this timestamp 47 | while (this->queue_.next_event_time() <= tick) { 48 | auto r = this->queue_.pop_front_and_get(); 49 | this->lag_.add_event_time(tick, r->event_time()); 50 | ++(this->processed_count_); 51 | if (handler_) 52 | handler_(r->record()); 53 | ++processed; 54 | } 55 | return processed; 56 | } 57 | 58 | protected: 59 | handler handler_; 60 | }; 61 | } -------------------------------------------------------------------------------- /include/kspp/sinks/sink_defs.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #pragma once 4 | 5 | namespace kspp { 6 | template 7 | class kafka_partitioner_base { 8 | public: 9 | using partitioner = typename std::function; 10 | }; 11 | 12 | template<> 13 | class kafka_partitioner_base { 14 | public: 15 | using partitioner = typename std::function; 16 | }; 17 | } 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /include/kspp/sinks/stream_sink.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #pragma once 9 | 10 | namespace kspp { 11 | template 12 | void stream_sink_print(std::shared_ptr> ev, std::ostream &os_, kspp::text_serdes *_codec) { 13 | os_ << "ts: " << ev->event_time() << " "; 14 | _codec->encode(ev->record()->key(), os_); 15 | os_ << ":"; 16 | if (ev->record()->value()) 17 | _codec->encode(*ev->record()->value(), os_); 18 | else 19 | os_ << ""; 20 | os_ << std::endl; 21 | } 22 | 23 | template 24 | void stream_sink_print(std::shared_ptr> ev, std::ostream &os_, kspp::text_serdes *_codec) { 25 | os_ << "ts: " << ev->event_time() << " "; 26 | _codec->encode(*ev->record()->value(), os_); 27 | os_ << std::endl; 28 | } 29 | 30 | template 31 | void stream_sink_print(std::shared_ptr> ev, std::ostream &os_, kspp::text_serdes *_codec) { 32 | os_ << "ts: " << ev->event_time() << " "; 33 | _codec->encode(ev->record()->key(), os_); 34 | os_ << std::endl; 35 | } 36 | 37 | template 38 | class stream_sink : public partition_sink { 39 | static constexpr const char *PROCESSOR_NAME = "stream_sink"; 40 | public: 41 | enum { 42 | MAX_KEY_SIZE = 1000 43 | }; 44 | 45 | stream_sink(std::shared_ptr config, std::shared_ptr> source, 46 | std::ostream *os) 47 | : partition_sink(source->partition()), os_(*os), codec_(std::make_shared()) { 48 | source->add_sink(this); 49 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, "stream_sink"); 50 | } 51 | 52 | ~stream_sink() override { 53 | this->flush();; 54 | } 55 | 56 | std::string log_name() const override { 57 | return PROCESSOR_NAME; 58 | } 59 | 60 | size_t queue_size() const override { 61 | return event_consumer::queue_size(); 62 | } 63 | 64 | void commit(bool flush) override { 65 | // noop 66 | } 67 | 68 | size_t process(int64_t tick) override { 69 | size_t processed = 0; 70 | while (this->queue_.size()) { 71 | auto ev = this->queue_.front(); 72 | stream_sink_print(ev, os_, codec_.get()); 73 | this->queue_.pop_front(); 74 | ++(this->processed_count_); 75 | this->lag_.add_event_time(kspp::milliseconds_since_epoch(), ev->event_time()); // move outside loop 76 | ++processed; 77 | } 78 | return processed; 79 | } 80 | 81 | std::ostream &os_; 82 | std::shared_ptr codec_; 83 | }; 84 | } -------------------------------------------------------------------------------- /include/kspp/sources/avro_file_source.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #pragma once 6 | 7 | namespace kspp { 8 | class generic_avro_file_source : public partition_source { 9 | static constexpr const char *PROCESSOR_NAME = "avro_file_source"; 10 | public: 11 | generic_avro_file_source(std::shared_ptr config, int32_t partition, std::string source); 12 | 13 | ~generic_avro_file_source() override; 14 | 15 | std::string log_name() const override { 16 | return PROCESSOR_NAME; 17 | } 18 | 19 | void start(int64_t offset) override; 20 | 21 | void commit(bool flush) override; 22 | 23 | void close() override; 24 | 25 | bool eof() const override; 26 | 27 | size_t queue_size() const override; 28 | 29 | int64_t next_event_time() const override; 30 | 31 | size_t process(int64_t tick) override; 32 | 33 | std::string topic() const override; 34 | 35 | protected: 36 | void thread_f(); 37 | 38 | size_t max_incomming_queue_size_ = 1000; 39 | bool started_ = false; 40 | bool exit_ = false; 41 | bool eof_ = false; 42 | std::thread thread_; 43 | std::string source_; 44 | int64_t messages_in_file_ = 0; 45 | event_queue incomming_msg_; 46 | }; 47 | } -------------------------------------------------------------------------------- /include/kspp/state_stores/state_store.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #pragma once 9 | 10 | // this should inherit from a state-store base class... 11 | namespace kspp { 12 | template 13 | class state_store { 14 | public: 15 | using sink_function = typename std::function>)>; 16 | 17 | virtual ~state_store() {} 18 | 19 | /** 20 | * garbage collects elements if they should be deleted 21 | * @param tick now 22 | */ 23 | virtual void garbage_collect(int64_t tick) {} 24 | 25 | /** 26 | * forces one of the oldest elements through garbage collection event if it's retention says it should be kept 27 | * NOTE not guaranteed to be the oldest (just one of the oldest...) 28 | * * @param tick now 29 | */ 30 | virtual void garbage_collect_one(int64_t tick) {} 31 | 32 | virtual void close() = 0; 33 | 34 | /** 35 | * Put or delete a record 36 | */ 37 | inline void insert(std::shared_ptr> record, int64_t offset) { 38 | _insert(record, offset); 39 | } 40 | 41 | /** 42 | * commits the offset 43 | */ 44 | virtual void commit(bool flush) = 0; 45 | 46 | /** 47 | * returns last offset 48 | */ 49 | virtual int64_t offset() const = 0; 50 | 51 | virtual void start(int64_t offset) = 0; 52 | 53 | virtual size_t aprox_size() const = 0; 54 | 55 | virtual size_t exact_size() const = 0; 56 | 57 | // TBD really needed for counter store 58 | virtual void clear() = 0; 59 | 60 | void set_sink(sink_function f) { 61 | sink_ = f; 62 | } 63 | 64 | /** 65 | * Returns a key-value pair with the given key 66 | */ 67 | virtual std::shared_ptr> get(const K &key) const = 0; 68 | 69 | virtual typename kspp::materialized_source::iterator begin() const = 0; 70 | 71 | virtual typename kspp::materialized_source::iterator end() const = 0; 72 | 73 | protected: 74 | virtual void _insert(std::shared_ptr> record, int64_t offset) = 0; 75 | 76 | sink_function sink_; 77 | }; 78 | } 79 | -------------------------------------------------------------------------------- /include/kspp/topology_builder.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #pragma once 8 | 9 | namespace kspp { 10 | class topology_builder { 11 | public: 12 | topology_builder(std::shared_ptr cluster_config) 13 | : cluster_config_(cluster_config) { 14 | } 15 | 16 | std::shared_ptr create_topology() { 17 | return std::make_shared(cluster_config_, std::to_string(next_topology_id_++)); 18 | } 19 | 20 | std::shared_ptr create_internal_topology() { 21 | return std::make_shared(cluster_config_, std::to_string(next_topology_id_++), true); 22 | } 23 | 24 | private: 25 | std::shared_ptr cluster_config_; 26 | size_t next_topology_id_=0; 27 | }; 28 | } 29 | -------------------------------------------------------------------------------- /include/kspp/type_name.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #pragma once 6 | 7 | namespace kspp { 8 | // default implementation 9 | template 10 | struct type_name { 11 | static std::string get() { 12 | return typeid(T).name(); 13 | } 14 | }; 15 | 16 | template<> 17 | struct type_name { 18 | static inline const std::string get() { return "bool"; } 19 | }; 20 | 21 | template<> 22 | struct type_name { 23 | static inline const std::string get() { return "int32_t"; } 24 | }; 25 | 26 | template<> 27 | struct type_name { 28 | static inline const std::string get() { return "int64_t"; } 29 | }; 30 | 31 | template<> 32 | struct type_name { 33 | static inline const std::string get() { return "size_t"; } 34 | }; 35 | 36 | template<> 37 | struct type_name { 38 | static inline const std::string get() { return "string"; } 39 | }; 40 | 41 | template<> 42 | struct type_name { 43 | static inline const std::string get() { return "uuid"; } 44 | }; 45 | } // namespace 46 | -------------------------------------------------------------------------------- /include/kspp/typedefs.h: -------------------------------------------------------------------------------- 1 | #pragma once 2 | 3 | namespace kspp { 4 | enum start_offset_t { 5 | OFFSET_BEGINNING = -2, OFFSET_END = -1, OFFSET_STORED = -1000 6 | }; 7 | } 8 | 9 | -------------------------------------------------------------------------------- /include/kspp/utils/cluster_uri.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #pragma once 5 | 6 | /** 7 | * Class representing a cluster URI. 8 | * 9 | * Consider zk://host1.domain.com:2110,host2.domain.com:2111,host3.domain.com:2112/foo/bar 10 | * 11 | * The URI is broken down into its parts: scheme ("zk"), authority (ie. host and port), 12 | * path ("/foo/bar"), 13 | * The scheme is lower-cased. 14 | */ 15 | namespace kspp { 16 | class cluster_uri { 17 | public: 18 | // requires a explicit scheme 19 | explicit cluster_uri(std::string s); 20 | 21 | //accepts a uri without a scheme 22 | cluster_uri(std::string s, std::string default_scheme); 23 | 24 | bool good() const { return good_; } 25 | 26 | std::string scheme() const { return scheme_; } 27 | 28 | std::string authority() const { return authority_; } 29 | 30 | std::vector split_authority() const; 31 | 32 | std::string path() const { return path_; } 33 | 34 | std::string str() const { return scheme_ + "://" + authority_ + path_; } 35 | 36 | private: 37 | bool good_; 38 | std::string scheme_; 39 | std::string authority_; 40 | std::string path_; 41 | }; 42 | } 43 | -------------------------------------------------------------------------------- /include/kspp/utils/concurrent_queue.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #pragma once 8 | 9 | template 10 | class concurrent_queue { 11 | public: 12 | concurrent_queue() = default; 13 | 14 | concurrent_queue(const concurrent_queue &) = delete; 15 | 16 | concurrent_queue &operator=(const concurrent_queue &) = delete; 17 | 18 | void push(const T &item) { 19 | std::unique_lock lk(cv_m_); 20 | queue_.push(item); 21 | lk.unlock(); 22 | cv_.notify_one(); 23 | } 24 | 25 | inline bool empty() const { 26 | std::unique_lock lk(cv_m_); 27 | return queue_.empty(); 28 | } 29 | 30 | T pop() { 31 | std::unique_lock lk(cv_m_); 32 | while (queue_.empty()) 33 | cv_.wait(lk); 34 | auto val = queue_.front(); 35 | queue_.pop(); 36 | return val; 37 | } 38 | 39 | void pop(T &item) { 40 | std::unique_lock lk(cv_m_); 41 | while (queue_.empty()) 42 | cv_.wait(lk); 43 | item = queue_.front(); 44 | queue_.pop(); 45 | } 46 | 47 | bool try_pop(T &item) { 48 | std::unique_lock lk(cv_m_); 49 | if (queue_.empty()) 50 | return false; 51 | item = queue_.front(); 52 | queue_.pop(); 53 | return true; 54 | } 55 | 56 | template 57 | bool try_pop(T &item, const std::chrono::duration &rel_time) { 58 | std::unique_lock lk(cv_m_); 59 | while (queue_.empty()) { 60 | if (cv_.wait_for(lk, rel_time) == std::cv_status::timeout) 61 | return false; 62 | } 63 | item = queue_.front(); 64 | queue_.pop(); 65 | return true; 66 | } 67 | 68 | private: 69 | std::queue queue_; 70 | std::mutex cv_m_; 71 | std::condition_variable cv_; 72 | }; -------------------------------------------------------------------------------- /include/kspp/utils/destructor_callback.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #pragma once 5 | 6 | namespace kspp { 7 | namespace async { 8 | template 9 | class destructor_callback { 10 | public: 11 | destructor_callback(std::function callback) : 12 | cb_(callback) {} 13 | 14 | destructor_callback(const Value &initial_value, std::function callback) : 15 | val_(initial_value), 16 | cb_(callback) {} 17 | 18 | ~destructor_callback() { 19 | if (cb_) { 20 | try { 21 | _cb(val_); 22 | } 23 | catch (...) { 24 | // ignore this 25 | assert(false); 26 | } 27 | } 28 | } 29 | 30 | inline Value &value() { 31 | return val_; 32 | } 33 | 34 | inline const Value &value() const { 35 | return val_; 36 | } 37 | 38 | private: 39 | std::function cb_; 40 | Value val_; 41 | }; 42 | } // namespace 43 | }// namespace -------------------------------------------------------------------------------- /include/kspp/utils/duration.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | /* 4 | * https://stackoverflow.com/questions/2808398/easily-measure-elapsed-time 5 | * Nikos Athanasiou 6 | */ 7 | #pragma once 8 | namespace kspp { 9 | template 10 | struct measure { 11 | template 12 | static typename TimeT::rep execution(F &&func, Args &&... args) { 13 | auto start = std::chrono::steady_clock::now(); 14 | std::forward(func)(std::forward(args)...); 15 | auto duration = std::chrono::duration_cast 16 | (std::chrono::steady_clock::now() - start); 17 | return duration.count(); 18 | } 19 | 20 | template 21 | static auto duration(F &&func, Args &&... args) { 22 | auto start = std::chrono::steady_clock::now(); 23 | std::forward(func)(std::forward(args)...); 24 | return std::chrono::duration_cast(std::chrono::steady_clock::now() - start); 25 | } 26 | }; 27 | } 28 | 29 | // usage: 30 | // std::cout << measure<>::execution(functor(dummy)) << std::endl; 31 | // 32 | // call .count() manually later when needed (eg IO) 33 | // auto avg = (measure<>::duration(func) + measure<>::duration(func)) / 2.0; 34 | -------------------------------------------------------------------------------- /include/kspp/utils/env.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #pragma once 4 | namespace kspp { 5 | std::string default_kafka_broker_uri(); 6 | 7 | std::string default_schema_registry_uri(); 8 | 9 | std::string default_statestore_root(); 10 | 11 | std::string default_ca_cert_path(); 12 | 13 | std::string default_client_cert_path(); 14 | 15 | std::string default_client_key_path(); 16 | 17 | std::string default_client_key_passphrase(); 18 | 19 | std::string default_hostname(); 20 | 21 | std::string default_pushgateway_uri(); 22 | 23 | std::string get_env_and_log(const char *env, std::string default_value = ""); 24 | 25 | std::string get_env_and_log_hidden(const char *env, std::string default_value = ""); 26 | } 27 | 28 | -------------------------------------------------------------------------------- /include/kspp/utils/kafka_utils.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #pragma once 7 | namespace kspp { 8 | class cluster_config; 9 | namespace kafka { 10 | int32_t get_number_partitions(std::shared_ptr, std::string topic); 11 | 12 | std::vector 13 | get_partition_list(std::shared_ptr, std::string topic, std::string partitions = "[-1]"); 14 | 15 | bool 16 | wait_for_consumer_group(std::shared_ptr config, std::string group_id, std::chrono::seconds timeout); 17 | 18 | void require_topic_leaders(std::shared_ptr config, std::string topic, std::chrono::seconds timeout); 19 | 20 | void require_topic_leaders(std::shared_ptr config, std::string topic); 21 | } 22 | } 23 | 24 | -------------------------------------------------------------------------------- /include/kspp/utils/kspp_utils.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #pragma once 6 | 7 | namespace kspp { 8 | std::string sanitize_filename(std::string s); 9 | 10 | std::vector parse_partition_list(std::string s); 11 | 12 | std::string partition_list_to_string(std::vector v); 13 | 14 | std::vector get_partition_list(int32_t nr_of_partitions); 15 | 16 | std::vector parse_string_array(std::string s, std::string regexp = "[\\,]"); 17 | 18 | std::string to_string(std::vector v); 19 | 20 | std::string to_string(std::set v); 21 | } 22 | -------------------------------------------------------------------------------- /include/kspp/utils/offset_storage_provider.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #pragma once 7 | 8 | namespace kspp { 9 | class offset_storage { 10 | public: 11 | enum { 12 | UNKNOWN_OFFSET = -4242 13 | }; 14 | 15 | offset_storage() {}; 16 | 17 | virtual ~offset_storage() {} 18 | 19 | int64_t start(int64_t offset); 20 | 21 | virtual void commit(int64_t offset, bool flush) { 22 | last_commited_offset_ = offset; 23 | if (flush || (last_commited_offset_ - last_flushed_offset_) > max_pending_offsets_) { 24 | if (last_commited_offset_ > last_flushed_offset_) 25 | persist_offset(last_commited_offset_, flush_offset_timeout_ms_); 26 | } 27 | } 28 | 29 | void set_max_pending_offsets(int64_t max_pending) { 30 | max_pending_offsets_ = max_pending; 31 | } 32 | 33 | void set_flush_offset_timeout(int flush_offset_timeout_ms) { 34 | flush_offset_timeout_ms_ = flush_offset_timeout_ms; 35 | } 36 | 37 | protected: 38 | virtual int64_t load_offset(int timeout_ms) = 0; 39 | 40 | virtual void persist_offset(int64_t offset, int timeout_ms) = 0; 41 | 42 | int64_t last_commited_offset_ = UNKNOWN_OFFSET; 43 | int64_t last_flushed_offset_ = UNKNOWN_OFFSET - 1; 44 | int64_t max_pending_offsets_ = 3600000; 45 | int flush_offset_timeout_ms_ = 1000; 46 | }; 47 | 48 | class fs_offset_storage : public offset_storage { 49 | public: 50 | fs_offset_storage(std::string path); 51 | 52 | ~fs_offset_storage() override; 53 | 54 | private: 55 | int64_t load_offset(int timeout_ms) override; 56 | 57 | void persist_offset(int64_t offset, int timeout_ms) override; 58 | 59 | std::filesystem::path offset_storage_path_; 60 | }; 61 | 62 | class null_offset_storage : public offset_storage { 63 | public: 64 | null_offset_storage() {}; 65 | 66 | ~null_offset_storage() override {} 67 | 68 | private: 69 | int64_t load_offset(int timeout_ms) override { return OFFSET_END; } 70 | 71 | void persist_offset(int64_t offset, int timeout_ms) override {} // noop 72 | }; 73 | 74 | 75 | /* 76 | * uri is one of [file: s3: null:] 77 | */ 78 | std::shared_ptr get_offset_provider(std::string uri); 79 | } 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /include/kspp/utils/spinlock.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #pragma once 6 | 7 | namespace kspp { 8 | class spinlock { 9 | std::atomic_flag locked = ATOMIC_FLAG_INIT; 10 | public: 11 | using scoped_lock = std::unique_lock; 12 | 13 | spinlock() {} 14 | 15 | inline void lock() { 16 | while (locked.test_and_set(std::memory_order_acquire)) { ; } 17 | } 18 | 19 | inline void unlock() { 20 | locked.clear(std::memory_order_release); 21 | } 22 | 23 | private: 24 | spinlock(spinlock const &) = delete; 25 | 26 | spinlock &operator=(spinlock const &) = delete; 27 | }; 28 | 29 | } // namespace 30 | -------------------------------------------------------------------------------- /include/kspp/utils/string_utils.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #pragma once 7 | 8 | namespace kspp { 9 | std::string escape_json(const std::string &s); 10 | 11 | std::string escape_sql(std::string s); 12 | 13 | inline void ltrim(std::string &s) { 14 | s.erase(s.begin(), std::find_if(s.begin(), s.end(), std::not1(std::ptr_fun(std::isspace)))); 15 | } 16 | 17 | // trim from end (in place) 18 | inline void rtrim(std::string &s) { 19 | s.erase(std::find_if(s.rbegin(), s.rend(), std::not1(std::ptr_fun(std::isspace))).base(), s.end()); 20 | } 21 | 22 | // trim from both ends (in place) 23 | inline void trim(std::string &s) { 24 | ltrim(s); 25 | rtrim(s); 26 | } 27 | 28 | kspp::start_offset_t to_offset(std::string); 29 | 30 | std::string to_string(start_offset_t); 31 | 32 | std::chrono::seconds to_duration(std::string s); 33 | 34 | std::string to_string(std::chrono::seconds s); 35 | } 36 | -------------------------------------------------------------------------------- /include/kspp/utils/url.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #pragma once 5 | 6 | /** 7 | 8 | * Consider http://host1.domain.com:2110/23987462 9 | * 10 | * The url is broker down in into its parts: scheme ("http"), authority (ie. host and port), 11 | * path ("/foo/bar"), 12 | * The scheme is lower-cased. 13 | */ 14 | 15 | namespace kspp { 16 | class url { 17 | public: 18 | //accepts a url without a scheme if given a non empty default_scheme 19 | url(std::string s, std::string default_scheme = ""); 20 | 21 | bool good() const { return good_; } 22 | 23 | std::string scheme() const { return scheme_; } 24 | 25 | std::string authority() const { return authority_; } 26 | 27 | std::string path() const { return path_; } 28 | 29 | std::string str() const { return scheme_ + "://" + authority_ + path_; } 30 | 31 | private: 32 | bool good_; 33 | std::string scheme_; 34 | std::string authority_; 35 | std::string path_; 36 | }; 37 | } 38 | -------------------------------------------------------------------------------- /include/kspp/utils/url_parser.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #pragma once 4 | 5 | namespace kspp { 6 | std::vector split_url_list(std::string s, std::string default_scheme = ""); 7 | } 8 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -e 3 | 4 | rm -rf build bin lib 5 | mkdir build 6 | cd build 7 | sudo rm -rf /usr/local/include/kspp 8 | sudo rm -rf /usr/local/lib/libkspp*.so 9 | cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_STATIC_LIBS=ON -DBUILD_SHARED_LIBS=OFF -DLINK_SHARED=OFF -DBUILD_TESTS=OFF -DBUILD_SAMPLES=OFF .. 10 | make -j "$(getconf _NPROCESSORS_ONLN)" 11 | sudo make install 12 | 13 | -------------------------------------------------------------------------------- /libraries/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | include_directories("${CMAKE_CURRENT_SOURCE_DIR}") 2 | 3 | IF (ENABLE_MQTT) 4 | add_subdirectory(kspp-mqtt) 5 | ENDIF () 6 | 7 | IF (ENABLE_ELASTICSEARCH) 8 | add_subdirectory(kspp-es) 9 | ENDIF () 10 | 11 | IF (ENABLE_INFLUXDB) 12 | add_subdirectory(kspp-influx) 13 | ENDIF () 14 | 15 | IF (ENABLE_POSTGRES) 16 | add_subdirectory(kspp-pg) 17 | ENDIF () 18 | 19 | IF (ENABLE_TDS) 20 | add_subdirectory(kspp-tds) 21 | ENDIF () 22 | 23 | IF (ENABLE_ROCKSDB) 24 | # add_subdirectory(kafka2rocksdb) 25 | ENDIF () 26 | 27 | if (ENABLE_BITBOUNCER_STREAMING) 28 | add_subdirectory(kspp-bitbouncer) 29 | ENDIF () 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /libraries/kspp-bitbouncer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | SET(SIMPLE_NAME kspp-bitbouncer) 2 | 3 | find_package(Protobuf REQUIRED) 4 | message(STATUS "Using protobuf ${protobuf_VERSION}") 5 | find_package(GRPC REQUIRED) 6 | 7 | SET(PROTOS ./proto/bb_streaming.proto) 8 | set(PROTO_SRC_DIR ${CMAKE_CURRENT_BINARY_DIR}) 9 | 10 | PROTOBUF_GENERATE_CPP(PROTO_SRCS PROTO_HDRS ${PROTOS}) 11 | 12 | # Print path to generated files 13 | message("PROTO_SRCS = ${PROTO_SRCS}") 14 | message("PROTO_HDRS = ${PROTO_HDRS}") 15 | 16 | grpc_generate_cpp(GRPC_SRCS GRPC_HDRS ${PROTO_SRC_DIR} ${PROTOS}) 17 | message("GRPC_SRCS = ${GRPC_SRCS}") 18 | message("GRPC_HDRS = ${GRPC_HDRS}") 19 | 20 | SET(BB_GRPC_LIBS_STATIC kspp_grpc_s gRPC::grpc++_reflection gRPC::grpc++ gRPC::grpc protobuf::libprotobuf) 21 | SET(BB_GRPC_LIBS_SHARED kspp_grpc gRPC::grpc++_reflection gRPC::grpc++ gRPC::grpc protobuf::libprotobuf) 22 | 23 | file(GLOB LIB_SRCS *.cpp grpc/*.cpp) 24 | 25 | file(GLOB LIB_HEADERS *.h grpc/*.h) 26 | 27 | 28 | if (BUILD_STATIC_LIBS) 29 | ADD_LIBRARY(${SIMPLE_NAME}_s STATIC ${PROTO_SRCS} ${GRPC_SRCS} ${BITBOUNCER_GRPC_LIB_SRCS}) 30 | INSTALL(TARGETS ${SIMPLE_NAME}_s 31 | LIBRARY DESTINATION lib 32 | ARCHIVE DESTINATION lib 33 | ) 34 | endif () 35 | 36 | if (BUILD_SHARED_LIBS) 37 | ADD_LIBRARY(${SIMPLE_NAME} SHARED ${PROTO_SRCS} ${GRPC_SRCS} ${BITBOUNCER_GRPC_LIB_SRCS}) 38 | INSTALL(TARGETS ${SIMPLE_NAME} 39 | LIBRARY DESTINATION lib 40 | ARCHIVE DESTINATION lib 41 | ) 42 | endif () 43 | 44 | INSTALL(FILES ${LIB_HEADERS} DESTINATION include/${SIMPLE_NAME}) 45 | 46 | IF (BUILD_TOOLS) 47 | add_subdirectory(tools) 48 | ENDIF () 49 | 50 | 51 | -------------------------------------------------------------------------------- /libraries/kspp-bitbouncer/grpc/grpc_utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #define API_KEY_HEADER "api-key" 4 | #define SECRET_ACCESS_KEY_HEADER "secret-access-key" 5 | 6 | namespace kspp { 7 | void add_api_key(grpc::ClientContext &client_context, const std::string &api_key) { 8 | client_context.AddMetadata(API_KEY_HEADER, api_key); 9 | } 10 | 11 | void add_api_key_secret(grpc::ClientContext &client_context, const std::string &api_key, 12 | const std::string &secret_access_key) { 13 | client_context.AddMetadata(API_KEY_HEADER, api_key); 14 | client_context.AddMetadata(SECRET_ACCESS_KEY_HEADER, secret_access_key); 15 | } 16 | 17 | void set_channel_args(grpc::ChannelArguments &channelArgs) { 18 | //channelArgs.SetInt(GRPC_ARG_HTTP2_BDP_PROBE, 1); 19 | 20 | channelArgs.SetInt(GRPC_ARG_KEEPALIVE_TIME_MS, 1000); 21 | channelArgs.SetInt(GRPC_ARG_KEEPALIVE_TIMEOUT_MS, 1000); 22 | channelArgs.SetInt(GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS, 1); 23 | 24 | channelArgs.SetInt(GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA, 0); // unlimited 25 | //channelArgs.SetInt(GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS, 5000); // not applicable for client 26 | channelArgs.SetInt(GRPC_ARG_HTTP2_MIN_SENT_PING_INTERVAL_WITHOUT_DATA_MS, 1000); 27 | } 28 | } 29 | 30 | 31 | 32 | 33 | 34 | -------------------------------------------------------------------------------- /libraries/kspp-bitbouncer/grpc/grpc_utils.h: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #pragma once 4 | 5 | namespace kspp { 6 | void add_api_key(grpc::ClientContext &client_context, const std::string &api_key); 7 | 8 | void add_api_key_secret(grpc::ClientContext &client_context, const std::string &api_key, 9 | const std::string &secret_access_key); 10 | 11 | void set_channel_args(grpc::ChannelArguments &channelArgs); 12 | } 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /libraries/kspp-bitbouncer/grpc_avro_schema_resolver.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #pragma once 9 | 10 | namespace kspp { 11 | class grpc_avro_schema_resolver { 12 | public: 13 | grpc_avro_schema_resolver(std::shared_ptr channel, const std::string &api_key) 14 | : stub_(bitbouncer::streaming::streamprovider::NewStub(channel)), _api_key(api_key) { 15 | } 16 | 17 | std::shared_ptr get_schema(int32_t schema_id) { 18 | if (schema_id == 0) 19 | return nullptr; 20 | 21 | { 22 | kspp::spinlock::scoped_lock xxx(_spinlock); 23 | auto item = _cache.find(schema_id); 24 | if (item != _cache.end()) { 25 | return item->second; 26 | } 27 | } 28 | 29 | grpc::ClientContext context; 30 | add_api_key(context, _api_key); 31 | bitbouncer::streaming::GetSchemaRequest request; 32 | request.set_schema_id(schema_id); 33 | bitbouncer::streaming::GetSchemaReply reply; 34 | grpc::Status status = stub_->GetSchema(&context, request, &reply); 35 | if (!status.ok()) { 36 | LOG_FIRST_N(ERROR, 10) << "avro_schema_resolver rpc failed, schema id: " << schema_id; 37 | return nullptr; 38 | } 39 | 40 | try { 41 | std::shared_ptr schema = std::make_shared(); 42 | std::istringstream stream(reply.schema()); 43 | avro::compileJsonSchema(stream, *schema); 44 | { 45 | kspp::spinlock::scoped_lock xxx(_spinlock); 46 | _cache[schema_id] = schema; 47 | } 48 | return schema; 49 | } catch (std::exception &e) { 50 | LOG(ERROR) << "failed to parse schema id:" << schema_id << ", " << e.what() << ", raw schema: " 51 | << reply.schema(); 52 | } 53 | return nullptr; 54 | } 55 | 56 | private: 57 | kspp::spinlock _spinlock; 58 | std::unique_ptr stub_; 59 | std::map> _cache; 60 | std::string _api_key; 61 | }; 62 | } // namespace 63 | -------------------------------------------------------------------------------- /libraries/kspp-bitbouncer/grpc_avro_source.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include "grpc_avro_consumer.h" 8 | 9 | #pragma once 10 | 11 | namespace kspp { 12 | template 13 | class grpc_avro_source : public partition_source { 14 | static constexpr const char *PROCESSOR_NAME = "bb_grpc_avro_source"; 15 | public: 16 | grpc_avro_source(std::shared_ptr config, 17 | int32_t partition, 18 | std::string topic, 19 | std::shared_ptr offset_store, 20 | std::shared_ptr channel, 21 | std::string api_key, 22 | std::string secret_access_key) 23 | : partition_source(nullptr, partition), 24 | _impl(partition, topic, offset_store, channel, api_key, secret_access_key) { 25 | } 26 | 27 | virtual ~grpc_avro_source() { 28 | close(); 29 | } 30 | 31 | std::string log_name() const override { 32 | return PROCESSOR_NAME; 33 | } 34 | 35 | void start(int64_t offset) override { 36 | _impl.start(offset); 37 | } 38 | 39 | void close() override { 40 | /* 41 | * if (_commit_chain.last_good_offset() >= 0 && _impl.commited() < _commit_chain.last_good_offset()) 42 | _impl.commit(_commit_chain.last_good_offset(), true); 43 | */ 44 | _impl.close(); 45 | } 46 | 47 | bool eof() const override { 48 | return _impl.eof(); 49 | } 50 | 51 | void commit(bool flush) override { 52 | _impl.commit(flush); 53 | } 54 | 55 | inline int64_t offset() const { 56 | return _impl.offset(); 57 | } 58 | 59 | inline bool good() const { 60 | return _impl.good(); 61 | } 62 | 63 | // TBD if we store last offset and end of stream offset we can use this... 64 | size_t queue_size() const override { 65 | return _impl.queue().size(); 66 | } 67 | 68 | int64_t next_event_time() const override { 69 | return _impl.queue().next_event_time(); 70 | } 71 | 72 | size_t process(int64_t tick) override { 73 | if (_impl.queue().size() == 0) 74 | return 0; 75 | size_t processed = 0; 76 | while (!_impl.queue().empty()) { 77 | auto p = _impl.queue().front(); 78 | if (p == nullptr || p->event_time() > tick) 79 | return processed; 80 | _impl.queue().pop_front(); 81 | this->send_to_sinks(p); 82 | ++(this->processed_count_); 83 | ++processed; 84 | this->_lag.add_event_time(tick, p->event_time()); 85 | } 86 | return processed; 87 | } 88 | 89 | std::string topic() const override { 90 | return _impl.logical_name(); 91 | } 92 | 93 | protected: 94 | grpc_avro_consumer _impl; 95 | }; 96 | } 97 | 98 | -------------------------------------------------------------------------------- /libraries/kspp-bitbouncer/proto/bb_streaming.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | option java_package = "com.bitbouncer.streaming.grpc"; 4 | option objc_class_prefix = "bitbouncer_streaming"; 5 | 6 | package bitbouncer.streaming; 7 | 8 | service streamprovider { 9 | rpc Subscribe (SubscriptionRequest) returns (stream SubscriptionBundle) {} 10 | rpc GetSchema (GetSchemaRequest) returns (GetSchemaReply) {} 11 | } 12 | 13 | message SubscriptionRequest { 14 | string topic = 1; 15 | int32 partition = 2; 16 | int64 offset = 3; 17 | int32 max_poll_time = 4; 18 | } 19 | 20 | message SubscriptionBundle { 21 | repeated SubscriptionData data = 1; 22 | bool eof = 2; 23 | } 24 | 25 | message SubscriptionData { 26 | int32 key_schema=1; 27 | int32 value_schema=2; 28 | bytes key = 3; 29 | bytes value = 4; 30 | int64 offset = 5; 31 | int64 timestamp = 6; 32 | } 33 | 34 | // The request message containing the schema id 35 | message GetSchemaRequest { 36 | int32 schema_id = 1; 37 | } 38 | 39 | // json encoded schema 40 | message GetSchemaReply { 41 | string schema = 1; 42 | } 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /libraries/kspp-bitbouncer/tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(bitbouncer) 2 | 3 | 4 | 5 | 6 | -------------------------------------------------------------------------------- /libraries/kspp-bitbouncer/tools/bitbouncer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(bb2pg bb2pg.cpp) 2 | 3 | if (LINK_SHARED) 4 | target_link_libraries(bb2pg ${BBSTREAMING_LIBS_SHARED} ${ALL_LIBS}) 5 | else () 6 | target_link_libraries(bb2pg ${BBSTREAMING_LIBS_STATIC} ${ALL_LIBS}) 7 | endif () 8 | 9 | INSTALL(TARGETS bb2pg RUNTIME DESTINATION bin) 10 | 11 | 12 | add_executable(bb2console bb2console.cpp) 13 | 14 | if (LINK_SHARED) 15 | target_link_libraries(bb2console ${BBSTREAMING_LIBS_SHARED} ${ALL_LIBS}) 16 | else () 17 | target_link_libraries(bb2console ${BBSTREAMING_LIBS_STATIC} ${ALL_LIBS}) 18 | endif () 19 | 20 | INSTALL(TARGETS bb2console RUNTIME DESTINATION bin) 21 | 22 | 23 | add_executable(bb2avro bb2avro.cpp) 24 | 25 | if (LINK_SHARED) 26 | target_link_libraries(bb2avro ${BBSTREAMING_LIBS_SHARED} ${ALL_LIBS}) 27 | else () 28 | target_link_libraries(bb2avro ${BBSTREAMING_LIBS_STATIC} ${ALL_LIBS}) 29 | endif () 30 | 31 | INSTALL(TARGETS bb2avro RUNTIME DESTINATION bin) 32 | 33 | -------------------------------------------------------------------------------- /libraries/kspp-es/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | SET(SIMPLE_NAME kspp-es) 2 | 3 | file(GLOB LIB_SRCS *.cpp) 4 | file(GLOB LIB_HEADERS *.h) 5 | 6 | if (BUILD_STATIC_LIBS) 7 | ADD_LIBRARY(${SIMPLE_NAME}_s STATIC ${LIB_SRCS} ${LIB_HEADERS}) 8 | INSTALL(TARGETS ${SIMPLE_NAME}_s 9 | LIBRARY DESTINATION lib 10 | ARCHIVE DESTINATION lib) 11 | endif () 12 | 13 | 14 | if (BUILD_SHARED_LIBS) 15 | ADD_LIBRARY(${SIMPLE_NAME} STATIC ${LIB_SRCS} ${LIB_HEADERS}) 16 | INSTALL(TARGETS ${SIMPLE_NAME} 17 | LIBRARY DESTINATION lib 18 | ARCHIVE DESTINATION lib) 19 | endif () 20 | 21 | INSTALL(FILES ${LIB_HEADERS} DESTINATION include/${SIMPLE_NAME}) 22 | 23 | IF (BUILD_TOOLS) 24 | add_subdirectory(tools) 25 | ENDIF () 26 | 27 | #SET(MQTT_LIBS paho-mqttpp3 paho-mqtt3cs) 28 | -------------------------------------------------------------------------------- /libraries/kspp-es/elasticsearch_generic_avro_sink.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #pragma once 6 | 7 | namespace kspp { 8 | class elasticsearch_generic_avro_sink : public generic_avro_sink { 9 | static constexpr const char *PROCESSOR_NAME = "elasticsearch_avro_sink"; 10 | public: 11 | elasticsearch_generic_avro_sink(std::shared_ptr config, std::string remote_write_url, 12 | std::string username, std::string password, size_t max_http_connection = 20) 13 | : generic_avro_sink(config, std::make_shared>( 14 | remote_write_url, 15 | username, 16 | password, 17 | [](const generic_avro &key) { 18 | return avro_2_raw_column_value(*key.generic_datum()); 19 | }, 20 | [](const generic_avro &value) { 21 | return avro2elastic_json(*value.valid_schema(), *value.generic_datum()); 22 | }, 23 | max_http_connection)) { 24 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, PROCESSOR_NAME); 25 | this->add_metrics_label(KSPP_TOPIC_TAG, remote_write_url); 26 | 27 | // register sub component metrics 28 | this->register_metrics(this); 29 | } 30 | 31 | /*bool good() const override { 32 | return this->good(); 33 | }*/ 34 | 35 | std::string log_name() const override { 36 | return PROCESSOR_NAME; 37 | } 38 | }; 39 | } 40 | -------------------------------------------------------------------------------- /libraries/kspp-es/elasticsearch_utils.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #pragma once 6 | 7 | namespace kspp { 8 | // decorated as json - ie "" around strings and [] around arrays 9 | std::string avro_2_json_simple_column_value(const avro::GenericDatum &datum); 10 | 11 | //no decoration 12 | std::string avro_2_raw_column_value(const avro::GenericDatum &column); 13 | 14 | std::string 15 | avro2elastic_key_values(const avro::ValidSchema &schema, const std::string &key, const avro::GenericDatum &datum); 16 | 17 | std::string avro2elastic_json(const avro::ValidSchema &schema, const avro::GenericDatum &datum); 18 | 19 | class avro2elastic_IsChars { 20 | public: 21 | avro2elastic_IsChars(const char *charsToRemove) : chars(charsToRemove) {}; 22 | 23 | inline bool operator()(char c) { 24 | for (const char *testChar = chars; *testChar != 0; ++testChar) { 25 | if (*testChar == c) { return true; } 26 | } 27 | return false; 28 | } 29 | 30 | private: 31 | const char *chars; 32 | }; 33 | } 34 | -------------------------------------------------------------------------------- /libraries/kspp-es/tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(kafka2es) 2 | 3 | 4 | -------------------------------------------------------------------------------- /libraries/kspp-es/tools/kafka2es/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(kafka2es kafka2es.cpp) 2 | 3 | target_link_libraries(kafka2es kspp-es_s ${ALL_LIBS}) 4 | 5 | INSTALL(TARGETS kafka2es RUNTIME DESTINATION bin) 6 | 7 | 8 | 9 | -------------------------------------------------------------------------------- /libraries/kspp-influx/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | SET(SIMPLE_NAME kspp-influx) 2 | 3 | file(GLOB LIB_SRCS *.cpp) 4 | file(GLOB LIB_HEADERS *.h) 5 | 6 | if (BUILD_STATIC_LIBS) 7 | ADD_LIBRARY(${SIMPLE_NAME}_s STATIC ${LIB_SRCS} ${LIB_HEADERS}) 8 | INSTALL(TARGETS ${SIMPLE_NAME}_s 9 | LIBRARY DESTINATION lib 10 | ARCHIVE DESTINATION lib) 11 | endif () 12 | 13 | if (BUILD_SHARED_LIBS) 14 | ADD_LIBRARY(${SIMPLE_NAME} STATIC ${LIB_SRCS} ${LIB_HEADERS}) 15 | INSTALL(TARGETS ${SIMPLE_NAME} 16 | LIBRARY DESTINATION lib 17 | ARCHIVE DESTINATION lib) 18 | endif () 19 | 20 | INSTALL(FILES ${LIB_HEADERS} DESTINATION include/${SIMPLE_NAME}) 21 | 22 | IF (BUILD_TOOLS) 23 | add_subdirectory(tools) 24 | ENDIF () 25 | 26 | -------------------------------------------------------------------------------- /libraries/kspp-influx/influx_sink.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #pragma once 8 | 9 | namespace kspp { 10 | class influx_sink : 11 | public kspp::topic_sink { 12 | public: 13 | influx_sink(std::shared_ptr config, 14 | const kspp::connect::connection_params &cp, 15 | int32_t http_batch_size, 16 | std::chrono::milliseconds http_timeout); 17 | 18 | ~influx_sink() override; 19 | 20 | std::string log_name() const override; 21 | 22 | bool eof() const override; 23 | 24 | size_t process(int64_t tick) override; 25 | 26 | void close() override; 27 | 28 | void flush() override; 29 | 30 | private: 31 | void _thread(); 32 | 33 | boost::asio::io_service ios_; 34 | std::unique_ptr work_; 35 | bool exit_=false; 36 | bool start_running_=false; 37 | bool closed_=false; 38 | std::atomic batch_in_progress_ = false; 39 | event_queue pending_for_delete_; 40 | const kspp::connect::connection_params cp_; 41 | kspp::http::client http_handler_; 42 | const size_t batch_size_; 43 | const std::chrono::milliseconds http_timeout_; 44 | kspp::metric_streaming_lag lag_; 45 | kspp::metric_counter http_requests_; 46 | kspp::metric_counter http_timeouts_; 47 | kspp::metric_counter http_error_; 48 | kspp::metric_counter http_ok_; 49 | kspp::metric_counter http_bytes_; 50 | std::thread asio_thread_; // internal to http client 51 | std::thread bg_; // performs the send loop 52 | }; 53 | } // namespace -------------------------------------------------------------------------------- /libraries/kspp-influx/tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(kafka2influx) 2 | 3 | 4 | -------------------------------------------------------------------------------- /libraries/kspp-influx/tools/kafka2influx/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(kafka2influxdb kafka2influxdb.cpp) 2 | 3 | target_link_libraries(kafka2influxdb kspp-influx_s ${ALL_LIBS}) 4 | 5 | INSTALL(TARGETS kafka2influxdb RUNTIME DESTINATION bin) 6 | -------------------------------------------------------------------------------- /libraries/kspp-kinesis/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | file(GLOB LIB_SRCS *.cpp) 2 | 3 | file(GLOB LIB_HEADERS *.h) 4 | 5 | SET(SIMPLE_NAME kspp-kinesis) 6 | 7 | if (BUILD_STATIC_LIBS) 8 | ADD_LIBRARY(${SIMPLE_NAME}_s STATIC ${LIB_SRCS} ${LIB_HEADERS}) 9 | INSTALL(TARGETS ${SIMPLE_NAME}_s 10 | LIBRARY DESTINATION lib 11 | ARCHIVE DESTINATION lib) 12 | endif () 13 | 14 | 15 | if (BUILD_SHARED_LIBS) 16 | ADD_LIBRARY(${SIMPLE_NAME}_s STATIC ${LIB_SRCS} ${LIB_HEADERS}) 17 | INSTALL(TARGETS ${SIMPLE_NAME}_s 18 | LIBRARY DESTINATION lib 19 | ARCHIVE DESTINATION lib) 20 | endif () 21 | 22 | INSTALL(FILES ${LIB_HEADERS} DESTINATION include/${SIMPLE_NAME}) 23 | 24 | IF (BUILD_EXAMPLES) 25 | add_subdirectory(examples) 26 | ENDIF () 27 | 28 | #SET(MQTT_LIBS paho-mqttpp3 paho-mqtt3cs) 29 | -------------------------------------------------------------------------------- /libraries/kspp-kinesis/examples/aws-iot-sample/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | SET(SRCS 2 | aws-iot-sample.cpp 3 | ) 4 | 5 | add_executable(aws-iot-sample ${SRCS}) 6 | target_link_libraries(aws-iot-sample ${AWS_KINESIS_LIBS} ${ALL_LIBS}) 7 | -------------------------------------------------------------------------------- /libraries/kspp-kinesis/examples/aws-kinesis-consumer/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | SET(SRCS 2 | aws-kinesis-consumer.cpp 3 | ) 4 | 5 | add_executable(aws-kinesis-consumer ${SRCS}) 6 | target_link_libraries(aws-kinesis-consumer ${AWS_KINESIS_LIBS} ${ALL_LIBS}) 7 | -------------------------------------------------------------------------------- /libraries/kspp-kinesis/examples/aws-kinesis-consumer/aws-kinesis-consumer.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | using namespace std::chrono_literals; 9 | using namespace kspp; 10 | using json = nlohmann::json; 11 | 12 | static bool run = true; 13 | 14 | static void sigterm(int sig) { 15 | run = false; 16 | } 17 | 18 | int main(int argc, char **argv) { 19 | if (argc != 2) { 20 | std::cerr << "usage: " << argv[0] << " stream_name"; 21 | return -1; 22 | } 23 | 24 | FLAGS_logtostderr = 1; 25 | google::InitGoogleLogging(argv[0]); 26 | 27 | std::string stream_name = argv[1]; 28 | 29 | /*auto nr_of_partitions = kspp::kafka::get_number_partitions(config, topic); 30 | if (partition_list.size() == 0 || partition_list[0] == -1) 31 | partition_list = kspp::get_partition_list(nr_of_partitions); 32 | LOG(INFO) << "partition_list : " << kspp::partition_list_to_string(partition_list); 33 | */ 34 | 35 | auto config = std::make_shared("", 0); 36 | kspp::topology_builder generic_builder(config); 37 | 38 | auto t = generic_builder.create_topology(); 39 | auto source0 = t->create_processors({0}, stream_name); 40 | auto vistor = t->create_processors>(source0, [](const auto record) { 41 | if (record.value()) { 42 | json j = json::parse(*record.value()); 43 | double t0 = j["ts"]; 44 | auto now = kspp::milliseconds_since_epoch(); 45 | int64_t kinesis_lag = now - record.event_time(); 46 | int64_t total_lag = now - t0; 47 | LOG(INFO) << *record.value() << " kinesis ts: " << record.event_time() << ", kinesis lag: " << kinesis_lag 48 | << " total_lag: " << total_lag; 49 | } 50 | }); 51 | 52 | std::signal(SIGINT, sigterm); 53 | std::signal(SIGTERM, sigterm); 54 | std::signal(SIGPIPE, SIG_IGN); 55 | 56 | LOG(INFO) << "status is up"; 57 | 58 | t->start(kspp::OFFSET_END); // only this is implemneted in kinesis 59 | 60 | while (run) { 61 | if (t->process(kspp::milliseconds_since_epoch()) == 0) { 62 | std::this_thread::sleep_for(10ms); 63 | t->commit(false); 64 | } 65 | } 66 | 67 | LOG(INFO) << "status is down"; 68 | 69 | return 0; 70 | } 71 | 72 | 73 | -------------------------------------------------------------------------------- /libraries/kspp-kinesis/examples/kinesis-test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | SET(SRCS 2 | kinesis-test.cpp 3 | ) 4 | 5 | add_executable(kinesis-test ${SRCS}) 6 | target_link_libraries(kinesis-test ${AWS_KINESIS_LIBS} ${ALL_LIBS}) 7 | -------------------------------------------------------------------------------- /libraries/kspp-kinesis/kinesis_consumer.h: -------------------------------------------------------------------------------- 1 | #ifdef KSPP_KINESIS 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #pragma once 11 | 12 | namespace kspp { 13 | class kinesis_consumer { 14 | public: 15 | kinesis_consumer(int32_t partition, std::string stream_name); 16 | 17 | ~kinesis_consumer(); 18 | 19 | bool initialize(); 20 | 21 | void close(); 22 | 23 | inline bool eof() const { 24 | return (incomming_msg_.size() == 0) && eof_; 25 | } 26 | 27 | inline std::string logical_name() const { 28 | return stream_name_; 29 | } 30 | 31 | inline int32_t partition() const { 32 | return partition_; 33 | } 34 | 35 | void start(int64_t offset); 36 | 37 | //void subscribe(); 38 | 39 | inline event_queue& queue(){ 40 | return incomming_msg_; 41 | }; 42 | 43 | inline const event_queue& queue() const { 44 | return incomming_msg_; 45 | }; 46 | 47 | void commit(bool flush) { 48 | int64_t offset = commit_chain_.last_good_offset(); 49 | if (offset>0) 50 | offset_storage_->commit(offset, flush); 51 | } 52 | 53 | private: 54 | //int parse_response(std::shared_ptr); 55 | void _thread(); 56 | bool exit_=false; 57 | bool start_running_=false; 58 | bool eof_=false; 59 | bool closed_=false; 60 | std::thread bg_; 61 | std::unique_ptr client_; 62 | Aws::String shard_iterator_; 63 | 64 | //postgres_read_cursor read_cursor_; 65 | commit_chain commit_chain_; 66 | const int32_t partition_; 67 | const std::string stream_name_; 68 | std::shared_ptr offset_storage_; 69 | event_queue incomming_msg_; 70 | // move 71 | uint64_t msg_cnt_=0; // TODO move to metrics 72 | }; 73 | } 74 | #endif 75 | -------------------------------------------------------------------------------- /libraries/kspp-kinesis/kinesis_source.h: -------------------------------------------------------------------------------- 1 | #ifdef KSPP_KINESIS 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include "kinesis_consumer.h" 9 | #pragma once 10 | 11 | namespace kspp { 12 | class kinesis_string_source : public partition_source { 13 | static constexpr const char *PROCESSOR_NAME = "kinesis source"; 14 | public: 15 | kinesis_string_source(std::shared_ptr config, 16 | int32_t partition, 17 | std::string logical_name) 18 | :partition_source(nullptr, partition) 19 | , _impl(partition, logical_name){ 20 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, PROCESSOR_NAME); 21 | this->add_metrics_label(KSPP_TOPIC_TAG, logical_name); 22 | this->add_metrics_label(KSPP_PARTITION_TAG, std::to_string(partition)); 23 | } 24 | 25 | virtual ~kinesis_string_source() { 26 | close(); 27 | } 28 | 29 | std::string log_name() const override { 30 | return PROCESSOR_NAME; 31 | } 32 | 33 | void start(int64_t offset) override { 34 | _impl.start(offset); 35 | } 36 | 37 | void close() override { 38 | /* 39 | * if (_commit_chain.last_good_offset() >= 0 && _impl.commited() < _commit_chain.last_good_offset()) 40 | _impl.commit(_commit_chain.last_good_offset(), true); 41 | */ 42 | _impl.close(); 43 | } 44 | 45 | bool eof() const override { 46 | return _impl.eof(); 47 | } 48 | 49 | void commit(bool flush) override { 50 | _impl.commit(flush); 51 | } 52 | 53 | // TBD if we store last offset and end of stream offset we can use this... 54 | size_t queue_size() const override { 55 | return _impl.queue().size(); 56 | } 57 | 58 | int64_t next_event_time() const override { 59 | return _impl.queue().next_event_time(); 60 | } 61 | 62 | size_t process(int64_t tick) override { 63 | if (_impl.queue().size() == 0) 64 | return 0; 65 | size_t processed = 0; 66 | while (!_impl.queue().empty()) { 67 | auto p = _impl.queue().front(); 68 | if (p == nullptr || p->event_time() > tick) 69 | return processed; 70 | _impl.queue().pop_front(); 71 | this->send_to_sinks(p); 72 | ++(this->processed_count_); 73 | ++processed; 74 | this->_lag.add_event_time(tick, p->event_time()); 75 | } 76 | return processed; 77 | } 78 | 79 | std::string topic() const override { 80 | return _impl.logical_name(); 81 | } 82 | 83 | protected: 84 | kinesis_consumer _impl; 85 | }; 86 | } 87 | #endif 88 | -------------------------------------------------------------------------------- /libraries/kspp-mqtt/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | SET(SIMPLE_NAME kspp-mqtt) 2 | 3 | file(GLOB LIB_SRCS *.cpp) 4 | file(GLOB LIB_HEADERS *.h) 5 | 6 | if (BUILD_STATIC_LIBS) 7 | ADD_LIBRARY(${SIMPLE_NAME}_s STATIC ${LIB_SRCS} ${LIB_HEADERS}) 8 | INSTALL(TARGETS ${SIMPLE_NAME}_s 9 | LIBRARY DESTINATION lib 10 | ARCHIVE DESTINATION lib) 11 | endif () 12 | 13 | 14 | if (BUILD_SHARED_LIBS) 15 | ADD_LIBRARY(${SIMPLE_NAME} STATIC ${LIB_SRCS} ${LIB_HEADERS}) 16 | INSTALL(TARGETS ${SIMPLE_NAME} 17 | LIBRARY DESTINATION lib 18 | ARCHIVE DESTINATION lib) 19 | endif () 20 | 21 | INSTALL(FILES ${LIB_HEADERS} DESTINATION include/${SIMPLE_NAME}) 22 | 23 | SET(PAHO_LIBS_STATIC paho-mqttpp3 paho-mqtt3cs paho-mqtt3as) 24 | SET(PAHO_LIBS_SHARED paho-mqttpp3 paho-mqtt3c paho-mqtt3a) 25 | 26 | IF (BUILD_SAMPLES) 27 | add_subdirectory(examples) 28 | ENDIF () 29 | 30 | 31 | -------------------------------------------------------------------------------- /libraries/kspp-mqtt/examples/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(kspp-mqtt-producer-example) 2 | 3 | 4 | #SET(MQTT_LIBS paho-mqttpp3 paho-mqtt3cs) 5 | -------------------------------------------------------------------------------- /libraries/kspp-mqtt/examples/kspp-mqtt-producer-example/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | SET(SRCS 2 | kspp-mqtt-producer-example.cpp 3 | ) 4 | 5 | add_executable(kspp-mqtt-producer-example ${SRCS}) 6 | 7 | target_link_libraries(kspp-mqtt-producer-example kspp-mqtt_s ${PAHO_LIBS_STATIC} ${ALL_LIBS}) 8 | -------------------------------------------------------------------------------- /libraries/kspp-mqtt/mqtt_consumer.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #pragma once 8 | 9 | namespace kspp { 10 | class mqtt_consumer { 11 | public: 12 | mqtt_consumer(std::string mqtt_endpoint, 13 | mqtt::connect_options connect_options); 14 | 15 | ~mqtt_consumer(); 16 | 17 | void start() { 18 | start_ = true; 19 | } 20 | 21 | inline bool good() const { 22 | return good_; 23 | } 24 | 25 | void close(); 26 | 27 | inline bool eof() const { 28 | return (incomming_msg_.size() == 0) && eof_; 29 | } 30 | 31 | 32 | void register_metrics(kspp::processor *parent); 33 | 34 | inline event_queue& queue(){ 35 | return incomming_msg_; 36 | }; 37 | 38 | inline const event_queue& queue() const { 39 | return incomming_msg_; 40 | }; 41 | 42 | 43 | private: 44 | class callback : public virtual mqtt::callback { 45 | public: 46 | callback(class mqtt_consumer *parent) : parent_(parent) {} 47 | 48 | protected: 49 | // (Re)connection success 50 | void connected(const std::string &cause) override { 51 | // do nothing 52 | } 53 | 54 | // Callback for when the connection is lost. 55 | void connection_lost(const std::string &cause) override { 56 | parent_->on_connection_lost(); 57 | } 58 | 59 | // when a message arrives. 60 | void message_arrived(mqtt::const_message_ptr msg) override { 61 | // do nothing - should never happen 62 | } 63 | 64 | // what does this one do?? - doe not seem to be called - we use publish listener instead 65 | void delivery_complete(mqtt::delivery_token_ptr token) override { 66 | // do nothing - is never called??? 67 | } 68 | 69 | private: 70 | mqtt_consumer *parent_ = nullptr; 71 | }; 72 | 73 | void on_connection_lost(); // from callback 74 | 75 | 76 | 77 | void thread(); 78 | 79 | bool exit_ = false; 80 | bool start_ = false; 81 | bool good_ = true; 82 | bool eof_=false; 83 | bool closed_ = false; 84 | bool connected_ = false; // ?? 85 | 86 | std::unique_ptr client_; 87 | std::string mqtt_endpoint_; 88 | mqtt::connect_options connect_options_; 89 | callback action_listener_; 90 | 91 | //const int32_t partition_; 92 | const std::string stream_name_; 93 | //std::shared_ptr offset_storage_; 94 | event_queue incomming_msg_; 95 | // move 96 | 97 | metric_counter connection_errors_; 98 | metric_counter msg_cnt_; 99 | metric_counter msg_bytes_; 100 | 101 | std::thread thread_; 102 | }; 103 | } 104 | -------------------------------------------------------------------------------- /libraries/kspp-mqtt/mqtt_sink.h: -------------------------------------------------------------------------------- 1 | #include "mqtt_producer.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #pragma once 10 | 11 | namespace kspp { 12 | class mqtt_sink : public topic_sink { 13 | public: 14 | mqtt_sink(std::shared_ptr config, std::string mqtt_endpoint, 15 | mqtt::connect_options connect_options) { 16 | impl_ = std::make_unique(mqtt_endpoint, connect_options); 17 | impl_->register_metrics(this); 18 | impl_->start(); 19 | } 20 | 21 | ~mqtt_sink() override { 22 | close(); 23 | } 24 | 25 | std::string log_name() const override { 26 | return "mqtt_sink"; 27 | } 28 | 29 | bool good() const { 30 | return impl_->good(); 31 | } 32 | 33 | void register_metrics(kspp::processor *parent) { 34 | impl_->register_metrics(parent); 35 | } 36 | 37 | void close() override { 38 | impl_->close(); 39 | } 40 | 41 | bool eof() const override { 42 | return this->queue_.size() == 0 && impl_->eof(); 43 | } 44 | 45 | size_t queue_size() const override { 46 | return this->queue_.size(); 47 | } 48 | 49 | size_t outbound_queue_len() const override { 50 | return this->impl_->queue_size(); 51 | } 52 | 53 | int64_t next_event_time() const override { 54 | return this->queue_.next_event_time(); 55 | } 56 | 57 | size_t process(int64_t tick) override { 58 | if (this->queue_.empty()) 59 | return 0; 60 | size_t processed = 0; 61 | while (!this->queue_.empty()) { 62 | auto p = this->queue_.front(); 63 | if (p == nullptr || p->event_time() > tick) 64 | return processed; 65 | this->queue_.pop_front(); 66 | impl_->insert(p); 67 | ++(this->processed_count_); 68 | ++processed; 69 | this->lag_.add_event_time(tick, p->event_time()); 70 | } 71 | return processed; 72 | } 73 | 74 | std::string topic() const override { 75 | return impl_->topic(); 76 | } 77 | 78 | void poll(int timeout) override { 79 | impl_->poll(); 80 | } 81 | 82 | void flush() override { 83 | while (!eof()) { 84 | process(kspp::milliseconds_since_epoch()); 85 | poll(0); 86 | std::this_thread::sleep_for(std::chrono::milliseconds( 87 | 10)); // TODO the deletable messages should be deleted when poill gets called an not from background thread 3rd queue is needed... 88 | } 89 | 90 | while (true) { 91 | int ec = 0; // TODO fixme 92 | //auto ec = _impl.flush(1000); 93 | if (ec == 0) 94 | break; 95 | } 96 | } 97 | 98 | protected: 99 | std::unique_ptr impl_; 100 | }; 101 | } 102 | 103 | 104 | -------------------------------------------------------------------------------- /libraries/kspp-mqtt/mqtt_source.h: -------------------------------------------------------------------------------- 1 | #include "mqtt_consumer.h" 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #pragma once 9 | 10 | namespace kspp { 11 | class mqtt_source : public partition_source { 12 | public: 13 | mqtt_source(std::shared_ptr config, std::string mqtt_endpoint, 14 | mqtt::connect_options connect_options, int32_t partition) : 15 | partition_source(nullptr, partition){ 16 | impl_ = std::make_unique(mqtt_endpoint, connect_options); 17 | impl_->register_metrics(this); 18 | impl_->start(); 19 | } 20 | 21 | ~mqtt_source() override { 22 | close(); 23 | } 24 | 25 | std::string log_name() const override { 26 | return "mqtt_source"; 27 | } 28 | 29 | bool good() const { 30 | return impl_->good(); 31 | } 32 | 33 | void register_metrics(kspp::processor *parent) { 34 | impl_->register_metrics(parent); 35 | } 36 | 37 | void close() override { 38 | impl_->close(); 39 | } 40 | 41 | bool eof() const override { 42 | return impl_->eof(); 43 | } 44 | 45 | void commit(bool flush) override { 46 | //impl_->commit(flush); 47 | } 48 | 49 | // TBD if we store last offset and end of stream offset we can use this... 50 | size_t queue_size() const override { 51 | return impl_->queue().size(); 52 | } 53 | 54 | int64_t next_event_time() const override { 55 | return impl_->queue().next_event_time(); 56 | } 57 | 58 | size_t process(int64_t tick) override { 59 | if (impl_->queue().size() == 0) 60 | return 0; 61 | size_t processed = 0; 62 | while (!impl_->queue().empty()) { 63 | auto p = impl_->queue().front(); 64 | if (p == nullptr || p->event_time() > tick) 65 | return processed; 66 | impl_->queue().pop_front(); 67 | this->send_to_sinks(p); 68 | ++(this->processed_count_); 69 | ++processed; 70 | this->lag_.add_event_time(tick, p->event_time()); 71 | } 72 | return processed; 73 | } 74 | 75 | std::string topic() const override { 76 | return "mqtt-dummy"; 77 | //return impl_->logical_name(); 78 | } 79 | 80 | protected: 81 | std::unique_ptr impl_; 82 | }; 83 | } 84 | 85 | 86 | -------------------------------------------------------------------------------- /libraries/kspp-pg/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | SET(SIMPLE_NAME kspp-pg) 2 | file(GLOB LIB_SRCS *.cpp) 3 | file(GLOB LIB_HEADERS *.h) 4 | 5 | find_package(PostgreSQL REQUIRED) 6 | include_directories(${PostgreSQL_INCLUDE_DIRS}) 7 | 8 | if (BUILD_STATIC_LIBS) 9 | ADD_LIBRARY(${SIMPLE_NAME}_s STATIC ${LIB_SRCS} ${LIB_HEADERS}) 10 | INSTALL(TARGETS ${SIMPLE_NAME}_s 11 | LIBRARY DESTINATION lib 12 | ARCHIVE DESTINATION lib) 13 | endif () 14 | 15 | 16 | if (BUILD_SHARED_LIBS) 17 | ADD_LIBRARY(${SIMPLE_NAME} STATIC ${LIB_SRCS} ${LIB_HEADERS}) 18 | INSTALL(TARGETS ${SIMPLE_NAME} 19 | LIBRARY DESTINATION lib 20 | ARCHIVE DESTINATION lib) 21 | endif () 22 | 23 | INSTALL(FILES ${LIB_HEADERS} DESTINATION include/${SIMPLE_NAME}) 24 | 25 | IF (BUILD_TOOLS) 26 | add_subdirectory(tools) 27 | ENDIF () 28 | 29 | #SET(MQTT_LIBS paho-mqttpp3 paho-mqtt3cs) 30 | -------------------------------------------------------------------------------- /libraries/kspp-pg/postgres_connection.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #pragma once 8 | 9 | //inspiration 10 | //http://www.freetds.org/software.html 11 | 12 | namespace kspp_postgres { 13 | class connection : public std::enable_shared_from_this { 14 | public: 15 | connection(std::string trace_id = ""); 16 | 17 | ~connection(); 18 | 19 | void close(); 20 | 21 | int connect(const kspp::connect::connection_params &cp); 22 | 23 | void disconnect(); 24 | 25 | inline int connected() const { 26 | return (pg_conn_ && (PQstatus(pg_conn_) == CONNECTION_OK)); 27 | } 28 | 29 | int set_client_encoding(std::string s); 30 | 31 | const std::string &trace_id() const; 32 | 33 | void set_warning_timeout(uint32_t ms); 34 | 35 | std::pair> exec(std::string statement); 36 | 37 | std::string last_error() const { 38 | return PQerrorMessage(pg_conn_); 39 | } 40 | 41 | private: 42 | PGconn *pg_conn_ = nullptr; 43 | std::string trace_id_; 44 | int32_t warn_timeout_ = 60000; 45 | }; 46 | } 47 | -------------------------------------------------------------------------------- /libraries/kspp-pg/postgres_generic_avro_sink.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #pragma once 11 | namespace kspp { 12 | class postgres_generic_avro_sink : public generic_avro_sink { 13 | static constexpr const char *PROCESSOR_NAME = "postgres_avro_sink"; 14 | public: 15 | postgres_generic_avro_sink(std::shared_ptr config, 16 | std::string table, 17 | const kspp::connect::connection_params &cp, 18 | std::vector keys, 19 | std::string client_encoding = "UTF8", 20 | size_t max_items_in_insert = 1000, 21 | bool skip_delete = false) 22 | : generic_avro_sink(config, std::make_shared(table, cp, keys, client_encoding, 23 | max_items_in_insert, skip_delete)) { 24 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, PROCESSOR_NAME); 25 | this->add_metrics_label(KSPP_TOPIC_TAG, table); 26 | 27 | // register sub component metrics 28 | this->register_metrics(this); 29 | } 30 | 31 | std::string log_name() const override { 32 | return PROCESSOR_NAME; 33 | } 34 | 35 | protected: 36 | // std::shared_ptr schema_; 37 | // int32_t schema_id_; 38 | }; 39 | } 40 | 41 | -------------------------------------------------------------------------------- /libraries/kspp-pg/postgres_generic_avro_source.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std::chrono_literals; 5 | 6 | namespace kspp { 7 | postgres_generic_avro_source::postgres_generic_avro_source(std::shared_ptr config, 8 | int32_t partition, 9 | std::string logical_name, 10 | const kspp::connect::connection_params &cp, 11 | kspp::connect::table_params tp, 12 | std::string query, 13 | std::string id_column, 14 | std::string ts_column, 15 | std::shared_ptr registry) 16 | : partition_source(nullptr, partition), 17 | impl_(partition, logical_name, cp, tp, query, id_column, ts_column, registry) { 18 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, PROCESSOR_NAME); 19 | this->add_metrics_label(KSPP_TOPIC_TAG, logical_name); 20 | this->add_metrics_label(KSPP_PARTITION_TAG, std::to_string(partition)); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /libraries/kspp-pg/postgres_generic_avro_source.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #pragma once 10 | 11 | namespace kspp { 12 | class postgres_generic_avro_source : public partition_source { 13 | static constexpr const char *PROCESSOR_NAME = "postgres_avro_source"; 14 | public: 15 | postgres_generic_avro_source(std::shared_ptr config, 16 | int32_t partition, 17 | std::string logical_name, 18 | const kspp::connect::connection_params &cp, 19 | kspp::connect::table_params tp, 20 | std::string query, 21 | std::string id_column, 22 | std::string ts_column, 23 | std::shared_ptr); 24 | 25 | virtual ~postgres_generic_avro_source() { 26 | close(); 27 | } 28 | 29 | std::string log_name() const override { 30 | return PROCESSOR_NAME; 31 | } 32 | 33 | void start(int64_t offset) override { 34 | impl_.start(offset); 35 | } 36 | 37 | void close() override { 38 | /* 39 | * if (_commit_chain.last_good_offset() >= 0 && _impl.commited() < _commit_chain.last_good_offset()) 40 | _impl.commit(_commit_chain.last_good_offset(), true); 41 | */ 42 | impl_.close(); 43 | } 44 | 45 | bool eof() const override { 46 | return impl_.eof(); 47 | } 48 | 49 | void commit(bool flush) override { 50 | impl_.commit(flush); 51 | } 52 | 53 | // TBD if we store last offset and end of stream offset we can use this... 54 | size_t queue_size() const override { 55 | return impl_.queue().size(); 56 | } 57 | 58 | int64_t next_event_time() const override { 59 | return impl_.queue().next_event_time(); 60 | } 61 | 62 | size_t process(int64_t tick) override { 63 | if (impl_.queue().size() == 0) 64 | return 0; 65 | size_t processed = 0; 66 | while (!impl_.queue().empty()) { 67 | auto p = impl_.queue().front(); 68 | if (p == nullptr || p->event_time() > tick) 69 | return processed; 70 | impl_.queue().pop_front(); 71 | this->send_to_sinks(p); 72 | ++(this->processed_count_); 73 | ++processed; 74 | this->lag_.add_event_time(tick, p->event_time()); 75 | } 76 | return processed; 77 | } 78 | 79 | std::string topic() const override { 80 | return impl_.logical_name(); 81 | } 82 | 83 | protected: 84 | postgres_consumer impl_; 85 | }; 86 | } 87 | 88 | -------------------------------------------------------------------------------- /libraries/kspp-pg/postgres_producer.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #pragma once 9 | 10 | namespace kspp { 11 | class postgres_producer : public generic_producer { 12 | public: 13 | enum { 14 | MAX_ERROR_BEFORE_BAD = 200 15 | }; 16 | 17 | postgres_producer(std::string table, 18 | const kspp::connect::connection_params &cp, 19 | std::vector keys, 20 | std::string client_encoding, 21 | size_t max_items_in_insert, 22 | bool skip_delete = false); 23 | 24 | ~postgres_producer(); 25 | 26 | void register_metrics(kspp::processor *parent) override; 27 | 28 | void close() override; 29 | 30 | bool good() const { 31 | return (current_error_streak_ < MAX_ERROR_BEFORE_BAD); 32 | } 33 | 34 | bool eof() const override { 35 | return (incomming_msg_.empty() && done_.empty()); 36 | } 37 | 38 | std::string topic() const override { 39 | return table_; 40 | } 41 | 42 | void stop(); 43 | 44 | void insert(std::shared_ptr> p) override { 45 | incomming_msg_.push_back(p); 46 | } 47 | 48 | void poll() override; 49 | 50 | size_t queue_size() const override { 51 | auto sz0 = incomming_msg_.size(); 52 | auto sz1 = done_.size(); 53 | return sz0 + sz1; 54 | } 55 | 56 | private: 57 | bool initialize(); 58 | 59 | bool check_table_exists(); 60 | 61 | void _thread(); 62 | 63 | bool exit_=false; 64 | bool start_running_=false; 65 | bool closed_=false; 66 | std::unique_ptr connection_; 67 | const std::string table_; 68 | const kspp::connect::connection_params cp_; 69 | const std::vector id_columns_; 70 | const std::string client_encoding_; 71 | event_queue incomming_msg_; 72 | event_queue done_; // waiting to be deleted in poll(); 73 | const size_t max_items_in_insert_; 74 | bool table_checked_ = false; 75 | bool table_exists_ = false; 76 | const bool skip_delete2_; 77 | size_t current_error_streak_=0; 78 | metric_counter connection_errors_; 79 | metric_counter insert_errors_; 80 | metric_counter msg_cnt_; 81 | metric_counter msg_bytes_; 82 | metric_summary request_time_; 83 | std::thread bg_; 84 | }; 85 | } 86 | 87 | -------------------------------------------------------------------------------- /libraries/kspp-pg/postgres_read_cursor.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #pragma once 9 | 10 | namespace kspp { 11 | class postgres_read_cursor { 12 | public: 13 | postgres_read_cursor(kspp::connect::table_params tp, std::string id_column, std::string ts_column); 14 | 15 | void init(std::shared_ptr result); 16 | 17 | void start(int64_t ts); 18 | 19 | void parse(std::shared_ptr result); 20 | 21 | std::string get_where_clause() const; 22 | 23 | std::string last_ts() const { return last_ts_; } 24 | 25 | inline int64_t last_tick() const { return last_ts_ticks_; } 26 | 27 | inline int64_t last_ts_ms() const { 28 | return (last_ts_ticks_ >= 0) ? ts_utc_offset_ + (last_ts_ticks_ * ts_multiplier_) : 0; 29 | } 30 | 31 | void set_eof(bool state) { eof_ = state; } 32 | 33 | private: 34 | 35 | std::string parse_id(std::shared_ptr result); 36 | 37 | std::string parse_ts(std::shared_ptr result); 38 | 39 | const kspp::connect::table_params tp_; 40 | bool eof_ = false; 41 | const std::string id_column_; 42 | const std::string ts_column_; 43 | const std::string order_by_; 44 | int ts_column_index_ = -1; 45 | int id_column_index_ = -1; 46 | int64_t last_ts_ticks_ = INT64_MIN; 47 | std::string last_ts_; 48 | std::string last_id_; 49 | 50 | int ts_multiplier_ = 0; 51 | int ts_utc_offset_ = 0; 52 | }; 53 | } -------------------------------------------------------------------------------- /libraries/kspp-pg/tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(postgres_connect) 2 | add_subdirectory(avro2pg) 3 | 4 | -------------------------------------------------------------------------------- /libraries/kspp-pg/tools/avro2pg/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(avro2pg avro2pg.cpp) 2 | 3 | target_link_libraries(avro2pg kspp-pg_s ${ALL_LIBS}) 4 | 5 | INSTALL(TARGETS avro2pg RUNTIME DESTINATION bin) 6 | 7 | 8 | -------------------------------------------------------------------------------- /libraries/kspp-pg/tools/postgres_connect/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(postgres2kafka postgres2kafka.cpp) 2 | target_link_libraries(postgres2kafka kspp-pg_s ${ALL_LIBS} rdkafka) 3 | INSTALL(TARGETS postgres2kafka RUNTIME DESTINATION bin) 4 | 5 | add_executable(kafka2postgres kafka2postgres.cpp) 6 | target_link_libraries(kafka2postgres kspp-pg_s ${ALL_LIBS}) 7 | INSTALL(TARGETS kafka2postgres RUNTIME DESTINATION bin) 8 | -------------------------------------------------------------------------------- /libraries/kspp-tds/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | SET(SIMPLE_NAME kspp-tds) 2 | 3 | file(GLOB LIB_SRCS *.cpp) 4 | file(GLOB LIB_HEADERS *.h) 5 | 6 | if (BUILD_STATIC_LIBS) 7 | ADD_LIBRARY(${SIMPLE_NAME}_s STATIC ${LIB_SRCS} ${LIB_HEADERS}) 8 | INSTALL(TARGETS ${SIMPLE_NAME}_s 9 | LIBRARY DESTINATION lib 10 | ARCHIVE DESTINATION lib) 11 | endif () 12 | 13 | 14 | if (BUILD_SHARED_LIBS) 15 | ADD_LIBRARY(${SIMPLE_NAME} STATIC ${LIB_SRCS} ${LIB_HEADERS}) 16 | INSTALL(TARGETS ${SIMPLE_NAME} 17 | LIBRARY DESTINATION lib 18 | ARCHIVE DESTINATION lib) 19 | endif () 20 | 21 | INSTALL(FILES ${LIB_HEADERS} DESTINATION include/${SIMPLE_NAME}) 22 | 23 | IF (BUILD_TOOLS) 24 | add_subdirectory(tools) 25 | ENDIF () 26 | 27 | #SET(MQTT_LIBS paho-mqttpp3 paho-mqtt3cs) 28 | -------------------------------------------------------------------------------- /libraries/kspp-tds/tds_avro_utils.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #pragma once 6 | 7 | namespace kspp { 8 | namespace tds { 9 | enum TDS_OIDS { 10 | SYBCHAR = 47, 11 | SYBBIT = 50, 12 | SYBINT2 = 52, 13 | SYBINT4 = 56, 14 | SYBFLT8 = 62, 15 | // MS only types 16 | SYBUNIQUE = 36, 17 | SYBMSDATETIME2 = 42, 18 | SYBINT8 = 127, 19 | SYBMSUDT = 240 20 | }; 21 | 22 | std::shared_ptr schema_for_oid(TDS_OIDS typid); 23 | 24 | std::shared_ptr schema_for_table_row(std::string schema_name, DBPROCESS *context); 25 | 26 | std::shared_ptr 27 | schema_for_table_key(std::string schema_name, const std::vector &keys, DBPROCESS *context); 28 | 29 | std::string simple_column_name(std::string column_name); 30 | 31 | int find_column_by_name(DBPROCESS *stream, const std::string &name); 32 | } 33 | } -------------------------------------------------------------------------------- /libraries/kspp-tds/tds_connection.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #pragma once 9 | 10 | //inspiration 11 | //http://www.freetds.org/software.html 12 | 13 | namespace kspp_tds { 14 | class connection : public std::enable_shared_from_this { 15 | public: 16 | connection(std::string trace_id = ""); 17 | 18 | ~connection(); 19 | 20 | void close(); 21 | 22 | int connect(const kspp::connect::connection_params &cp); 23 | 24 | void disconnect(); 25 | 26 | inline bool connected() const { 27 | return (dbproc_ != nullptr); 28 | } 29 | 30 | bool set_client_encoding(std::string s); 31 | 32 | std::string trace_id() const; 33 | 34 | void set_warning_timeout(uint32_t ms); 35 | 36 | std::pair exec(std::string statement); 37 | 38 | private: 39 | LOGINREC *login_ = nullptr; 40 | DBPROCESS *dbproc_ = nullptr; 41 | std::string trace_id_; 42 | std::string current_statement_; 43 | int32_t warn_timeout_ = 60000; 44 | }; 45 | } 46 | -------------------------------------------------------------------------------- /libraries/kspp-tds/tds_generic_avro_source.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std::chrono_literals; 5 | 6 | namespace kspp { 7 | tds_generic_avro_source::tds_generic_avro_source(std::shared_ptr config, 8 | int32_t partition, 9 | std::string logical_name, 10 | const kspp::connect::connection_params &cp, 11 | kspp::connect::table_params tp, 12 | std::string query, 13 | std::string id_column, 14 | std::string ts_column, 15 | std::shared_ptr registry) 16 | : partition_source(nullptr, partition), 17 | impl_(partition, logical_name, cp, tp, query, id_column, ts_column, registry) { 18 | this->add_metrics_label(KSPP_PROCESSOR_TYPE_TAG, PROCESSOR_NAME); 19 | this->add_metrics_label(KSPP_TOPIC_TAG, logical_name); 20 | this->add_metrics_label(KSPP_PARTITION_TAG, std::to_string(partition)); 21 | } 22 | } 23 | -------------------------------------------------------------------------------- /libraries/kspp-tds/tds_generic_avro_source.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | #pragma once 10 | 11 | namespace kspp { 12 | class tds_generic_avro_source : public partition_source { 13 | static constexpr const char *PROCESSOR_NAME = "tds_avro_source"; 14 | public: 15 | tds_generic_avro_source(std::shared_ptr config, 16 | int32_t partition, 17 | std::string logical_name, 18 | const kspp::connect::connection_params &cp, 19 | kspp::connect::table_params tp, 20 | std::string query, 21 | std::string id_column, 22 | std::string ts_column, 23 | std::shared_ptr); 24 | 25 | virtual ~tds_generic_avro_source() { 26 | close(); 27 | } 28 | 29 | std::string log_name() const override { 30 | return PROCESSOR_NAME; 31 | } 32 | 33 | void start(int64_t offset) override { 34 | impl_.start(offset); 35 | } 36 | 37 | void close() override { 38 | // TODO should this be here ?? 39 | /* 40 | * if (_commit_chain.last_good_offset() >= 0 && _impl.commited() < _commit_chain.last_good_offset()) 41 | _impl.commit(_commit_chain.last_good_offset(), true); 42 | */ 43 | impl_.close(); 44 | } 45 | 46 | bool eof() const override { 47 | return impl_.eof(); 48 | } 49 | 50 | void commit(bool flush) override { 51 | impl_.commit(flush); 52 | } 53 | 54 | // TBD if we store last offset and end of stream offset we can use this... 55 | size_t queue_size() const override { 56 | return impl_.queue().size(); 57 | } 58 | 59 | int64_t next_event_time() const override { 60 | return impl_.queue().next_event_time(); 61 | } 62 | 63 | size_t process(int64_t tick) override { 64 | if (impl_.queue().size() == 0) 65 | return 0; 66 | size_t processed = 0; 67 | while (!impl_.queue().empty()) { 68 | auto p = impl_.queue().front(); 69 | if (p == nullptr || p->event_time() > tick) 70 | return processed; 71 | impl_.queue().pop_front(); 72 | this->send_to_sinks(p); 73 | ++(this->processed_count_); 74 | ++processed; 75 | this->lag_.add_event_time(tick, p->event_time()); 76 | } 77 | return processed; 78 | } 79 | 80 | std::string topic() const override { 81 | return impl_.logical_name(); 82 | } 83 | 84 | protected: 85 | tds_consumer impl_; 86 | }; 87 | } 88 | 89 | -------------------------------------------------------------------------------- /libraries/kspp-tds/tds_read_cursor.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #pragma once 9 | 10 | namespace kspp { 11 | class tds_read_cursor { 12 | public: 13 | tds_read_cursor(kspp::connect::table_params tp, std::string id_column, std::string ts_column); 14 | 15 | void init(DBPROCESS *stream); 16 | 17 | void start(int64_t ts); 18 | 19 | void parse(DBPROCESS *stream); 20 | 21 | std::string get_where_clause() const; 22 | 23 | std::string last_ts() const { return last_ts_; } 24 | 25 | inline int64_t last_tick() const { return last_ts_ticks_; } 26 | 27 | inline int64_t last_ts_ms() const { 28 | return (last_ts_ticks_ >= 0) ? ts_utc_offset_ + (last_ts_ticks_ * ts_multiplier_) : 0; 29 | } 30 | 31 | void set_eof(bool state) { eof_ = state; } 32 | 33 | private: 34 | 35 | int64_t parse_id(DBPROCESS *stream); 36 | 37 | int64_t parse_ts(DBPROCESS *stream); 38 | 39 | const kspp::connect::table_params tp_; 40 | bool eof_ = false; 41 | const std::string id_column_; 42 | const std::string ts_column_; 43 | const std::string order_by_; 44 | int ts_column_index_ = -1; 45 | int id_column_index_ = -1; 46 | int64_t last_ts_ticks_ = INT64_MIN; 47 | std::string last_ts_; 48 | std::string last_id_; 49 | 50 | int ts_multiplier_ = 0; 51 | int ts_utc_offset_ = 0; 52 | 53 | }; 54 | } -------------------------------------------------------------------------------- /libraries/kspp-tds/tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(tds_connect) 2 | 3 | -------------------------------------------------------------------------------- /libraries/kspp-tds/tools/tds_connect/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(tds2kafka tds2kafka.cpp) 2 | target_link_libraries(tds2kafka kspp-tds_s sybdb ${ALL_LIBS}) 3 | INSTALL(TARGETS tds2kafka RUNTIME DESTINATION bin) 4 | 5 | 6 | 7 | -------------------------------------------------------------------------------- /libraries/kspp-tds/tools/tds_connect/README.md: -------------------------------------------------------------------------------- 1 | wget ftp://ftp.freetds.org/pub/freetds/stable/freetds-patched.tar.gz 2 | tar xvf freetds-patched.tar.gz 3 | cd freetds-1.00.86 4 | ./configure 5 | make -j8 6 | sudo make install 7 | sudo cp -r include/freetds /usr/local/include 8 | 9 | http://www.freetds.org/userguide/confirminstall.htm#TSQL 10 | src/apps/tsql.c 11 | 12 | sudo apt-get install unixodbc unixodbc-dev freetds-dev freetds-bin tdsodbc 13 | 14 | sudo apt-get remove freetds-dev freetds-bin 15 | ( 16 | tsql -H hostmachine -p port -U username -P password 17 | 18 | tsql -H 10.1.46.42 -p 1433 -U SA -P 2Secrets 19 | 20 | -------------------------------------------------------------------------------- /rebuild.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | rm -rf build bin lib 3 | mkdir build 4 | cd build 5 | cmake -DCMAKE_BUILD_TYPE=Release .. 6 | make -j "$(getconf _NPROCESSORS_ONLN)" 7 | 8 | 9 | -------------------------------------------------------------------------------- /scripts/create_test_topics.bat: -------------------------------------------------------------------------------- 1 | REM kafka-topics.bat must be in you path.. it's somewhere like C:\???\kafka_2.11-0.10.0.1\bin\windows\kafka-topics.bat 2 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_test0_table --config cleanup.policy=compact 3 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_test0_eventstream --config retention.ms=86400000 4 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_test0_eventstream_out --config retention.ms=86400000 5 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_PageViews 6 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_UserProfile 7 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_ViewCountsByUser 8 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_PageViewByRegion 9 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_UserCountByRegion 10 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_PageCounts 11 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_PageViewsDecorated 12 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_TextInput 13 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_WordCount 14 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_example5_usernames 15 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_example5_user_channel 16 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_example5_channel_names 17 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_example5_usernames.per-channel 18 | -------------------------------------------------------------------------------- /scripts/insert_large_test_text.bat: -------------------------------------------------------------------------------- 1 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_test_text 2 | call kafka-topics.bat --create --zookeeper localhost:2181 --replication-factor 1 --partitions 8 --topic kspp_test_words 3 | 4 | call kafka-console-producer.bat --broker-list localhost:9092 --topic kspp_test_text < pg10.txt 5 | 6 | 7 | -------------------------------------------------------------------------------- /scripts/list_topics.bat: -------------------------------------------------------------------------------- 1 | kafka-topics.bat --list --zookeeper localhost -------------------------------------------------------------------------------- /src/avro/avro_utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace kspp { 5 | namespace avro_utils { 6 | std::string normalize(const avro::ValidSchema &vs) { 7 | std::stringstream ss; 8 | vs.toJson(ss); 9 | std::string s = ss.str(); 10 | // TBD we should strip type : string to string 11 | // strip whitespace 12 | s.erase(remove_if(s.begin(), s.end(), ::isspace), s.end()); // c version does not use locale... 13 | return s; 14 | } 15 | 16 | std::string to_string(avro::Type t) { 17 | switch (t) { 18 | case avro::AVRO_STRING: 19 | return "AVRO_STRING"; 20 | case avro::AVRO_BYTES: 21 | return "AVRO_BYTES"; 22 | case avro::AVRO_INT: 23 | return "AVRO_INT"; 24 | case avro::AVRO_LONG: 25 | return "AVRO_LONG"; 26 | case avro::AVRO_FLOAT: 27 | return "AVRO_FLOAT"; 28 | case avro::AVRO_DOUBLE: 29 | return "AVRO_DOUBLE"; 30 | case avro::AVRO_BOOL: 31 | return "AVRO_BOOL"; 32 | case avro::AVRO_NULL: 33 | return "AVRO_NULL"; 34 | case avro::AVRO_RECORD: 35 | return "AVRO_RECORD"; 36 | case avro::AVRO_ENUM: 37 | return "AVRO_ENUM"; 38 | case avro::AVRO_ARRAY: 39 | return "AVRO_ARRAY"; 40 | case avro::AVRO_UNION: 41 | return "AVRO_UNION"; 42 | case avro::AVRO_FIXED: 43 | return "AVRO_FIXED"; 44 | case avro::AVRO_NUM_TYPES: 45 | return "AVRO_NUM_TYPES"; 46 | default: 47 | return "AVRO_UNKNOWN"; 48 | }; 49 | } 50 | 51 | static bool is_valid_avro_name(char c) { 52 | return ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'); 53 | } 54 | 55 | static bool is_invalid_avro_name(char c) { 56 | return !is_valid_avro_name(c); 57 | } 58 | 59 | std::string sanitize_schema_name(std::string s) { 60 | // to survive avro classes it seems it must follow some rules 61 | //start with [A-Za-z_] 62 | //subsequently contain only [A-Za-z0-9_] 63 | std::replace(s.begin(), s.end(), '-', '_'); // replace - with _ 64 | s.erase(std::remove_if(s.begin(), s.end(), is_invalid_avro_name), s.end()); 65 | return s; 66 | } 67 | } 68 | } -------------------------------------------------------------------------------- /src/avro/generic_avro.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | 6 | namespace kspp { 7 | std::string generic_avro::generic_record::to_json() const { 8 | return "not implemented"; 9 | } 10 | } 11 | 12 | std::string to_json(const kspp::generic_avro &src) { 13 | /* JSON encoder */ 14 | avro::EncoderPtr json_encoder = avro::jsonEncoder(*src.valid_schema()); 15 | std::stringstream ss; 16 | 17 | /* JSON output stream */ 18 | auto json_os = avro::ostreamOutputStream(ss); 19 | 20 | try { 21 | /* Encode Avro datum to JSON */ 22 | json_encoder->init(*json_os.get()); 23 | avro::encode(*json_encoder, *src.generic_datum()); 24 | json_encoder->flush(); 25 | 26 | } 27 | catch (const avro::Exception &e) { 28 | LOG(ERROR) << "Binary to JSON transformation failed: " << e.what(); 29 | return 0; 30 | } 31 | return ss.str(); 32 | } 33 | -------------------------------------------------------------------------------- /src/features/aws/aws.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace kspp { 5 | void init_aws() { 6 | static bool is_init = false; 7 | if (!is_init) { 8 | is_init = true; 9 | Aws::SDKOptions options; 10 | options.loggingOptions.logLevel = Aws::Utils::Logging::LogLevel::Info; 11 | Aws::InitAPI(options); 12 | } 13 | } 14 | } 15 | 16 | -------------------------------------------------------------------------------- /src/internal/commit_chain.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace kspp { 6 | 7 | static std::deque> s_pending_delete; 8 | 9 | commit_chain::autocommit_marker::~autocommit_marker() { 10 | //_cb(_offset, _ec); 11 | //now we could delete everting that is waiting for us - but if this is 100k objects the callstack is kind of large and we will segfault 12 | //so we check if we're the last holder of next - if so let someone else delete next object 13 | if (next_.use_count() == 1) { 14 | if (next_) 15 | s_pending_delete.push_back(next_); 16 | next_.reset(); 17 | } 18 | } 19 | 20 | void autocommit_marker_gc() { 21 | while (s_pending_delete.size()) 22 | s_pending_delete.pop_front(); 23 | } 24 | 25 | commit_chain::commit_chain(std::string topic, int32_t partition) 26 | : topic_(topic), partition_(partition), size_(0), last_good_offset_(-1), first_ec_(0), 27 | next_(std::make_shared([this](int64_t offset, int32_t ec) { 28 | handle_result(offset, ec); 29 | })) { 30 | } 31 | 32 | std::shared_ptr commit_chain::create(int64_t offset) { 33 | spinlock::scoped_lock xxx(spinlock_); 34 | { 35 | ++size_; 36 | } 37 | auto next = std::make_shared([this](int64_t offset, int32_t ec) { 38 | handle_result(offset, ec); 39 | }); 40 | 41 | next_->init(offset, next); 42 | auto res = next_; 43 | next_ = next; 44 | return res; 45 | } 46 | 47 | // tbd we might want to have several error handling algoritms 48 | // fatal as below or just a warning and skip? 49 | void commit_chain::handle_result(int64_t offset, int32_t ec) { 50 | if (offset >= 0) { // the "next" object with -1 is invalid 51 | if (first_ec_) // we never continue after first failure 52 | return; 53 | if (!ec) { 54 | spinlock::scoped_lock xxx(spinlock_); 55 | { 56 | --size_; 57 | } 58 | last_good_offset_ = offset; 59 | } else { 60 | first_ec_ = ec; 61 | LOG(FATAL) << "commit_chain failed, topic " << topic_ << ":" << partition_ 62 | << ", failure at offset:" << offset << ", ec:" << ec; 63 | } 64 | } 65 | } 66 | } -------------------------------------------------------------------------------- /src/internal/rocksdb/rocksdb_operators.cpp: -------------------------------------------------------------------------------- 1 | #ifdef KSPP_ROCKSDB 2 | 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | // A 'model' merge operator with uint64 addition semantics 10 | // Implemented as an AssociativeMergeOperator for simplicity and example. 11 | namespace rocksdb { 12 | 13 | static inline void PutFixed64(std::string *dst, int64_t val) { 14 | dst->resize(sizeof(int64_t)); 15 | memcpy((void *) dst->data(), &val, sizeof(int64_t)); 16 | } 17 | 18 | 19 | class Int64AddOperator : public rocksdb::AssociativeMergeOperator { 20 | public: 21 | virtual bool Merge(const Slice & /*key*/, const Slice *existing_value, 22 | const Slice &value, std::string *new_value, 23 | Logger *logger) const override { 24 | assert(new_value); 25 | int64_t orig_value = 0; 26 | if (existing_value) { 27 | orig_value = ::Int64AddOperator::Deserialize(*existing_value); 28 | } 29 | int64_t operand = ::Int64AddOperator::Deserialize(value); 30 | int64_t updated_value = orig_value + operand; 31 | new_value->resize(sizeof(int64_t)); 32 | memcpy((void *) new_value->data(), &updated_value, sizeof(int64_t)); 33 | return true; // Return true always since corruption will be treated as 0 34 | } 35 | 36 | virtual const char *Name() const override { 37 | return "Int64AddOperator"; 38 | } 39 | }; 40 | } 41 | 42 | 43 | namespace rocksdb { 44 | std::shared_ptr CreateInt64AddOperator() { 45 | return std::make_shared(); 46 | } 47 | } 48 | #endif // KSPP_ROCKSDB 49 | -------------------------------------------------------------------------------- /src/internal/utils/kspp_utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace kspp { 5 | static bool is_number(const std::string &s) { 6 | return !s.empty() && std::all_of(s.begin(), s.end(), ::isdigit); 7 | } 8 | 9 | std::string sanitize_filename(std::string s) { 10 | auto e = std::regex("[/?<>\\:*|\"]"); 11 | s = std::regex_replace(s, e, "_"); 12 | return s; 13 | } 14 | 15 | std::vector parse_partition_list(std::string s) { 16 | std::vector result; 17 | auto begin = s.find_first_of("["); 18 | auto end = s.find_first_of("]"); 19 | if (begin == std::string::npos || end == std::string::npos || end - begin < 2) 20 | return result; 21 | auto sz = (end - begin) - 1; 22 | auto s2 = s.substr(begin + 1, sz); 23 | { 24 | std::regex rgx("[,\\s+]"); 25 | std::sregex_token_iterator iter(s2.begin(), s2.end(), rgx, -1); 26 | std::sregex_token_iterator end; 27 | for (; iter != end; ++iter) { 28 | if (is_number(*iter)) 29 | result.push_back(stoi(*iter)); 30 | } 31 | } 32 | return result; 33 | } 34 | 35 | std::vector parse_string_array(std::string s, std::string regexp) { 36 | std::vector result; 37 | auto begin = s.find_first_of("["); 38 | auto end = s.find_first_of("]"); 39 | if (begin == std::string::npos || end == std::string::npos || end - begin < 2) 40 | return result; 41 | auto sz = (end - begin) - 1; 42 | auto s2 = s.substr(begin + 1, sz); 43 | { 44 | //std::regex rgx("[,\\s+]"); 45 | //std::regex rgx("[\\,]"); 46 | std::regex rgx(regexp); 47 | std::sregex_token_iterator iter(s2.begin(), s2.end(), rgx, -1); 48 | std::sregex_token_iterator end; 49 | for (; iter != end; ++iter) { 50 | result.push_back(*iter); 51 | } 52 | } 53 | return result; 54 | } 55 | 56 | std::vector get_partition_list(int32_t nr_of_partitions) { 57 | std::vector res; 58 | for (int32_t i = 0; i != nr_of_partitions; ++i) 59 | res.push_back(i); 60 | return res; 61 | } 62 | 63 | std::string partition_list_to_string(std::vector v) { 64 | std::string s = "["; 65 | for (auto i = v.begin(); i != v.end(); ++i) 66 | s += std::to_string(*i) + ((i != v.end() - 1) ? ", " : "]"); 67 | return s; 68 | } 69 | 70 | std::string to_string(std::vector v) { 71 | std::string s = "["; 72 | for (auto i = v.begin(); i != v.end(); ++i) 73 | s += *i + ((i != v.end() - 1) ? ", " : "]"); 74 | return s; 75 | } 76 | 77 | std::string to_string(std::set v) { 78 | std::string s = "["; 79 | size_t count = 0; 80 | for (auto i = v.begin(); i != v.end(); ++i, ++count) 81 | s += *i + ((count != v.size() - 1) ? ", " : "]"); 82 | return s; 83 | } 84 | 85 | } -------------------------------------------------------------------------------- /src/kspp.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | namespace kspp { 4 | } 5 | 6 | -------------------------------------------------------------------------------- /src/metrics/prometheus_pushgateway_reporter.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std::chrono_literals; 5 | namespace kspp { 6 | static std::string hostname_part(std::string s) { 7 | return s.substr(0, s.find(":")); 8 | } 9 | 10 | static std::string port_part(std::string s) { 11 | auto i = s.find(":"); 12 | if (i == std::string::npos) 13 | return "9091"; 14 | return s.substr(i + 1); 15 | } 16 | 17 | prometheus_pushgateway_reporter::prometheus_pushgateway_reporter(std::string job_name, std::string uri, bool verbose) 18 | : gateway_(hostname_part(uri), port_part(uri), job_name) 19 | , verbose_(verbose) { 20 | thread_ = std::make_shared([this, uri]() { 21 | int64_t next_time_to_send = kspp::milliseconds_since_epoch() + 10 * 1000; 22 | while (run_) { 23 | //time for report 24 | 25 | if (next_time_to_send <= kspp::milliseconds_since_epoch()) { 26 | uint64_t measurement_time = milliseconds_since_epoch(); 27 | int http_result = gateway_.Push(); 28 | uint64_t push_time = milliseconds_since_epoch(); 29 | if (http_result != 200) { 30 | LOG(WARNING) << "metrics push failed, uri:" << uri << ", elapsed: " << push_time - measurement_time; 31 | } else { 32 | if (verbose_) 33 | LOG(INFO) << "metrics sent OK, elapsed: " << push_time - measurement_time; 34 | } 35 | //schedule nex reporting event 36 | next_time_to_send += 10000; 37 | // if we are really out of sync lets sleep at least 10 more seconds 38 | if (next_time_to_send <= kspp::milliseconds_since_epoch()) 39 | next_time_to_send = kspp::milliseconds_since_epoch() + 10000; 40 | } 41 | std::this_thread::sleep_for(100ms); 42 | } // while 43 | });//thread 44 | 45 | run_ = true; 46 | } 47 | 48 | prometheus_pushgateway_reporter::~prometheus_pushgateway_reporter() { 49 | run_ = false; 50 | thread_->join(); 51 | } 52 | 53 | void prometheus_pushgateway_reporter::add_metrics(std::shared_ptr p) { 54 | gateway_.RegisterCollectable(p->get_prometheus_registry()); 55 | } 56 | 57 | std::shared_ptr 58 | operator<<(std::shared_ptr reporter, std::shared_ptr t) { 59 | reporter->add_metrics(t); 60 | return reporter; 61 | } 62 | 63 | std::shared_ptr 64 | operator<<(std::shared_ptr reporter, std::vector> v) { 65 | for (const auto &i: v) 66 | reporter->add_metrics(i); 67 | return reporter; 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/utils/cluster_uri.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | namespace kspp { 6 | cluster_uri::cluster_uri(std::string s) 7 | : good_(true) { 8 | std::string::size_type pos0 = s.find("://"); 9 | if (pos0 == std::string::npos) { 10 | good_ = false; 11 | return; 12 | } 13 | scheme_ = s.substr(0, pos0); 14 | std::string::size_type pos1 = s.find('/', pos0 + 3); 15 | if (pos1 != std::string::npos) { 16 | authority_ = s.substr(pos0 + 3, pos1 - (pos0 + 3)); 17 | path_ = s.substr(pos1, std::string::npos); 18 | } else { 19 | authority_ = s.substr(pos0 + 3, std::string::npos); 20 | } 21 | 22 | // remove internal whitespaces in authority_ 23 | authority_.erase(remove_if(authority_.begin(), authority_.end(), isspace), authority_.end()); 24 | 25 | std::transform(scheme_.begin(), scheme_.end(), scheme_.begin(), ::tolower); 26 | } 27 | 28 | cluster_uri::cluster_uri(std::string s, std::string default_scheme) 29 | : good_(true) { 30 | std::string::size_type pos0 = s.find("://"); 31 | if (pos0 == std::string::npos) { 32 | scheme_ = default_scheme; 33 | std::string::size_type pos1 = s.find('/', 0); 34 | if (pos1 != std::string::npos) { 35 | authority_ = s.substr(0, pos1); 36 | path_ = s.substr(pos1, std::string::npos); 37 | } else { 38 | authority_ = s.substr(0, std::string::npos); 39 | } 40 | } else { 41 | scheme_ = s.substr(0, pos0); 42 | 43 | std::string::size_type pos1 = s.find('/', pos0 + 3); 44 | if (pos1 != std::string::npos) { 45 | authority_ = s.substr(pos0 + 3, pos1 - (pos0 + 3)); 46 | path_ = s.substr(pos1, std::string::npos); 47 | } else { 48 | authority_ = s.substr(pos0 + 3, std::string::npos); 49 | } 50 | } 51 | // remove internal whitespaces in authority_ 52 | authority_.erase(remove_if(authority_.begin(), authority_.end(), isspace), authority_.end()); 53 | std::transform(scheme_.begin(), scheme_.end(), scheme_.begin(), ::tolower); 54 | } 55 | 56 | std::vector cluster_uri::split_authority() const { 57 | std::vector result; 58 | std::regex rgx("[,\\s+]"); 59 | std::sregex_token_iterator iter(authority_.begin(), authority_.end(), rgx, -1); 60 | std::sregex_token_iterator end; 61 | for (; iter != end; ++iter) 62 | result.push_back(*iter); 63 | return result; 64 | } 65 | } -------------------------------------------------------------------------------- /src/utils/env.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | namespace kspp { 7 | std::string get_env_and_log(const char *env, std::string default_value) { 8 | const char *env_p = std::getenv(env); 9 | if (env_p) { 10 | LOG(INFO) << "env: " << env << " -> " << env_p; 11 | return std::string(env_p); 12 | } else { 13 | if (default_value.size()) 14 | LOG(INFO) << "env: " << env << " - not defined, using default: " << default_value; 15 | else 16 | LOG(INFO) << "env: " << env << " - not defined"; 17 | return default_value; 18 | } 19 | } 20 | 21 | std::string get_env_and_log_hidden(const char *env, std::string default_value) { 22 | const char *env_p = std::getenv(env); 23 | if (env_p) { 24 | LOG(INFO) << "env: " << env << " -> [hidden]"; 25 | return std::string(env_p); 26 | } else { 27 | LOG(INFO) << "env: " << env << " - not defined, using default: [hidden]"; 28 | return default_value; 29 | } 30 | } 31 | 32 | std::string default_kafka_broker_uri() { 33 | return get_env_and_log("KSPP_KAFKA_BROKER_URL", "plaintext://localhost:9092"); 34 | } 35 | 36 | std::string default_kafka_rest_uri() { 37 | return get_env_and_log("KSPP_KAFKA_REST_URL", "http://localhost:8082"); 38 | } 39 | 40 | std::string default_schema_registry_uri() { 41 | return get_env_and_log("KSPP_SCHEMA_REGISTRY_URL", "http://localhost:8081"); 42 | } 43 | 44 | std::string default_statestore_root() { 45 | return get_env_and_log("KSPP_STATE_STORE_ROOT", 46 | std::filesystem::temp_directory_path().generic_string() + "/kspp"); 47 | } 48 | 49 | std::string default_pushgateway_uri() { 50 | return get_env_and_log("KSPP_PUSHGATEWAY_URL", "http://localhost:9091"); 51 | } 52 | 53 | std::string default_ca_cert_path() { 54 | return get_env_and_log("KSPP_CA_CERT", "/etc/kspp/credentials/cacert.pem"); 55 | } 56 | 57 | std::string default_client_cert_path() { 58 | return get_env_and_log("KSPP_CLIENT_CERT", "/etc/kspp/credentials/client.pem"); 59 | } 60 | 61 | std::string default_client_key_path() { 62 | return get_env_and_log("KSPP_CLIENT_KEY", "/etc/kspp/credentials/client.key"); 63 | } 64 | 65 | std::string default_client_key_passphrase() { 66 | return get_env_and_log_hidden("KSPP_CLIENT_KEY_PASSPHRASE", ""); 67 | } 68 | 69 | std::string default_hostname() { 70 | auto host_name = boost::asio::ip::host_name(); 71 | return host_name; 72 | } 73 | } 74 | 75 | -------------------------------------------------------------------------------- /src/utils/kafka_utils.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std::chrono_literals; 10 | 11 | namespace kspp { 12 | namespace kafka { 13 | 14 | int32_t get_number_partitions(std::shared_ptr config, std::string topic) { 15 | return config->get_cluster_metadata()->get_number_partitions(topic); 16 | } 17 | 18 | bool wait_for_consumer_group(std::shared_ptr config, std::string group_id, 19 | std::chrono::seconds timeout) { 20 | if (config->get_cluster_metadata()->consumer_group_exists(group_id, timeout)) { 21 | LOG(INFO) << "wait_for_consumer_group: \"" << group_id << "\" - OK"; 22 | return true; 23 | } else { 24 | LOG(ERROR) << "wait_for_consumer_group: \"" << group_id << "\" - FAILED"; 25 | return false; 26 | } 27 | } 28 | 29 | void 30 | require_topic_leaders(std::shared_ptr config, std::string topic, std::chrono::seconds timeout) { 31 | if (config->get_cluster_metadata()->wait_for_topic_leaders(topic, timeout)) 32 | LOG(INFO) << "require_topic_leaders: \"" << topic << "\" - OK"; 33 | else 34 | LOG(FATAL) << "require_topic_leaders: \"" << topic << "\" - FAILED"; 35 | } 36 | 37 | void require_topic_leaders(std::shared_ptr config, std::string topic) { 38 | require_topic_leaders(config, topic, config->get_cluster_state_timeout()); 39 | } 40 | 41 | std::vector 42 | get_partition_list(std::shared_ptr config, std::string topic, std::string partitions) { 43 | auto partition_list = parse_partition_list(partitions); 44 | if (partition_list.size() == 0 || partition_list[0] == -1) { 45 | auto nr_of_partitions = kspp::kafka::get_number_partitions(config, topic); 46 | partition_list = kspp::get_partition_list(nr_of_partitions); 47 | } 48 | return partition_list; 49 | } 50 | }//namespace kafka 51 | } // kspp 52 | 53 | 54 | -------------------------------------------------------------------------------- /src/utils/url.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace kspp { 5 | 6 | /* 7 | file://localhost/etc/fstab 8 | file:///etc/fstab 9 | */ 10 | 11 | url::url(std::string s, std::string default_scheme) 12 | : good_(true) { 13 | std::string::size_type pos0 = s.find("://"); 14 | if (pos0 == std::string::npos) { 15 | if (default_scheme.size() == 0) { 16 | good_ = false; 17 | return; 18 | } 19 | scheme_ = default_scheme; 20 | std::string::size_type pos1 = s.find('/', 0); 21 | if (pos1 != std::string::npos) { 22 | authority_ = s.substr(0, pos1); 23 | path_ = s.substr(pos1, std::string::npos); 24 | } else { 25 | authority_ = s.substr(0, std::string::npos); 26 | } 27 | } else { 28 | scheme_ = s.substr(0, pos0); 29 | 30 | std::string::size_type pos1 = s.find('/', pos0 + 3); 31 | if (pos1 != std::string::npos) { 32 | authority_ = s.substr(pos0 + 3, pos1 - (pos0 + 3)); 33 | path_ = s.substr(pos1, std::string::npos); 34 | } else { 35 | authority_ = s.substr(pos0 + 3, std::string::npos); 36 | } 37 | } 38 | std::transform(scheme_.begin(), scheme_.end(), scheme_.begin(), ::tolower); 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /src/utils/url_parser.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace kspp { 5 | std::vector split_url_list(std::string s, std::string default_scheme) { 6 | // remove internal whitespaces in s to make sure we do not create empty urls later... 7 | s.erase(remove_if(s.begin(), s.end(), isspace), s.end()); 8 | 9 | std::vector splitted_urls; 10 | std::regex rgx("[,\\s+]"); 11 | std::sregex_token_iterator iter(s.begin(), s.end(), rgx, -1); 12 | std::sregex_token_iterator end; 13 | for (; iter != end; ++iter) 14 | splitted_urls.push_back(*iter); 15 | 16 | std::vector result; 17 | for (auto str: splitted_urls) { 18 | url a_url(str, default_scheme); 19 | if (a_url.good()) 20 | result.push_back(a_url); 21 | } 22 | return result; 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /tests/run_all.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | set -ef 3 | 4 | cd ../cmake-build-debug/tests/bin 5 | 6 | echo "test1" 7 | ./test1 8 | 9 | 10 | echo "test2 mem_counter_store" 11 | ./test2_mem_counter_store 12 | 13 | echo "test2_mem_store" 14 | ./test2_mem_store 15 | 16 | echo "test2_mem_windowed_store" 17 | ./test2_mem_windowed_store 18 | 19 | echo "test2_rocksdb_counter_store" 20 | ./test2_rocksdb_counter_store 21 | 22 | echo "test2_rocksdb_store" 23 | ./test2_rocksdb_store 24 | 25 | 26 | echo "test2_rocksdb_windowed_store" 27 | ./test2_rocksdb_windowed_store 28 | 29 | echo "test3_mem_token_bucket" 30 | ./test3_mem_token_bucket 31 | 32 | echo "test4_kafka_consumer" 33 | ./test4_kafka_consumer 34 | 35 | echo "test5_kafka_source_sink" 36 | ./test5_kafka_source_sink 37 | 38 | echo "test6_repartition" 39 | ./test6_repartition 40 | 41 | 42 | echo "test7_cluster_uri" 43 | ./test7_cluster_uri 44 | 45 | echo "test7_url_vector" 46 | ./test7_url_vector 47 | 48 | echo "test8_join" 49 | ./test8_join 50 | 51 | echo "tests OK" 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /tests/test1.cpp: -------------------------------------------------------------------------------- 1 | 2 | 3 | int main(int argc, char **argv) { 4 | //assert(... == 0); 5 | return 0; 6 | } 7 | 8 | 9 | -------------------------------------------------------------------------------- /tests/test10_http.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | class my_http_client { 11 | public: 12 | my_http_client() 13 | : work_(new boost::asio::io_service::work(ios_)) 14 | , bg_([this]() { ios_.run(); }) 15 | , http_handler_(ios_, 10) { 16 | //curl_global_init(CURL_GLOBAL_NOTHING); /* minimal */ 17 | } 18 | 19 | ~my_http_client() { 20 | http_handler_.close(); 21 | work_.reset(); 22 | bg_.join(); 23 | } 24 | 25 | std::string get(std::string url) { 26 | std::vector headers = {""}; 27 | auto request = std::make_shared( 28 | kspp::http::GET, 29 | url, 30 | headers); 31 | auto r2 = http_handler_.perform(request); 32 | std::string s = r2->rx_content(); 33 | return s; 34 | } 35 | 36 | private: 37 | boost::asio::io_service ios_; 38 | std::unique_ptr work_; 39 | std::thread bg_; 40 | kspp::http::client http_handler_; 41 | }; 42 | 43 | 44 | int main(int argc, char **argv) { 45 | curl_global_init(CURL_GLOBAL_NOTHING); /* minimal */ 46 | LOG(INFO) << "starting test"; 47 | { 48 | my_http_client client; 49 | auto s = client.get("www.di.se"); 50 | LOG(INFO) << s; 51 | } 52 | LOG(INFO) << "after destructor"; 53 | return 0; 54 | } 55 | 56 | -------------------------------------------------------------------------------- /tests/test12_s3.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | int main(int argc, char **argv) { 5 | auto p = kspp::get_offset_provider("s3://10.1.47.180:9000/kspp-dev/tests/test12_s3.offset"); 6 | p->commit(0, true); 7 | auto offset = p->start(kspp::OFFSET_STORED); 8 | assert(offset == 0); 9 | return 0; 10 | } 11 | 12 | -------------------------------------------------------------------------------- /tests/test13_mem.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | using namespace std::chrono_literals; 8 | 9 | 10 | /* Exit flag for main loop */ 11 | static bool run = true; 12 | 13 | static void sigterm(int sig) { 14 | run = false; 15 | } 16 | 17 | //valgrind --leak-check=yes --leak-check=full --show-leak-kinds=all 18 | 19 | int main(int argc, char **argv) { 20 | auto config = std::make_shared("", kspp::cluster_config::NONE); 21 | kspp::topology_builder builder(config); 22 | auto topology = builder.create_topology(); 23 | auto source = topology->create_processor>(0); 24 | auto sink = topology->create_sink>(source); 25 | 26 | std::signal(SIGINT, sigterm); 27 | std::signal(SIGTERM, sigterm); 28 | std::signal(SIGPIPE, SIG_IGN); 29 | 30 | topology->start(kspp::OFFSET_END); // has to be something - since we feed events from web totally irrelevant 31 | 32 | std::thread t([topology]() { 33 | while (run) { 34 | if (topology->process(kspp::milliseconds_since_epoch()) == 0) { 35 | std::this_thread::sleep_for(10ms); 36 | topology->commit(false); 37 | } 38 | } 39 | LOG(INFO) << "flushing events.."; 40 | topology->flush(true, 10000); // 10sec max 41 | LOG(INFO) << "flushing events done"; 42 | }); 43 | 44 | 45 | for (int i = 0; i != 10; ++i) { 46 | for (int j = 0; j != 1000; ++j) 47 | insert(*source, std::string("nisse"), std::string("was here")); 48 | std::this_thread::sleep_for(1000ms); 49 | } 50 | LOG(INFO) << "exiting test"; 51 | 52 | run = false; 53 | t.join(); 54 | LOG(INFO) << "joined"; 55 | 56 | 57 | config->load_config_from_env(); 58 | config->set_producer_buffering_time(1000ms); 59 | config->set_consumer_buffering_time(500ms); 60 | config->validate(); 61 | config->log(); 62 | config->get_schema_registry(); 63 | 64 | 65 | gflags::ShutDownCommandLineFlags(); 66 | return 0; 67 | } 68 | 69 | -------------------------------------------------------------------------------- /tests/test14_async.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include // std::cout 4 | #include // std::async, std::future 5 | #include 6 | #include 7 | #include 8 | 9 | using namespace std::chrono_literals; 10 | 11 | void async_sleep(boost::asio::io_service &ios, int64_t ms, std::function cb) { 12 | auto h = std::make_shared(ios); 13 | h->expires_from_now(boost::posix_time::milliseconds(ms)); 14 | LOG(INFO) << "sleeping for " << " (time : " << ms << " ms)"; 15 | h->async_wait([h, ms, cb](const boost::system::error_code &ec) { 16 | LOG(INFO) << "sleep is over : " << " (time : " << ms << " ms)"; 17 | cb(!ec ? 0 : -1); 18 | }); 19 | } 20 | 21 | 22 | void test1() { 23 | std::cout << "start test 1" << std::endl; 24 | boost::asio::io_service io_service; 25 | std::unique_ptr keepalive_work = std::make_unique( 26 | io_service); 27 | std::thread asio_thread([&] { io_service.run(); }); 28 | auto work = std::make_shared>(kspp::async::SEQUENTIAL, kspp::async::FIRST_FAIL); 29 | for (auto delay: {5, 4, 3, 2, 1}) { 30 | work->push_back([&io_service, delay](kspp::async::work::callback cb) { 31 | async_sleep(io_service, delay * 1000, cb); 32 | }); 33 | } 34 | 35 | work->async_call([](int64_t duration, int ec) { 36 | LOG(INFO) << "SEQUENTIAL work done ec: " << ec << " (time : " << duration << " ms)"; 37 | }); 38 | 39 | keepalive_work.reset(); 40 | asio_thread.join(); 41 | LOG(INFO) << "exiting test1"; 42 | } 43 | 44 | void test2() { 45 | LOG(INFO) << "start test 2"; 46 | boost::asio::io_service io_service; 47 | std::unique_ptr keepalive_work = std::make_unique( 48 | io_service); 49 | std::thread asio_thread([&] { io_service.run(); }); 50 | 51 | auto work = std::make_shared>(kspp::async::PARALLEL, kspp::async::FIRST_FAIL); 52 | for (auto delay: {5, 4, 3, 2, 1}) { 53 | work->push_back([&io_service, delay](kspp::async::work::callback cb) { 54 | async_sleep(io_service, delay * 1000, cb); 55 | }); 56 | } 57 | 58 | 59 | auto result = work->call(); 60 | LOG(INFO) << "PARALLEL work done ec: " << result << " (time : " << "???" << " ms)"; 61 | //work->async_call([](int64_t duration, int ec) { 62 | // LOG(INFO) << "PARALLEL work done ec: " << ec << " (time : " << " ms)"; 63 | //}); 64 | 65 | LOG(INFO) << "resetting dummy work"; 66 | keepalive_work.reset(); 67 | asio_thread.join(); 68 | LOG(INFO) << "exiting test2"; 69 | } 70 | 71 | 72 | int main() { 73 | test1(); 74 | test2(); 75 | //gflags::ShutDownCommandLineFlags(); 76 | return 0; 77 | } 78 | 79 | -------------------------------------------------------------------------------- /tests/test2_mem_counter_store.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std::chrono_literals; 5 | 6 | int main(int argc, char **argv) { 7 | FLAGS_logtostderr = 1; 8 | google::InitGoogleLogging(argv[0]); 9 | 10 | // insert 3 check size 11 | kspp::mem_counter_store store(""); 12 | auto t0 = kspp::milliseconds_since_epoch(); 13 | store.insert(std::make_shared>(0, 1, t0), -1); 14 | store.insert(std::make_shared>(1, 1, t0), -1); 15 | store.insert(std::make_shared>(2, 1, t0), -1); 16 | assert(store.exact_size() == 3); 17 | 18 | // update existing key with new value 19 | { 20 | store.insert(std::make_shared>(2, 1, t0 + 10), -1); 21 | assert(store.exact_size() == 3); 22 | auto record = store.get(2); 23 | assert(record != nullptr); 24 | assert(record->key() == 2); 25 | assert(record->value() != nullptr); 26 | assert(*record->value() == 2); 27 | assert(record->event_time() == t0 + 10); 28 | } 29 | 30 | // update existing key with new value but old timestamp 31 | // this should be ok since this is an aggregation 32 | { 33 | store.insert(std::make_shared>(2, 2, t0), -1); 34 | assert(store.exact_size() == 3); 35 | auto record = store.get(2); 36 | assert(record != nullptr); 37 | assert(record->key() == 2); 38 | assert(record->value() != nullptr); 39 | assert(*record->value() == 4); 40 | assert(record->event_time() == t0 + 10); // keep biggest timestamp - not latest 41 | } 42 | 43 | // update existing key with new negative value 44 | { 45 | store.insert(std::make_shared>(0, -2, t0), -1); 46 | assert(store.exact_size() == 3); 47 | auto record = store.get(0); 48 | assert(record != nullptr); 49 | assert(record->key() == 0); 50 | assert(record->value() != nullptr); 51 | assert(*record->value() == -1); 52 | } 53 | 54 | // delete existing key with to old timestamp 55 | // should be forbidden 56 | { 57 | store.insert(std::make_shared>(2, nullptr, t0), -1); 58 | assert(store.exact_size() == 3); 59 | auto record = store.get(2); 60 | assert(record != nullptr); 61 | assert(record->key() == 2); 62 | assert(record->value() != nullptr); 63 | assert(*record->value() == 4); 64 | } 65 | 66 | // delete existing key with new timestamp 67 | { 68 | store.insert(std::make_shared>(2, nullptr, t0 + 30), -1); 69 | assert(store.exact_size() == 2); 70 | auto record = store.get(2); 71 | assert(record == nullptr); 72 | } 73 | return 0; 74 | } 75 | 76 | 77 | -------------------------------------------------------------------------------- /tests/test2_mem_store.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std::chrono_literals; 5 | 6 | int main(int argc, char **argv) { 7 | { 8 | FLAGS_logtostderr = 1; 9 | google::InitGoogleLogging(argv[0]); 10 | 11 | // insert 3 check size 12 | kspp::mem_store store(""); 13 | auto t0 = kspp::milliseconds_since_epoch(); 14 | store.insert(std::make_shared>(0, "value0", t0), -1); 15 | store.insert(std::make_shared>(1, "value1", t0), -1); 16 | store.insert(std::make_shared>(2, "value2", t0), -1); 17 | assert(store.exact_size() == 3); 18 | 19 | // update existing key with new value 20 | { 21 | store.insert(std::make_shared>(2, "value2updated", t0 + 10), -1); 22 | assert(store.exact_size() == 3); 23 | auto record = store.get(2); 24 | assert(record != nullptr); 25 | assert(record->key() == 2); 26 | assert(record->value() != nullptr); 27 | assert(*record->value() == "value2updated"); 28 | } 29 | 30 | // update existing key with new value but old timestamp 31 | { 32 | store.insert(std::make_shared>(2, "to_old", t0), -1); 33 | assert(store.exact_size() == 3); 34 | auto record = store.get(2); 35 | assert(record != nullptr); 36 | assert(record->key() == 2); 37 | assert(record->value() != nullptr); 38 | assert(*record->value() == "value2updated"); 39 | } 40 | 41 | // delete existing key with to old timestamp 42 | { 43 | store.insert(std::make_shared>(2, nullptr, t0), -1); 44 | assert(store.exact_size() == 3); 45 | auto record = store.get(2); 46 | assert(record != nullptr); 47 | assert(record->key() == 2); 48 | assert(record->value() != nullptr); 49 | assert(*record->value() == "value2updated"); 50 | } 51 | 52 | // delete existing key with new timestamp 53 | { 54 | store.insert(std::make_shared>(2, nullptr, t0 + 30), -1); 55 | assert(store.exact_size() == 2); 56 | auto record = store.get(2); 57 | assert(record == nullptr); 58 | } 59 | } 60 | return 0; 61 | } 62 | 63 | 64 | -------------------------------------------------------------------------------- /tests/test2_rocksdb_store.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | using namespace std::chrono_literals; 7 | 8 | int main(int argc, char **argv) { 9 | FLAGS_logtostderr = 1; 10 | google::InitGoogleLogging(argv[0]); 11 | 12 | std::filesystem::path path = kspp::default_statestore_root(); 13 | path /= "test2_rocksdb_store"; 14 | 15 | if (std::filesystem::exists(path)) 16 | std::filesystem::remove_all(path); 17 | { 18 | // insert 3 check size 19 | kspp::rocksdb_store store(path); 20 | auto t0 = kspp::milliseconds_since_epoch(); 21 | store.insert(std::make_shared>(0, "value0", t0), -1); 22 | store.insert(std::make_shared>(1, "value1", t0), -1); 23 | store.insert(std::make_shared>(2, "value2", t0), -1); 24 | assert(store.exact_size() == 3); 25 | 26 | // update existing key with new value 27 | { 28 | store.insert(std::make_shared>(2, "value2updated", t0 + 10), -1); 29 | assert(store.exact_size() == 3); 30 | auto record = store.get(2); 31 | assert(record != nullptr); 32 | assert(record->key() == 2); 33 | assert(record->value() != nullptr); 34 | assert(*record->value() == "value2updated"); 35 | assert(record->event_time() == t0 + 10); 36 | } 37 | 38 | //// update existing key with new value but old timestamp 39 | //{ 40 | // store.insert(std::make_shared>(2, "to_old", t0)); 41 | // assert(exact_size(store) == 3); 42 | // auto record = store.get(2); 43 | // assert(record != nullptr); 44 | // assert(record->key() == 2); 45 | // assert(record->value != nullptr); 46 | // assert(*record->value == "value2updated"); 47 | //} 48 | 49 | //// delete existing key with to old timestamp 50 | //{ 51 | // store.insert(std::make_shared>(2, nullptr, t0)); 52 | // assert(exact_size(store) == 3); 53 | // auto record = store.get(2); 54 | // assert(record != nullptr); 55 | // assert(record->key() == 2); 56 | // assert(record->value != nullptr); 57 | // assert(*record->value == "value2updated"); 58 | //} 59 | 60 | // delete existing key with new timestamp 61 | { 62 | store.insert(std::make_shared>(2, nullptr, t0 + 30), -1); 63 | assert(store.exact_size() == 2); 64 | auto record = store.get(2); 65 | assert(record == nullptr); 66 | } 67 | } 68 | // cleanup 69 | std::filesystem::remove_all(path); 70 | return 0; 71 | } 72 | 73 | 74 | -------------------------------------------------------------------------------- /tests/test3_mem_token_bucket.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | using namespace std::chrono_literals; 5 | 6 | int main(int argc, char **argv) { 7 | FLAGS_logtostderr = 1; 8 | google::InitGoogleLogging(argv[0]); 9 | 10 | { 11 | // insert 3 check size 12 | kspp::mem_token_bucket_store store(100ms, 2); 13 | auto t0 = kspp::milliseconds_since_epoch(); 14 | assert(store.consume(0, t0) == true); 15 | assert(store.consume(1, t0) == true); 16 | assert(store.consume(2, t0) == true); 17 | assert(store.exact_size() == 3); 18 | 19 | assert(store.exact_size() == 3); // tests iterators 20 | 21 | // consume existing key 22 | { 23 | assert(store.consume(2, t0 + 10) == true); 24 | assert(store.exact_size() == 3); 25 | auto res = store.get(2); 26 | assert(res); 27 | assert(res->key() == 2); 28 | assert(res->value()); 29 | assert(*res->value() == 0); 30 | assert(res->event_time() == t0); // less than one item so not incremented 31 | } 32 | 33 | // consume existing key to fast 34 | { 35 | assert(store.consume(2, t0 + 20) == false); 36 | assert(store.exact_size() == 3); 37 | auto res = store.get(2); 38 | assert(res); 39 | assert(res->key() == 2); 40 | assert(res->value()); 41 | assert(*res->value() == 0); 42 | assert(res->event_time() == t0);// less than one item so not incremented 43 | } 44 | 45 | // consume existing key after one should be available 46 | { 47 | assert(store.consume(2, t0 + 101) == true); 48 | assert(store.exact_size() == 3); 49 | auto res = store.get(2); 50 | assert(res); 51 | assert(res->key() == 2); 52 | assert(res->value()); 53 | assert(*res->value() == 1); 54 | assert(res->event_time() == t0 + 101);// more than full time period so reset 55 | } 56 | 57 | // delete existing key 58 | { 59 | store.del(1); 60 | assert(store.exact_size() == 2); 61 | auto res = store.get(1); 62 | assert(res); 63 | assert(res->key() == 1); 64 | assert(res->value()); 65 | assert(*res->value() == 2); 66 | assert(res->event_time() == -1); 67 | } 68 | } 69 | return 0; 70 | } 71 | 72 | 73 | -------------------------------------------------------------------------------- /tests/test6_repartition.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace std::chrono_literals; 9 | 10 | int main(int argc, char **argv) { 11 | FLAGS_logtostderr = 1; 12 | google::InitGoogleLogging(argv[0]); 13 | 14 | return 0; 15 | } 16 | 17 | -------------------------------------------------------------------------------- /tests/test7_cluster_uri.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char **argv) { 6 | 7 | // lets test the trivial case 8 | { 9 | kspp::cluster_uri uri("Zk://127.0.0.1:2181,192.168.100.44:2181/nisseGul"); 10 | assert(uri.good()); 11 | assert(uri.scheme() == "zk"); // should be lower case 12 | assert(uri.authority() == "127.0.0.1:2181,192.168.100.44:2181"); 13 | assert(uri.path() == "/nisseGul"); 14 | assert(uri.str() == "zk://127.0.0.1:2181,192.168.100.44:2181/nisseGul"); 15 | } 16 | 17 | { 18 | kspp::cluster_uri uri("Zk://127.0.0.1:2181,192.168.100.44:2181"); 19 | assert(uri.good()); 20 | assert(uri.scheme() == "zk"); // should be lower case 21 | assert(uri.authority() == "127.0.0.1:2181,192.168.100.44:2181"); 22 | assert(uri.path() == ""); 23 | assert(uri.str() == "zk://127.0.0.1:2181,192.168.100.44:2181"); 24 | } 25 | 26 | //ip v6.... 27 | //https://[2001:db8:85a3:0:0:8a2e:370:7334]:4321,[::1]:5555,[0:0:0:0:0:0:0:1]:1432 28 | { 29 | kspp::cluster_uri uri("https://[2001:db8:85a3:0:0:8a2e:370:7334]:4321,[::1]:5555,[0:0:0:0:0:0:0:1]:1432"); 30 | assert(uri.good()); 31 | assert(uri.scheme() == "https"); 32 | assert(uri.authority() == "[2001:db8:85a3:0:0:8a2e:370:7334]:4321,[::1]:5555,[0:0:0:0:0:0:0:1]:1432"); 33 | assert(uri.path() == ""); 34 | assert(uri.str() == "https://[2001:db8:85a3:0:0:8a2e:370:7334]:4321,[::1]:5555,[0:0:0:0:0:0:0:1]:1432"); 35 | } 36 | 37 | // with some whitespace 38 | { 39 | kspp::cluster_uri uri("https://[2001:db8:85a3:0:0:8a2e:370:7334]:4321, [::1]:5555 ,[0:0:0:0:0:0:0:1]:1432"); 40 | assert(uri.good()); 41 | assert(uri.scheme() == "https"); 42 | assert(uri.authority() == "[2001:db8:85a3:0:0:8a2e:370:7334]:4321,[::1]:5555,[0:0:0:0:0:0:0:1]:1432"); 43 | assert(uri.path() == ""); 44 | assert(uri.str() == "https://[2001:db8:85a3:0:0:8a2e:370:7334]:4321,[::1]:5555,[0:0:0:0:0:0:0:1]:1432"); 45 | } 46 | 47 | //https://[::1]:5555 48 | //http://[2001:0db8:85a3:0000:0000:8a2e:0370:7334]:1234 49 | //http://[0:0:0:0:0:0:0:1]:1432 (edited) 50 | 51 | return 0; 52 | } 53 | 54 | -------------------------------------------------------------------------------- /tests/test7_url_vector.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | int main(int argc, char **argv) { 6 | 7 | // lets test the trivial case 8 | { 9 | std::string s( 10 | "Https://[2001:db8:85a3:0:0:8a2e:370:7334]:4321/hfadsjkh, https://[2001:db8:85a3:0:0:8a2e:370:7334]:4321/hfadsjkh"); 11 | auto v = kspp::split_url_list(s); 12 | assert(v.size() == 2); 13 | for (auto url: v) { 14 | assert(url.good()); 15 | assert(url.scheme() == "https"); // should be lower case 16 | assert(url.authority() == "[2001:db8:85a3:0:0:8a2e:370:7334]:4321"); 17 | assert(url.path() == "/hfadsjkh"); 18 | assert(url.str() == "https://[2001:db8:85a3:0:0:8a2e:370:7334]:4321/hfadsjkh"); 19 | } 20 | } 21 | 22 | // test default scheme 23 | { 24 | std::string s( 25 | "Https://[2001:db8:85a3:0:0:8a2e:370:7334]:4321/hfadsjkh, [2001:db8:85a3:0:0:8a2e:370:7334]:4321/hfadsjkh"); 26 | auto v = kspp::split_url_list(s, "https"); 27 | assert(v.size() == 2); 28 | for (auto url: v) { 29 | assert(url.good()); 30 | assert(url.scheme() == "https"); // should be lower case 31 | assert(url.authority() == "[2001:db8:85a3:0:0:8a2e:370:7334]:4321"); 32 | assert(url.path() == "/hfadsjkh"); 33 | assert(url.str() == "https://[2001:db8:85a3:0:0:8a2e:370:7334]:4321/hfadsjkh"); 34 | } 35 | } 36 | 37 | return 0; 38 | } 39 | 40 | -------------------------------------------------------------------------------- /tests/test9_kafka_rest.cpp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bitbouncer/kspp/8539f359e32bd3dd1360ac4616eab88e79aab607/tests/test9_kafka_rest.cpp -------------------------------------------------------------------------------- /tools/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_subdirectory(kspp_avrogencpp) 2 | add_subdirectory(kafka2avro) 3 | add_subdirectory(csv2avro) 4 | 5 | IF (ENABLE_ROCKSDB) 6 | add_subdirectory(kafka2rocksdb) 7 | ENDIF () 8 | 9 | add_subdirectory(kspp_protobuf_register_schema) 10 | 11 | IF (ENABLE_MQTT) 12 | add_subdirectory(mqtt2kafka) 13 | ENDIF () 14 | -------------------------------------------------------------------------------- /tools/csv2avro/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(csv2avro csv2avro.cpp) 2 | 3 | target_link_libraries(csv2avro ${ALL_LIBS}) 4 | 5 | INSTALL(TARGETS csv2avro RUNTIME DESTINATION bin) 6 | 7 | -------------------------------------------------------------------------------- /tools/kafka2avro/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(kafka2avro kafka2avro.cpp) 2 | 3 | target_link_libraries(kafka2avro ${ALL_LIBS}) 4 | 5 | INSTALL(TARGETS kafka2avro RUNTIME DESTINATION bin) 6 | 7 | -------------------------------------------------------------------------------- /tools/kafka2rocksdb/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(kafka2rocksdb main.cpp) 2 | 3 | target_link_libraries(kafka2rocksdb ${ALL_LIBS}) 4 | 5 | INSTALL(TARGETS kafka2rocksdb RUNTIME DESTINATION bin) 6 | -------------------------------------------------------------------------------- /tools/kspp_avrogencpp/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(kspp_avrogencpp kspp_avrogencpp.cpp) 2 | 3 | target_link_libraries(kspp_avrogencpp ${ALL_LIBS}) 4 | 5 | INSTALL(TARGETS kspp_avrogencpp RUNTIME DESTINATION bin) 6 | -------------------------------------------------------------------------------- /tools/kspp_protobuf_register_schema/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | add_executable(kspp_protobuf_register_schema kspp_protobuf_register_schema.cpp) 2 | 3 | target_link_libraries(kspp_protobuf_register_schema ${ALL_LIBS}) 4 | 5 | INSTALL(TARGETS kspp_protobuf_register_schema RUNTIME DESTINATION bin) 6 | -------------------------------------------------------------------------------- /tools/kspp_protobuf_register_schema/kspp_protobuf_register_schema.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | using namespace google::protobuf::compiler; 9 | 10 | class MyMultiFileErrorCollector : public MultiFileErrorCollector { 11 | public: 12 | MyMultiFileErrorCollector(){} 13 | virtual void AddError(const std::string & filename, int line, int column, const std::string & message){ 14 | LOG(ERROR) << filename << " line:" << line << ":" << column << " " << message; 15 | } 16 | }; 17 | 18 | int main(int argc, char** argv) { 19 | using namespace google::protobuf; 20 | using namespace google::protobuf::io; 21 | using namespace google::protobuf::compiler; 22 | 23 | if (argc != 4) { 24 | std::cerr << "usage " << argv[0] 25 | << "source_root_path " 26 | ".protofile " 27 | "subject" 28 | << std::endl; 29 | return -1; 30 | } 31 | 32 | std::string source_path = argv[1]; 33 | std::string proto_source = argv[2]; 34 | std::string subject = argv[3]; 35 | 36 | MyMultiFileErrorCollector ec; 37 | DiskSourceTree source_tree; 38 | source_tree.MapPath("", source_path); 39 | source_tree.MapPath("", "/usr/local/include/"); 40 | source_tree.MapPath("", "/usr/include/"); 41 | Importer importer(&source_tree, &ec); 42 | auto file_descriptor = importer.Import(proto_source); 43 | //LOG(INFO) << file_descriptor->DebugString(); 44 | auto config = std::make_shared("", kspp::cluster_config::SCHEMA_REGISTRY); 45 | //auto config = std::make_shared(""); 46 | config->load_config_from_env(); 47 | try { 48 | nlohmann::json json = kspp::protobuf_register_schema( 49 | config->get_schema_registry(), subject, file_descriptor); 50 | } 51 | catch (std::exception& e){ 52 | LOG(ERROR) << "failed to register schema: " << e.what() << std::endl; 53 | return -1; 54 | } 55 | 56 | //catch (std::runtime_error& e){ 57 | return 0; 58 | } -------------------------------------------------------------------------------- /tools/mqtt2kafka/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | SET(SRCS 2 | mqtt2kafka.cpp 3 | ) 4 | 5 | #SET(PAHO_LIBS_STATIC paho-mqttpp3 paho-mqtt3cs paho-mqtt3as) 6 | SET(PAHO_LIBS_SHARED paho-mqttpp3 paho-mqtt3c paho-mqtt3a) 7 | 8 | add_executable(mqtt2kafka.cpp ${SRCS}) 9 | target_link_libraries(mqtt2kafka.cpp ${PAHO_LIBS_SHARED} ${ALL_LIBS}) 10 | 11 | INSTALL(TARGETS mqtt2kafka.cpp RUNTIME DESTINATION bin) 12 | 13 | -------------------------------------------------------------------------------- /utils_to_fix/source_provider.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | namespace kspp { 5 | source_parts split_url_parts(std::string uri) { 6 | //if (uri.size() >5 && boost::iequals(uri.substr(0, 5), ("s3://"))) 7 | // return S3; 8 | source_parts parts; 9 | parts.protocol = source_parts::NONE; 10 | 11 | 12 | if (uri.size() > 8 && boost::iequals(uri.substr(0, 8), ("kafka://"))) { 13 | parts.protocol = source_parts::KAFKA; 14 | const size_t topic_start = 8; 15 | //parts.host = uri.substr(host_start, uri.find("/", host_start) - host_start); 16 | parts.topic = uri.substr(topic_start, uri.find(":", topic_start) - topic_start); 17 | parts.partition = 0; // fixme 18 | } 19 | 20 | if (uri.size() > 7 && boost::iequals(uri.substr(0, 7), ("avro://"))) { 21 | parts.protocol = source_parts::AVRO; 22 | } 23 | 24 | if (uri.size() > 6 && boost::iequals(uri.substr(0, 6), ("tds://"))) { 25 | parts.protocol = source_parts::TDS; 26 | } 27 | 28 | if (uri.size() > 11 && boost::iequals(uri.substr(0, 11), ("postgres://"))) { 29 | parts.protocol = source_parts::POSTGRES; 30 | } 31 | 32 | if (uri.size() > 5 && boost::iequals(uri.substr(0, 5), ("bb://"))) { 33 | const size_t host_start = 5; 34 | parts.protocol = source_parts::BB_GRPC; 35 | parts.host = uri.substr(host_start, uri.find("/", host_start) - host_start); 36 | const size_t topic_start = host_start + parts.host.size() + 1; 37 | parts.topic = uri.substr(topic_start, uri.find(":", topic_start) - topic_start); 38 | 39 | parts.partition = 0; // fixme 40 | 41 | if (parts.topic.empty()) { 42 | LOG(ERROR) << "bad topic"; 43 | //return nullptr; 44 | } 45 | 46 | if (parts.host.empty()) { 47 | LOG(ERROR) << "bad host"; 48 | //throw 49 | } 50 | } 51 | 52 | return parts; 53 | } 54 | } -------------------------------------------------------------------------------- /utils_to_fix/source_provider.h: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #ifdef KSPP_GRPC 7 | #include 8 | #endif 9 | 10 | #ifdef KSPP_POSTGRES 11 | #include <../libraries/kspp-pg/postgres_consumer.h> 12 | #endif 13 | 14 | #pragma once 15 | 16 | namespace kspp { 17 | struct source_parts { 18 | enum protocol_type_t { 19 | NONE, KAFKA, AVRO, TDS, POSTGRES, BB_GRPC 20 | }; 21 | 22 | protocol_type_t protocol; 23 | std::string host; 24 | int port; 25 | std::string topic; 26 | int partition; 27 | }; 28 | 29 | source_parts split_url_parts(std::string uri); 30 | 31 | template 32 | std::shared_ptr > 33 | make_source(std::shared_ptr config, std::shared_ptr offset_store, std::string uri) { 34 | auto parts = split_url_parts(uri); 35 | 36 | switch (parts.protocol) { 37 | case source_parts::NONE: 38 | return nullptr; 39 | case source_parts::KAFKA: { 40 | // kafka://topic:partition 41 | std::make_shared>(config, parts.partition, parts.topic, 42 | config->avro_serdes(), config->avro_serdes()); 43 | } 44 | case source_parts::AVRO: 45 | return nullptr; 46 | case source_parts::TDS: 47 | return nullptr; 48 | #ifdef KSPP_POSTGRES 49 | case source_parts::POSTGRES: { 50 | std::string consumer_group = ""; 51 | kspp::connect::connection_params connection_params; 52 | kspp::connect::table_params table_params; 53 | std::string query; 54 | std::string id_column; 55 | std::string ts_column; 56 | //std::make_shared>(parts.partition, parts.topic, consumer_group, connection_params, table_params, query, id_column, ts_column, config->get_schema_registry()); 57 | } 58 | return nullptr; 59 | #endif 60 | 61 | #ifdef KSPP_GRPC 62 | case source_parts::BB_GRPC: { 63 | // bb://host:port/topic:partition 64 | std::string uri; 65 | std::string api_key; 66 | std::string secret_access_key; 67 | return std::make_shared>(config, parts.partition, parts.topic, offset_store, uri, api_key, secret_access_key); 68 | } 69 | #endif 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /windows/rebuild_windows_vs14-all-options.bat: -------------------------------------------------------------------------------- 1 | set VISUALSTUDIO_VERSION_MAJOR=14 2 | ECHO ===== CMake for 64-bit ====== 3 | call "C:\Program Files (x86)\Microsoft Visual Studio %VISUALSTUDIO_VERSION_MAJOR%.0\VC\vcvarsall.bat" amd64 4 | 5 | 6 | echo BUILDING 7 | rmdir /s /q build 8 | mkdir build 9 | cd build 10 | cmake -G "Visual Studio %VISUALSTUDIO_VERSION_MAJOR% Win64" -DENABLE_AVRO=ON -DENABLE_ROCKSDB=ON -DBUILD_SAMPLES=ON -DBUILD_TESTS=ON .. 11 | msbuild ALL_BUILD.vcxproj /p:Configuration=Debug /p:Platform=x64 /maxcpucount:12 12 | msbuild ALL_BUILD.vcxproj /p:Configuration=Release /p:Platform=x64 /maxcpucount:12 13 | cd .. 14 | -------------------------------------------------------------------------------- /windows/rebuild_windows_vs14-minimal.bat: -------------------------------------------------------------------------------- 1 | set VISUALSTUDIO_VERSION_MAJOR=14 2 | ECHO ===== CMake for 64-bit ====== 3 | call "C:\Program Files (x86)\Microsoft Visual Studio %VISUALSTUDIO_VERSION_MAJOR%.0\VC\vcvarsall.bat" amd64 4 | 5 | 6 | echo BUILDING 7 | rmdir /s /q build 8 | mkdir build 9 | cd build 10 | cmake -G "Visual Studio %VISUALSTUDIO_VERSION_MAJOR% Win64" .. 11 | msbuild ALL_BUILD.vcxproj /p:Configuration=Debug /p:Platform=x64 /maxcpucount:12 12 | msbuild ALL_BUILD.vcxproj /p:Configuration=Release /p:Platform=x64 /maxcpucount:12 13 | cd .. 14 | -------------------------------------------------------------------------------- /windows/rebuild_windows_vs14-test.bat: -------------------------------------------------------------------------------- 1 | set VISUALSTUDIO_VERSION_MAJOR=14 2 | ECHO ===== CMake for 64-bit ====== 3 | call "C:\Program Files (x86)\Microsoft Visual Studio %VISUALSTUDIO_VERSION_MAJOR%.0\VC\vcvarsall.bat" amd64 4 | 5 | 6 | echo BUILDING 7 | rmdir /s /q build 8 | mkdir build 9 | cd build 10 | cmake -G "Visual Studio %VISUALSTUDIO_VERSION_MAJOR% Win64" -DENABLE_AVRO=ON -DENABLE_ROCKSDB=ON -DBUILD_SAMPLES=OFF -DBUILD_TESTS=ON .. 11 | msbuild ALL_BUILD.vcxproj /p:Configuration=Debug /p:Platform=x64 /maxcpucount:12 12 | msbuild ALL_BUILD.vcxproj /p:Configuration=Release /p:Platform=x64 /maxcpucount:12 13 | cd .. 14 | --------------------------------------------------------------------------------