├── bin ├── kwack └── kwack-run-class ├── .gitignore ├── .editorconfig ├── src ├── test │ ├── proto │ │ ├── Simple.proto │ │ └── Complex.proto │ ├── resources │ │ ├── log4j.properties │ │ ├── ConfluentMetric.proto │ │ └── schema_registry_key.json │ └── java │ │ └── io │ │ └── kcache │ │ └── kwack │ │ ├── AbstractSchemaTest.java │ │ ├── util │ │ ├── LocalClusterTestHarness.java │ │ └── RestApp.java │ │ ├── ProtobufTest.java │ │ ├── ProtobufNoSRTest.java │ │ ├── AvroBenchmark.java │ │ ├── JsonNoSchemaTest.java │ │ ├── AvroKeyTest.java │ │ ├── AvroTest.java │ │ └── JsonSchemaTest.java └── main │ ├── java │ └── io │ │ └── kcache │ │ └── kwack │ │ ├── transformer │ │ ├── Transformer.java │ │ ├── Context.java │ │ ├── avro │ │ │ └── AvroTransformer.java │ │ └── json │ │ │ └── JsonTransformer.java │ │ ├── sqlline │ │ ├── KwackApplication.java │ │ └── KwackPromptHandler.java │ │ ├── schema │ │ ├── ColumnDefsContainer.java │ │ ├── ListColumnDef.java │ │ ├── DecimalColumnDef.java │ │ ├── MapColumnDef.java │ │ ├── EnumColumnDef.java │ │ ├── ColumnStrategy.java │ │ ├── ColumnDef.java │ │ ├── UnionColumnDef.java │ │ └── StructColumnDef.java │ │ ├── util │ │ └── Jackson.java │ │ └── KwackMain.java │ └── assembly │ ├── jar-with-dependencies.xml │ └── package.xml ├── findbugs-exclude.xml ├── .github ├── dependabot.yml └── workflows │ └── build.yml ├── config ├── kwack.properties └── log4j.properties ├── BENCHMARK.md ├── README.md └── LICENSE /bin/kwack: -------------------------------------------------------------------------------- 1 | exec $(dirname $0)/kwack-run-class io.kcache.kwack.KwackMain "$@" 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.iml 3 | lib_managed 4 | src_managed 5 | target 6 | *.ipr 7 | *.iws 8 | *.swp 9 | .DS_Store 10 | dependency-reduced-pom.xml 11 | htmlReport 12 | logs 13 | -------------------------------------------------------------------------------- /.editorconfig: -------------------------------------------------------------------------------- 1 | # http://editorconfig.org 2 | 3 | root = true 4 | 5 | [*] 6 | charset = utf-8 7 | indent_style = space 8 | 9 | [*.java] 10 | indent_style = space 11 | indent_size = 4 12 | 13 | [*.md] 14 | indent_style = space 15 | indent_size = 2 16 | 17 | [*.xml] 18 | indent_style = space 19 | indent_size = 4 20 | -------------------------------------------------------------------------------- /src/test/proto/Simple.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package io.kcache.kwack.proto; 4 | 5 | option java_package = "io.kcache.kwack.proto"; 6 | option java_outer_classname = "SimpleProto"; 7 | option java_multiple_files = false; 8 | 9 | message Simple { 10 | int32 id = 1; 11 | optional string name = 2; 12 | } -------------------------------------------------------------------------------- /src/test/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, stdout 2 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 3 | log4j.appender.stdout.Target=System.out 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=%d{yyyy-MM-dd HH:mm:ss} %-5p %c - %m%n 6 | log4j.logger.io.kcache=INFO, stdout 7 | log4j.additivity.io.kcache=false 8 | -------------------------------------------------------------------------------- /findbugs-exclude.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/transformer/Transformer.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack.transformer; 2 | 3 | import io.confluent.kafka.schemaregistry.ParsedSchema; 4 | import io.kcache.kwack.schema.ColumnDef; 5 | 6 | public interface Transformer { 7 | ColumnDef schemaToColumnDef(Context ctx, ParsedSchema parsedSchema); 8 | 9 | Object messageToColumn( 10 | Context ctx, ParsedSchema parsedSchema, Object message, ColumnDef columnDef); 11 | } 12 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: maven 4 | directory: "/" 5 | schedule: 6 | interval: daily 7 | open-pull-requests-limit: 10 8 | ignore: 9 | - dependency-name: io.netty:netty-tcnative-boringssl-static 10 | versions: 11 | - 2.0.36.Final 12 | - 2.0.38.Final 13 | - dependency-name: org.mockito:mockito-core 14 | versions: 15 | - 3.7.7 16 | - 3.8.0 17 | - dependency-name: org.scala-lang:scala-library 18 | versions: 19 | - 2.13.4 20 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: build 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build: 7 | runs-on: ${{ matrix.os }} 8 | 9 | strategy: 10 | matrix: 11 | os: [ubuntu-22.04, ubuntu-latest] 12 | 13 | steps: 14 | - uses: actions/checkout@v2 15 | - name: Set up JDK 17 16 | uses: actions/setup-java@v2 17 | with: 18 | java-version: '17' 19 | distribution: 'adopt' 20 | - name: Build with Maven 21 | run: mvn -B package --file pom.xml 22 | -------------------------------------------------------------------------------- /config/kwack.properties: -------------------------------------------------------------------------------- 1 | # Topics to manage 2 | topics=topic1 3 | 4 | # Key serdes (default is binary) 5 | key.serdes=topic1=string 6 | 7 | # Value serdes (default is latest) 8 | value.serdes=topic1=latest 9 | 10 | # The Schema Registry URL 11 | schema.registry.url=http://localhost:8081 12 | #basic.auth.credentials.source=USER_INFO 13 | #basic.auth.user.info={{ SR_API_KEY }}:{{ SR_API_SECRET }} 14 | 15 | # The bootstrap servers for your Kafka cluster 16 | bootstrap.servers=localhost:9092 17 | #security.protocol=SASL_SSL 18 | #sasl.jaas.config=org.apache.kafka.common.security.plain.PlainLoginModule required username='{{ CLUSTER_API_KEY }}' password='{{ CLUSTER_API_SECRET }}'; 19 | #sasl.mechanism=PLAIN -------------------------------------------------------------------------------- /config/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=WARN, stdout, file 2 | 3 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender 4 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout 5 | log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c:%L)%n 6 | 7 | log4j.logger.kafka=ERROR, stdout 8 | log4j.logger.org.apache.kafka=ERROR, stdout 9 | log4j.additivity.kafka.server=false 10 | 11 | log4j.appender.file=org.apache.log4j.RollingFileAppender 12 | log4j.appender.file.maxBackupIndex=10 13 | log4j.appender.file.maxFileSize=100MB 14 | log4j.appender.file.File=${kwack.log.dir}/kwack.log 15 | log4j.appender.file.layout=org.apache.log4j.PatternLayout 16 | log4j.appender.file.layout.ConversionPattern=[%d] %p %m (%c)%n 17 | -------------------------------------------------------------------------------- /src/main/assembly/jar-with-dependencies.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | jar-with-dependencies 6 | 7 | jar 8 | 9 | false 10 | 11 | 12 | / 13 | true 14 | true 15 | runtime 16 | 17 | 18 | 19 | 20 | 21 | metaInf-services 22 | 23 | 24 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/sqlline/KwackApplication.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack.sqlline; 2 | 3 | import sqlline.Application; 4 | import sqlline.BuiltInProperty; 5 | import sqlline.PromptHandler; 6 | import sqlline.SqlLine; 7 | import sqlline.SqlLineOpts; 8 | 9 | public class KwackApplication extends Application { 10 | 11 | public KwackApplication() { 12 | super(); 13 | } 14 | 15 | @Override 16 | public SqlLineOpts getOpts(SqlLine sqlline) { 17 | SqlLineOpts opts = super.getOpts(sqlline); 18 | opts.set(BuiltInProperty.CONNECT_INTERACTION_MODE, "notAskCredentials"); 19 | opts.set(BuiltInProperty.MAX_WIDTH, 120); 20 | return opts; 21 | } 22 | 23 | @Override 24 | public PromptHandler getPromptHandler(SqlLine sqlLine) { 25 | return new KwackPromptHandler(sqlLine); 26 | } 27 | 28 | @Override 29 | public String getInfoMessage() { 30 | return "Welcome to kwack!\n" 31 | + "Enter \"!help\" for usage hints.\n\n" 32 | + " ___(.)>\n" 33 | + "~~~~~~\\___)~~~~~~\n"; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/schema/ColumnDefsContainer.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to you under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package io.kcache.kwack.schema; 18 | 19 | import java.util.LinkedHashMap; 20 | 21 | public interface ColumnDefsContainer { 22 | LinkedHashMap getColumnDefs(); 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/sqlline/KwackPromptHandler.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack.sqlline; 2 | 3 | import sqlline.PromptHandler; 4 | import sqlline.SqlLine; 5 | 6 | public class KwackPromptHandler extends PromptHandler { 7 | 8 | public KwackPromptHandler(SqlLine sqlline) { 9 | super(sqlline); 10 | } 11 | 12 | @Override 13 | protected String getDefaultPrompt(int connectionIndex, String url, String defaultPrompt) { 14 | if (url != null && !url.isEmpty()) { 15 | if (url.contains(";")) { 16 | url = url.substring(0, url.indexOf(";")); 17 | } 18 | 19 | if (url.contains("?")) { 20 | url = url.substring(0, url.indexOf("?")); 21 | } 22 | 23 | //String resultPrompt = connectionIndex + ": " + url; 24 | String resultPrompt = url; 25 | if (resultPrompt.length() > 45) { 26 | resultPrompt = resultPrompt.substring(0, 45); 27 | } 28 | 29 | return resultPrompt + "> "; 30 | } else { 31 | return defaultPrompt; 32 | } 33 | } 34 | } 35 | -------------------------------------------------------------------------------- /src/test/proto/Complex.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | package io.kcache.kwack.proto; 4 | 5 | import "confluent/meta.proto"; 6 | import "confluent/type/decimal.proto"; 7 | import "google/protobuf/timestamp.proto"; 8 | import "google/type/date.proto"; 9 | import "google/type/timeofday.proto"; 10 | 11 | option java_package = "io.kcache.kwack.proto"; 12 | option java_outer_classname = "ComplexProto"; 13 | option java_multiple_files = false; 14 | 15 | message Complex { 16 | optional string name = 1; 17 | string mystring = 2; 18 | bytes mybytes = 3; 19 | int32 myint = 4; 20 | uint32 myuint = 5; 21 | int64 mylong = 6; 22 | uint64 myulong = 7; 23 | float myfloat = 8; 24 | double mydouble = 9; 25 | bool myboolean = 10; 26 | Kind kind = 11; 27 | oneof myoneof { 28 | string myoneofstring = 12; 29 | int32 myoneofint = 13; 30 | } 31 | repeated string str_array = 14; 32 | repeated Data data_array = 15; 33 | map data_map = 16; 34 | confluent.type.Decimal decimal = 17 [(confluent.field_meta) = { params: [ 35 | { key: "precision", value: "5" }, 36 | { key: "scale", value: "2" } 37 | ]}]; 38 | google.type.Date date = 18; 39 | google.type.TimeOfDay time = 19; 40 | google.protobuf.Timestamp timestamp = 20; 41 | } 42 | 43 | message Data { 44 | string data = 1; 45 | } 46 | 47 | enum Kind { 48 | ZERO = 0; 49 | ONE = 1; 50 | TWO = 2; 51 | } -------------------------------------------------------------------------------- /src/main/assembly/package.xml: -------------------------------------------------------------------------------- 1 | 5 | 6 | package 7 | 8 | dir 9 | zip 10 | tar.gz 11 | 12 | kwack-${version} 13 | true 14 | 15 | 16 | ${project.build.directory} 17 | lib 18 | 19 | *.jar 20 | 21 | 22 | 23 | ${project.basedir}/bin 24 | bin 25 | 26 | * 27 | 28 | 29 | 30 | ${project.basedir}/config 31 | config 32 | 33 | * 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/schema/ListColumnDef.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to you under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package io.kcache.kwack.schema; 18 | 19 | import io.kcache.kwack.transformer.Context; 20 | import java.util.Objects; 21 | import org.duckdb.DuckDBColumnType; 22 | 23 | public class ListColumnDef extends ColumnDef { 24 | private final ColumnDef itemDef; 25 | 26 | public ListColumnDef(ColumnDef itemDef) { 27 | this(itemDef, ColumnStrategy.NOT_NULL_STRATEGY); 28 | } 29 | 30 | public ListColumnDef(ColumnDef itemDef, ColumnStrategy columnStrategy) { 31 | super(DuckDBColumnType.LIST, columnStrategy); 32 | this.itemDef = itemDef; 33 | } 34 | 35 | public ColumnDef getItemDef() { 36 | return itemDef; 37 | } 38 | 39 | @Override 40 | public String toDdl(Context ctx) { 41 | return itemDef.toDdl(ctx) + "[]"; 42 | } 43 | 44 | @Override 45 | public boolean equals(Object o) { 46 | if (this == o) { 47 | return true; 48 | } 49 | if (o == null || getClass() != o.getClass()) { 50 | return false; 51 | } 52 | if (!super.equals(o)) { 53 | return false; 54 | } 55 | ListColumnDef that = (ListColumnDef) o; 56 | return Objects.equals(itemDef, that.itemDef); 57 | } 58 | 59 | @Override 60 | public int hashCode() { 61 | return Objects.hash(super.hashCode(), itemDef); 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/schema/DecimalColumnDef.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to you under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package io.kcache.kwack.schema; 18 | 19 | import io.kcache.kwack.transformer.Context; 20 | import java.util.Objects; 21 | import org.duckdb.DuckDBColumnType; 22 | 23 | public class DecimalColumnDef extends ColumnDef { 24 | private final int precision; 25 | private final int scale; 26 | 27 | public DecimalColumnDef(int precision, int scale) { 28 | this(precision, scale, ColumnStrategy.NOT_NULL_STRATEGY); 29 | } 30 | 31 | public DecimalColumnDef(int precision, int scale, ColumnStrategy columnStrategy) { 32 | super(DuckDBColumnType.DECIMAL, columnStrategy); 33 | this.precision = precision; 34 | this.scale = scale; 35 | } 36 | 37 | public int getPrecision() { 38 | return precision; 39 | } 40 | 41 | public int getScale() { 42 | return scale; 43 | } 44 | 45 | @Override 46 | public String toDdl(Context ctx) { 47 | return columnType.name() + "(" + precision + ", " + scale + ")"; 48 | } 49 | 50 | @Override 51 | public boolean equals(Object o) { 52 | if (this == o) { 53 | return true; 54 | } 55 | if (o == null || getClass() != o.getClass()) { 56 | return false; 57 | } 58 | if (!super.equals(o)) { 59 | return false; 60 | } 61 | DecimalColumnDef that = (DecimalColumnDef) o; 62 | return precision == that.precision && scale == that.scale; 63 | } 64 | 65 | @Override 66 | public int hashCode() { 67 | return Objects.hash(super.hashCode(), precision, scale); 68 | } 69 | } 70 | -------------------------------------------------------------------------------- /src/test/java/io/kcache/kwack/AbstractSchemaTest.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack; 2 | 3 | import io.kcache.kwack.util.LocalClusterTestHarness; 4 | import java.nio.ByteBuffer; 5 | import java.util.Properties; 6 | import org.apache.kafka.clients.producer.KafkaProducer; 7 | import org.apache.kafka.clients.producer.ProducerConfig; 8 | import org.apache.kafka.clients.producer.ProducerRecord; 9 | import org.apache.kafka.common.utils.Bytes; 10 | 11 | public abstract class AbstractSchemaTest extends LocalClusterTestHarness { 12 | 13 | private static final String SCHEMA_REGISTRY_URL = "schema.registry.url"; 14 | 15 | protected Properties createProducerProps(String schemaRegistryUrl) { 16 | Properties props = new Properties(); 17 | props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList); 18 | props.put(SCHEMA_REGISTRY_URL, schemaRegistryUrl); 19 | props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, getKeySerializer()); 20 | props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, getValueSerializer()); 21 | return props; 22 | } 23 | 24 | protected abstract String getTopic(); 25 | 26 | protected Class getKeySerializer() { 27 | return org.apache.kafka.common.serialization.BytesSerializer.class; 28 | } 29 | 30 | protected abstract Class getValueSerializer(); 31 | 32 | protected KafkaProducer createProducer(Properties props) { 33 | return new KafkaProducer(props); 34 | } 35 | 36 | protected void produce(KafkaProducer producer, String topic, Object[] objects) { 37 | produce(producer, topic, null, objects); 38 | } 39 | 40 | protected void produce(KafkaProducer producer, String topic, Object[] keys, Object[] values) { 41 | ProducerRecord record; 42 | for (int i = 0; i < values.length; i++) { 43 | Object value = values[i]; 44 | Object key; 45 | if (keys != null) { 46 | key = keys[i]; 47 | } else { 48 | key = Bytes.wrap(ByteBuffer.allocate(4).putInt(value.hashCode()).array()); 49 | } 50 | record = new ProducerRecord<>(topic, key, value); 51 | producer.send(record); 52 | } 53 | } 54 | 55 | @Override 56 | protected void injectKwackProperties(Properties props) { 57 | super.injectKwackProperties(props); 58 | String topic = getTopic(); 59 | props.put(KwackConfig.TOPICS_CONFIG, topic); 60 | props.put(KwackConfig.QUERY_CONFIG, "select * from '" + topic + "'"); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/schema/MapColumnDef.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to you under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package io.kcache.kwack.schema; 18 | 19 | import io.kcache.kwack.transformer.Context; 20 | import java.util.Objects; 21 | import org.duckdb.DuckDBColumnType; 22 | 23 | public class MapColumnDef extends ColumnDef { 24 | private final ColumnDef keyDef; 25 | private final ColumnDef valueDef; 26 | 27 | public MapColumnDef(ColumnDef keyDef, ColumnDef valueDef) { 28 | this(keyDef, valueDef, ColumnStrategy.NOT_NULL_STRATEGY); 29 | } 30 | 31 | public MapColumnDef(ColumnDef keyDef, ColumnDef valueDef, ColumnStrategy columnStrategy) { 32 | super(DuckDBColumnType.MAP, columnStrategy); 33 | this.keyDef = keyDef; 34 | this.valueDef = valueDef; 35 | } 36 | 37 | public ColumnDef getKeyDef() { 38 | return keyDef; 39 | } 40 | 41 | public ColumnDef getValueDef() { 42 | return valueDef; 43 | } 44 | 45 | @Override 46 | public String toDdl(Context ctx) { 47 | return columnType.name() + "(" + keyDef.toDdl(ctx) + ", " + valueDef.toDdl(ctx) + ")"; 48 | } 49 | 50 | @Override 51 | public boolean equals(Object o) { 52 | if (this == o) { 53 | return true; 54 | } 55 | if (o == null || getClass() != o.getClass()) { 56 | return false; 57 | } 58 | if (!super.equals(o)) { 59 | return false; 60 | } 61 | MapColumnDef that = (MapColumnDef) o; 62 | return Objects.equals(keyDef, that.keyDef) 63 | && Objects.equals(valueDef, that.valueDef); 64 | } 65 | 66 | @Override 67 | public int hashCode() { 68 | return Objects.hash(super.hashCode(), keyDef, valueDef); 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/schema/EnumColumnDef.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to you under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package io.kcache.kwack.schema; 18 | 19 | import io.kcache.kwack.transformer.Context; 20 | import java.util.List; 21 | import java.util.Objects; 22 | import org.duckdb.DuckDBColumnType; 23 | 24 | public class EnumColumnDef extends ColumnDef { 25 | private final List enums; 26 | 27 | public EnumColumnDef(List enums) { 28 | this(enums, ColumnStrategy.NULL_STRATEGY); 29 | } 30 | 31 | public EnumColumnDef(List enums, ColumnStrategy columnStrategy) { 32 | super(DuckDBColumnType.ENUM, columnStrategy); 33 | this.enums = enums; 34 | } 35 | 36 | public List getEnums() { 37 | return enums; 38 | } 39 | 40 | @Override 41 | public String toDdl(Context ctx) { 42 | StringBuilder sb = new StringBuilder(columnType.name()); 43 | sb.append(" ("); 44 | for (int i = 0; i < enums.size(); i++) { 45 | sb.append("'"); 46 | sb.append(enums.get(i)); 47 | sb.append("'"); 48 | if (i < enums.size() - 1) { 49 | sb.append(", "); 50 | } 51 | } 52 | sb.append(")"); 53 | return sb.toString(); 54 | } 55 | 56 | @Override 57 | public boolean equals(Object o) { 58 | if (this == o) { 59 | return true; 60 | } 61 | if (o == null || getClass() != o.getClass()) { 62 | return false; 63 | } 64 | if (!super.equals(o)) { 65 | return false; 66 | } 67 | EnumColumnDef that = (EnumColumnDef) o; 68 | return Objects.equals(enums, that.enums); 69 | } 70 | 71 | @Override 72 | public int hashCode() { 73 | return Objects.hash(super.hashCode(), enums); 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/schema/ColumnStrategy.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to you under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package io.kcache.kwack.schema; 18 | 19 | import io.kcache.kwack.transformer.Context; 20 | 21 | public interface ColumnStrategy { 22 | 23 | ColumnStrategy NOT_NULL_STRATEGY = new NotNullStrategy(); 24 | 25 | ColumnStrategy NULL_STRATEGY = new NullStrategy(); 26 | 27 | StrategyType getType(); 28 | 29 | default Object getDefaultValue() { 30 | return null; 31 | } 32 | 33 | enum StrategyType { 34 | NOT_NULL, 35 | NULL, 36 | DEFAULT 37 | } 38 | 39 | String toDdl(Context ctx); 40 | 41 | class NotNullStrategy implements ColumnStrategy { 42 | @Override 43 | public StrategyType getType() { 44 | return StrategyType.NOT_NULL; 45 | } 46 | 47 | @Override 48 | public String toDdl(Context ctx) { 49 | return "NOT NULL"; 50 | } 51 | } 52 | 53 | class NullStrategy implements ColumnStrategy { 54 | @Override 55 | public StrategyType getType() { 56 | return StrategyType.NULL; 57 | } 58 | 59 | @Override 60 | public String toDdl(Context ctx) { 61 | return "NULL"; 62 | } 63 | } 64 | 65 | class DefaultStrategy implements ColumnStrategy { 66 | private final Object defaultValue; 67 | 68 | public DefaultStrategy(Object defaultValue) { 69 | this.defaultValue = defaultValue; 70 | } 71 | 72 | @Override 73 | public StrategyType getType() { 74 | return StrategyType.DEFAULT; 75 | } 76 | 77 | @Override 78 | public Object getDefaultValue() { 79 | return defaultValue; 80 | } 81 | 82 | @Override 83 | public String toDdl(Context ctx) { 84 | return "DEFAULT " + defaultValue; 85 | } 86 | 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/schema/ColumnDef.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to you under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package io.kcache.kwack.schema; 18 | 19 | import io.kcache.kwack.transformer.Context; 20 | import java.util.Objects; 21 | import org.duckdb.DuckDBColumnType; 22 | 23 | public class ColumnDef { 24 | protected final DuckDBColumnType columnType; 25 | protected ColumnStrategy columnStrategy; 26 | 27 | public ColumnDef(DuckDBColumnType columnType) { 28 | this(columnType, ColumnStrategy.NOT_NULL_STRATEGY); 29 | } 30 | 31 | public ColumnDef(DuckDBColumnType columnType, ColumnStrategy columnStrategy) { 32 | this.columnType = columnType; 33 | this.columnStrategy = columnStrategy; 34 | } 35 | 36 | public DuckDBColumnType getColumnType() { 37 | return columnType; 38 | } 39 | 40 | public ColumnStrategy getColumnStrategy() { 41 | return columnStrategy; 42 | } 43 | 44 | public void setColumnStrategy(ColumnStrategy columnStrategy) { 45 | this.columnStrategy = columnStrategy; 46 | } 47 | 48 | public String toDdl(Context ctx) { 49 | return columnType.name(); 50 | } 51 | 52 | public String toDdlWithStrategy(Context ctx) { 53 | String ddl = toDdl(ctx); 54 | if (columnStrategy != null) { 55 | // TODO fix default? 56 | return ddl + " " + columnStrategy.toDdl(ctx); 57 | } else { 58 | return ddl; 59 | } 60 | } 61 | 62 | @Override 63 | public boolean equals(Object o) { 64 | if (this == o) { 65 | return true; 66 | } 67 | if (o == null || getClass() != o.getClass()) { 68 | return false; 69 | } 70 | ColumnDef columnDef = (ColumnDef) o; 71 | return columnType == columnDef.columnType 72 | && Objects.equals(columnStrategy, columnDef.columnStrategy); 73 | } 74 | 75 | @Override 76 | public int hashCode() { 77 | return Objects.hash(columnType, columnStrategy); 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/transformer/Context.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack.transformer; 2 | 3 | import io.kcache.kwack.schema.ColumnDef; 4 | import io.kcache.kwack.schema.UnionColumnDef; 5 | import java.sql.Array; 6 | import java.sql.SQLException; 7 | import java.sql.Struct; 8 | import java.util.Collections; 9 | import java.util.HashMap; 10 | import java.util.IdentityHashMap; 11 | import java.util.Map; 12 | import java.util.Set; 13 | import org.duckdb.DuckDBConnection; 14 | 15 | public class Context { 16 | private final boolean isKey; 17 | private final DuckDBConnection conn; 18 | private final Map columnDefs; 19 | private final Map unionBranches; 20 | private final Set visited; 21 | private Object originalMessage; 22 | 23 | public Context(boolean isKey, DuckDBConnection conn) { 24 | this.isKey = isKey; 25 | this.conn = conn; 26 | this.columnDefs = new IdentityHashMap<>(); 27 | this.unionBranches = new IdentityHashMap<>(); 28 | this.visited = Collections.newSetFromMap(new IdentityHashMap<>()); 29 | } 30 | 31 | public boolean isKey() { 32 | return isKey; 33 | } 34 | 35 | public void put(Object key, ColumnDef value) { 36 | columnDefs.put(key, value); 37 | } 38 | 39 | public ColumnDef get(Object key) { 40 | return columnDefs.get(key); 41 | } 42 | 43 | public void putUnionBranch(UnionColumnDef key, String value) { 44 | unionBranches.put(key, value); 45 | } 46 | 47 | public String getUnionBranch(UnionColumnDef key) { 48 | return unionBranches.get(key); 49 | } 50 | 51 | public DuckDBConnection getConnection() { 52 | return conn; 53 | } 54 | 55 | public Array createArrayOf(String typeName, Object[] attributes) { 56 | try { 57 | return conn.createArrayOf(typeName, attributes); 58 | } catch (SQLException e) { 59 | throw new RuntimeException(e); 60 | } 61 | } 62 | 63 | public Map createMap(String typeName, Map map) { 64 | return conn.createMap(typeName, map); 65 | } 66 | 67 | public Struct createStruct(String typeName, Object[] attributes) { 68 | try { 69 | return conn.createStruct(typeName, attributes); 70 | } catch (SQLException e) { 71 | throw new RuntimeException(e); 72 | } 73 | } 74 | 75 | public boolean visit(ColumnDef columnDef) { 76 | return visited.add(columnDef); 77 | } 78 | 79 | public boolean leave(ColumnDef columnDef) { 80 | return visited.remove(columnDef); 81 | } 82 | 83 | public Object getOriginalMessage() { 84 | return originalMessage; 85 | } 86 | 87 | public void setOriginalMessage(Object originalMessage) { 88 | this.originalMessage = originalMessage; 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /BENCHMARK.md: -------------------------------------------------------------------------------- 1 | # Kwack Avro Benchmark 2 | 3 | ## Quick Start 4 | 5 | ### Run benchmark with default record count (100,000 records): 6 | ```bash 7 | mvn test-compile exec:java -Dexec.classpathScope=test \ 8 | -Dexec.mainClass=io.kcache.kwack.KwackAvroReadBenchmark 9 | ``` 10 | 11 | ### Run benchmark with fewer records (faster for testing): 12 | ```bash 13 | mvn test-compile exec:java -Dexec.classpathScope=test \ 14 | -Dexec.mainClass=io.kcache.kwack.KwackAvroReadBenchmark \ 15 | -Dexec.args="-p recordCount=1000" 16 | ``` 17 | 18 | ### Run with multiple record counts: 19 | ```bash 20 | mvn test-compile exec:java -Dexec.classpathScope=test \ 21 | -Dexec.mainClass=io.kcache.kwack.KwackAvroReadBenchmark \ 22 | -Dexec.args="-p recordCount=1000,10000,100000" 23 | ``` 24 | 25 | ## Understanding the Output 26 | 27 | After the benchmark completes, you'll see output like: 28 | 29 | ``` 30 | Benchmark (recordCount) Mode Cnt Score Error Units 31 | KwackAvroReadBenchmark.readRecordsWithKwack 100000 thrpt 2 0.123 ops/s 32 | ``` 33 | 34 | Where: 35 | - **Mode**: `thrpt` = throughput (operations per second) 36 | - **Cnt**: Number of measurement iterations 37 | - **Score**: Operations per second - complete init+read+close cycles (higher is better) 38 | - **Units**: ops/s = complete Kwack lifecycle operations per second 39 | 40 | For example, a score of `0.123 ops/s` means Kwack can complete the full cycle (initialize, read 100K records, close) about once every 8 seconds. 41 | 42 | ## Benchmark Configuration 43 | 44 | - **Warmup**: 1 iteration × 3 seconds (JVM warmup phase) 45 | - **Measurement**: 2 iterations × 5 seconds (actual performance measurement) 46 | - **Setup**: Each test starts an embedded Kafka cluster, Schema Registry, and produces N records 47 | - **What is Measured**: The full Kwack lifecycle for each operation: 48 | 1. Get KwackEngine instance 49 | 2. Configure engine 50 | 3. Initialize engine (`init()`) 51 | 4. Start and read all N records from Kafka 52 | 5. Close engine instance 53 | - **Result**: Operations per second - how many complete init+read+close cycles can be performed 54 | 55 | ## Customizing the Benchmark 56 | 57 | You can customize the benchmark using JMH command-line options: 58 | 59 | ```bash 60 | # Run with more iterations for more accurate results 61 | mvn test-compile exec:java -Dexec.classpathScope=test \ 62 | -Dexec.mainClass=io.kcache.kwack.KwackAvroReadBenchmark \ 63 | -Dexec.args="-p recordCount=1000 -wi 2 -i 5" 64 | ``` 65 | 66 | Where: 67 | - `-p recordCount=X`: Set record count 68 | - `-wi N`: Number of warmup iterations 69 | - `-i N`: Number of measurement iterations 70 | - `-w Ns`: Warmup time per iteration (e.g., `-w 5s`) 71 | - `-r Ns`: Measurement time per iteration (e.g., `-r 10s`) 72 | 73 | ## Notes 74 | 75 | - The benchmark uses `@Fork(0)` which runs in the same JVM process 76 | - First run may be slower due to Maven dependency resolution 77 | - Results may vary based on system resources and JVM state 78 | 79 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/util/Jackson.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack.util; 2 | 3 | import static com.fasterxml.jackson.databind.DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES; 4 | 5 | import com.fasterxml.jackson.core.JsonFactory; 6 | import com.fasterxml.jackson.core.json.JsonReadFeature; 7 | import com.fasterxml.jackson.databind.DeserializationFeature; 8 | import com.fasterxml.jackson.databind.ObjectMapper; 9 | import com.fasterxml.jackson.databind.json.JsonMapper; 10 | import com.fasterxml.jackson.databind.node.JsonNodeFactory; 11 | import com.fasterxml.jackson.databind.node.ObjectNode; 12 | import java.util.TreeMap; 13 | 14 | /** 15 | * A utility class for Jackson. 16 | */ 17 | public class Jackson { 18 | private Jackson() { 19 | /* singleton */ 20 | } 21 | 22 | /** 23 | * Creates a new {@link ObjectMapper}. 24 | * 25 | * @return an object mapper 26 | */ 27 | public static ObjectMapper newObjectMapper() { 28 | return newObjectMapper(false); 29 | } 30 | 31 | /** 32 | * Creates a new {@link ObjectMapper}. 33 | * 34 | * @param sorted whether to sort object properties 35 | * @return an object mapper 36 | */ 37 | public static ObjectMapper newObjectMapper(boolean sorted) { 38 | final ObjectMapper mapper = JsonMapper.builder() 39 | .enable(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS) 40 | .enable(JsonReadFeature.ALLOW_UNQUOTED_FIELD_NAMES) // for simpler ref specifications 41 | .build(); 42 | 43 | return configure(mapper, sorted); 44 | } 45 | 46 | /** 47 | * Creates a new {@link ObjectMapper} with a custom 48 | * {@link JsonFactory}. 49 | * 50 | * @param jsonFactory instance of {@link JsonFactory} to use 51 | * for the created {@link ObjectMapper} instance. 52 | * @return an object mapper 53 | */ 54 | public static ObjectMapper newObjectMapper(JsonFactory jsonFactory) { 55 | final ObjectMapper mapper = JsonMapper.builder(jsonFactory) 56 | .enable(JsonReadFeature.ALLOW_NON_NUMERIC_NUMBERS) 57 | .enable(JsonReadFeature.ALLOW_UNQUOTED_FIELD_NAMES) 58 | .build(); 59 | 60 | return configure(mapper, false); 61 | } 62 | 63 | private static ObjectMapper configure(ObjectMapper mapper, boolean sorted) { 64 | mapper.enable(DeserializationFeature.USE_BIG_DECIMAL_FOR_FLOATS); 65 | mapper.enable(DeserializationFeature.USE_LONG_FOR_INTS); 66 | mapper.disable(FAIL_ON_UNKNOWN_PROPERTIES); 67 | mapper.setNodeFactory(sorted 68 | ? new SortingNodeFactory(true) 69 | : JsonNodeFactory.withExactBigDecimals(true)); 70 | 71 | return mapper; 72 | } 73 | 74 | static class SortingNodeFactory extends JsonNodeFactory { 75 | public SortingNodeFactory(boolean bigDecimalExact) { 76 | super(bigDecimalExact); 77 | } 78 | 79 | @Override 80 | public ObjectNode objectNode() { 81 | return new ObjectNode(this, new TreeMap<>()); 82 | } 83 | } 84 | } 85 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/schema/UnionColumnDef.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to you under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package io.kcache.kwack.schema; 18 | 19 | import io.kcache.kwack.transformer.Context; 20 | import java.util.LinkedHashMap; 21 | import java.util.Map; 22 | import java.util.Objects; 23 | import org.duckdb.DuckDBColumnType; 24 | 25 | public class UnionColumnDef extends ColumnDef implements ColumnDefsContainer { 26 | private final LinkedHashMap columnDefs; 27 | 28 | public UnionColumnDef(LinkedHashMap columnDefs) { 29 | this(columnDefs, ColumnStrategy.NOT_NULL_STRATEGY); 30 | } 31 | 32 | public UnionColumnDef( 33 | LinkedHashMap columnDefs, ColumnStrategy columnStrategy) { 34 | super(DuckDBColumnType.UNION, columnStrategy); 35 | this.columnDefs = columnDefs; 36 | } 37 | 38 | @Override 39 | public LinkedHashMap getColumnDefs() { 40 | return columnDefs; 41 | } 42 | 43 | @Override 44 | public String toDdl(Context ctx) { 45 | StringBuilder sb = new StringBuilder(columnType.name()); 46 | sb.append("("); 47 | int i = 0; 48 | for (Map.Entry entry : columnDefs.entrySet()) { 49 | String name = entry.getKey(); 50 | ColumnDef columnDef = entry.getValue(); 51 | sb.append("\""); 52 | sb.append(name); 53 | sb.append("\" "); 54 | sb.append(columnDef.toDdl(ctx)); 55 | if (i < columnDefs.size() - 1) { 56 | sb.append(", "); 57 | } 58 | i++; 59 | } 60 | sb.append(")"); 61 | return sb.toString(); 62 | } 63 | 64 | @Override 65 | public boolean equals(Object o) { 66 | if (this == o) { 67 | return true; 68 | } 69 | if (o == null || getClass() != o.getClass()) { 70 | return false; 71 | } 72 | if (!super.equals(o)) { 73 | return false; 74 | } 75 | UnionColumnDef that = (UnionColumnDef) o; 76 | return Objects.equals(columnDefs, that.columnDefs); 77 | } 78 | 79 | @Override 80 | public int hashCode() { 81 | return Objects.hash(super.hashCode(), columnDefs); 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/schema/StructColumnDef.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one or more 3 | * contributor license agreements. See the NOTICE file distributed with 4 | * this work for additional information regarding copyright ownership. 5 | * The ASF licenses this file to you under the Apache License, Version 2.0 6 | * (the "License"); you may not use this file except in compliance with 7 | * the License. You may obtain a copy of the License at 8 | * 9 | * http://www.apache.org/licenses/LICENSE-2.0 10 | * 11 | * Unless required by applicable law or agreed to in writing, software 12 | * distributed under the License is distributed on an "AS IS" BASIS, 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | * See the License for the specific language governing permissions and 15 | * limitations under the License. 16 | */ 17 | package io.kcache.kwack.schema; 18 | 19 | import io.kcache.kwack.transformer.Context; 20 | import java.util.LinkedHashMap; 21 | import java.util.Map; 22 | import java.util.Objects; 23 | import org.duckdb.DuckDBColumnType; 24 | 25 | public class StructColumnDef extends ColumnDef implements ColumnDefsContainer { 26 | private final LinkedHashMap columnDefs; 27 | 28 | public StructColumnDef(LinkedHashMap columnDefs) { 29 | this(columnDefs, ColumnStrategy.NOT_NULL_STRATEGY); 30 | } 31 | 32 | public StructColumnDef( 33 | LinkedHashMap columnDefs, ColumnStrategy columnStrategy) { 34 | super(DuckDBColumnType.STRUCT, columnStrategy); 35 | this.columnDefs = columnDefs; 36 | } 37 | 38 | @Override 39 | public LinkedHashMap getColumnDefs() { 40 | return columnDefs; 41 | } 42 | 43 | @Override 44 | public String toDdl(Context ctx) { 45 | if (columnDefs.isEmpty()) { 46 | throw new IllegalArgumentException("Struct column definitions cannot be empty"); 47 | } 48 | if (!ctx.visit(this)) { 49 | throw new IllegalArgumentException("Struct column definitions cannot be recursive"); 50 | } 51 | try { 52 | StringBuilder sb = new StringBuilder(columnType.name()); 53 | sb.append("("); 54 | int i = 0; 55 | for (Map.Entry entry : columnDefs.entrySet()) { 56 | String name = entry.getKey(); 57 | ColumnDef columnDef = entry.getValue(); 58 | sb.append("\""); 59 | sb.append(name); 60 | sb.append("\" "); 61 | sb.append(columnDef.toDdl(ctx)); 62 | if (i < columnDefs.size() - 1) { 63 | sb.append(", "); 64 | } 65 | i++; 66 | } 67 | sb.append(")"); 68 | return sb.toString(); 69 | } finally { 70 | ctx.leave(this); 71 | } 72 | } 73 | 74 | @Override 75 | public boolean equals(Object o) { 76 | if (this == o) { 77 | return true; 78 | } 79 | if (o == null || getClass() != o.getClass()) { 80 | return false; 81 | } 82 | if (!super.equals(o)) { 83 | return false; 84 | } 85 | StructColumnDef that = (StructColumnDef) o; 86 | return Objects.equals(columnDefs, that.columnDefs); 87 | } 88 | 89 | @Override 90 | public int hashCode() { 91 | return Objects.hash(super.hashCode(), columnDefs); 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /bin/kwack-run-class: -------------------------------------------------------------------------------- 1 | base_dir=$(dirname $0)/.. 2 | 3 | # CYGINW == 1 if Cygwin is detected, else 0. 4 | if [[ $(uname -a) =~ "CYGWIN" ]]; then 5 | CYGWIN=1 6 | else 7 | CYGWIN=0 8 | fi 9 | 10 | CLASSPATH=$base_dir/lib/kwack-*-jar-with-dependencies.jar 11 | 12 | # Log directory to use 13 | if [ "x$LOG_DIR" = "x" ]; then 14 | LOG_DIR="$base_dir/logs" 15 | fi 16 | 17 | # create logs directory 18 | if [ ! -d "$LOG_DIR" ]; then 19 | mkdir -p "$LOG_DIR" 20 | fi 21 | 22 | # logj4 settings 23 | if [ "x$KWACK_LOG4J_OPTS" = "x" ]; then 24 | # Test for files from dev -> packages so this will work as expected in dev if you have packages 25 | # installed 26 | if [ -e "$base_dir/config/log4j.properties" ]; then # Dev environment 27 | LOG4J_DIR="$base_dir/config/log4j.properties" 28 | fi 29 | 30 | # If Cygwin is detected, LOG4J_DIR is converted to Windows format. 31 | (( CYGWIN )) && LOG4J_DIR=$(cygpath --path --mixed "${LOG4J_DIR}") 32 | 33 | KWACK_LOG4J_OPTS="-Dlog4j.configuration=file:${LOG4J_DIR}" 34 | fi 35 | 36 | # If Cygwin is detected, LOG_DIR is converted to Windows format. 37 | (( CYGWIN )) && LOG_DIR=$(cygpath --path --mixed "${LOG_DIR}") 38 | 39 | KWACK_LOG4J_OPTS="-Dkwack.log.dir=$LOG_DIR $KWACK_LOG4J_OPTS" 40 | 41 | # Generic jvm settings you want to add 42 | if [ -z "$KWACK_OPTS" ]; then 43 | KWACK_OPTS="--add-opens java.base/java.nio=ALL-UNNAMED" 44 | fi 45 | 46 | # Which java to use 47 | if [ -z "$JAVA_HOME" ]; then 48 | JAVA="java" 49 | else 50 | JAVA="$JAVA_HOME/bin/java" 51 | fi 52 | 53 | # Memory options 54 | if [ -z "$KWACK_HEAP_OPTS" ]; then 55 | KWACK_HEAP_OPTS="-Xmx4G" 56 | fi 57 | 58 | # JVM performance options 59 | if [ -z "$KWACK_JVM_PERFORMANCE_OPTS" ]; then 60 | KWACK_JVM_PERFORMANCE_OPTS="-server -XX:+UseG1GC -XX:MaxGCPauseMillis=20 -XX:InitiatingHeapOccupancyPercent=35 -XX:+ExplicitGCInvokesConcurrent -Djava.awt.headless=true" 61 | fi 62 | 63 | MAIN=$1 64 | shift 65 | 66 | # GC options 67 | GC_FILE_SUFFIX='-gc.log' 68 | GC_LOG_FILE_NAME='' 69 | if [ "x$GC_LOG_ENABLED" = "xtrue" ]; then 70 | GC_LOG_FILE_NAME=$DAEMON_NAME$GC_FILE_SUFFIX 71 | 72 | # The first segment of the version number, which is '1' for releases before Java 9 73 | # it then becomes '9', '10', ... 74 | # Some examples of the first line of `java --version`: 75 | # 8 -> java version "1.8.0_152" 76 | # 9.0.4 -> java version "9.0.4" 77 | # 10 -> java version "10" 2018-03-20 78 | # 10.0.1 -> java version "10.0.1" 2018-04-17 79 | # We need to match to the end of the line to prevent sed from printing the characters that do not match 80 | JAVA_MAJOR_VERSION=$($JAVA -version 2>&1 | sed -E -n 's/.* version "([0-9]*).*$/\1/p') 81 | if [[ "$JAVA_MAJOR_VERSION" -ge "9" ]] ; then 82 | KWACK_GC_LOG_OPTS="-Xlog:gc*:file=$LOG_DIR/$GC_LOG_FILE_NAME:time,tags:filecount=10,filesize=102400" 83 | else 84 | KWACK_GC_LOG_OPTS="-Xloggc:$LOG_DIR/$GC_LOG_FILE_NAME -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCDateStamps -XX:+PrintGCTimeStamps -XX:+UseGCLogFileRotation -XX:NumberOfGCLogFiles=10 -XX:GCLogFileSize=100M" 85 | fi 86 | fi 87 | 88 | # If Cygwin is detected, classpath is converted to Windows format. 89 | (( CYGWIN )) && CLASSPATH=$(cygpath --path --mixed "${CLASSPATH}") 90 | 91 | # Launch mode 92 | if [ "x$DAEMON_MODE" = "xtrue" ]; then 93 | CONSOLE_OUTPUT_FILE=${CONSOLE_OUTPUT_FILE:-${LOG_DIR}/kwack-console.out} 94 | nohup $JAVA $KWACK_HEAP_OPTS $KWACK_JVM_PERFORMANCE_OPTS $KWACK_GC_LOG_OPTS $KWACK_JMX_OPTS $KWACK_LOG4J_OPTS -cp $CLASSPATH $KWACK_OPTS "$MAIN" "$@" > "${CONSOLE_OUTPUT_FILE}" 2>&1 < /dev/null & 95 | else 96 | exec "$JAVA" $KWACK_HEAP_OPTS $KWACK_JVM_PERFORMANCE_OPTS $KWACK_GC_LOG_OPTS $KWACK_JMX_OPTS $KWACK_LOG4J_OPTS -cp $CLASSPATH $KWACK_OPTS "$MAIN" "$@" 97 | fi 98 | -------------------------------------------------------------------------------- /src/test/resources/ConfluentMetric.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | package metrics; 3 | 4 | option java_package = "io.confluent.metrics.record"; 5 | 6 | message YammerMetricName { 7 | string group = 1; 8 | string name = 2; 9 | string type = 3; 10 | string scope = 4; 11 | string mBeanName = 5; 12 | } 13 | 14 | message YammerGauge { 15 | YammerMetricName metricName = 1; 16 | string value = 2; 17 | oneof numericValue { 18 | int64 longValue = 3; 19 | double doubleValue = 4; 20 | } 21 | } 22 | 23 | message YammerMeter { 24 | YammerMetricName metricName = 1; 25 | int64 count = 2; 26 | double oneMinuteRate = 3; 27 | double fiveMinuteRate = 4; 28 | double fifteenMinuteRate = 5; 29 | double meanRate = 6; 30 | int64 deltaCount = 7; 31 | } 32 | 33 | message YammerHistogram { 34 | YammerMetricName metricName = 1; 35 | int64 count = 2; // number of values recorded 36 | double max = 3; 37 | double min = 4; 38 | double mean = 5; 39 | double stdDev = 6; 40 | double sum = 7; 41 | double median = 8; 42 | double percentile75th = 9; 43 | double percentile95th = 10; 44 | double percentile98th = 11; 45 | double percentile99th = 12; 46 | double percentile999th = 13; 47 | int32 size = 14; // number of items in the snapshot 48 | int64 deltaCount = 15; 49 | } 50 | 51 | message YammerTimer { 52 | YammerMetricName metricName = 1; 53 | int64 count = 2; // number of values recorded 54 | double max = 3; 55 | double min = 4; 56 | double mean = 5; 57 | double stdDev = 6; 58 | double sum = 7; 59 | double variance = 8; 60 | double median = 9; 61 | double percentile75th = 10; 62 | double percentile95th = 11; 63 | double percentile98th = 12; 64 | double percentile99th = 13; 65 | double percentile999th = 14; 66 | int32 size = 15; // number of items in the snapshot 67 | double oneMinuteRate = 16; 68 | double fiveMinuteRate = 17; 69 | double fifteenMinuteRate = 18; 70 | double meanRate = 19; 71 | int64 deltaCount = 20; 72 | } 73 | 74 | message KafkaMetricName { 75 | string group = 1; 76 | string name = 2; 77 | map tags = 3; 78 | } 79 | 80 | message KafkaMeasurable { 81 | KafkaMetricName metricName = 1; 82 | double value = 2; 83 | } 84 | 85 | enum MetricType { 86 | UNKNOWN = 0; 87 | BROKER = 1; 88 | PRODUCER = 2; 89 | CONSUMER = 3; 90 | } 91 | 92 | message MetricsMessage { 93 | MetricType metricType = 1; 94 | int64 timestamp = 2; 95 | // an array of Yammer gauges 96 | repeated YammerGauge yammerGauge = 3; 97 | // an array of Yammer meters 98 | repeated YammerMeter yammerMeter = 4; 99 | // an array of Yammer histograms 100 | repeated YammerHistogram yammerHistogram = 5; 101 | // an array of Yammer timers 102 | repeated YammerTimer yammerTimer = 6; 103 | // an array of Kafka measurables 104 | repeated KafkaMeasurable kafkaMeasurable = 7; 105 | string clusterId = 8; 106 | int32 brokerId = 9; 107 | // Client Id for consumers and producers 108 | string clientId = 10; 109 | // Group Id for consumers 110 | string groupId = 11; 111 | // System-level metrics 112 | SystemMetrics systemMetrics = 12; 113 | 114 | string version = 13; 115 | string commitId = 14; 116 | // an array of the kafka process roles defined by the KafkaConfig.ProcessRolesProp 117 | // The values are sanitized and sorted alphabetically 118 | repeated string processRoles = 15; 119 | } 120 | 121 | message SystemMetrics { 122 | // Volume metrics 123 | repeated VolumeMetrics volumes = 1; 124 | } 125 | 126 | message VolumeMetrics { 127 | string name = 1; 128 | int64 usableBytes = 2; 129 | int64 totalBytes = 3; 130 | repeated LogDir logDirs = 4; 131 | } 132 | 133 | message LogDir { 134 | string path = 1; 135 | } 136 | -------------------------------------------------------------------------------- /src/test/java/io/kcache/kwack/util/LocalClusterTestHarness.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | *

6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | *

8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package io.kcache.kwack.util; 16 | 17 | import io.confluent.kafka.schemaregistry.SchemaProvider; 18 | import io.confluent.kafka.schemaregistry.avro.AvroSchemaProvider; 19 | import io.confluent.kafka.schemaregistry.client.SchemaRegistryClient; 20 | import io.confluent.kafka.schemaregistry.json.JsonSchemaProvider; 21 | import io.confluent.kafka.schemaregistry.protobuf.ProtobufSchemaProvider; 22 | import io.confluent.kafka.serializers.AbstractKafkaSchemaSerDeConfig; 23 | import io.kcache.kwack.KwackConfig; 24 | import io.kcache.kwack.KwackEngine; 25 | import java.util.Arrays; 26 | import java.util.Collections; 27 | import java.util.List; 28 | import java.util.Properties; 29 | import org.junit.jupiter.api.AfterEach; 30 | import org.junit.jupiter.api.BeforeEach; 31 | import org.slf4j.Logger; 32 | import org.slf4j.LoggerFactory; 33 | 34 | /** 35 | * Test harness to run against a real, local Kafka cluster. This is essentially 36 | * Kafka's ZookeeperTestHarness and KafkaServerTestHarness traits combined. 37 | */ 38 | public abstract class LocalClusterTestHarness extends ClusterTestHarness { 39 | 40 | private static final Logger LOG = LoggerFactory.getLogger(LocalClusterTestHarness.class); 41 | 42 | protected static final String MOCK_URL = "mock://kwack"; 43 | 44 | protected Properties props; 45 | 46 | protected Integer serverPort; 47 | protected KwackEngine engine; 48 | 49 | public LocalClusterTestHarness() { 50 | super(); 51 | } 52 | 53 | public LocalClusterTestHarness(int numBrokers) { 54 | super(numBrokers); 55 | } 56 | 57 | public KwackEngine getEngine() { 58 | return engine; 59 | } 60 | 61 | @Override 62 | protected void setUp() throws Exception { 63 | super.setUp(); 64 | 65 | Thread.sleep(1000); 66 | 67 | setUpServer(); 68 | List providers = Arrays.asList( 69 | new AvroSchemaProvider(), new JsonSchemaProvider(), new ProtobufSchemaProvider() 70 | ); 71 | SchemaRegistryClient schemaRegistry = KwackEngine.createSchemaRegistry( 72 | Collections.singletonList(MOCK_URL), providers, null); 73 | registerInitialSchemas(schemaRegistry); 74 | } 75 | 76 | private void setUpServer() { 77 | try { 78 | props = new Properties(); 79 | injectKwackProperties(props); 80 | 81 | KwackConfig config = new KwackConfig(props); 82 | 83 | engine = KwackEngine.getInstance(); 84 | engine.configure(config); 85 | } catch (Exception e) { 86 | LOG.error("Server died unexpectedly", e); 87 | System.exit(1); 88 | } 89 | } 90 | 91 | protected void registerInitialSchemas(SchemaRegistryClient schemaRegistry) throws Exception { 92 | } 93 | 94 | protected void injectKwackProperties(Properties props) { 95 | props.put(KwackConfig.KAFKACACHE_BOOTSTRAP_SERVERS_CONFIG, brokerList); 96 | props.put(KwackConfig.KAFKACACHE_TOPIC_REPLICATION_FACTOR_CONFIG, 1); 97 | props.put(KwackConfig.SCHEMA_REGISTRY_URL_CONFIG, MOCK_URL); 98 | props.put(KwackConfig.DB_CONFIG, ":memory:?cache=private"); 99 | props.put(AbstractKafkaSchemaSerDeConfig.AUTO_REGISTER_SCHEMAS, "true"); 100 | } 101 | 102 | @AfterEach 103 | public void tearDown() throws Exception { 104 | try { 105 | KwackEngine.closeInstance(); 106 | } catch (Exception e) { 107 | LOG.warn("Exception during tearDown", e); 108 | } 109 | super.tearDown(); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/test/java/io/kcache/kwack/util/RestApp.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed under the Apache License, Version 2.0 (the "License"); 3 | * you may not use this file except in compliance with the License. 4 | * You may obtain a copy of the License at 5 | *

6 | * http://www.apache.org/licenses/LICENSE-2.0 7 | *

8 | * Unless required by applicable law or agreed to in writing, software 9 | * distributed under the License is distributed on an "AS IS" BASIS, 10 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 11 | * See the License for the specific language governing permissions and 12 | * limitations under the License. 13 | */ 14 | 15 | package io.kcache.kwack.util; 16 | 17 | 18 | import io.confluent.kafka.schemaregistry.CompatibilityLevel; 19 | import io.confluent.kafka.schemaregistry.client.rest.RestService; 20 | import io.confluent.kafka.schemaregistry.exceptions.SchemaRegistryException; 21 | import io.confluent.kafka.schemaregistry.rest.SchemaRegistryConfig; 22 | import io.confluent.kafka.schemaregistry.rest.SchemaRegistryRestApplication; 23 | import io.confluent.kafka.schemaregistry.storage.SchemaRegistry; 24 | import io.confluent.kafka.schemaregistry.storage.SchemaRegistryIdentity; 25 | import java.util.Properties; 26 | import org.eclipse.jetty.server.Server; 27 | 28 | public class RestApp { 29 | 30 | public final Properties prop; 31 | public RestService restClient; 32 | public SchemaRegistryRestApplication restApp; 33 | public Server restServer; 34 | public String restConnect; 35 | 36 | public RestApp(int port, String zkConnect, String kafkaTopic) { 37 | this(port, zkConnect, kafkaTopic, CompatibilityLevel.NONE.name, null); 38 | } 39 | 40 | public RestApp(int port, String zkConnect, String kafkaTopic, String compatibilityType, Properties schemaRegistryProps) { 41 | this(port, zkConnect, null, kafkaTopic, compatibilityType, true, schemaRegistryProps); 42 | } 43 | 44 | public RestApp(int port, 45 | String zkConnect, String kafkaTopic, 46 | String compatibilityType, boolean leaderEligibility, Properties schemaRegistryProps) { 47 | this(port, zkConnect, null, kafkaTopic, compatibilityType, 48 | leaderEligibility, schemaRegistryProps); 49 | } 50 | 51 | public RestApp(int port, 52 | String zkConnect, String bootstrapBrokers, 53 | String kafkaTopic, String compatibilityType, boolean leaderEligibility, 54 | Properties schemaRegistryProps) { 55 | prop = new Properties(); 56 | if (schemaRegistryProps != null) { 57 | prop.putAll(schemaRegistryProps); 58 | } 59 | prop.setProperty(SchemaRegistryConfig.PORT_CONFIG, ((Integer) port).toString()); 60 | if (bootstrapBrokers != null) { 61 | prop.setProperty(SchemaRegistryConfig.KAFKASTORE_BOOTSTRAP_SERVERS_CONFIG, bootstrapBrokers); 62 | } 63 | prop.put(SchemaRegistryConfig.KAFKASTORE_TOPIC_CONFIG, kafkaTopic); 64 | prop.put(SchemaRegistryConfig.SCHEMA_COMPATIBILITY_CONFIG, compatibilityType); 65 | prop.put(SchemaRegistryConfig.LEADER_ELIGIBILITY, leaderEligibility); 66 | } 67 | 68 | public void start() throws Exception { 69 | restApp = new SchemaRegistryRestApplication(prop); 70 | restServer = restApp.createServer(); 71 | restServer.start(); 72 | restApp.postServerStart(); 73 | restConnect = restServer.getURI().toString(); 74 | if (restConnect.endsWith("/")) 75 | restConnect = restConnect.substring(0, restConnect.length()-1); 76 | // For testing with Apache Http Client 77 | // restClient = new RestService(restConnect, false, true); 78 | restClient = new RestService(restConnect); 79 | } 80 | 81 | public void stop() throws Exception { 82 | if (restClient != null) { 83 | restClient.close(); 84 | restClient = null; 85 | } 86 | if (restServer != null) { 87 | restServer.stop(); 88 | restServer.join(); 89 | } 90 | } 91 | 92 | /** 93 | * This method must be called before calling {@code RestApp.start()} 94 | * for the additional properties to take affect. 95 | * 96 | * @param props the additional properties to set 97 | */ 98 | public void addConfigs(Properties props) { 99 | prop.putAll(props); 100 | } 101 | 102 | public boolean isLeader() { 103 | return restApp.schemaRegistry().isLeader(); 104 | } 105 | 106 | public void setLeader(SchemaRegistryIdentity schemaRegistryIdentity) 107 | throws SchemaRegistryException { 108 | restApp.schemaRegistry().setLeader(schemaRegistryIdentity); 109 | } 110 | 111 | public SchemaRegistryIdentity myIdentity() { 112 | return restApp.schemaRegistry().myIdentity(); 113 | } 114 | 115 | public SchemaRegistryIdentity leaderIdentity() { 116 | return restApp.schemaRegistry().leaderIdentity(); 117 | } 118 | 119 | public SchemaRegistry schemaRegistry() { 120 | return restApp.schemaRegistry(); 121 | } 122 | } 123 | -------------------------------------------------------------------------------- /src/test/java/io/kcache/kwack/ProtobufTest.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | 5 | import com.google.common.collect.ImmutableList; 6 | import com.google.common.collect.Lists; 7 | import com.google.protobuf.ByteString; 8 | import com.google.protobuf.Timestamp; 9 | import com.google.type.Date; 10 | import com.google.type.TimeOfDay; 11 | import io.confluent.protobuf.type.utils.DecimalUtils; 12 | import io.kcache.kwack.proto.ComplexProto.Data; 13 | import io.kcache.kwack.proto.ComplexProto.Kind; 14 | import io.kcache.kwack.proto.ComplexProto.Complex; 15 | import io.kcache.kwack.proto.SimpleProto.Simple; 16 | import io.reactivex.rxjava3.core.Observable; 17 | import java.io.IOException; 18 | import java.math.BigDecimal; 19 | import java.math.BigInteger; 20 | import java.time.Instant; 21 | import java.time.LocalDate; 22 | import java.time.LocalTime; 23 | import java.util.ArrayList; 24 | import java.util.Base64; 25 | import java.util.HashMap; 26 | import java.util.List; 27 | import java.util.Map; 28 | import java.util.Properties; 29 | import org.apache.kafka.clients.producer.KafkaProducer; 30 | import org.junit.jupiter.api.Test; 31 | 32 | public class ProtobufTest extends AbstractSchemaTest { 33 | 34 | private Simple createSimpleObj() { 35 | return Simple.newBuilder().setId(123).setName("hi").build(); 36 | } 37 | 38 | private Complex createComplexObj() { 39 | return Complex.newBuilder() 40 | .setName("test") 41 | .setMystring("testUser") 42 | .setMybytes(ByteString.copyFrom(new byte[]{0, 1, 2})) 43 | .setMyint(1) 44 | .setMyuint(2) 45 | .setMylong(2L) 46 | .setMyulong(3L) 47 | .setMyfloat(3.0f) 48 | .setMydouble(4.0d) 49 | .setMyboolean(true) 50 | .setKind(Kind.ONE) 51 | .setMyoneofint(5) 52 | .addStrArray("hi") 53 | .addStrArray("there") 54 | .addDataArray(Data.newBuilder().setData("hi").build()) 55 | .addDataArray(Data.newBuilder().setData("there").build()) 56 | .putDataMap("bye", Data.newBuilder().setData("there").build()) 57 | .setDecimal(DecimalUtils.fromBigDecimal(new BigDecimal("123.45"))) 58 | .setDate(Date.newBuilder().setYear(2024).setMonth(1).setDay(1).build()) 59 | .setTime(TimeOfDay.newBuilder().setHours(12).setMinutes(30).setSeconds(30).build()) 60 | .setTimestamp(Timestamp.newBuilder().setSeconds(1234567890L).build()) 61 | .build(); 62 | } 63 | 64 | @Test 65 | public void testSimple() throws IOException { 66 | Simple obj = createSimpleObj(); 67 | Properties producerProps = createProducerProps(MOCK_URL); 68 | KafkaProducer producer = createProducer(producerProps); 69 | produce(producer, getTopic(), new Object[] { obj }); 70 | producer.close(); 71 | 72 | engine.init(); 73 | Observable> obs = engine.start(); 74 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 75 | Map m = lm.get(0); 76 | assertEquals("hi", m.get("name")); 77 | assertEquals(123, m.get("id")); 78 | } 79 | 80 | @Test 81 | public void testComplex() throws IOException { 82 | Complex obj = createComplexObj(); 83 | Properties producerProps = createProducerProps(MOCK_URL); 84 | KafkaProducer producer = createProducer(producerProps); 85 | produce(producer, getTopic(), new Object[] { obj }); 86 | producer.close(); 87 | 88 | engine.init(); 89 | Observable> obs = engine.start(); 90 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 91 | Map m = lm.get(0); 92 | assertEquals("test", m.get("name")); 93 | assertEquals("testUser", m.get("mystring")); 94 | assertEquals(Base64.getEncoder().encodeToString(new byte[]{0, 1, 2}), m.get("mybytes")); 95 | assertEquals(1, m.get("myint")); 96 | assertEquals(2L, m.get("myuint")); 97 | assertEquals(2L, m.get("mylong")); 98 | assertEquals(new BigInteger("3"), m.get("myulong")); 99 | assertEquals(3.0f, m.get("myfloat")); 100 | assertEquals(4.0d, m.get("mydouble")); 101 | assertEquals(true, m.get("myboolean")); 102 | assertEquals("ONE", m.get("kind")); 103 | assertEquals(5, m.get("myoneof")); 104 | assertEquals(ImmutableList.of("hi", "there"), m.get("str_array")); 105 | Map m1 = new HashMap<>(); 106 | m1.put("data", "hi"); 107 | Map m2 = new HashMap<>(); 108 | m2.put("data", "there"); 109 | List> a1 = new ArrayList<>(); 110 | a1.add(m1); 111 | a1.add(m2); 112 | assertEquals(a1, m.get("data_array")); 113 | Map> m4 = new HashMap<>(); 114 | m4.put("bye", m2); 115 | assertEquals(m4, m.get("data_map")); 116 | assertEquals(new BigDecimal("123.45"), m.get("decimal")); 117 | assertEquals(LocalDate.of(2024, 1, 1), m.get("date")); 118 | assertEquals(LocalTime.of(12, 30, 30), m.get("time")); 119 | assertEquals(java.sql.Timestamp.from(Instant.ofEpochSecond(1234567890L)), m.get("timestamp")); 120 | } 121 | @Override 122 | protected String getTopic() { 123 | return "test-proto"; 124 | } 125 | 126 | @Override 127 | protected Class getValueSerializer() { 128 | return io.confluent.kafka.serializers.protobuf.KafkaProtobufSerializer.class; 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/test/resources/schema_registry_key.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "http://json-schema.org/draft-07/schema#", 3 | "title": "Schema Registry Key", 4 | "oneOf": [ 5 | { 6 | "$ref": "#/definitions/ConfigKey" 7 | }, 8 | { 9 | "$ref": "#/definitions/SchemaKey" 10 | }, 11 | { 12 | "$ref": "#/definitions/ModeKey" 13 | }, 14 | { 15 | "$ref": "#/definitions/NoopKey" 16 | }, 17 | { 18 | "$ref": "#/definitions/DeleteSubjectKey" 19 | }, 20 | { 21 | "$ref": "#/definitions/ClearSubjectKey" 22 | }, 23 | { 24 | "$ref": "#/definitions/ContextKey" 25 | } 26 | ], 27 | "definitions": { 28 | "ConfigKey": { 29 | "type": "object", 30 | "additionalProperties": true, 31 | "properties": { 32 | "keytype": { 33 | "type": "string", 34 | "enum": [ 35 | "CONFIG" 36 | ], 37 | "default": "CONFIG" 38 | }, 39 | "subject": { 40 | "oneOf": [ 41 | { 42 | "type": "null", 43 | "title": "Not included" 44 | }, 45 | { 46 | "type": "string" 47 | } 48 | ] 49 | }, 50 | "magic": { 51 | "type": "integer", 52 | "minimum": 0 53 | } 54 | }, 55 | "title": "CONFIG", 56 | "required": [ 57 | "keytype", 58 | "magic" 59 | ] 60 | }, 61 | "SchemaKey": { 62 | "type": "object", 63 | "additionalProperties": true, 64 | "properties": { 65 | "keytype": { 66 | "type": "string", 67 | "enum": [ 68 | "SCHEMA" 69 | ], 70 | "default": "SCHEMA" 71 | }, 72 | "subject": { 73 | "oneOf": [ 74 | { 75 | "type": "null", 76 | "title": "Not included" 77 | }, 78 | { 79 | "type": "string" 80 | } 81 | ] 82 | }, 83 | "version": { 84 | "type": "integer", 85 | "minimum": 1 86 | }, 87 | "magic": { 88 | "type": "integer", 89 | "minimum": 0 90 | } 91 | }, 92 | "title": "SCHEMA", 93 | "required": [ 94 | "keytype", 95 | "version", 96 | "magic" 97 | ] 98 | }, 99 | "ModeKey": { 100 | "type": "object", 101 | "additionalProperties": true, 102 | "properties": { 103 | "keytype": { 104 | "type": "string", 105 | "enum": [ 106 | "MODE" 107 | ], 108 | "default": "MODE" 109 | }, 110 | "subject": { 111 | "oneOf": [ 112 | { 113 | "type": "null", 114 | "title": "Not included" 115 | }, 116 | { 117 | "type": "string" 118 | } 119 | ] 120 | }, 121 | "magic": { 122 | "type": "integer", 123 | "minimum": 0 124 | } 125 | }, 126 | "title": "MODE", 127 | "required": [ 128 | "keytype", 129 | "magic" 130 | ] 131 | }, 132 | "NoopKey": { 133 | "type": "object", 134 | "additionalProperties": true, 135 | "properties": { 136 | "keytype": { 137 | "type": "string", 138 | "enum": [ 139 | "NOOP" 140 | ], 141 | "default": "NOOP" 142 | }, 143 | "magic": { 144 | "type": "integer", 145 | "minimum": 0 146 | } 147 | }, 148 | "title": "NOOP", 149 | "required": [ 150 | "keytype", 151 | "magic" 152 | ] 153 | }, 154 | "DeleteSubjectKey": { 155 | "type": "object", 156 | "additionalProperties": true, 157 | "properties": { 158 | "keytype": { 159 | "type": "string", 160 | "enum": [ 161 | "DELETE_SUBJECT" 162 | ], 163 | "default": "DELETE_SUBJECT" 164 | }, 165 | "subject": { 166 | "oneOf": [ 167 | { 168 | "type": "null", 169 | "title": "Not included" 170 | }, 171 | { 172 | "type": "string" 173 | } 174 | ] 175 | }, 176 | "magic": { 177 | "type": "integer", 178 | "minimum": 0 179 | } 180 | }, 181 | "title": "DELETE_SUBJECT", 182 | "required": [ 183 | "keytype", 184 | "magic" 185 | ] 186 | }, 187 | "ClearSubjectKey": { 188 | "type": "object", 189 | "additionalProperties": true, 190 | "properties": { 191 | "keytype": { 192 | "type": "string", 193 | "enum": [ 194 | "CLEAR_SUBJECT" 195 | ], 196 | "default": "CLEAR_SUBJECT" 197 | }, 198 | "subject": { 199 | "oneOf": [ 200 | { 201 | "type": "null", 202 | "title": "Not included" 203 | }, 204 | { 205 | "type": "string" 206 | } 207 | ] 208 | }, 209 | "magic": { 210 | "type": "integer", 211 | "minimum": 0 212 | } 213 | }, 214 | "title": "CLEAR_SUBJECT", 215 | "required": [ 216 | "keytype", 217 | "magic" 218 | ] 219 | }, 220 | "ContextKey": { 221 | "type": "object", 222 | "additionalProperties": true, 223 | "properties": { 224 | "keytype": { 225 | "type": "string", 226 | "enum": [ 227 | "CONTEXT" 228 | ], 229 | "default": "CONTEXT" 230 | }, 231 | "tenant": { 232 | "oneOf": [ 233 | { 234 | "type": "null", 235 | "title": "Not included" 236 | }, 237 | { 238 | "type": "string" 239 | } 240 | ] 241 | }, 242 | "context": { 243 | "oneOf": [ 244 | { 245 | "type": "null", 246 | "title": "Not included" 247 | }, 248 | { 249 | "type": "string" 250 | } 251 | ] 252 | }, 253 | "magic": { 254 | "type": "integer", 255 | "minimum": 0 256 | } 257 | }, 258 | "title": "CONTEXT", 259 | "required": [ 260 | "keytype", 261 | "magic" 262 | ] 263 | } 264 | } 265 | } 266 | -------------------------------------------------------------------------------- /src/test/java/io/kcache/kwack/ProtobufNoSRTest.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | 5 | import com.google.common.collect.ImmutableList; 6 | import com.google.common.collect.Lists; 7 | import com.google.protobuf.ByteString; 8 | import com.google.protobuf.Timestamp; 9 | import com.google.type.Date; 10 | import com.google.type.TimeOfDay; 11 | import io.confluent.kafka.schemaregistry.ParsedSchema; 12 | import io.confluent.kafka.schemaregistry.protobuf.ProtobufSchema; 13 | import io.confluent.protobuf.type.utils.DecimalUtils; 14 | import io.kcache.kwack.proto.ComplexProto.Complex; 15 | import io.kcache.kwack.proto.ComplexProto.Data; 16 | import io.kcache.kwack.proto.ComplexProto.Kind; 17 | import io.reactivex.rxjava3.core.Observable; 18 | import java.io.IOException; 19 | import java.math.BigDecimal; 20 | import java.math.BigInteger; 21 | import java.time.Instant; 22 | import java.time.LocalDate; 23 | import java.time.LocalTime; 24 | import java.util.ArrayList; 25 | import java.util.Base64; 26 | import java.util.HashMap; 27 | import java.util.List; 28 | import java.util.Map; 29 | import java.util.Properties; 30 | import org.apache.kafka.clients.producer.KafkaProducer; 31 | import org.apache.kafka.common.serialization.ByteArraySerializer; 32 | import org.junit.jupiter.api.Test; 33 | 34 | public class ProtobufNoSRTest extends AbstractSchemaTest { 35 | 36 | private ParsedSchema createComplexSchema() { 37 | String schemaStr = "syntax = \"proto3\";\n" 38 | + "\n" 39 | + "package io.kcache.kwack.proto;\n" 40 | + "\n" 41 | + "import \"confluent/meta.proto\";\n" 42 | + "import \"confluent/type/decimal.proto\";\n" 43 | + "import \"google/protobuf/timestamp.proto\";\n" 44 | + "import \"google/type/date.proto\";\n" 45 | + "import \"google/type/timeofday.proto\";\n" 46 | + "\n" 47 | + "option java_package = \"io.kcache.kwack.proto\";\n" 48 | + "option java_outer_classname = \"ComplexProto\";\n" 49 | + "option java_multiple_files = false;\n" 50 | + "\n" 51 | + "message Complex {\n" 52 | + " optional string name = 1;\n" 53 | + " string mystring = 2;\n" 54 | + " bytes mybytes = 3;\n" 55 | + " int32 myint = 4;\n" 56 | + " uint32 myuint = 5;\n" 57 | + " int64 mylong = 6;\n" 58 | + " uint64 myulong = 7;\n" 59 | + " float myfloat = 8;\n" 60 | + " double mydouble = 9;\n" 61 | + " bool myboolean = 10;\n" 62 | + " Kind kind = 11;\n" 63 | + " oneof myoneof {\n" 64 | + " string myoneofstring = 12;\n" 65 | + " int32 myoneofint = 13;\n" 66 | + " }\n" 67 | + " repeated string str_array = 14;\n" 68 | + " repeated Data data_array = 15;\n" 69 | + " map data_map = 16;\n" 70 | + " confluent.type.Decimal decimal = 17 [(confluent.field_meta) = { params: [\n" 71 | + " { key: \"precision\", value: \"5\" },\n" 72 | + " { key: \"scale\", value: \"2\" }\n" 73 | + " ]}];\n" 74 | + " google.type.Date date = 18;\n" 75 | + " google.type.TimeOfDay time = 19;\n" 76 | + " google.protobuf.Timestamp timestamp = 20;\n" 77 | + "}\n" 78 | + "\n" 79 | + "message Data {\n" 80 | + " string data = 1;\n" 81 | + "}\n" 82 | + "\n" 83 | + "enum Kind {\n" 84 | + " ZERO = 0;\n" 85 | + " ONE = 1;\n" 86 | + " TWO = 2;\n" 87 | + "}"; 88 | return new ProtobufSchema(schemaStr); 89 | } 90 | 91 | private Complex createComplexObj() { 92 | return Complex.newBuilder() 93 | .setName("test") 94 | .setMystring("testUser") 95 | .setMybytes(ByteString.copyFrom(new byte[]{0, 1, 2})) 96 | .setMyint(1) 97 | .setMyuint(2) 98 | .setMylong(2L) 99 | .setMyulong(3L) 100 | .setMyfloat(3.0f) 101 | .setMydouble(4.0d) 102 | .setMyboolean(true) 103 | .setKind(Kind.ONE) 104 | .setMyoneofint(5) 105 | .addStrArray("hi") 106 | .addStrArray("there") 107 | .addDataArray(Data.newBuilder().setData("hi").build()) 108 | .addDataArray(Data.newBuilder().setData("there").build()) 109 | .putDataMap("bye", Data.newBuilder().setData("there").build()) 110 | .setDecimal(DecimalUtils.fromBigDecimal(new BigDecimal("123.45"))) 111 | .setDate(Date.newBuilder().setYear(2024).setMonth(1).setDay(1).build()) 112 | .setTime(TimeOfDay.newBuilder().setHours(12).setMinutes(30).setSeconds(30).build()) 113 | .setTimestamp(Timestamp.newBuilder().setSeconds(1234567890L).build()) 114 | .build(); 115 | } 116 | 117 | @Test 118 | public void testComplex() throws IOException { 119 | Complex obj = createComplexObj(); 120 | Properties producerProps = createProducerProps(MOCK_URL); 121 | KafkaProducer producer = createProducer(producerProps); 122 | produce(producer, getTopic(), new Object[] { obj.toByteArray() }); 123 | producer.close(); 124 | 125 | engine.init(); 126 | Observable> obs = engine.start(); 127 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 128 | Map m = lm.get(0); 129 | assertEquals("test", m.get("name")); 130 | assertEquals("testUser", m.get("mystring")); 131 | assertEquals(Base64.getEncoder().encodeToString(new byte[]{0, 1, 2}), m.get("mybytes")); 132 | assertEquals(1, m.get("myint")); 133 | assertEquals(2L, m.get("myuint")); 134 | assertEquals(2L, m.get("mylong")); 135 | assertEquals(new BigInteger("3"), m.get("myulong")); 136 | assertEquals(3.0f, m.get("myfloat")); 137 | assertEquals(4.0d, m.get("mydouble")); 138 | assertEquals(true, m.get("myboolean")); 139 | assertEquals("ONE", m.get("kind")); 140 | assertEquals(5, m.get("myoneof")); 141 | assertEquals(ImmutableList.of("hi", "there"), m.get("str_array")); 142 | Map m1 = new HashMap<>(); 143 | m1.put("data", "hi"); 144 | Map m2 = new HashMap<>(); 145 | m2.put("data", "there"); 146 | List> a1 = new ArrayList<>(); 147 | a1.add(m1); 148 | a1.add(m2); 149 | assertEquals(a1, m.get("data_array")); 150 | Map> m4 = new HashMap<>(); 151 | m4.put("bye", m2); 152 | assertEquals(m4, m.get("data_map")); 153 | assertEquals(new BigDecimal("123.45"), m.get("decimal")); 154 | assertEquals(LocalDate.of(2024, 1, 1), m.get("date")); 155 | assertEquals(LocalTime.of(12, 30, 30), m.get("time")); 156 | assertEquals(java.sql.Timestamp.from(Instant.ofEpochSecond(1234567890L)), m.get("timestamp")); 157 | } 158 | 159 | @Override 160 | protected String getTopic() { 161 | return "test-proto"; 162 | } 163 | 164 | @Override 165 | protected Class getValueSerializer() { 166 | return ByteArraySerializer.class; 167 | } 168 | 169 | @Override 170 | protected void injectKwackProperties(Properties props) { 171 | super.injectKwackProperties(props); 172 | props.put(KwackConfig.VALUE_SERDES_CONFIG, 173 | "'" + getTopic() + "=proto:" + createComplexSchema().canonicalString() + "'"); 174 | } 175 | } 176 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | # kwack - In-Memory Analytics for Kafka using DuckDB 3 | 4 | [![Build Status][github-actions-shield]][github-actions-link] 5 | 6 | [github-actions-shield]: https://github.com/rayokota/kwack/actions/workflows/build.yml/badge.svg?branch=master 7 | [github-actions-link]: https://github.com/rayokota/kwack/actions 8 | 9 | kwack supports in-memory analytics for Kafka data using DuckDB. 10 | 11 | ## Getting Started 12 | 13 | Note that kwack requires Java 17 or higher. 14 | 15 | To run kwack, download a [release](https://github.com/rayokota/kwack/releases), unpack it. 16 | Then change to the `kwack-${version}` directory and run the following to see the command-line options: 17 | 18 | ```bash 19 | $ bin/kwack -h 20 | 21 | Usage: kwack [-hV] [-t=]... [-p=]... [-b=]... 22 | [-m=] [-F=] [-o=] [-k=]... 23 | [-v=]... [-r=] [-q=] [-a=]... 24 | [-d=] [-X=]... 25 | In-Memory Analytics for Kafka using DuckDB. 26 | -t, --topic= Topic(s) to consume from and produce to 27 | -p, --partition= Partition(s) 28 | -b, --bootstrap-server= Bootstrap broker(s) (host:[port]) 29 | -m, --metadata-timeout= Metadata (et.al.) request timeout 30 | -F, --file= Read configuration properties from file 31 | -o, --offset= Offset to start consuming from: 32 | beginning | end | 33 | (absolute offset) | 34 | - (relative offset from end) 35 | @ (timestamp in ms to start at) 36 | Default: beginning 37 | -k, --key-serde= (De)serialize keys using 38 | -v, --value-serde= (De)serialize values using 39 | Available serdes: 40 | short | int | long | float | 41 | double | string | json | binary | 42 | avro: | 43 | json: | 44 | proto: | 45 | latest (use latest version in SR) | 46 | (use schema id from SR) 47 | Default for key: binary 48 | Default for value: latest 49 | The proto/latest/ serde formats can 50 | also take a message type name, e.g. 51 | proto:;msg: 52 | in case multiple message types exist 53 | -r, --schema-registry-url= SR (Schema Registry) URL 54 | -q, --query= SQL query to execute. If none is specified, 55 | interactive sqlline mode is used 56 | -a, --row-attribute= Row attribute(s) to show: 57 | none 58 | rowkey (record key) 59 | ksi (key schema id) 60 | vsi (value schema id) 61 | top (topic) 62 | par (partition) 63 | off (offset) 64 | ts (timestamp) 65 | tst (timestamp type) 66 | epo (leadership epoch) 67 | hdr (headers) 68 | Default: rowkey,ksi,vsi,par,off,ts,hdr 69 | -d, --db= DuckDB db, appended to 'jdbc:duckdb:' 70 | Default: :memory: 71 | -x, --skip-bytes= Extra bytes to skip when deserializing with 72 | an external schema 73 | -X, --property= Set configuration property. 74 | -h, --help Show this help message and exit. 75 | -V, --version Print version information and exit. 76 | ``` 77 | 78 | kwack shares many command-line options with [kcat](https://github.com/edenhill/kcat) (formerly kafkacat). 79 | In addition, a file containing configuration properties can be used. The available configuration properties 80 | are listed [here](https://github.com/rayokota/kwack/blob/master/src/main/java/io/kcache/kwack/KwackConfig.java). 81 | 82 | Simply modify `config/kwack.properties` to point to an existing Kafka broker and Schema 83 | Registry. Then run the following: 84 | 85 | ```bash 86 | # Run with properties file 87 | $ bin/kwack -F config/kwack.properties 88 | ``` 89 | 90 | Starting kwack is as easy as specifying a Kafka broker, topic, and Schema Registry URL: 91 | 92 | ```bash 93 | $ bin/kwack -b mybroker -t mytopic -r http://schema-registry-url:8081 94 | Welcome to kwack! 95 | Enter "!help" for usage hints. 96 | 97 | ___(.)> 98 | ~~~~~~\___)~~~~~~ 99 | 100 | jdbc:duckdb::memory:> 101 | ``` 102 | 103 | When kwack starts, it will enter interactive mode, where you can enter SQL queries 104 | to analyze Kafka data. For non-interactive mode, specify a query on the command line: 105 | 106 | ```bash 107 | $ bin/kwack -b mybroker -t mytopic -r http://schema-registry-url:8081 -q "SELECT * FROM mytopic" 108 | ``` 109 | 110 | The output of the above command will be in JSON, and so can be piped to other commands like jq. 111 | 112 | One can load multiple topics, and then perform a query that joins the resulting tables on a common 113 | column: 114 | 115 | ```bash 116 | $ bin/kwack -b mybroker -t mytopic -t mytopic2 -r http://schema-registry-url:8081 -q "SELECT * FROM mytopic JOIN mytopic2 USING (col1)" 117 | ``` 118 | 119 | One can convert Kafka data into Parquet format by using the COPY commmand in DuckDB: 120 | 121 | ```bash 122 | $ bin/kwack -b mybroker -t mytopic -r http://schema-registry-url:8081 -q "COPY mytopic to 'mytopic.parquet' (FORMAT 'parquet')" 123 | ``` 124 | 125 | If not using Confluent Schema Registry, one can pass an external schema: 126 | 127 | ```bash 128 | $ bin/kwack -b mybroker -t mytopic -v mytopic=proto:@/path/to/myschema.proto 129 | ``` 130 | 131 | For a given schema, kwack will create DuckDB columns based on 132 | the appropriate Avro, Protobuf, or JSON Schema as follows: 133 | 134 | |Avro | Protobuf | JSON Schema | DuckDB | 135 | |-----|----------|-------------|--------| 136 | |boolean | boolean | boolean | BOOLEAN | 137 | |int | int32, sint32, sfixed32 || INTEGER | 138 | || uint32, fixed32 || UINTEGER | 139 | |long | int64. sint64, sfixed64 | integer | BIGINT | 140 | || uint64, fixed64 || UBIGINT | 141 | |float | float || FLOAT | 142 | |double | double | number | DOUBLE | 143 | |string | string | string | VARCHAR | 144 | |bytes, fixed | bytes || BLOB | 145 | |enum | enum| enum | ENUM | 146 | |record | message | object | STRUCT | 147 | |array | repeated | array | LIST | 148 | |map | map || MAP | 149 | |union | oneof | oneOf,anyOf | UNION | 150 | |decimal | confluent.type.Decimal || DECIMAL | 151 | |date | google.type.Date || DATE | 152 | |time-millis, time-micros | google.type.TimeOfDay || TIME | 153 | |timestamp-millis ||| TIMESTAMP_MS | 154 | |timestamp-micros ||| TIMESTAMP | 155 | |timestamp-nanos | google.protobuf.Timestamp || TIMESTAMP_NS | 156 | |duration | google.protobuf.Duration || INTERVAL | 157 | |uuid ||| UUID | 158 | 159 | For more on how to use kwack, see this [blog](https://yokota.blog/2024/07/11/in-memory-analytics-for-kafka-using-duckdb/). 160 | -------------------------------------------------------------------------------- /src/test/java/io/kcache/kwack/AvroBenchmark.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack; 2 | 3 | import com.google.common.collect.Lists; 4 | import io.confluent.kafka.serializers.KafkaAvroSerializerConfig; 5 | import io.kcache.kwack.util.ClusterTestHarness.DefaultTestInfo; 6 | import io.kcache.kwack.util.LocalClusterTestHarness; 7 | import io.reactivex.rxjava3.core.Observable; 8 | import java.util.Collections; 9 | import java.util.Optional; 10 | import org.apache.avro.Schema; 11 | import org.apache.avro.generic.GenericData; 12 | import org.apache.avro.generic.GenericRecord; 13 | import org.apache.avro.generic.IndexedRecord; 14 | import org.apache.kafka.clients.producer.KafkaProducer; 15 | import org.apache.kafka.clients.producer.ProducerConfig; 16 | import org.apache.kafka.clients.producer.ProducerRecord; 17 | import org.apache.kafka.common.utils.Bytes; 18 | import org.openjdk.jmh.annotations.Benchmark; 19 | import org.openjdk.jmh.annotations.BenchmarkMode; 20 | import org.openjdk.jmh.annotations.Fork; 21 | import org.openjdk.jmh.annotations.Level; 22 | import org.openjdk.jmh.annotations.Measurement; 23 | import org.openjdk.jmh.annotations.Mode; 24 | import org.openjdk.jmh.annotations.OutputTimeUnit; 25 | import org.openjdk.jmh.annotations.Param; 26 | import org.openjdk.jmh.annotations.Scope; 27 | import org.openjdk.jmh.annotations.Setup; 28 | import org.openjdk.jmh.annotations.State; 29 | import org.openjdk.jmh.annotations.TearDown; 30 | import org.openjdk.jmh.annotations.Warmup; 31 | import org.openjdk.jmh.runner.Runner; 32 | import org.openjdk.jmh.runner.RunnerException; 33 | import org.openjdk.jmh.runner.options.Options; 34 | import org.openjdk.jmh.runner.options.OptionsBuilder; 35 | 36 | import java.io.IOException; 37 | import java.nio.ByteBuffer; 38 | import java.util.List; 39 | import java.util.Map; 40 | import java.util.Properties; 41 | import java.util.Random; 42 | import java.util.concurrent.TimeUnit; 43 | 44 | /** 45 | * JMH Benchmark for producing and reading Kafka records with Avro schema using Kwack. 46 | * 47 | * This benchmark measures the throughput and performance of: 48 | * 1. Producing a configurable number of Kafka records with a simple Avro schema 49 | * 2. Reading and processing those records using the Kwack engine 50 | * 51 | * To run the benchmark: 52 | * 53 | * Quick test with just one record count (fastest, ~1 minute): 54 | * mvn test-compile exec:java -Dexec.classpathScope=test \ 55 | * -Dexec.mainClass=io.kcache.kwack.AvroBenchmark \ 56 | * -Dexec.args="-p recordCount=1000" 57 | * 58 | * Full benchmark with all record counts (~2 minutes): 59 | * mvn test-compile exec:java -Dexec.classpathScope=test \ 60 | * -Dexec.mainClass=io.kcache.kwack.AvroBenchmark 61 | * 62 | * Customize parameters: 63 | * -Dexec.args="-p recordCount=100,1000 -wi 1 -i 2" 64 | */ 65 | @BenchmarkMode(Mode.Throughput) 66 | @OutputTimeUnit(TimeUnit.SECONDS) 67 | @Warmup(iterations = 1, time = 3, timeUnit = TimeUnit.SECONDS) 68 | @Measurement(iterations = 2, time = 5, timeUnit = TimeUnit.SECONDS) 69 | @Fork(0) // Disable forking to avoid classpath issues when running from Maven 70 | @State(Scope.Benchmark) 71 | public class AvroBenchmark { 72 | 73 | @Param({"100", "1000", "10000"}) 74 | private int recordCount; 75 | 76 | private BenchmarkTestHarness testHarness; 77 | private String topic; 78 | private Properties kwackProps; 79 | private static final String SCHEMA_REGISTRY_URL = "schema.registry.url"; 80 | private static final String MOCK_URL = "mock://kwack"; 81 | 82 | /** 83 | * Creates a simple Avro schema for the benchmark. 84 | * Schema contains: id (int), name (string), value (double), timestamp (long) 85 | */ 86 | private Schema createBenchmarkSchema() { 87 | return new Schema.Parser().parse( 88 | "{\"namespace\": \"io.kcache.kwack.benchmark\",\n" 89 | + " \"type\": \"record\",\n" 90 | + " \"name\": \"BenchmarkRecord\",\n" 91 | + " \"fields\": [\n" 92 | + " {\"name\": \"id\", \"type\": \"int\"},\n" 93 | + " {\"name\": \"name\", \"type\": \"string\"},\n" 94 | + " {\"name\": \"value\", \"type\": \"double\"},\n" 95 | + " {\"name\": \"timestamp\", \"type\": \"long\"}\n" 96 | + "]\n" 97 | + "}"); 98 | } 99 | 100 | /** 101 | * Creates a simple Avro record with the given id. 102 | */ 103 | private IndexedRecord createBenchmarkRecord(int id, Random random) { 104 | Schema schema = createBenchmarkSchema(); 105 | GenericRecord avroRecord = new GenericData.Record(schema); 106 | avroRecord.put("id", id); 107 | avroRecord.put("name", "record_" + id); 108 | avroRecord.put("value", random.nextDouble() * 1000.0); 109 | avroRecord.put("timestamp", System.currentTimeMillis()); 110 | return avroRecord; 111 | } 112 | 113 | /** 114 | * Inner class to configure the test harness for benchmarking. 115 | */ 116 | private class BenchmarkTestHarness extends LocalClusterTestHarness { 117 | @Override 118 | protected void injectKwackProperties(Properties props) { 119 | super.injectKwackProperties(props); 120 | props.put(KwackConfig.TOPICS_CONFIG, topic); 121 | props.put(KwackConfig.QUERY_CONFIG, "select * from '" + topic + "'"); 122 | } 123 | 124 | public String getBrokerList() { 125 | return brokerList; 126 | } 127 | 128 | public Properties getKwackProperties() { 129 | Properties props = new Properties(); 130 | injectKwackProperties(props); 131 | return props; 132 | } 133 | } 134 | 135 | /** 136 | * Setup the test environment before the benchmark. 137 | * This includes starting Kafka, Schema Registry, and producing test records. 138 | * Note: KwackEngine initialization is NOT done here - it's measured in the benchmark. 139 | */ 140 | @Setup(Level.Trial) 141 | public void setup() throws Exception { 142 | topic = "benchmark-avro-" + System.currentTimeMillis(); 143 | 144 | // Initialize the test harness (Kafka + Schema Registry) without starting engine 145 | testHarness = new BenchmarkTestHarness(); 146 | testHarness.setUpTest(new DefaultTestInfo( 147 | "AvroBenchmark", Collections.EMPTY_SET, Optional.empty(), Optional.empty())); 148 | 149 | // Store kwack properties for use in benchmark iterations 150 | kwackProps = testHarness.getKwackProperties(); 151 | 152 | // Produce records 153 | Properties producerProps = createProducerProps(MOCK_URL, testHarness.getBrokerList()); 154 | KafkaProducer producer = new KafkaProducer<>(producerProps); 155 | 156 | Random random = new Random(); 157 | for (int i = 0; i < recordCount; i++) { 158 | IndexedRecord record = createBenchmarkRecord(i, random); 159 | Object key = Bytes.wrap(ByteBuffer.allocate(4).putInt(i).array()); 160 | ProducerRecord producerRecord = new ProducerRecord<>(topic, key, record); 161 | producer.send(producerRecord); 162 | } 163 | producer.flush(); 164 | producer.close(); 165 | } 166 | 167 | /** 168 | * Cleanup after the benchmark. 169 | * Note: KwackEngine is closed in each benchmark iteration, not here. 170 | */ 171 | @TearDown(Level.Trial) 172 | public void tearDown() throws Exception { 173 | // Close the instance one final time to ensure cleanup 174 | KwackEngine.closeInstance(); 175 | 176 | if (testHarness != null) { 177 | testHarness.tearDown(); 178 | } 179 | } 180 | 181 | /** 182 | * Creates producer properties for Kafka with Avro serialization. 183 | */ 184 | private Properties createProducerProps(String schemaRegistryUrl, String brokerList) { 185 | Properties props = new Properties(); 186 | props.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, brokerList); 187 | props.put(SCHEMA_REGISTRY_URL, schemaRegistryUrl); 188 | props.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, 189 | org.apache.kafka.common.serialization.BytesSerializer.class); 190 | props.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, 191 | io.confluent.kafka.serializers.KafkaAvroSerializer.class); 192 | props.put(KafkaAvroSerializerConfig.AVRO_USE_LOGICAL_TYPE_CONVERTERS_CONFIG, true); 193 | return props; 194 | } 195 | 196 | /** 197 | * Benchmark: Initialize Kwack engine, read and process all records, then close. 198 | * This measures the full cycle including initialization time. 199 | */ 200 | @Benchmark 201 | public int readRecordsWithKwack() throws IOException { 202 | // Get a fresh KwackEngine instance 203 | KwackEngine engine = KwackEngine.getInstance(); 204 | 205 | try { 206 | // Configure the engine 207 | KwackConfig config = new KwackConfig(kwackProps); 208 | engine.configure(config); 209 | 210 | // Initialize the engine (this is measured) 211 | engine.init(); 212 | 213 | // Start and read records (this is measured) 214 | Observable> obs = engine.start(); 215 | List> results = Lists.newArrayList(obs.blockingIterable().iterator()); 216 | 217 | // Verify we got all records 218 | if (results.size() != recordCount) { 219 | throw new RuntimeException("Expected " + recordCount + " records but got " + results.size()); 220 | } 221 | 222 | return results.size(); 223 | } finally { 224 | // Close the engine instance for the next iteration 225 | KwackEngine.closeInstance(false); 226 | } 227 | } 228 | 229 | /** 230 | * Main method to run the benchmark standalone. 231 | */ 232 | public static void main(String[] args) throws RunnerException { 233 | Options opt = new OptionsBuilder() 234 | .include(AvroBenchmark.class.getSimpleName()) 235 | .forks(0) // Use annotation-based fork value 236 | .build(); 237 | 238 | new Runner(opt).run(); 239 | } 240 | } 241 | 242 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "{}" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright {yyyy} {name of copyright owner} 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | 203 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/transformer/avro/AvroTransformer.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack.transformer.avro; 2 | 3 | import static io.kcache.kwack.schema.ColumnStrategy.NOT_NULL_STRATEGY; 4 | import static io.kcache.kwack.schema.ColumnStrategy.NULL_STRATEGY; 5 | 6 | import io.confluent.kafka.schemaregistry.ParsedSchema; 7 | import io.kcache.kwack.schema.ColumnDef; 8 | import io.kcache.kwack.schema.DecimalColumnDef; 9 | import io.kcache.kwack.schema.EnumColumnDef; 10 | import io.kcache.kwack.schema.ListColumnDef; 11 | import io.kcache.kwack.schema.MapColumnDef; 12 | import io.kcache.kwack.schema.StructColumnDef; 13 | import io.kcache.kwack.schema.UnionColumnDef; 14 | import io.kcache.kwack.transformer.Context; 15 | import io.kcache.kwack.transformer.Transformer; 16 | import java.nio.ByteBuffer; 17 | import java.sql.Timestamp; 18 | import java.time.Instant; 19 | import java.util.Arrays; 20 | import java.util.LinkedHashMap; 21 | import java.util.List; 22 | import java.util.Map; 23 | import java.util.UUID; 24 | import java.util.stream.Collectors; 25 | import java.util.stream.StreamSupport; 26 | import org.apache.avro.Schema; 27 | import org.apache.avro.Schema.Type; 28 | import org.apache.avro.generic.GenericData; 29 | import org.apache.avro.generic.GenericFixed; 30 | import org.apache.avro.generic.GenericRecord; 31 | import org.apache.avro.reflect.ReflectData; 32 | import org.apache.avro.specific.SpecificData; 33 | import org.apache.avro.specific.SpecificRecord; 34 | import org.apache.avro.util.Utf8; 35 | import org.duckdb.DuckDBColumnType; 36 | 37 | public class AvroTransformer implements Transformer { 38 | @Override 39 | public ColumnDef schemaToColumnDef(Context ctx, ParsedSchema parsedSchema) { 40 | Schema schema = (Schema) parsedSchema.rawSchema(); 41 | return schemaToColumnDef(ctx, schema); 42 | } 43 | 44 | private ColumnDef schemaToColumnDef(Context ctx, Schema schema) { 45 | String logicalType = schema.getProp("logicalType"); 46 | LinkedHashMap columnDefs = new LinkedHashMap<>(); 47 | switch (schema.getType()) { 48 | case RECORD: 49 | StructColumnDef structColumnDef = new StructColumnDef(columnDefs); 50 | for (Schema.Field field : schema.getFields()) { 51 | columnDefs.put(field.name(), schemaToColumnDef(ctx, field.schema())); 52 | } 53 | return structColumnDef; 54 | case ENUM: 55 | return new EnumColumnDef(schema.getEnumSymbols()); 56 | case ARRAY: 57 | ColumnDef itemDef = schemaToColumnDef(ctx, schema.getElementType()); 58 | return new ListColumnDef(itemDef); 59 | case MAP: 60 | ColumnDef valueDef = schemaToColumnDef(ctx, schema.getValueType()); 61 | return new MapColumnDef(new ColumnDef(DuckDBColumnType.VARCHAR), valueDef); 62 | case UNION: 63 | Schema singletonUnion = flattenSingletonUnion(schema); 64 | if (singletonUnion != null) { 65 | ColumnDef colDef = schemaToColumnDef(ctx, singletonUnion); 66 | if (schema.getTypes().size() > 1) { 67 | colDef.setColumnStrategy(NULL_STRATEGY); 68 | } 69 | return colDef; 70 | } 71 | int i = 0; 72 | boolean nullable = false; 73 | for (Schema subSchema : schema.getTypes()) { 74 | if (subSchema.getType() == Schema.Type.NULL) { 75 | nullable = true; 76 | } else { 77 | columnDefs.put("u" + i, schemaToColumnDef(ctx, subSchema)); 78 | } 79 | i++; 80 | } 81 | return new UnionColumnDef(columnDefs, nullable 82 | ? NULL_STRATEGY 83 | : NOT_NULL_STRATEGY); 84 | case FIXED: 85 | return new ColumnDef(DuckDBColumnType.BLOB); 86 | case STRING: 87 | if ("uuid".equals(logicalType)) { 88 | return new ColumnDef(DuckDBColumnType.UUID); 89 | } 90 | return new ColumnDef(DuckDBColumnType.VARCHAR); 91 | case BYTES: 92 | if ("decimal".equals(logicalType)) { 93 | Object scaleNode = schema.getObjectProp("scale"); 94 | // In Avro the scale is optional and should default to 0 95 | int scale = scaleNode instanceof Number ? ((Number) scaleNode).intValue() : 0; 96 | Object precisionNode = schema.getObjectProp("precision"); 97 | int precision = ((Number) precisionNode).intValue(); 98 | return new DecimalColumnDef(precision, scale); 99 | } 100 | return new ColumnDef(DuckDBColumnType.BLOB); 101 | case INT: 102 | if ("date".equals(logicalType)) { 103 | return new ColumnDef(DuckDBColumnType.DATE); 104 | } else if ("time-millis".equals(logicalType)) { 105 | return new ColumnDef(DuckDBColumnType.TIME); 106 | } 107 | return new ColumnDef(DuckDBColumnType.INTEGER); 108 | case LONG: 109 | if ("time-micros".equals(logicalType)) { 110 | return new ColumnDef(DuckDBColumnType.TIME); 111 | } else if ("timestamp-millis".equals(logicalType)) { 112 | return new ColumnDef(DuckDBColumnType.TIMESTAMP_MS); 113 | } else if ("timestamp-micros".equals(logicalType)) { 114 | return new ColumnDef(DuckDBColumnType.TIMESTAMP); 115 | } else if ("timestamp-nanos".equals(logicalType)) { 116 | return new ColumnDef(DuckDBColumnType.TIMESTAMP_NS); 117 | } 118 | return new ColumnDef(DuckDBColumnType.BIGINT); 119 | case FLOAT: 120 | return new ColumnDef(DuckDBColumnType.FLOAT); 121 | case DOUBLE: 122 | return new ColumnDef(DuckDBColumnType.DOUBLE); 123 | case BOOLEAN: 124 | return new ColumnDef(DuckDBColumnType.BOOLEAN); 125 | case NULL: 126 | return new ColumnDef(DuckDBColumnType.BLOB, NULL_STRATEGY); 127 | default: 128 | break; 129 | } 130 | throw new IllegalArgumentException(); 131 | } 132 | 133 | private Schema flattenSingletonUnion(Schema schema) { 134 | if (schema.getType() != Type.UNION) { 135 | return null; 136 | } 137 | List types = schema.getTypes(); 138 | int size = types.size(); 139 | if (size == 1) { 140 | return types.get(0); 141 | } else if (size == 2) { 142 | if (types.get(0).getType() == Type.NULL) { 143 | return types.get(1); 144 | } else if (types.get(1).getType() == Type.NULL) { 145 | return types.get(0); 146 | } 147 | } 148 | return null; 149 | } 150 | 151 | @Override 152 | public Object messageToColumn( 153 | Context ctx, ParsedSchema parsedSchema, Object message, ColumnDef columnDef) { 154 | Schema schema = (Schema) parsedSchema.rawSchema(); 155 | return messageToColumn(ctx, schema, message, columnDef); 156 | } 157 | 158 | private Object messageToColumn( 159 | Context ctx, Schema schema, Object message, ColumnDef columnDef) { 160 | if (message == null) { 161 | return null; 162 | } 163 | GenericData data; 164 | switch (schema.getType()) { 165 | case RECORD: 166 | StructColumnDef structColumnDef = (StructColumnDef) columnDef; 167 | data = getData(message); 168 | Object[] attributes = new Object[schema.getFields().size()]; 169 | int i = 0; 170 | for (Schema.Field field : schema.getFields()) { 171 | ColumnDef fieldColumnDef = structColumnDef.getColumnDefs().get(field.name()); 172 | Object value = data.getField(message, field.name(), field.pos()); 173 | Object newValue = messageToColumn(ctx, field.schema(), value, fieldColumnDef); 174 | attributes[i++] = newValue; 175 | } 176 | return Arrays.asList(attributes); 177 | case ENUM: 178 | return message.toString(); 179 | case ARRAY: 180 | if (!(message instanceof Iterable)) { 181 | return message; 182 | } 183 | ListColumnDef listColumnDef = (ListColumnDef) columnDef; 184 | ColumnDef itemDef = listColumnDef.getItemDef(); 185 | return StreamSupport.stream(((Iterable) message).spliterator(), false) 186 | .map(it -> messageToColumn(ctx, schema.getElementType(), it, itemDef)) 187 | .collect(Collectors.toList()); 188 | case MAP: 189 | if (!(message instanceof Map)) { 190 | return message; 191 | } 192 | MapColumnDef mapColumnDef = (MapColumnDef) columnDef; 193 | ColumnDef valueDef = mapColumnDef.getValueDef(); 194 | return ((Map) message).entrySet().stream() 195 | .collect(Collectors.toMap( 196 | e -> e.getKey().toString(), 197 | e -> messageToColumn(ctx, schema.getValueType(), e.getValue(), valueDef), 198 | (e1, e2) -> e1)); 199 | case UNION: 200 | Schema singletonUnion = flattenSingletonUnion(schema); 201 | if (singletonUnion != null) { 202 | return messageToColumn(ctx, singletonUnion, message, columnDef); 203 | } 204 | if (columnDef.getColumnType() == DuckDBColumnType.UNION) { 205 | UnionColumnDef unionColumnDef = (UnionColumnDef) columnDef; 206 | data = getData(message); 207 | int unionIndex = data.resolveUnion(schema, message); 208 | String unionBranch = "u" + unionIndex; 209 | ctx.putUnionBranch(unionColumnDef, unionBranch); 210 | return messageToColumn(ctx, schema.getTypes().get(unionIndex), message, 211 | unionColumnDef.getColumnDefs().get(unionBranch)); 212 | } 213 | break; 214 | case FIXED: 215 | case BYTES: 216 | if (message instanceof ByteBuffer) { 217 | message = ((ByteBuffer) message).array(); 218 | } else if (message instanceof GenericFixed) { 219 | message = ((GenericFixed) message).bytes(); 220 | } 221 | break; 222 | case STRING: 223 | // NOTE: DuckDB fails when passing a UUID instance in the test 224 | if (message instanceof Utf8 || message instanceof UUID) { 225 | message = message.toString(); 226 | } 227 | break; 228 | case INT: 229 | break; 230 | case LONG: 231 | if (message instanceof Instant) { 232 | message = Timestamp.from((Instant) message); 233 | } 234 | break; 235 | case FLOAT: 236 | case DOUBLE: 237 | case BOOLEAN: 238 | case NULL: 239 | default: 240 | break; 241 | } 242 | return message; 243 | } 244 | 245 | private static GenericData getData(Object message) { 246 | if (message instanceof SpecificRecord) { 247 | return SpecificData.get(); 248 | } else if (message instanceof GenericRecord) { 249 | return GenericData.get(); 250 | } else { 251 | return ReflectData.get(); 252 | } 253 | } 254 | } 255 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/KwackMain.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack; 2 | 3 | import static io.kcache.kwack.KwackEngine.MOCK_SR; 4 | 5 | import com.fasterxml.jackson.databind.ObjectMapper; 6 | import io.kcache.KafkaCacheConfig; 7 | import io.kcache.kwack.KwackConfig.ListPropertyParser; 8 | import io.kcache.kwack.KwackConfig.MapPropertyParser; 9 | import io.kcache.kwack.KwackConfig.RowAttribute; 10 | import io.kcache.kwack.util.Jackson; 11 | import io.reactivex.rxjava3.core.Observable; 12 | import io.reactivex.rxjava3.disposables.Disposable; 13 | import java.io.PrintWriter; 14 | import java.util.EnumSet; 15 | import org.apache.kafka.common.config.ConfigException; 16 | import org.slf4j.Logger; 17 | import org.slf4j.LoggerFactory; 18 | import picocli.CommandLine; 19 | import picocli.CommandLine.Command; 20 | import picocli.CommandLine.Option; 21 | 22 | import java.io.File; 23 | import java.io.IOException; 24 | import java.net.URL; 25 | import java.util.Enumeration; 26 | import java.util.HashMap; 27 | import java.util.List; 28 | import java.util.Map; 29 | import java.util.concurrent.Callable; 30 | import java.util.jar.Attributes; 31 | import java.util.jar.Manifest; 32 | import java.util.stream.Collectors; 33 | 34 | @Command(name = "kwack", mixinStandardHelpOptions = true, 35 | versionProvider = KwackMain.ManifestVersionProvider.class, 36 | description = "In-Memory Analytics for Kafka using DuckDB.", 37 | sortOptions = false, sortSynopsis = false) 38 | public class KwackMain implements Callable { 39 | 40 | private static final Logger LOG = LoggerFactory.getLogger(KwackMain.class); 41 | 42 | private static final ObjectMapper MAPPER = Jackson.newObjectMapper(); 43 | 44 | private static final ListPropertyParser listPropertyParser = new ListPropertyParser(); 45 | private static final MapPropertyParser mapPropertyParser = new MapPropertyParser(); 46 | 47 | private KwackConfig config; 48 | 49 | @Option(names = {"-t", "--topic"}, 50 | description = "Topic(s) to consume from and produce to", paramLabel = "") 51 | private List topics; 52 | 53 | @Option(names = {"-p", "--partition"}, 54 | description = "Partition(s)", paramLabel = "") 55 | private List partitions; 56 | 57 | @Option(names = {"-b", "--bootstrap-server"}, 58 | description = "Bootstrap broker(s) (host:[port])", paramLabel = "") 59 | private List bootstrapBrokers; 60 | 61 | @Option(names = {"-m", "--metadata-timeout"}, 62 | description = "Metadata (et.al.) request timeout", paramLabel = "") 63 | private Integer initTimeout; 64 | 65 | @Option(names = {"-F", "--file"}, 66 | description = "Read configuration properties from file", paramLabel = "") 67 | private File configFile; 68 | 69 | @Option(names = {"-o", "--offset"}, 70 | description = "Offset to start consuming from:\n" 71 | + " beginning | end |\n" 72 | + " (absolute offset) |\n" 73 | + " - (relative offset from end)\n" 74 | + " @ (timestamp in ms to start at)\n" 75 | + " Default: beginning") 76 | private KafkaCacheConfig.Offset offset; 77 | 78 | @Option(names = {"-k", "--key-serde"}, 79 | description = "(De)serialize keys using ", paramLabel = "") 80 | private Map keySerdes; 81 | 82 | @Option(names = {"-v", "--value-serde"}, 83 | description = "(De)serialize values using \n" 84 | + "Available serdes:\n" 85 | + " short | int | long | float |\n" 86 | + " double | string | json | binary |\n" 87 | + " avro: |\n" 88 | + " json: |\n" 89 | + " proto: |\n" 90 | + " latest (use latest version in SR) |\n" 91 | + " (use schema id from SR)\n" 92 | + " Default for key: binary\n" 93 | + " Default for value: latest\n" 94 | + "The proto/latest/ serde formats can\n" 95 | + "also take a message type name, e.g.\n" 96 | + " proto:;msg:\n" 97 | + "in case multiple message types exist", 98 | 99 | paramLabel = "") 100 | private Map valueSerdes; 101 | 102 | @Option(names = {"-r", "--schema-registry-url"}, 103 | description = "SR (Schema Registry) URL", paramLabel = "") 104 | private String schemaRegistryUrl; 105 | 106 | @Option(names = {"-q", "--query"}, 107 | description = "SQL query to execute. If none is specified, interactive sqlline mode is used", 108 | paramLabel = "") 109 | private String query; 110 | 111 | @Option(names = {"-a", "--row-attribute"}, 112 | description = "Row attribute(s) to show:\n" 113 | + " none\n" 114 | + " rowkey (record key)\n" 115 | + " ksi (key schema id)\n" 116 | + " vsi (value schema id)\n" 117 | + " top (topic)\n" 118 | + " par (partition)\n" 119 | + " off (offset)\n" 120 | + " ts (timestamp)\n" 121 | + " tst (timestamp type)\n" 122 | + " epo (leadership epoch)\n" 123 | + " hdr (headers)\n" 124 | + " Default: rowkey,ksi,vsi,par,off,ts,hdr", paramLabel = "") 125 | private EnumSet rowAttrs; 126 | 127 | @Option(names = {"-d", "--db"}, 128 | description = "DuckDB db, appended to 'jdbc:duckdb:' Default: :memory:", paramLabel = "") 129 | private String db; 130 | 131 | @Option(names = {"-x", "--skip-bytes"}, 132 | description = "Extra bytes to skip when deserializing with an external schema", 133 | paramLabel = "") 134 | private Integer bytesToSkip; 135 | 136 | @Option(names = {"-X", "--property"}, 137 | description = "Set configuration property.", paramLabel = "") 138 | private Map properties; 139 | 140 | public KwackMain() { 141 | } 142 | 143 | public KwackMain(KwackConfig config) { 144 | this.config = config; 145 | } 146 | 147 | @Override 148 | public Integer call() throws Exception { 149 | if (configFile != null) { 150 | config = new KwackConfig(configFile); 151 | } 152 | config = updateConfig(); 153 | 154 | KwackEngine engine = KwackEngine.getInstance(); 155 | engine.configure(config); 156 | engine.init(); 157 | Observable> obs = engine.start(); 158 | PrintWriter pw = new PrintWriter(System.out); 159 | Disposable d = obs.map(MAPPER::writeValueAsString).subscribe( 160 | pw::println, 161 | pw::println, 162 | pw::flush 163 | ); 164 | return 0; 165 | } 166 | 167 | private KwackConfig updateConfig() { 168 | Map props = new HashMap<>(); 169 | if (config != null) { 170 | props.putAll(config.originalsStrings()); 171 | } 172 | if (topics != null) { 173 | props.put(KwackConfig.TOPICS_CONFIG, String.join(",", topics)); 174 | } 175 | if (partitions != null) { 176 | props.put(KwackConfig.KAFKACACHE_TOPIC_PARTITIONS_CONFIG, partitions.stream() 177 | .map(Object::toString) 178 | .collect(Collectors.joining(","))); 179 | } 180 | if (bootstrapBrokers != null) { 181 | props.put( 182 | KwackConfig.KAFKACACHE_BOOTSTRAP_SERVERS_CONFIG, String.join(",", bootstrapBrokers)); 183 | } 184 | if (initTimeout != null) { 185 | props.put(KwackConfig.KAFKACACHE_INIT_TIMEOUT_CONFIG, String.valueOf(initTimeout)); 186 | } 187 | if (offset != null) { 188 | props.put(KwackConfig.KAFKACACHE_TOPIC_PARTITIONS_OFFSET_CONFIG, offset.toString()); 189 | } 190 | if (keySerdes != null) { 191 | props.put(KwackConfig.KEY_SERDES_CONFIG, 192 | mapPropertyParser.asString(keySerdes.entrySet().stream() 193 | .collect(Collectors.toMap( 194 | Map.Entry::getKey, 195 | e -> e.getValue().toString())) 196 | )); 197 | } 198 | if (valueSerdes != null) { 199 | props.put(KwackConfig.VALUE_SERDES_CONFIG, 200 | mapPropertyParser.asString(valueSerdes.entrySet().stream() 201 | .collect(Collectors.toMap( 202 | Map.Entry::getKey, 203 | e -> e.getValue().toString())) 204 | )); 205 | } 206 | if (query != null) { 207 | props.put(KwackConfig.QUERY_CONFIG, query); 208 | } 209 | if (rowAttrs != null) { 210 | props.put(KwackConfig.ROW_ATTRIBUTES_CONFIG, rowAttrs.stream() 211 | .map(Enum::name) 212 | .collect(Collectors.joining(","))); 213 | } 214 | if (db != null) { 215 | props.put(KwackConfig.DB_CONFIG, db); 216 | } 217 | if (bytesToSkip != null) { 218 | props.put(KwackConfig.SKIP_BYTES_CONFIG, String.valueOf(bytesToSkip)); 219 | } 220 | if (schemaRegistryUrl != null) { 221 | props.put(KwackConfig.SCHEMA_REGISTRY_URL_CONFIG, schemaRegistryUrl); 222 | } else { 223 | props.put(KwackConfig.SCHEMA_REGISTRY_URL_CONFIG, MOCK_SR); 224 | } 225 | if (properties != null) { 226 | props.putAll(properties); 227 | } 228 | return new KwackConfig(props); 229 | } 230 | 231 | static class OffsetConverter implements CommandLine.ITypeConverter { 232 | @Override 233 | public KafkaCacheConfig.Offset convert(String value) { 234 | try { 235 | return new KafkaCacheConfig.Offset(value); 236 | } catch (ConfigException e) { 237 | throw new CommandLine.TypeConversionException("expected one of [beginning, end, " 238 | + ", -, @] but was '" + value + "'"); 239 | } 240 | } 241 | } 242 | 243 | static class SerdeConverter implements CommandLine.ITypeConverter { 244 | @Override 245 | public KwackConfig.Serde convert(String value) { 246 | try { 247 | return new KwackConfig.Serde(value); 248 | } catch (ConfigException e) { 249 | throw new CommandLine.TypeConversionException(e.getMessage()); 250 | } 251 | } 252 | } 253 | 254 | static class ManifestVersionProvider implements CommandLine.IVersionProvider { 255 | public String[] getVersion() throws Exception { 256 | Enumeration resources = CommandLine.class.getClassLoader().getResources("META-INF/MANIFEST.MF"); 257 | while (resources.hasMoreElements()) { 258 | URL url = resources.nextElement(); 259 | try { 260 | Manifest manifest = new Manifest(url.openStream()); 261 | if (isApplicableManifest(manifest)) { 262 | Attributes attr = manifest.getMainAttributes(); 263 | return new String[]{ 264 | "kwack - In-Memory Analytics for Kafka using DuckDB", 265 | "https://github.com/rayokota/kwack", 266 | "Copyright (c) 2024, Robert Yokota", 267 | "Version " + get(attr, "Implementation-Version") 268 | }; 269 | } 270 | } catch (IOException ex) { 271 | return new String[]{"Unable to read from " + url + ": " + ex}; 272 | } 273 | } 274 | return new String[0]; 275 | } 276 | 277 | private boolean isApplicableManifest(Manifest manifest) { 278 | Attributes attributes = manifest.getMainAttributes(); 279 | return "kwack".equals(get(attributes, "Implementation-Title")); 280 | } 281 | 282 | private static Object get(Attributes attributes, String key) { 283 | return attributes.get(new Attributes.Name(key)); 284 | } 285 | } 286 | 287 | public static void main(String[] args) { 288 | CommandLine commandLine = new CommandLine(new KwackMain()); 289 | if (args.length == 0) { 290 | commandLine.usage(System.out); 291 | System.exit(1); 292 | } 293 | commandLine.registerConverter(KafkaCacheConfig.Offset.class, new OffsetConverter()); 294 | commandLine.registerConverter(KwackConfig.Serde.class, new SerdeConverter()); 295 | commandLine.setCaseInsensitiveEnumValuesAllowed(true); 296 | commandLine.setUsageHelpLongOptionsMaxWidth(30); 297 | int exitCode = commandLine.execute(args); 298 | System.exit(exitCode); 299 | } 300 | } 301 | -------------------------------------------------------------------------------- /src/test/java/io/kcache/kwack/JsonNoSchemaTest.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | 5 | import com.fasterxml.jackson.annotation.JsonSubTypes; 6 | import com.fasterxml.jackson.annotation.JsonTypeInfo; 7 | import com.fasterxml.jackson.databind.JsonNode; 8 | import com.fasterxml.jackson.databind.ObjectMapper; 9 | import com.google.common.collect.ImmutableList; 10 | import com.google.common.collect.ImmutableMap; 11 | import com.google.common.collect.Lists; 12 | import io.reactivex.rxjava3.core.Observable; 13 | import java.io.IOException; 14 | import java.util.ArrayList; 15 | import java.util.HashMap; 16 | import java.util.List; 17 | import java.util.Map; 18 | import java.util.Objects; 19 | import java.util.Properties; 20 | import org.apache.kafka.clients.producer.KafkaProducer; 21 | import org.apache.kafka.common.serialization.StringSerializer; 22 | import org.junit.jupiter.api.Test; 23 | 24 | public class JsonNoSchemaTest extends AbstractSchemaTest { 25 | 26 | private static ObjectMapper objectMapper = new ObjectMapper(); 27 | 28 | private Simple createSimpleObj() { 29 | Simple simple = new Simple(); 30 | simple.setId(123); 31 | simple.setName("hi"); 32 | return simple; 33 | } 34 | 35 | private Complex createComplexObj() { 36 | Complex obj = new Complex("test"); 37 | obj.setMystring("testUser"); 38 | obj.setMyint(1); 39 | obj.setMylong(2L); 40 | obj.setMyfloat(3.0f); 41 | obj.setMydouble(4.0d); 42 | obj.setMyboolean(true); 43 | obj.setMyenum(Color.GREEN); 44 | obj.setMykind(new Kind2("kind2")); 45 | obj.setArray(ImmutableList.of(new Data("hi"), new Data("there"))); 46 | obj.setMap(ImmutableMap.of("bye", new Data("there"))); 47 | return obj; 48 | } 49 | 50 | @Test 51 | public void testSimple() throws IOException { 52 | Simple obj = createSimpleObj(); 53 | Properties producerProps = createProducerProps(MOCK_URL); 54 | KafkaProducer producer = createProducer(producerProps); 55 | produce(producer, getTopic(), new Object[]{objectMapper.writeValueAsString(obj)}); 56 | producer.close(); 57 | 58 | engine.init(); 59 | Observable> obs = engine.start(); 60 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 61 | Map m = lm.get(0); 62 | JsonNode node = objectMapper.readTree(m.get("rowval").toString()); 63 | assertEquals("hi", node.get("name").asText()); 64 | assertEquals(123L, node.get("id").asLong()); 65 | } 66 | 67 | @Test 68 | public void testComplex() throws IOException { 69 | Complex obj = createComplexObj(); 70 | Properties producerProps = createProducerProps(MOCK_URL); 71 | KafkaProducer producer = createProducer(producerProps); 72 | produce(producer, getTopic(), new Object[]{objectMapper.writeValueAsString(obj)}); 73 | producer.close(); 74 | 75 | engine.init(); 76 | Observable> obs = engine.start(); 77 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 78 | Map m = lm.get(0); 79 | JsonNode node = objectMapper.readTree(m.get("rowval").toString()); 80 | assertEquals("test", node.get("name").asText()); 81 | assertEquals("testUser", node.get("mystring").asText()); 82 | assertEquals(1L, node.get("myint").asLong()); 83 | assertEquals(2L, node.get("mylong").asLong()); 84 | assertEquals(3.0d, node.get("myfloat").asDouble()); 85 | assertEquals(4.0d, node.get("mydouble").asDouble()); 86 | assertEquals(true, node.get("myboolean").asBoolean()); 87 | assertEquals("GREEN", node.get("myenum").asText()); 88 | JsonNode node2 = node.get("mykind"); 89 | assertEquals("kind2", node2.get("kind2String").asText()); 90 | assertEquals("kind2", node2.get("type").asText()); 91 | JsonNode node3 = node.get("array"); 92 | JsonNode node4 = node3.get(0); 93 | assertEquals("hi", node4.get("data").asText()); 94 | JsonNode node5 = node3.get(1); 95 | assertEquals("there", node5.get("data").asText()); 96 | JsonNode node6 = node.get("map"); 97 | JsonNode node7 = node6.get("bye"); 98 | assertEquals("there", node7.get("data").asText()); 99 | } 100 | 101 | @Override 102 | protected String getTopic() { 103 | return "test-json"; 104 | } 105 | 106 | @Override 107 | protected Class getValueSerializer() { 108 | return StringSerializer.class; 109 | } 110 | 111 | @Override 112 | protected void injectKwackProperties(Properties props) { 113 | super.injectKwackProperties(props); 114 | props.put(KwackConfig.VALUE_SERDES_CONFIG, getTopic() + "=json"); 115 | } 116 | 117 | public static class Simple { 118 | 119 | private int id; 120 | private String name; 121 | 122 | public Simple() { 123 | } 124 | 125 | public int getId() { 126 | return id; 127 | } 128 | 129 | public void setId(int id) { 130 | this.id = id; 131 | } 132 | 133 | public String getName() { 134 | return name; 135 | } 136 | 137 | public void setName(String name) { 138 | this.name = name; 139 | } 140 | 141 | @Override 142 | public boolean equals(Object o) { 143 | if (this == o) { 144 | return true; 145 | } 146 | if (o == null || getClass() != o.getClass()) { 147 | return false; 148 | } 149 | Simple simple = (Simple) o; 150 | return id == simple.id && Objects.equals(name, simple.name); 151 | } 152 | 153 | @Override 154 | public int hashCode() { 155 | return Objects.hash(id, name); 156 | } 157 | } 158 | 159 | public enum Color { 160 | RED, AMBER, GREEN 161 | } 162 | 163 | public static class Complex { 164 | 165 | private String name; 166 | private String mystring; 167 | private int myint; 168 | private long mylong; 169 | private float myfloat; 170 | private double mydouble; 171 | private boolean myboolean; 172 | private Color myenum; 173 | private Kind mykind; 174 | private List array = new ArrayList<>(); 175 | private Map map = new HashMap<>(); 176 | 177 | public Complex() { 178 | } 179 | 180 | public Complex(String name) { 181 | this.name = name; 182 | } 183 | 184 | public String getName() { 185 | return name; 186 | } 187 | 188 | public void setName(String name) { 189 | this.name = name; 190 | } 191 | 192 | public String getMystring() { 193 | return mystring; 194 | } 195 | 196 | public void setMystring(String mystring) { 197 | this.mystring = mystring; 198 | } 199 | 200 | public int getMyint() { 201 | return myint; 202 | } 203 | 204 | public void setMyint(int myint) { 205 | this.myint = myint; 206 | } 207 | 208 | public long getMylong() { 209 | return mylong; 210 | } 211 | 212 | public void setMylong(long mylong) { 213 | this.mylong = mylong; 214 | } 215 | 216 | public float getMyfloat() { 217 | return myfloat; 218 | } 219 | 220 | public void setMyfloat(float myfloat) { 221 | this.myfloat = myfloat; 222 | } 223 | 224 | public double getMydouble() { 225 | return mydouble; 226 | } 227 | 228 | public void setMydouble(double mydouble) { 229 | this.mydouble = mydouble; 230 | } 231 | 232 | public boolean isMyboolean() { 233 | return myboolean; 234 | } 235 | 236 | public void setMyboolean(boolean myboolean) { 237 | this.myboolean = myboolean; 238 | } 239 | 240 | public Color getMyenum() { 241 | return myenum; 242 | } 243 | 244 | public void setMyenum(Color myenum) { 245 | this.myenum = myenum; 246 | } 247 | 248 | public Kind getMykind() { 249 | return mykind; 250 | } 251 | 252 | public void setMykind(Kind mykind) { 253 | this.mykind = mykind; 254 | } 255 | 256 | public List getArray() { 257 | return array; 258 | } 259 | 260 | public void setArray(List array) { 261 | this.array = array; 262 | } 263 | 264 | public Map getMap() { 265 | return map; 266 | } 267 | 268 | public void setMap(Map map) { 269 | this.map = map; 270 | } 271 | 272 | @Override 273 | public boolean equals(Object o) { 274 | if (this == o) { 275 | return true; 276 | } 277 | if (o == null || getClass() != o.getClass()) { 278 | return false; 279 | } 280 | Complex obj = (Complex) o; 281 | return myint == obj.myint 282 | && mylong == obj.mylong 283 | && Float.compare(myfloat, obj.myfloat) == 0 284 | && Double.compare(mydouble, obj.mydouble) == 0 285 | && myboolean == obj.myboolean 286 | && myenum == obj.myenum 287 | && Objects.equals(name, obj.name) 288 | && Objects.equals(mystring, obj.mystring) 289 | && Objects.equals(array, obj.array) 290 | && Objects.equals(map, obj.map); 291 | } 292 | 293 | @Override 294 | public int hashCode() { 295 | return Objects.hash( 296 | name, mystring, myint, mylong, myfloat, mydouble, myboolean, myenum, array, map); 297 | } 298 | } 299 | 300 | public static class Data { 301 | 302 | private String data; 303 | 304 | public Data() { 305 | } 306 | 307 | public Data(String data) { 308 | this.data = data; 309 | } 310 | 311 | public String getData() { 312 | return data; 313 | } 314 | 315 | @Override 316 | public boolean equals(Object o) { 317 | if (this == o) { 318 | return true; 319 | } 320 | if (o == null || getClass() != o.getClass()) { 321 | return false; 322 | } 323 | Data data1 = (Data) o; 324 | return Objects.equals(data, data1.data); 325 | } 326 | 327 | @Override 328 | public int hashCode() { 329 | return Objects.hashCode(data); 330 | } 331 | } 332 | 333 | @JsonTypeInfo( 334 | use = JsonTypeInfo.Id.NAME, 335 | include = JsonTypeInfo.As.PROPERTY, 336 | property = "type") 337 | @JsonSubTypes({ 338 | @JsonSubTypes.Type(value = Kind1.class, name = "kind1"), 339 | @JsonSubTypes.Type(value = Kind2.class, name = "kind2")}) 340 | public abstract class Kind { 341 | } 342 | 343 | public class Kind1 extends Kind { 344 | public Kind1(String kind1String) { 345 | this.kind1String = kind1String; 346 | } 347 | 348 | public final String kind1String; 349 | } 350 | 351 | public class Kind2 extends Kind { 352 | public Kind2(String kind2String) { 353 | this.kind2String = kind2String; 354 | } 355 | 356 | public final String kind2String; 357 | } 358 | 359 | public static class BadNameContainer { 360 | 361 | private int id; 362 | private BadName badName; 363 | 364 | public BadNameContainer(int id, BadName badName) { 365 | this.id = id; 366 | this.badName = badName; 367 | } 368 | 369 | public int getId() { 370 | return id; 371 | } 372 | 373 | public BadName getBadName() { 374 | return badName; 375 | } 376 | 377 | @Override 378 | public boolean equals(Object o) { 379 | if (this == o) { 380 | return true; 381 | } 382 | if (o == null || getClass() != o.getClass()) { 383 | return false; 384 | } 385 | BadNameContainer that = (BadNameContainer) o; 386 | return id == that.id && Objects.equals(badName, that.badName); 387 | } 388 | 389 | @Override 390 | public int hashCode() { 391 | return Objects.hash(id, badName); 392 | } 393 | } 394 | 395 | public static class BadName { 396 | private String name; 397 | private int group; 398 | private long order; 399 | 400 | public BadName(String name, int group, long order) { 401 | this.name = name; 402 | this.group = group; 403 | this.order = order; 404 | } 405 | 406 | public String getName() { 407 | return name; 408 | } 409 | 410 | public int getGroup() { 411 | return group; 412 | } 413 | 414 | public long getOrder() { 415 | return order; 416 | } 417 | 418 | @Override 419 | public boolean equals(Object o) { 420 | if (this == o) { 421 | return true; 422 | } 423 | if (o == null || getClass() != o.getClass()) { 424 | return false; 425 | } 426 | BadName badName = (BadName) o; 427 | return group == badName.group 428 | && order == badName.order 429 | && Objects.equals(name, badName.name); 430 | } 431 | 432 | @Override 433 | public int hashCode() { 434 | return Objects.hash(name, group, order); 435 | } 436 | } 437 | } -------------------------------------------------------------------------------- /src/test/java/io/kcache/kwack/AvroKeyTest.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertNull; 5 | 6 | import com.google.common.collect.ImmutableList; 7 | import com.google.common.collect.ImmutableMap; 8 | import com.google.common.collect.Lists; 9 | import io.confluent.kafka.serializers.KafkaAvroSerializerConfig; 10 | import io.reactivex.rxjava3.core.Observable; 11 | import java.io.IOException; 12 | import java.math.BigDecimal; 13 | import java.nio.ByteBuffer; 14 | import java.sql.Timestamp; 15 | import java.time.Instant; 16 | import java.time.LocalDate; 17 | import java.time.LocalTime; 18 | import java.util.Base64; 19 | import java.util.List; 20 | import java.util.Map; 21 | import java.util.Properties; 22 | import java.util.UUID; 23 | import org.apache.avro.Schema; 24 | import org.apache.avro.generic.GenericData; 25 | import org.apache.avro.generic.GenericRecord; 26 | import org.apache.avro.generic.IndexedRecord; 27 | import org.apache.kafka.clients.producer.KafkaProducer; 28 | import org.junit.jupiter.api.Test; 29 | 30 | public class AvroKeyTest extends AbstractSchemaTest { 31 | 32 | @Override 33 | protected Properties createProducerProps(String schemaRegistryUrl) { 34 | Properties props = super.createProducerProps(schemaRegistryUrl); 35 | props.put(KafkaAvroSerializerConfig.AVRO_USE_LOGICAL_TYPE_CONVERTERS_CONFIG, true); 36 | return props; 37 | } 38 | 39 | private Schema createSimpleSchema() { 40 | return new Schema.Parser().parse( 41 | "{\"namespace\": \"namespace\",\n" 42 | + " \"type\": \"record\",\n" 43 | + " \"name\": \"test\",\n" 44 | + " \"fields\": [\n" 45 | + " {\"name\": \"f1\", \"type\": \"string\"},\n" 46 | + " {\"name\": \"f2\", \"type\": \"int\"}\n" 47 | + "]\n" 48 | + "}"); 49 | } 50 | 51 | private IndexedRecord createSimpleRecord() { 52 | return createSimpleRecord(123); 53 | } 54 | 55 | private IndexedRecord createSimpleRecord(int f2) { 56 | Schema schema = createSimpleSchema(); 57 | GenericRecord avroRecord = new GenericData.Record(schema); 58 | avroRecord.put("f1", "hi"); 59 | avroRecord.put("f2", f2); 60 | return avroRecord; 61 | } 62 | 63 | private Schema createEnumSchema() { 64 | String enumSchema = "{\"name\": \"Kind\",\"namespace\": \"example.avro\",\n" 65 | + " \"type\": \"enum\",\n" 66 | + " \"symbols\" : [\"ONE\", \"TWO\", \"THREE\"]\n" 67 | + "}"; 68 | Schema.Parser parser = new Schema.Parser(); 69 | return parser.parse(enumSchema); 70 | } 71 | 72 | private Schema createFixedSchema() { 73 | String fixedSchema = "{\"name\": \"Fixed\",\n" 74 | + " \"type\": \"fixed\",\n" 75 | + " \"size\" : 4\n" 76 | + "}"; 77 | Schema.Parser parser = new Schema.Parser(); 78 | return parser.parse(fixedSchema); 79 | } 80 | 81 | private Schema createComplexSchema() { 82 | return new Schema.Parser().parse( 83 | "{\"namespace\": \"namespace\",\n" 84 | + " \"type\": \"record\",\n" 85 | + " \"name\": \"test\",\n" 86 | + " \"fields\": [\n" 87 | + " {\"name\": \"null\", \"type\": \"null\"},\n" 88 | + " {\"name\": \"boolean\", \"type\": \"boolean\"},\n" 89 | + " {\"name\": \"int\", \"type\": \"int\"},\n" 90 | + " {\"name\": \"long\", \"type\": \"long\"},\n" 91 | + " {\"name\": \"float\", \"type\": \"float\"},\n" 92 | + " {\"name\": \"double\", \"type\": \"double\"},\n" 93 | + " {\"name\": \"bytes\", \"type\": \"bytes\"},\n" 94 | + " {\"name\": \"string\", \"type\": \"string\", \"aliases\": [\"string_alias\"]},\n" 95 | + " {\"name\": \"enum\",\n" 96 | + " \"type\": {\n" 97 | + " \"name\": \"Kind\",\n" 98 | + " \"type\": \"enum\",\n" 99 | + " \"symbols\" : [\"ONE\", \"TWO\", \"THREE\"]\n" 100 | + " }\n" 101 | + " },\n" 102 | + " {\"name\": \"array\",\n" 103 | + " \"type\": {\n" 104 | + " \"type\": \"array\",\n" 105 | + " \"items\" : \"string\"\n" 106 | + " }\n" 107 | + " },\n" 108 | + " {\"name\": \"map\",\n" 109 | + " \"type\": {\n" 110 | + " \"type\": \"map\",\n" 111 | + " \"values\" : \"string\"\n" 112 | + " }\n" 113 | + " },\n" 114 | + " {\"name\": \"nullable_string\", \"type\": [\"null\", \"string\"]},\n" 115 | + " {\"name\": \"union\", \"type\": [\"null\", \"string\", \"int\"]},\n" 116 | + " {\"name\": \"fixed\",\n" 117 | + " \"type\": {\n" 118 | + " \"name\": \"Fixed\",\n" 119 | + " \"type\": \"fixed\",\n" 120 | + " \"size\" : 4\n" 121 | + " }\n" 122 | + " },\n" 123 | + " {\"name\": \"decimal\", \"type\": {\"type\": \"bytes\",\n" 124 | + " \"logicalType\": \"decimal\", \"precision\": 5, \"scale\": 2}},\n" 125 | + " {\"name\": \"uuid\", \"type\": {\"type\": \"string\", \"logicalType\": \"uuid\"}},\n" 126 | + " {\"name\": \"date\", \"type\": {\"type\": \"int\", \"logicalType\": \"date\"}},\n" 127 | + " {\"name\": \"time\", \"type\": {\"type\": \"int\", \"logicalType\": \"time-millis\"}},\n" 128 | + " {\"name\": \"timestamp\", \"type\": {\"type\": \"long\", \"logicalType\": \"timestamp-millis\"}}\n" 129 | + "]\n" 130 | + "}"); 131 | } 132 | 133 | private IndexedRecord createComplexRecord() { 134 | Schema enumSchema = createEnumSchema(); 135 | Schema fixedSchema = createFixedSchema(); 136 | Schema schema = createComplexSchema(); 137 | GenericRecord avroRecord = new GenericData.Record(schema); 138 | avroRecord.put("null", null); 139 | avroRecord.put("boolean", true); 140 | avroRecord.put("int", 1); 141 | avroRecord.put("long", 2L); 142 | avroRecord.put("float", 3.0f); 143 | avroRecord.put("double", 4.0d); 144 | avroRecord.put("bytes", ByteBuffer.wrap(new byte[]{0, 1, 2})); 145 | avroRecord.put("string", "testUser"); 146 | avroRecord.put("enum", new GenericData.EnumSymbol(enumSchema, "ONE")); 147 | avroRecord.put("array", ImmutableList.of("hi", "there")); 148 | avroRecord.put("map", ImmutableMap.of("bye", "there")); 149 | avroRecord.put("nullable_string", "zap"); 150 | avroRecord.put("union", 123); 151 | avroRecord.put("fixed", new GenericData.Fixed(fixedSchema, new byte[]{0, 0, 0, 0})); 152 | avroRecord.put("decimal", new BigDecimal("123.45")); 153 | avroRecord.put("uuid", UUID.fromString("d21998e8-8737-432e-a83c-13768dabd821")); 154 | avroRecord.put("date", LocalDate.of(2024, 1, 1)); 155 | avroRecord.put("time", LocalTime.of(8, 30, 30)); 156 | avroRecord.put("timestamp", Instant.ofEpochSecond(1234567890L)); 157 | return avroRecord; 158 | } 159 | 160 | @Test 161 | public void testComplexKey() throws IOException { 162 | IndexedRecord key = createComplexRecord(); 163 | IndexedRecord value = createSimpleRecord(); 164 | Properties producerProps = createProducerProps(MOCK_URL); 165 | KafkaProducer producer = createProducer(producerProps); 166 | produce(producer, getTopic(), new Object[] { key }, new Object[] { value }); 167 | producer.close(); 168 | 169 | engine.init(); 170 | Observable> obs = engine.start(); 171 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 172 | Map row = lm.get(0); 173 | Map m = (Map) row.get("rowkey"); 174 | assertNull(m.get("null")); 175 | assertEquals(true, m.get("boolean")); 176 | assertEquals(1, m.get("int")); 177 | assertEquals(2L, m.get("long")); 178 | assertEquals(3.0f, m.get("float")); 179 | assertEquals(4.0d, m.get("double")); 180 | assertEquals(Base64.getEncoder().encodeToString(new byte[]{0, 1, 2}), m.get("bytes")); 181 | assertEquals("testUser", m.get("string")); 182 | assertEquals("ONE", m.get("enum")); 183 | assertEquals(ImmutableList.of("hi", "there"), m.get("array")); 184 | assertEquals(ImmutableMap.of("bye", "there"), m.get("map")); 185 | assertEquals("zap", m.get("nullable_string")); 186 | assertEquals(123, m.get("union")); 187 | assertEquals(Base64.getEncoder().encodeToString(new byte[]{0, 0, 0, 0}), m.get("fixed")); 188 | assertEquals(new BigDecimal("123.45"), m.get("decimal")); 189 | assertEquals(UUID.fromString("d21998e8-8737-432e-a83c-13768dabd821"), m.get("uuid")); 190 | assertEquals(LocalDate.of(2024, 1, 1), m.get("date")); 191 | assertEquals(LocalTime.of(8, 30, 30), m.get("time")); 192 | assertEquals(Timestamp.from(Instant.ofEpochSecond(1234567890L)), m.get("timestamp")); 193 | } 194 | 195 | @Test 196 | public void testNullKey() throws IOException { 197 | IndexedRecord value = createSimpleRecord(); 198 | Properties producerProps = createProducerProps(MOCK_URL); 199 | KafkaProducer producer = createProducer(producerProps); 200 | produce(producer, getTopic(), new Object[] { null }, new Object[] { value }); 201 | producer.close(); 202 | 203 | engine.init(); 204 | Observable> obs = engine.start(); 205 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 206 | Map m = lm.get(0); 207 | assertEquals("hi", m.get("f1")); 208 | assertEquals(123, m.get("f2")); 209 | } 210 | 211 | @Test 212 | public void testNullValue() throws IOException { 213 | IndexedRecord key = createComplexRecord(); 214 | Properties producerProps = createProducerProps(MOCK_URL); 215 | KafkaProducer producer = createProducer(producerProps); 216 | produce(producer, getTopic(), new Object[] { key }, new Object[] { null }); 217 | producer.close(); 218 | 219 | engine.init(); 220 | Observable> obs = engine.start(); 221 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 222 | Map row = lm.get(0); 223 | Map m = (Map) row.get("rowkey"); 224 | assertNull(m.get("null")); 225 | assertEquals(true, m.get("boolean")); 226 | assertEquals(1, m.get("int")); 227 | assertEquals(2L, m.get("long")); 228 | assertEquals(3.0f, m.get("float")); 229 | assertEquals(4.0d, m.get("double")); 230 | assertEquals(Base64.getEncoder().encodeToString(new byte[]{0, 1, 2}), m.get("bytes")); 231 | assertEquals("testUser", m.get("string")); 232 | assertEquals("ONE", m.get("enum")); 233 | assertEquals(ImmutableList.of("hi", "there"), m.get("array")); 234 | assertEquals(ImmutableMap.of("bye", "there"), m.get("map")); 235 | assertEquals("zap", m.get("nullable_string")); 236 | assertEquals(123, m.get("union")); 237 | assertEquals(Base64.getEncoder().encodeToString(new byte[]{0, 0, 0, 0}), m.get("fixed")); 238 | assertEquals(new BigDecimal("123.45"), m.get("decimal")); 239 | assertEquals(UUID.fromString("d21998e8-8737-432e-a83c-13768dabd821"), m.get("uuid")); 240 | assertEquals(LocalDate.of(2024, 1, 1), m.get("date")); 241 | assertEquals(LocalTime.of(8, 30, 30), m.get("time")); 242 | assertEquals(Timestamp.from(Instant.ofEpochSecond(1234567890L)), m.get("timestamp")); 243 | } 244 | 245 | @Test 246 | public void testNullKeyAndValue() throws IOException { 247 | Properties producerProps = createProducerProps(MOCK_URL); 248 | KafkaProducer producer = createProducer(producerProps); 249 | produce(producer, getTopic(), new Object[] { null }, new Object[] { null }); 250 | producer.close(); 251 | 252 | engine.init(); 253 | Observable> obs = engine.start(); 254 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 255 | Map row = lm.get(0); 256 | Map m = (Map) row.get("rowkey"); 257 | assertNull(m); 258 | } 259 | 260 | @Test 261 | public void testTombstone() throws IOException { 262 | IndexedRecord key = createComplexRecord(); 263 | IndexedRecord value = createSimpleRecord(); 264 | Properties producerProps = createProducerProps(MOCK_URL); 265 | KafkaProducer producer = createProducer(producerProps); 266 | produce(producer, getTopic(), new Object[] { key, key }, new Object[] { value, null }); 267 | producer.close(); 268 | 269 | engine.init(); 270 | Observable> obs = engine.start(); 271 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 272 | Map m = lm.get(0); 273 | assertEquals("hi", m.get("f1")); 274 | assertEquals(123, m.get("f2")); 275 | } 276 | 277 | @Override 278 | protected String getTopic() { 279 | return "test-avro"; 280 | } 281 | 282 | @Override 283 | protected Class getKeySerializer() { 284 | return io.confluent.kafka.serializers.KafkaAvroSerializer.class; 285 | } 286 | 287 | @Override 288 | protected Class getValueSerializer() { 289 | return io.confluent.kafka.serializers.KafkaAvroSerializer.class; 290 | } 291 | 292 | @Override 293 | protected void injectKwackProperties(Properties props) { 294 | super.injectKwackProperties(props); 295 | props.put(KwackConfig.KEY_SERDES_CONFIG, getTopic() + "=latest"); 296 | } 297 | } 298 | -------------------------------------------------------------------------------- /src/test/java/io/kcache/kwack/AvroTest.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertNull; 5 | 6 | import com.google.common.collect.ImmutableList; 7 | import com.google.common.collect.ImmutableMap; 8 | import com.google.common.collect.Lists; 9 | import io.confluent.kafka.serializers.KafkaAvroSerializerConfig; 10 | import io.reactivex.rxjava3.core.Observable; 11 | import java.io.IOException; 12 | import java.math.BigDecimal; 13 | import java.nio.ByteBuffer; 14 | import java.sql.Timestamp; 15 | import java.time.Instant; 16 | import java.time.LocalDate; 17 | import java.time.LocalTime; 18 | import java.util.Base64; 19 | import java.util.List; 20 | import java.util.Map; 21 | import java.util.Properties; 22 | import java.util.Random; 23 | import java.util.UUID; 24 | import org.apache.avro.Schema; 25 | import org.apache.avro.generic.GenericData; 26 | import org.apache.avro.generic.GenericRecord; 27 | import org.apache.avro.generic.IndexedRecord; 28 | import org.apache.kafka.clients.producer.KafkaProducer; 29 | import org.junit.jupiter.api.Test; 30 | 31 | public class AvroTest extends AbstractSchemaTest { 32 | 33 | @Override 34 | protected Properties createProducerProps(String schemaRegistryUrl) { 35 | Properties props = super.createProducerProps(schemaRegistryUrl); 36 | props.put(KafkaAvroSerializerConfig.AVRO_USE_LOGICAL_TYPE_CONVERTERS_CONFIG, true); 37 | return props; 38 | } 39 | 40 | private Schema createSimpleSchema() { 41 | return new Schema.Parser().parse( 42 | "{\"namespace\": \"namespace\",\n" 43 | + " \"type\": \"record\",\n" 44 | + " \"name\": \"test\",\n" 45 | + " \"fields\": [\n" 46 | + " {\"name\": \"f1\", \"type\": \"string\"},\n" 47 | + " {\"name\": \"f2\", \"type\": \"int\"}\n" 48 | + "]\n" 49 | + "}"); 50 | } 51 | 52 | private IndexedRecord createSimpleRecord() { 53 | return createSimpleRecord(123); 54 | } 55 | 56 | private IndexedRecord createSimpleRecord(int f2) { 57 | Schema schema = createSimpleSchema(); 58 | GenericRecord avroRecord = new GenericData.Record(schema); 59 | avroRecord.put("f1", "hi"); 60 | avroRecord.put("f2", f2); 61 | return avroRecord; 62 | } 63 | 64 | private Schema createSimpleExtendedSchema() { 65 | return new Schema.Parser().parse( 66 | "{\"namespace\": \"namespace\",\n" 67 | + " \"type\": \"record\",\n" 68 | + " \"name\": \"test\",\n" 69 | + " \"fields\": [\n" 70 | + " {\"name\": \"f1\", \"type\": \"string\"},\n" 71 | + " {\"name\": \"f2\", \"type\": \"int\"},\n" 72 | + " {\"name\": \"f3\", \"type\": \"string\", \"default\": \"hithere\"}\n" 73 | + "]\n" 74 | + "}"); 75 | } 76 | 77 | private IndexedRecord createSimpleExtendedRecord() { 78 | Schema schema = createSimpleExtendedSchema(); 79 | GenericRecord avroRecord = new GenericData.Record(schema); 80 | avroRecord.put("f1", "hi"); 81 | avroRecord.put("f2", 123); 82 | avroRecord.put("f3", "bye"); 83 | return avroRecord; 84 | } 85 | 86 | private Schema createEnumSchema() { 87 | String enumSchema = "{\"name\": \"Kind\",\"namespace\": \"example.avro\",\n" 88 | + " \"type\": \"enum\",\n" 89 | + " \"symbols\" : [\"ONE\", \"TWO\", \"THREE\"]\n" 90 | + "}"; 91 | Schema.Parser parser = new Schema.Parser(); 92 | return parser.parse(enumSchema); 93 | } 94 | 95 | private Schema createFixedSchema() { 96 | String fixedSchema = "{\"name\": \"Fixed\",\n" 97 | + " \"type\": \"fixed\",\n" 98 | + " \"size\" : 4\n" 99 | + "}"; 100 | Schema.Parser parser = new Schema.Parser(); 101 | return parser.parse(fixedSchema); 102 | } 103 | 104 | private Schema createComplexSchema() { 105 | return new Schema.Parser().parse( 106 | "{\"namespace\": \"namespace\",\n" 107 | + " \"type\": \"record\",\n" 108 | + " \"name\": \"test\",\n" 109 | + " \"fields\": [\n" 110 | + " {\"name\": \"null\", \"type\": \"null\"},\n" 111 | + " {\"name\": \"boolean\", \"type\": \"boolean\"},\n" 112 | + " {\"name\": \"int\", \"type\": \"int\"},\n" 113 | + " {\"name\": \"long\", \"type\": \"long\"},\n" 114 | + " {\"name\": \"float\", \"type\": \"float\"},\n" 115 | + " {\"name\": \"double\", \"type\": \"double\"},\n" 116 | + " {\"name\": \"bytes\", \"type\": \"bytes\"},\n" 117 | + " {\"name\": \"string\", \"type\": \"string\", \"aliases\": [\"string_alias\"]},\n" 118 | + " {\"name\": \"enum\",\n" 119 | + " \"type\": {\n" 120 | + " \"name\": \"Kind\",\n" 121 | + " \"type\": \"enum\",\n" 122 | + " \"symbols\" : [\"ONE\", \"TWO\", \"THREE\"]\n" 123 | + " }\n" 124 | + " },\n" 125 | + " {\"name\": \"array\",\n" 126 | + " \"type\": {\n" 127 | + " \"type\": \"array\",\n" 128 | + " \"items\" : \"string\"\n" 129 | + " }\n" 130 | + " },\n" 131 | + " {\"name\": \"map\",\n" 132 | + " \"type\": {\n" 133 | + " \"type\": \"map\",\n" 134 | + " \"values\" : \"string\"\n" 135 | + " }\n" 136 | + " },\n" 137 | + " {\"name\": \"nullable_string\", \"type\": [\"null\", \"string\"]},\n" 138 | + " {\"name\": \"union\", \"type\": [\"null\", \"string\", \"int\"]},\n" 139 | + " {\"name\": \"fixed\",\n" 140 | + " \"type\": {\n" 141 | + " \"name\": \"Fixed\",\n" 142 | + " \"type\": \"fixed\",\n" 143 | + " \"size\" : 4\n" 144 | + " }\n" 145 | + " },\n" 146 | + " {\"name\": \"decimal\", \"type\": {\"type\": \"bytes\",\n" 147 | + " \"logicalType\": \"decimal\", \"precision\": 5, \"scale\": 2}},\n" 148 | + " {\"name\": \"uuid\", \"type\": {\"type\": \"string\", \"logicalType\": \"uuid\"}},\n" 149 | + " {\"name\": \"date\", \"type\": {\"type\": \"int\", \"logicalType\": \"date\"}},\n" 150 | + " {\"name\": \"time\", \"type\": {\"type\": \"int\", \"logicalType\": \"time-millis\"}},\n" 151 | + " {\"name\": \"timestamp\", \"type\": {\"type\": \"long\", \"logicalType\": \"timestamp-millis\"}}\n" 152 | + "]\n" 153 | + "}"); 154 | } 155 | 156 | private IndexedRecord createComplexRecord() { 157 | Schema enumSchema = createEnumSchema(); 158 | Schema fixedSchema = createFixedSchema(); 159 | Schema schema = createComplexSchema(); 160 | GenericRecord avroRecord = new GenericData.Record(schema); 161 | avroRecord.put("null", null); 162 | avroRecord.put("boolean", true); 163 | avroRecord.put("int", 1); 164 | avroRecord.put("long", 2L); 165 | avroRecord.put("float", 3.0f); 166 | avroRecord.put("double", 4.0d); 167 | avroRecord.put("bytes", ByteBuffer.wrap(new byte[]{0, 1, 2})); 168 | avroRecord.put("string", "testUser"); 169 | avroRecord.put("enum", new GenericData.EnumSymbol(enumSchema, "ONE")); 170 | avroRecord.put("array", ImmutableList.of("hi", "there")); 171 | avroRecord.put("map", ImmutableMap.of("bye", "there")); 172 | avroRecord.put("nullable_string", "zap"); 173 | avroRecord.put("union", 123); 174 | avroRecord.put("fixed", new GenericData.Fixed(fixedSchema, new byte[]{0, 0, 0, 0})); 175 | avroRecord.put("decimal", new BigDecimal("123.45")); 176 | avroRecord.put("uuid", UUID.fromString("d21998e8-8737-432e-a83c-13768dabd821")); 177 | avroRecord.put("date", LocalDate.of(2024, 1, 1)); 178 | avroRecord.put("time", LocalTime.of(8, 30, 30)); 179 | avroRecord.put("timestamp", Instant.ofEpochSecond(1234567890L)); 180 | return avroRecord; 181 | } 182 | 183 | private Schema createNullableSchema() { 184 | return new Schema.Parser().parse("{\n" 185 | + " \"type\": \"record\",\n" 186 | + " \"name\": \"testRecord\",\n" 187 | + " \"namespace\": \"com.example.test\",\n" 188 | + " \"fields\": [\n" 189 | + " {\"name\": \"id\", \"type\": \"long\"},\n" 190 | + " {\"name\": \"title\", \"type\": [\"null\",\"string\"], \"default\": null},\n" 191 | + " {\"name\": \"year\", \"type\": [\"null\",\"int\"], \"default\": null},\n" 192 | + " {\"name\": \"sales_number\", \"type\": [\"null\",\"long\"], \"default\": null},\n" 193 | + " {\"name\": \"sales_amount\", \"type\": [\"null\",\"float\"], \"default\": null},\n" 194 | + " {\"name\": \"is_first_publish\", \"type\": [\"null\",\"boolean\"], \"default\": null},\n" 195 | + " {\"name\": \"operation_type\", \"type\": [\"null\",\"string\"], \"default\": null}\n" 196 | + " ]\n" 197 | + "}"); 198 | } 199 | 200 | private IndexedRecord createNullableRecord() { 201 | Schema schema = createNullableSchema(); 202 | GenericRecord avroRecord = new GenericData.Record(schema); 203 | avroRecord.put("id", 123456789L); 204 | avroRecord.put("title", "John"); 205 | avroRecord.put("year", 2021); 206 | avroRecord.put("sales_number", 123456789L); 207 | avroRecord.put("sales_amount", 1.23456792e8f); 208 | avroRecord.put("is_first_publish", true); 209 | avroRecord.put("operation_type", "INSERT"); 210 | return avroRecord; 211 | } 212 | 213 | @Test 214 | public void testSimple() throws IOException { 215 | IndexedRecord record = createSimpleRecord(); 216 | Properties producerProps = createProducerProps(MOCK_URL); 217 | KafkaProducer producer = createProducer(producerProps); 218 | produce(producer, getTopic(), new Object[] { record }); 219 | producer.close(); 220 | 221 | engine.init(); 222 | Observable> obs = engine.start(); 223 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 224 | Map m = lm.get(0); 225 | assertEquals("hi", m.get("f1")); 226 | assertEquals(123, m.get("f2")); 227 | } 228 | 229 | @Test 230 | public void testSimpleEvolved() throws IOException { 231 | IndexedRecord record = createSimpleRecord(); 232 | IndexedRecord record2 = createSimpleExtendedRecord(); 233 | Properties producerProps = createProducerProps(MOCK_URL); 234 | KafkaProducer producer = createProducer(producerProps); 235 | produce(producer, getTopic(), new Object[] { record, record2 }); 236 | producer.close(); 237 | 238 | engine.init(); 239 | Observable> obs = engine.start(); 240 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 241 | Map m = lm.get(0); 242 | assertEquals("hi", m.get("f1")); 243 | assertEquals(123, m.get("f2")); 244 | m = lm.get(1); 245 | assertEquals("hi", m.get("f1")); 246 | assertEquals(123, m.get("f2")); 247 | assertEquals("bye", m.get("f3")); 248 | } 249 | 250 | @Test 251 | public void testSimpleMany() throws IOException { 252 | int count = 10000; 253 | Random random = new Random(); 254 | Properties producerProps = createProducerProps(MOCK_URL); 255 | KafkaProducer producer = createProducer(producerProps); 256 | for (int i = 0; i < count; i++) { 257 | produce(producer, getTopic(), new Object[] { createSimpleRecord(random.nextInt()) }); 258 | } 259 | producer.close(); 260 | 261 | engine.init(); 262 | Observable> obs = engine.start(); 263 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 264 | assertEquals(count, lm.size()); 265 | } 266 | 267 | @Test 268 | public void testComplex() throws IOException { 269 | IndexedRecord record = createComplexRecord(); 270 | Properties producerProps = createProducerProps(MOCK_URL); 271 | KafkaProducer producer = createProducer(producerProps); 272 | produce(producer, getTopic(), new Object[] { record }); 273 | producer.close(); 274 | 275 | engine.init(); 276 | Observable> obs = engine.start(); 277 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 278 | Map m = lm.get(0); 279 | assertNull(m.get("null")); 280 | assertEquals(true, m.get("boolean")); 281 | assertEquals(1, m.get("int")); 282 | assertEquals(2L, m.get("long")); 283 | assertEquals(3.0f, m.get("float")); 284 | assertEquals(4.0d, m.get("double")); 285 | assertEquals(Base64.getEncoder().encodeToString(new byte[]{0, 1, 2}), m.get("bytes")); 286 | assertEquals("testUser", m.get("string")); 287 | assertEquals("ONE", m.get("enum")); 288 | assertEquals(ImmutableList.of("hi", "there"), m.get("array")); 289 | assertEquals(ImmutableMap.of("bye", "there"), m.get("map")); 290 | assertEquals("zap", m.get("nullable_string")); 291 | assertEquals(123, m.get("union")); 292 | assertEquals(Base64.getEncoder().encodeToString(new byte[]{0, 0, 0, 0}), m.get("fixed")); 293 | assertEquals(new BigDecimal("123.45"), m.get("decimal")); 294 | assertEquals(UUID.fromString("d21998e8-8737-432e-a83c-13768dabd821"), m.get("uuid")); 295 | assertEquals(LocalDate.of(2024, 1, 1), m.get("date")); 296 | assertEquals(LocalTime.of(8, 30, 30), m.get("time")); 297 | assertEquals(Timestamp.from(Instant.ofEpochSecond(1234567890L)), m.get("timestamp")); 298 | } 299 | 300 | @Test 301 | public void testNullable() throws IOException { 302 | IndexedRecord record = createNullableRecord(); 303 | Properties producerProps = createProducerProps(MOCK_URL); 304 | KafkaProducer producer = createProducer(producerProps); 305 | produce(producer, getTopic(), new Object[] { record }); 306 | producer.close(); 307 | 308 | engine.init(); 309 | Observable> obs = engine.start(); 310 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 311 | Map m = lm.get(0); 312 | assertEquals(123456789L, m.get("id")); 313 | } 314 | 315 | @Override 316 | protected String getTopic() { 317 | return "test-avro"; 318 | } 319 | 320 | @Override 321 | protected Class getValueSerializer() { 322 | return io.confluent.kafka.serializers.KafkaAvroSerializer.class; 323 | } 324 | } 325 | -------------------------------------------------------------------------------- /src/main/java/io/kcache/kwack/transformer/json/JsonTransformer.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack.transformer.json; 2 | 3 | import static io.kcache.kwack.schema.ColumnStrategy.NOT_NULL_STRATEGY; 4 | import static io.kcache.kwack.schema.ColumnStrategy.NULL_STRATEGY; 5 | 6 | import com.fasterxml.jackson.databind.JsonNode; 7 | import com.fasterxml.jackson.databind.node.ArrayNode; 8 | import com.fasterxml.jackson.databind.node.ObjectNode; 9 | import io.confluent.kafka.schemaregistry.ParsedSchema; 10 | import io.confluent.kafka.schemaregistry.json.JsonSchema; 11 | import io.kcache.kwack.schema.ColumnDef; 12 | import io.kcache.kwack.schema.EnumColumnDef; 13 | import io.kcache.kwack.schema.ListColumnDef; 14 | import io.kcache.kwack.schema.MapColumnDef; 15 | import io.kcache.kwack.schema.StructColumnDef; 16 | import io.kcache.kwack.schema.UnionColumnDef; 17 | import io.kcache.kwack.transformer.Context; 18 | import io.kcache.kwack.transformer.Transformer; 19 | import io.vavr.Tuple; 20 | import io.vavr.Tuple2; 21 | import java.util.Arrays; 22 | import java.util.Collection; 23 | import java.util.Collections; 24 | import java.util.HashMap; 25 | import java.util.HashSet; 26 | import java.util.IdentityHashMap; 27 | import java.util.Iterator; 28 | import java.util.LinkedHashMap; 29 | import java.util.Map; 30 | import java.util.Map.Entry; 31 | import java.util.Set; 32 | import java.util.stream.Collectors; 33 | import org.duckdb.DuckDBColumnType; 34 | import org.everit.json.schema.ArraySchema; 35 | import org.everit.json.schema.BooleanSchema; 36 | import org.everit.json.schema.CombinedSchema; 37 | import org.everit.json.schema.ConstSchema; 38 | import org.everit.json.schema.EnumSchema; 39 | import org.everit.json.schema.NullSchema; 40 | import org.everit.json.schema.NumberSchema; 41 | import org.everit.json.schema.ObjectSchema; 42 | import org.everit.json.schema.ReferenceSchema; 43 | import org.everit.json.schema.Schema; 44 | import org.everit.json.schema.StringSchema; 45 | 46 | public class JsonTransformer implements Transformer { 47 | @Override 48 | public ColumnDef schemaToColumnDef(Context ctx, ParsedSchema parsedSchema) { 49 | Schema schema = (Schema) parsedSchema.rawSchema(); 50 | return schemaToColumnDef(ctx, schema); 51 | } 52 | 53 | private ColumnDef schemaToColumnDef(Context ctx, Schema schema) { 54 | LinkedHashMap columnDefs = new LinkedHashMap<>(); 55 | if (schema instanceof BooleanSchema) { 56 | return new ColumnDef(DuckDBColumnType.BOOLEAN); 57 | } else if (schema instanceof NumberSchema) { 58 | NumberSchema numberSchema = (NumberSchema) schema; 59 | return new ColumnDef(numberSchema.requiresInteger() 60 | ? DuckDBColumnType.BIGINT 61 | : DuckDBColumnType.DOUBLE); 62 | } else if (schema instanceof StringSchema) { 63 | return new ColumnDef(DuckDBColumnType.VARCHAR); 64 | } else if (schema instanceof ConstSchema) { 65 | ConstSchema constSchema = (ConstSchema) schema; 66 | return new EnumColumnDef( 67 | Collections.singletonList(constSchema.getPermittedValue().toString())); 68 | } else if (schema instanceof EnumSchema) { 69 | EnumSchema enumSchema = (EnumSchema) schema; 70 | return new EnumColumnDef(enumSchema.getPossibleValues().stream() 71 | .map(Object::toString).collect(Collectors.toList())); 72 | } else if (schema instanceof CombinedSchema) { 73 | CombinedSchema combinedSchema = (CombinedSchema) schema; 74 | Schema singletonUnion = flattenSingletonUnion(combinedSchema); 75 | if (singletonUnion != null) { 76 | ColumnDef colDef = schemaToColumnDef(ctx, singletonUnion); 77 | if (combinedSchema.getSubschemas().size() > 1) { 78 | colDef.setColumnStrategy(NULL_STRATEGY); 79 | } 80 | return colDef; 81 | } 82 | CombinedSchema.ValidationCriterion criterion = combinedSchema.getCriterion(); 83 | if (criterion == CombinedSchema.ALL_CRITERION) { 84 | return allOfToConnectSchema(ctx, combinedSchema)._2; 85 | } 86 | int i = 0; 87 | boolean nullable = false; 88 | for (Schema subSchema : combinedSchema.getSubschemas()) { 89 | if (subSchema instanceof NullSchema) { 90 | nullable = true; 91 | } else { 92 | columnDefs.put("u" + i, schemaToColumnDef(ctx, subSchema)); 93 | } 94 | i++; 95 | } 96 | return new UnionColumnDef(columnDefs, nullable 97 | ? NULL_STRATEGY 98 | : NOT_NULL_STRATEGY); 99 | } else if (schema instanceof ArraySchema) { 100 | ArraySchema arraySchema = (ArraySchema) schema; 101 | return new ListColumnDef(schemaToColumnDef(ctx, arraySchema.getAllItemSchema())); 102 | } else if (schema instanceof ObjectSchema) { 103 | ObjectSchema objectSchema = (ObjectSchema) schema; 104 | if (objectSchema.getSchemaOfAdditionalProperties() != null) { 105 | // mbknor uses schema of additionalProperties to represent a map 106 | return new MapColumnDef(new ColumnDef(DuckDBColumnType.VARCHAR), 107 | schemaToColumnDef(ctx, objectSchema.getSchemaOfAdditionalProperties())); 108 | } 109 | Map properties = objectSchema.getPropertySchemas(); 110 | StructColumnDef structColumnDef = new StructColumnDef(columnDefs); 111 | ctx.put(schema, structColumnDef); 112 | for (Map.Entry entry : properties.entrySet()) { 113 | columnDefs.put(entry.getKey(), schemaToColumnDef(ctx, entry.getValue())); 114 | } 115 | return structColumnDef; 116 | } else if (schema instanceof ReferenceSchema) { 117 | ReferenceSchema referenceSchema = (ReferenceSchema) schema; 118 | Schema referredSchema = referenceSchema.getReferredSchema(); 119 | ColumnDef columnDef = ctx.get(referredSchema); 120 | if (columnDef != null) { 121 | return columnDef; 122 | } 123 | return schemaToColumnDef(ctx, referredSchema); 124 | } 125 | throw new IllegalArgumentException( 126 | "Unsupported schema type " + schema.getClass().getName()); 127 | } 128 | 129 | private Tuple2 allOfToConnectSchema(Context ctx, CombinedSchema combinedSchema) { 130 | ConstSchema constSchema = null; 131 | EnumSchema enumSchema = null; 132 | NumberSchema numberSchema = null; 133 | StringSchema stringSchema = null; 134 | CombinedSchema combinedSubschema = null; 135 | ReferenceSchema referenceSchema = null; 136 | Map properties = new LinkedHashMap<>(); 137 | Map required = new HashMap<>(); 138 | for (Schema subSchema : combinedSchema.getSubschemas()) { 139 | if (subSchema instanceof ConstSchema) { 140 | constSchema = (ConstSchema) subSchema; 141 | } else if (subSchema instanceof EnumSchema) { 142 | enumSchema = (EnumSchema) subSchema; 143 | } else if (subSchema instanceof NumberSchema) { 144 | numberSchema = (NumberSchema) subSchema; 145 | } else if (subSchema instanceof StringSchema) { 146 | stringSchema = (StringSchema) subSchema; 147 | } else if (subSchema instanceof CombinedSchema) { 148 | combinedSubschema = (CombinedSchema) subSchema; 149 | } else if (subSchema instanceof ReferenceSchema) { 150 | referenceSchema = (ReferenceSchema) subSchema; 151 | } 152 | collectPropertySchemas( 153 | subSchema, properties, required, Collections.newSetFromMap(new IdentityHashMap<>())); 154 | } 155 | if (!properties.isEmpty()) { 156 | LinkedHashMap columnDefs = new LinkedHashMap<>(); 157 | StructColumnDef structColumnDef = new StructColumnDef(columnDefs); 158 | ctx.put(combinedSchema, structColumnDef); 159 | for (Map.Entry property : properties.entrySet()) { 160 | String subFieldName = property.getKey(); 161 | Schema subSchema = property.getValue(); 162 | ColumnDef columnDef = schemaToColumnDef(ctx, subSchema); 163 | if (!required.get(subFieldName)) { 164 | columnDef.setColumnStrategy(NULL_STRATEGY); 165 | } 166 | columnDefs.put(subFieldName, columnDef); 167 | } 168 | return Tuple.of(combinedSchema, structColumnDef); 169 | } else if (combinedSubschema != null) { 170 | // Any combined subschema takes precedence over primitive subschemas 171 | return Tuple.of(combinedSubschema, schemaToColumnDef(ctx, combinedSubschema)); 172 | } else if (constSchema != null) { 173 | if (stringSchema != null) { 174 | // Ignore the const, return the string 175 | return Tuple.of(stringSchema, schemaToColumnDef(ctx, stringSchema)); 176 | } else if (numberSchema != null) { 177 | // Ignore the const, return the number or integer 178 | return Tuple.of(numberSchema, schemaToColumnDef(ctx, numberSchema)); 179 | } 180 | } else if (enumSchema != null) { 181 | if (stringSchema != null) { 182 | // Return a string enum 183 | return Tuple.of(enumSchema, schemaToColumnDef(ctx, enumSchema)); 184 | } else if (numberSchema != null) { 185 | // Ignore the enum, return the number or integer 186 | return Tuple.of(numberSchema, schemaToColumnDef(ctx, numberSchema)); 187 | } 188 | } else if (stringSchema != null && stringSchema.getFormatValidator() != null) { 189 | if (numberSchema != null) { 190 | // This is a number or integer with a format 191 | return Tuple.of(numberSchema, schemaToColumnDef(ctx, numberSchema)); 192 | } 193 | return Tuple.of(stringSchema, schemaToColumnDef(ctx, stringSchema)); 194 | } else if (referenceSchema != null) { 195 | Schema referredSchema = referenceSchema.getReferredSchema(); 196 | ColumnDef columnDef = ctx.get(referredSchema); 197 | if (columnDef != null) { 198 | return Tuple.of(referredSchema, columnDef); 199 | } 200 | return Tuple.of(referredSchema, schemaToColumnDef(ctx, referredSchema)); 201 | } 202 | throw new IllegalArgumentException("Unsupported criterion " 203 | + combinedSchema.getCriterion() + " for " + combinedSchema); 204 | } 205 | 206 | private void collectPropertySchemas( 207 | Schema schema, 208 | Map properties, 209 | Map required, 210 | Set visited) { 211 | if (visited.contains(schema)) { 212 | return; 213 | } else { 214 | visited.add(schema); 215 | } 216 | if (schema instanceof CombinedSchema) { 217 | CombinedSchema combinedSchema = (CombinedSchema) schema; 218 | if (combinedSchema.getCriterion() == CombinedSchema.ALL_CRITERION) { 219 | for (Schema subSchema : combinedSchema.getSubschemas()) { 220 | collectPropertySchemas(subSchema, properties, required, visited); 221 | } 222 | } 223 | } else if (schema instanceof ObjectSchema) { 224 | ObjectSchema objectSchema = (ObjectSchema) schema; 225 | for (Map.Entry entry : objectSchema.getPropertySchemas().entrySet()) { 226 | String fieldName = entry.getKey(); 227 | properties.put(fieldName, entry.getValue()); 228 | required.put(fieldName, objectSchema.getRequiredProperties().contains(fieldName)); 229 | } 230 | } else if (schema instanceof ReferenceSchema) { 231 | ReferenceSchema refSchema = (ReferenceSchema) schema; 232 | collectPropertySchemas(refSchema.getReferredSchema(), properties, required, visited); 233 | } 234 | } 235 | 236 | private Schema flattenSingletonUnion(CombinedSchema schema) { 237 | Collection subschemas = schema.getSubschemas(); 238 | int size = subschemas.size(); 239 | if (size == 1) { 240 | return subschemas.iterator().next(); 241 | } else if (size == 2) { 242 | boolean nullable = false; 243 | Schema notNullable = null; 244 | for (Schema subSchema : subschemas) { 245 | if (subSchema instanceof NullSchema) { 246 | nullable = true; 247 | } else { 248 | notNullable = subSchema; 249 | } 250 | } 251 | if (nullable && notNullable != null) { 252 | return notNullable; 253 | } 254 | } 255 | return null; 256 | } 257 | 258 | @Override 259 | public Object messageToColumn( 260 | Context ctx, ParsedSchema parsedSchema, Object message, ColumnDef columnDef) { 261 | Schema schema = (Schema) parsedSchema.rawSchema(); 262 | return messageToColumn(ctx, schema, (JsonNode) message, columnDef); 263 | } 264 | 265 | private Object messageToColumn( 266 | Context ctx, Schema schema, JsonNode jsonNode, ColumnDef columnDef) { 267 | if (jsonNode == null) { 268 | return null; 269 | } 270 | if (schema instanceof BooleanSchema) { 271 | return jsonNode.asBoolean(); 272 | } else if (schema instanceof NumberSchema) { 273 | NumberSchema numberSchema = (NumberSchema) schema; 274 | if (numberSchema.requiresInteger()) { 275 | return jsonNode.asLong(); 276 | } else { 277 | return jsonNode.asDouble(); 278 | } 279 | } else if (schema instanceof StringSchema) { 280 | return jsonNode.asText(); 281 | } else if (schema instanceof ConstSchema) { 282 | return jsonNode.asText(); 283 | } else if (schema instanceof EnumSchema) { 284 | return jsonNode.asText(); 285 | } else if (schema instanceof CombinedSchema) { 286 | CombinedSchema combinedSchema = (CombinedSchema) schema; 287 | Schema singletonUnion = flattenSingletonUnion(combinedSchema); 288 | if (singletonUnion != null) { 289 | return messageToColumn(ctx, singletonUnion, jsonNode, columnDef); 290 | } 291 | if (combinedSchema.getCriterion() == CombinedSchema.ALL_CRITERION) { 292 | Schema subschema = allOfToConnectSchema(ctx, combinedSchema)._1; 293 | ColumnDef colDef = allOfToConnectSchema(ctx, combinedSchema)._2; 294 | return messageToColumn(ctx, subschema, jsonNode, colDef); 295 | } 296 | if (columnDef.getColumnType() == DuckDBColumnType.UNION) { 297 | UnionColumnDef unionColumnDef = (UnionColumnDef) columnDef; 298 | int unionIndex = 0; 299 | for (Schema subschema : combinedSchema.getSubschemas()) { 300 | boolean valid = false; 301 | try { 302 | JsonSchema.validate(subschema, jsonNode); 303 | valid = true; 304 | } catch (Exception e) { 305 | // noop 306 | } 307 | if (valid) { 308 | String unionBranch = "u" + unionIndex; 309 | ctx.putUnionBranch(unionColumnDef, unionBranch); 310 | return messageToColumn(ctx, subschema, jsonNode, 311 | unionColumnDef.getColumnDefs().get(unionBranch)); 312 | } 313 | unionIndex++; 314 | } 315 | } 316 | } else if (schema instanceof ArraySchema) { 317 | ArraySchema arraySchema = (ArraySchema) schema; 318 | ArrayNode arrayNode = (ArrayNode) jsonNode; 319 | ListColumnDef listColumnDef = (ListColumnDef) columnDef; 320 | ColumnDef itemDef = listColumnDef.getItemDef(); 321 | Object[] items = new Object[arrayNode.size()]; 322 | for (int i = 0; i < arrayNode.size(); i++) { 323 | items[i] = messageToColumn( 324 | ctx, arraySchema.getAllItemSchema(), arrayNode.get(i), itemDef); 325 | } 326 | return Arrays.asList(items); 327 | } else if (schema instanceof ObjectSchema) { 328 | ObjectSchema objectSchema = (ObjectSchema) schema; 329 | ObjectNode objectNode = (ObjectNode) jsonNode; 330 | if (columnDef.getColumnType() == DuckDBColumnType.MAP) { 331 | MapColumnDef mapColumnDef = (MapColumnDef) columnDef; 332 | Map map = new HashMap<>(); 333 | Iterator> entries = objectNode.fields(); 334 | while (entries.hasNext()) { 335 | Map.Entry entry = entries.next(); 336 | String name = entry.getKey(); 337 | Object newValue = messageToColumn( 338 | ctx, 339 | objectSchema.getSchemaOfAdditionalProperties(), 340 | entry.getValue(), 341 | mapColumnDef.getValueDef() 342 | ); 343 | map.put(name, newValue); 344 | } 345 | return map; 346 | } 347 | StructColumnDef structColumnDef = (StructColumnDef) columnDef; 348 | Map properties = objectSchema.getPropertySchemas(); 349 | Object[] attributes = new Object[properties.size()]; 350 | int i = 0; 351 | for (Map.Entry entry : properties.entrySet()) { 352 | String name = entry.getKey(); 353 | ColumnDef fieldColumnDef = structColumnDef.getColumnDefs().get(name); 354 | Object newValue = messageToColumn( 355 | ctx, entry.getValue(), objectNode.get(name), fieldColumnDef); 356 | attributes[i++] = newValue; 357 | } 358 | return Arrays.asList(attributes); 359 | } else if (schema instanceof ReferenceSchema) { 360 | ReferenceSchema referenceSchema = (ReferenceSchema) schema; 361 | return messageToColumn(ctx, referenceSchema.getReferredSchema(), jsonNode, columnDef); 362 | } 363 | throw new IllegalArgumentException( 364 | "Unsupported schema type " + schema.getClass().getName()); 365 | } 366 | } 367 | -------------------------------------------------------------------------------- /src/test/java/io/kcache/kwack/JsonSchemaTest.java: -------------------------------------------------------------------------------- 1 | package io.kcache.kwack; 2 | 3 | import static org.junit.jupiter.api.Assertions.assertEquals; 4 | import static org.junit.jupiter.api.Assertions.assertThrows; 5 | 6 | import com.fasterxml.jackson.annotation.JsonSubTypes; 7 | import com.fasterxml.jackson.annotation.JsonTypeInfo; 8 | import com.google.common.collect.ImmutableList; 9 | import com.google.common.collect.ImmutableMap; 10 | import com.google.common.collect.Lists; 11 | import io.confluent.kafka.schemaregistry.json.JsonSchema; 12 | import io.reactivex.rxjava3.core.Observable; 13 | import java.io.IOException; 14 | import java.util.ArrayList; 15 | import java.util.HashMap; 16 | import java.util.List; 17 | import java.util.Map; 18 | import java.util.Objects; 19 | import java.util.Properties; 20 | import org.apache.kafka.clients.producer.KafkaProducer; 21 | import org.everit.json.schema.Schema; 22 | import org.junit.jupiter.api.Test; 23 | 24 | public class JsonSchemaTest extends AbstractSchemaTest { 25 | 26 | private Schema createSimpleSchema() { 27 | String schemaStr = 28 | "{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Obj\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\n" 29 | + "\"id\":{\"type\":\"integer\"}," 30 | + "\"name\":{\"oneOf\":[{\"type\":\"null\",\"title\":\"Not included\"},{\"type\":\"string\"}]}}}"; 31 | JsonSchema jsonSchema = new JsonSchema(schemaStr); 32 | return jsonSchema.rawSchema(); 33 | } 34 | 35 | private Simple createSimpleObj() { 36 | Simple simple = new Simple(); 37 | simple.setId(123); 38 | simple.setName("hi"); 39 | return simple; 40 | } 41 | 42 | private Schema createComplexSchema() { 43 | String schemaStr = 44 | "{\"$schema\":\"http://json-schema.org/draft-07/schema#\",\"title\":\"Obj\",\"type\":\"object\",\"additionalProperties\":false,\"properties\":{\n" 45 | + "\"name\":{\"oneOf\":[{\"type\":\"null\",\"title\":\"Not included\"},{\"type\":\"string\"}]}," 46 | + "\"mystring\":{\"type\":\"string\"}," 47 | + "\"myint\":{\"type\":\"integer\"}," 48 | + "\"mylong\":{\"type\":\"integer\"}," 49 | + "\"myfloat\":{\"type\":\"number\"}," 50 | + "\"mydouble\":{\"type\":\"number\"}," 51 | + "\"myboolean\":{\"type\":\"boolean\"}," 52 | + "\"myenum\":{\"enum\": [\"red\", \"amber\", \"green\"]}," 53 | + "\"array\":{\"oneOf\":[{\"type\":\"null\",\"title\":\"Not included\"},{\"type\":\"array\",\"items\":{\"$ref\":\"#/definitions/Data\"}}]}," 54 | + "\"map\":{\"oneOf\":[{\"type\":\"null\",\"title\":\"Not included\"},\"additionalProperties\":{\"$ref\":\"#/definitions/Data\"}}]}," 55 | + "\"definitions\":{\"Data\":{\"type\":\"object\",\"additionalProperties\":false,\"properties\":{" 56 | + "\"data\":{\"oneOf\":[{\"type\":\"null\",\"title\":\"Not included\"},{\"type\":\"string\"}]}}}}"; 57 | JsonSchema jsonSchema = new JsonSchema(schemaStr); 58 | return jsonSchema.rawSchema(); 59 | } 60 | 61 | private Complex createComplexObj() { 62 | Complex obj = new Complex("test"); 63 | obj.setMystring("testUser"); 64 | obj.setMyint(1); 65 | obj.setMylong(2L); 66 | obj.setMyfloat(3.0f); 67 | obj.setMydouble(4.0d); 68 | obj.setMyboolean(true); 69 | obj.setMyenum(Color.GREEN); 70 | obj.setMykind(new Kind2("kind2")); 71 | obj.setArray(ImmutableList.of(new Data("hi"), new Data("there"))); 72 | obj.setMap(ImmutableMap.of("bye", new Data("there"))); 73 | return obj; 74 | } 75 | 76 | private Schema createRecursiveSchema() { 77 | String schemaStr = "{\n" 78 | + " \"type\": \"object\",\n" 79 | + " \"title\": \"Task\",\n" 80 | + " \"description\": \"A task\",\n" 81 | + " \"id\": \"#id1\",\n" 82 | + " \"properties\": {\n" 83 | + " \"parent\": {\n" 84 | + " \"$ref\": \"#id1\"\n" 85 | + " },\n" 86 | + " \"title\": {\n" 87 | + " \"type\": \"string\",\n" 88 | + " \"description\": \"Task title\",\n" 89 | + " }\n" 90 | + " }\n" 91 | + "}\n"; 92 | JsonSchema jsonSchema = new JsonSchema(schemaStr); 93 | return jsonSchema.rawSchema(); 94 | } 95 | 96 | private Recursive createRecursiveObj() { 97 | Recursive obj = new Recursive("test"); 98 | Recursive parent = new Recursive("parent"); 99 | obj.setParent(parent); 100 | return obj; 101 | } 102 | 103 | @Test 104 | public void testSimple() throws IOException { 105 | Simple obj = createSimpleObj(); 106 | Properties producerProps = createProducerProps(MOCK_URL); 107 | KafkaProducer producer = createProducer(producerProps); 108 | produce(producer, getTopic(), new Object[]{obj}); 109 | producer.close(); 110 | 111 | engine.init(); 112 | Observable> obs = engine.start(); 113 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 114 | Map m = lm.get(0); 115 | assertEquals("hi", m.get("name")); 116 | assertEquals(123L, m.get("id")); 117 | } 118 | 119 | @Test 120 | public void testComplex() throws IOException { 121 | Complex obj = createComplexObj(); 122 | Properties producerProps = createProducerProps(MOCK_URL); 123 | KafkaProducer producer = createProducer(producerProps); 124 | produce(producer, getTopic(), new Object[]{obj}); 125 | producer.close(); 126 | 127 | engine.init(); 128 | Observable> obs = engine.start(); 129 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 130 | Map m = lm.get(0); 131 | assertEquals("test", m.get("name")); 132 | assertEquals("testUser", m.get("mystring")); 133 | assertEquals(1L, m.get("myint")); 134 | assertEquals(2L, m.get("mylong")); 135 | assertEquals(3.0d, m.get("myfloat")); 136 | assertEquals(4.0d, m.get("mydouble")); 137 | assertEquals(true, m.get("myboolean")); 138 | assertEquals("GREEN", m.get("myenum")); 139 | assertEquals(ImmutableMap.of("kind2String", "kind2", "type", "kind2"), m.get("mykind")); 140 | Map m1 = new HashMap<>(); 141 | m1.put("data", "hi"); 142 | Map m2 = new HashMap<>(); 143 | m2.put("data", "there"); 144 | List> a1 = new ArrayList<>(); 145 | a1.add(m1); 146 | a1.add(m2); 147 | assertEquals(a1, m.get("array")); 148 | Map> m4 = new HashMap<>(); 149 | m4.put("bye", m2); 150 | assertEquals(m4, m.get("map")); 151 | } 152 | 153 | @Test 154 | public void testRecursive() throws IOException { 155 | Recursive obj = createRecursiveObj(); 156 | Properties producerProps = createProducerProps(MOCK_URL); 157 | KafkaProducer producer = createProducer(producerProps); 158 | produce(producer, getTopic(), new Object[]{obj}); 159 | producer.close(); 160 | 161 | assertThrows(IllegalArgumentException.class, () -> engine.init()); 162 | } 163 | 164 | @Test 165 | public void testBadName() throws IOException { 166 | BadName badName = new BadName("hi", 1, 2L); 167 | BadNameContainer obj = new BadNameContainer(1, badName); 168 | Properties producerProps = createProducerProps(MOCK_URL); 169 | KafkaProducer producer = createProducer(producerProps); 170 | produce(producer, getTopic(), new Object[]{obj}); 171 | producer.close(); 172 | 173 | engine.init(); 174 | Observable> obs = engine.start(); 175 | List> lm = Lists.newArrayList(obs.blockingIterable().iterator()); 176 | Map m = lm.get(0); 177 | Map bad = (Map) m.get("badName"); 178 | assertEquals("hi", bad.get("name")); 179 | assertEquals(1L, bad.get("group")); 180 | assertEquals(2L, bad.get("order")); 181 | } 182 | 183 | @Override 184 | protected String getTopic() { 185 | return "test-json"; 186 | } 187 | 188 | @Override 189 | protected Class getValueSerializer() { 190 | return io.confluent.kafka.serializers.json.KafkaJsonSchemaSerializer.class; 191 | } 192 | 193 | public static class Simple { 194 | 195 | private int id; 196 | private String name; 197 | 198 | public Simple() { 199 | } 200 | 201 | public int getId() { 202 | return id; 203 | } 204 | 205 | public void setId(int id) { 206 | this.id = id; 207 | } 208 | 209 | public String getName() { 210 | return name; 211 | } 212 | 213 | public void setName(String name) { 214 | this.name = name; 215 | } 216 | 217 | @Override 218 | public boolean equals(Object o) { 219 | if (this == o) { 220 | return true; 221 | } 222 | if (o == null || getClass() != o.getClass()) { 223 | return false; 224 | } 225 | Simple simple = (Simple) o; 226 | return id == simple.id && Objects.equals(name, simple.name); 227 | } 228 | 229 | @Override 230 | public int hashCode() { 231 | return Objects.hash(id, name); 232 | } 233 | } 234 | 235 | public enum Color { 236 | RED, AMBER, GREEN 237 | } 238 | 239 | public static class Complex { 240 | 241 | private String name; 242 | private String mystring; 243 | private int myint; 244 | private long mylong; 245 | private float myfloat; 246 | private double mydouble; 247 | private boolean myboolean; 248 | private Color myenum; 249 | private Kind mykind; 250 | private List array = new ArrayList<>(); 251 | private Map map = new HashMap<>(); 252 | 253 | public Complex() { 254 | } 255 | 256 | public Complex(String name) { 257 | this.name = name; 258 | } 259 | 260 | public String getName() { 261 | return name; 262 | } 263 | 264 | public void setName(String name) { 265 | this.name = name; 266 | } 267 | 268 | public String getMystring() { 269 | return mystring; 270 | } 271 | 272 | public void setMystring(String mystring) { 273 | this.mystring = mystring; 274 | } 275 | 276 | public int getMyint() { 277 | return myint; 278 | } 279 | 280 | public void setMyint(int myint) { 281 | this.myint = myint; 282 | } 283 | 284 | public long getMylong() { 285 | return mylong; 286 | } 287 | 288 | public void setMylong(long mylong) { 289 | this.mylong = mylong; 290 | } 291 | 292 | public float getMyfloat() { 293 | return myfloat; 294 | } 295 | 296 | public void setMyfloat(float myfloat) { 297 | this.myfloat = myfloat; 298 | } 299 | 300 | public double getMydouble() { 301 | return mydouble; 302 | } 303 | 304 | public void setMydouble(double mydouble) { 305 | this.mydouble = mydouble; 306 | } 307 | 308 | public boolean isMyboolean() { 309 | return myboolean; 310 | } 311 | 312 | public void setMyboolean(boolean myboolean) { 313 | this.myboolean = myboolean; 314 | } 315 | 316 | public Color getMyenum() { 317 | return myenum; 318 | } 319 | 320 | public void setMyenum(Color myenum) { 321 | this.myenum = myenum; 322 | } 323 | 324 | public Kind getMykind() { 325 | return mykind; 326 | } 327 | 328 | public void setMykind(Kind mykind) { 329 | this.mykind = mykind; 330 | } 331 | 332 | public List getArray() { 333 | return array; 334 | } 335 | 336 | public void setArray(List array) { 337 | this.array = array; 338 | } 339 | 340 | public Map getMap() { 341 | return map; 342 | } 343 | 344 | public void setMap(Map map) { 345 | this.map = map; 346 | } 347 | 348 | @Override 349 | public boolean equals(Object o) { 350 | if (this == o) { 351 | return true; 352 | } 353 | if (o == null || getClass() != o.getClass()) { 354 | return false; 355 | } 356 | Complex obj = (Complex) o; 357 | return myint == obj.myint 358 | && mylong == obj.mylong 359 | && Float.compare(myfloat, obj.myfloat) == 0 360 | && Double.compare(mydouble, obj.mydouble) == 0 361 | && myboolean == obj.myboolean 362 | && myenum == obj.myenum 363 | && Objects.equals(name, obj.name) 364 | && Objects.equals(mystring, obj.mystring) 365 | && Objects.equals(array, obj.array) 366 | && Objects.equals(map, obj.map); 367 | } 368 | 369 | @Override 370 | public int hashCode() { 371 | return Objects.hash( 372 | name, mystring, myint, mylong, myfloat, mydouble, myboolean, myenum, array, map); 373 | } 374 | } 375 | 376 | public static class Data { 377 | 378 | private String data; 379 | 380 | public Data() { 381 | } 382 | 383 | public Data(String data) { 384 | this.data = data; 385 | } 386 | 387 | public String getData() { 388 | return data; 389 | } 390 | 391 | @Override 392 | public boolean equals(Object o) { 393 | if (this == o) { 394 | return true; 395 | } 396 | if (o == null || getClass() != o.getClass()) { 397 | return false; 398 | } 399 | Data data1 = (Data) o; 400 | return Objects.equals(data, data1.data); 401 | } 402 | 403 | @Override 404 | public int hashCode() { 405 | return Objects.hashCode(data); 406 | } 407 | } 408 | 409 | @JsonTypeInfo( 410 | use = JsonTypeInfo.Id.NAME, 411 | include = JsonTypeInfo.As.PROPERTY, 412 | property = "type") 413 | @JsonSubTypes({ 414 | @JsonSubTypes.Type(value = Kind1.class, name = "kind1"), 415 | @JsonSubTypes.Type(value = Kind2.class, name = "kind2")}) 416 | public abstract class Kind { 417 | } 418 | 419 | public class Kind1 extends Kind { 420 | public Kind1(String kind1String) { 421 | this.kind1String = kind1String; 422 | } 423 | 424 | public final String kind1String; 425 | } 426 | 427 | public class Kind2 extends Kind { 428 | public Kind2(String kind2String) { 429 | this.kind2String = kind2String; 430 | } 431 | 432 | public final String kind2String; 433 | } 434 | 435 | public static class BadNameContainer { 436 | 437 | private int id; 438 | private BadName badName; 439 | 440 | public BadNameContainer(int id, BadName badName) { 441 | this.id = id; 442 | this.badName = badName; 443 | } 444 | 445 | public int getId() { 446 | return id; 447 | } 448 | 449 | public BadName getBadName() { 450 | return badName; 451 | } 452 | 453 | @Override 454 | public boolean equals(Object o) { 455 | if (this == o) { 456 | return true; 457 | } 458 | if (o == null || getClass() != o.getClass()) { 459 | return false; 460 | } 461 | BadNameContainer that = (BadNameContainer) o; 462 | return id == that.id && Objects.equals(badName, that.badName); 463 | } 464 | 465 | @Override 466 | public int hashCode() { 467 | return Objects.hash(id, badName); 468 | } 469 | } 470 | 471 | public static class Recursive { 472 | 473 | private Recursive parent; 474 | private String title; 475 | 476 | public Recursive(String title) { 477 | } 478 | 479 | public Recursive getParent() { 480 | return parent; 481 | } 482 | 483 | public void setParent(Recursive parent) { 484 | this.parent = parent; 485 | } 486 | 487 | public String getTitle() { 488 | return title; 489 | } 490 | 491 | public void setTitle(String title) { 492 | this.title = title; 493 | } 494 | 495 | @Override 496 | public boolean equals(Object o) { 497 | if (this == o) { 498 | return true; 499 | } 500 | if (o == null || getClass() != o.getClass()) { 501 | return false; 502 | } 503 | Recursive recursive = (Recursive) o; 504 | return Objects.equals(title, recursive.title) 505 | && Objects.equals(parent, recursive.parent); 506 | } 507 | 508 | @Override 509 | public int hashCode() { 510 | return Objects.hash(title, parent); 511 | } 512 | } 513 | 514 | public static class BadName { 515 | private String name; 516 | private int group; 517 | private long order; 518 | 519 | public BadName(String name, int group, long order) { 520 | this.name = name; 521 | this.group = group; 522 | this.order = order; 523 | } 524 | 525 | public String getName() { 526 | return name; 527 | } 528 | 529 | public int getGroup() { 530 | return group; 531 | } 532 | 533 | public long getOrder() { 534 | return order; 535 | } 536 | 537 | @Override 538 | public boolean equals(Object o) { 539 | if (this == o) { 540 | return true; 541 | } 542 | if (o == null || getClass() != o.getClass()) { 543 | return false; 544 | } 545 | BadName badName = (BadName) o; 546 | return group == badName.group 547 | && order == badName.order 548 | && Objects.equals(name, badName.name); 549 | } 550 | 551 | @Override 552 | public int hashCode() { 553 | return Objects.hash(name, group, order); 554 | } 555 | } 556 | } --------------------------------------------------------------------------------