├── src └── main │ ├── resources │ ├── kafka-connect-milvus.properties │ ├── images │ │ ├── message.png │ │ ├── add_config.png │ │ ├── add_plugin.png │ │ ├── with_schema.png │ │ ├── produce_message.png │ │ ├── scheme_registry.png │ │ ├── collection_schema.png │ │ └── insearted_entities.png │ ├── kafka-connect-milvus.json │ └── sample_message.json │ └── java │ └── com │ └── milvus │ └── io │ └── kafka │ ├── client │ ├── response │ │ ├── GetLoadStateResp.java │ │ ├── HasCollectionResp.java │ │ ├── RestfulResponse.java │ │ └── DescribeCollectionResp.java │ ├── request │ │ └── UpsertReq.java │ ├── common │ │ ├── JsonUtils.java │ │ ├── ConsistencyLevel.java │ │ ├── DataType.java │ │ └── IndexParam.java │ └── MilvusRestClient.java │ ├── helper │ └── MilvusClientHelper.java │ ├── utils │ ├── VersionUtil.java │ ├── Utils.java │ └── DataConverter.java │ ├── MilvusSinkConnectorConfig.java │ ├── MilvusSinkConnector.java │ └── MilvusSinkTask.java ├── .gitattributes ├── assets └── zilliz_logo.png ├── .gitignore ├── README_OSS.md ├── README.md ├── pom.xml └── LICENSE /src/main/resources/kafka-connect-milvus.properties: -------------------------------------------------------------------------------- 1 | version=${project.version} -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /assets/zilliz_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/kafka-connect-milvus/HEAD/assets/zilliz_logo.png -------------------------------------------------------------------------------- /src/main/resources/images/message.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/kafka-connect-milvus/HEAD/src/main/resources/images/message.png -------------------------------------------------------------------------------- /src/main/resources/images/add_config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/kafka-connect-milvus/HEAD/src/main/resources/images/add_config.png -------------------------------------------------------------------------------- /src/main/resources/images/add_plugin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/kafka-connect-milvus/HEAD/src/main/resources/images/add_plugin.png -------------------------------------------------------------------------------- /src/main/resources/images/with_schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/kafka-connect-milvus/HEAD/src/main/resources/images/with_schema.png -------------------------------------------------------------------------------- /src/main/resources/images/produce_message.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/kafka-connect-milvus/HEAD/src/main/resources/images/produce_message.png -------------------------------------------------------------------------------- /src/main/resources/images/scheme_registry.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/kafka-connect-milvus/HEAD/src/main/resources/images/scheme_registry.png -------------------------------------------------------------------------------- /src/main/resources/images/collection_schema.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/kafka-connect-milvus/HEAD/src/main/resources/images/collection_schema.png -------------------------------------------------------------------------------- /src/main/resources/images/insearted_entities.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zilliztech/kafka-connect-milvus/HEAD/src/main/resources/images/insearted_entities.png -------------------------------------------------------------------------------- /src/main/resources/kafka-connect-milvus.json: -------------------------------------------------------------------------------- 1 | { 2 | "public.endpoint": "https://:port", 3 | "token": "*****************************************", 4 | "collection.name": "topic_0", 5 | "topics": "topic_0" 6 | } -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/client/response/GetLoadStateResp.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka.client.response; 2 | 3 | import lombok.Data; 4 | 5 | @Data 6 | public class GetLoadStateResp { 7 | private String loadState; 8 | private String loadProgress; 9 | private String message; 10 | } 11 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/client/response/HasCollectionResp.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka.client.response; 2 | 3 | import lombok.AllArgsConstructor; 4 | import lombok.Data; 5 | import lombok.NoArgsConstructor; 6 | 7 | @Data 8 | @AllArgsConstructor 9 | @NoArgsConstructor 10 | public class HasCollectionResp { 11 | private Boolean has; 12 | } 13 | -------------------------------------------------------------------------------- /src/main/resources/sample_message.json: -------------------------------------------------------------------------------- 1 | { 2 | "id": 0, 3 | "title": "The Reported Mortality Rate of Coronavirus Is Not Important", 4 | "title_vector": [ 5 | 0.041732933, 6 | 0.013779674, 7 | -0.027564144, 8 | -0.013061441, 9 | 0.009748648, 10 | 0.00082446384, 11 | -0.00071647146, 12 | 0.048612226 13 | ], 14 | "link": "https://medium.com/swlh/the-reported-mortality-rate-of-coronavirus-is-not-important-369989c8d912" 15 | } -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/helper/MilvusClientHelper.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka.helper; 2 | 3 | import com.milvus.io.kafka.MilvusSinkConnectorConfig; 4 | import com.milvus.io.kafka.client.MilvusRestClient; 5 | import com.milvus.io.kafka.utils.Utils; 6 | 7 | public class MilvusClientHelper { 8 | public MilvusRestClient createMilvusClient(MilvusSinkConnectorConfig config) { 9 | return new MilvusRestClient(config.getUrl(), Utils.decryptToken(config.getToken().value()), config.getDatabaseName()); 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | pom.xml.tag 3 | pom.xml.releaseBackup 4 | pom.xml.versionsBackup 5 | pom.xml.next 6 | release.properties 7 | dependency-reduced-pom.xml 8 | buildNumber.properties 9 | .mvn/timing.properties 10 | # https://github.com/takari/maven-wrapper#usage-without-binary-jar 11 | .mvn/wrapper/maven-wrapper.jar 12 | 13 | # Eclipse m2e generated files 14 | # Eclipse Core 15 | .project 16 | # JDT-specific (Eclipse Java Development Tools) 17 | .classpath 18 | 19 | *.DS_Store 20 | /.idea/ 21 | /.vscode/ 22 | /src/main/resources/kafka-connect-milvus-test.json 23 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/client/request/UpsertReq.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka.client.request; 2 | 3 | import com.google.gson.JsonObject; 4 | import lombok.AllArgsConstructor; 5 | import lombok.Data; 6 | import lombok.NoArgsConstructor; 7 | import lombok.experimental.SuperBuilder; 8 | 9 | import java.util.List; 10 | 11 | @Data 12 | @SuperBuilder 13 | @AllArgsConstructor 14 | @NoArgsConstructor 15 | public class UpsertReq { 16 | private String dbName; 17 | private String collectionName; 18 | private String partitionName; 19 | private List data; 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/utils/VersionUtil.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka.utils; 2 | 3 | import java.io.IOException; 4 | import java.io.InputStream; 5 | import java.util.Properties; 6 | 7 | public final class VersionUtil { 8 | private static final String VERSION; 9 | 10 | static { 11 | Properties prop = new Properties(); 12 | try (InputStream in = VersionUtil.class.getResourceAsStream("/kafka-connect-milvus.properties")) { 13 | prop.load(in); 14 | VERSION = prop.getProperty("version", "0.0.0.0"); 15 | } catch (IOException e) { 16 | throw new ExceptionInInitializerError(e); 17 | } 18 | } 19 | 20 | public static String getVersion() { 21 | return VERSION; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/client/common/JsonUtils.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka.client.common; 2 | 3 | import com.google.gson.Gson; 4 | import com.google.gson.GsonBuilder; 5 | import com.google.gson.ToNumberPolicy; 6 | 7 | import java.lang.reflect.Type; 8 | 9 | public class JsonUtils { 10 | private static final Gson GSON_INSTANCE; 11 | 12 | static { 13 | GSON_INSTANCE = (new GsonBuilder()).serializeNulls().setObjectToNumberStrategy(ToNumberPolicy.LONG_OR_DOUBLE).create(); 14 | } 15 | 16 | public JsonUtils() { 17 | } 18 | 19 | public static T fromJson(String jsonStr, Type typeOfT) { 20 | return GSON_INSTANCE.fromJson(jsonStr, typeOfT); 21 | } 22 | 23 | public static String toJson(Object object) { 24 | return GSON_INSTANCE.toJson(object); 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/client/common/ConsistencyLevel.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package com.milvus.io.kafka.client.common; 21 | 22 | import lombok.Getter; 23 | 24 | @Getter 25 | public enum ConsistencyLevel { 26 | STRONG("Strong", 0), 27 | SESSION("Session", 1), 28 | BOUNDED("Bounded", 2), 29 | EVENTUALLY("Eventually", 3), 30 | ; 31 | private final String name; 32 | private final int code; 33 | 34 | ConsistencyLevel(String name, int code) { 35 | this.name = name; 36 | this.code = code; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/client/response/RestfulResponse.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package com.milvus.io.kafka.client.response; 21 | 22 | import lombok.AllArgsConstructor; 23 | import lombok.Builder; 24 | import lombok.Data; 25 | import lombok.NoArgsConstructor; 26 | 27 | import java.io.Serializable; 28 | 29 | @Data 30 | @Builder 31 | @AllArgsConstructor 32 | @NoArgsConstructor 33 | public class RestfulResponse implements Serializable { 34 | private static final long serialVersionUID = -7162743560382861611L; 35 | 36 | private int code; 37 | 38 | private String message; 39 | 40 | private T data; 41 | } 42 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/client/common/DataType.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package com.milvus.io.kafka.client.common; 21 | 22 | import lombok.Getter; 23 | 24 | @Getter 25 | public enum DataType { 26 | None(0), 27 | Bool(1), 28 | Int8(2), 29 | Int16(3), 30 | Int32(4), 31 | Int64(5), 32 | 33 | Float(10), 34 | Double(11), 35 | 36 | String(20), 37 | VarChar(21), // variable-length strings with a specified maximum length 38 | Array(22), 39 | JSON(23), 40 | 41 | BinaryVector(100), 42 | FloatVector(101), 43 | Float16Vector(102), 44 | BFloat16Vector(103), 45 | SparseFloatVector(104); 46 | 47 | private final int code; 48 | 49 | DataType(int code) { 50 | this.code = code; 51 | } 52 | 53 | } 54 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/MilvusSinkConnectorConfig.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka; 2 | 3 | import org.apache.kafka.common.config.AbstractConfig; 4 | import org.apache.kafka.common.config.ConfigDef; 5 | import org.apache.kafka.common.config.types.Password; 6 | 7 | import java.util.Map; 8 | 9 | public class MilvusSinkConnectorConfig extends AbstractConfig { 10 | protected static final String URL = "public.endpoint"; 11 | protected static final String TOKEN = "token"; 12 | protected static final String DATABASE_NAME = "database.name"; 13 | protected static final String COLLECTION_NAME = "collection.name"; 14 | 15 | public MilvusSinkConnectorConfig(ConfigDef config, Map parsedConfig) { 16 | super(config, parsedConfig); 17 | } 18 | 19 | public MilvusSinkConnectorConfig(Map parsedConfig) { 20 | this(conf(), parsedConfig); 21 | } 22 | 23 | public static ConfigDef conf() { 24 | return new ConfigDef() 25 | .define(URL, ConfigDef.Type.STRING, "", ConfigDef.Importance.MEDIUM, "Public Endpoint") 26 | .define(TOKEN, ConfigDef.Type.PASSWORD, "db_admin:****", ConfigDef.Importance.HIGH, "Token to connect milvus") 27 | .define(DATABASE_NAME, ConfigDef.Type.STRING, "default", ConfigDef.Importance.MEDIUM, "Database name to save the topic messages") 28 | .define(COLLECTION_NAME, ConfigDef.Type.STRING, "", ConfigDef.Importance.MEDIUM, "Collection name to save the topic messages"); 29 | } 30 | 31 | public String getUrl() { 32 | return getString(URL); 33 | } 34 | 35 | public Password getToken() { 36 | return getPassword(TOKEN); 37 | } 38 | 39 | public String getDatabaseName() { 40 | return getString(DATABASE_NAME); 41 | } 42 | 43 | public String getCollectionName() { 44 | return getString(COLLECTION_NAME); 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/MilvusSinkConnector.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka; 2 | 3 | import com.milvus.io.kafka.utils.VersionUtil; 4 | import org.apache.kafka.common.config.ConfigDef; 5 | import org.apache.kafka.common.config.ConfigException; 6 | import org.apache.kafka.connect.connector.Task; 7 | import org.apache.kafka.connect.sink.SinkConnector; 8 | import org.slf4j.Logger; 9 | import org.slf4j.LoggerFactory; 10 | 11 | import java.util.ArrayList; 12 | import java.util.List; 13 | import java.util.Map; 14 | 15 | public class MilvusSinkConnector extends SinkConnector { 16 | 17 | private static final Logger log = LoggerFactory.getLogger(MilvusSinkConnector.class); 18 | private Map configProperties; 19 | 20 | @Override 21 | public void start(Map props) { 22 | try { 23 | log.info("starting Milvus Sink Connector"); 24 | configProperties = props; 25 | // validation 26 | new MilvusSinkConnectorConfig(props); 27 | } catch (ConfigException e) { 28 | throw new ConfigException("Couldn't start MilvusSinkConnector due to configuration error", e); 29 | } 30 | } 31 | 32 | @Override 33 | public Class taskClass() { 34 | return MilvusSinkTask.class; 35 | } 36 | 37 | @Override 38 | public List> taskConfigs(int maxTasks) { 39 | final List> taskConfigs = new ArrayList<>(maxTasks); 40 | for (int i = 0; i < maxTasks; i++) { 41 | taskConfigs.add(configProperties); 42 | } 43 | return taskConfigs; 44 | } 45 | 46 | @Override 47 | public void stop() { 48 | 49 | } 50 | 51 | @Override 52 | public ConfigDef config() { 53 | return MilvusSinkConnectorConfig.conf(); 54 | } 55 | 56 | @Override 57 | public String version() { 58 | return VersionUtil.getVersion(); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/utils/Utils.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka.utils; 2 | 3 | import org.slf4j.Logger; 4 | import org.slf4j.LoggerFactory; 5 | 6 | import javax.crypto.Cipher; 7 | import javax.crypto.KeyGenerator; 8 | import javax.crypto.SecretKey; 9 | import java.nio.charset.StandardCharsets; 10 | import java.security.NoSuchAlgorithmException; 11 | import java.security.SecureRandom; 12 | import java.util.Base64; 13 | 14 | public class Utils { 15 | private static final Logger log = LoggerFactory.getLogger(Utils.class); 16 | private static final SecretKey SECRET_KEY = generateSecretKey(); 17 | 18 | public static String encryptToken(String token) { 19 | try { 20 | Cipher cipher = Cipher.getInstance("AES/ECB/PKCS5Padding"); 21 | cipher.init(Cipher.ENCRYPT_MODE, SECRET_KEY); 22 | 23 | byte[] encryptedBytes = cipher.doFinal(token.getBytes(StandardCharsets.UTF_8)); 24 | return Base64.getEncoder().encodeToString(encryptedBytes); 25 | } catch (Exception e) { 26 | // Handle encryption errors 27 | log.error("encryption error" + e.getMessage()); 28 | return null; 29 | } 30 | } 31 | 32 | public static String decryptToken(String token) { 33 | try { 34 | Cipher cipher = Cipher.getInstance("AES/ECB/PKCS5Padding"); 35 | cipher.init(Cipher.DECRYPT_MODE, SECRET_KEY); 36 | 37 | byte[] encryptedBytes = Base64.getDecoder().decode(token); 38 | byte[] decryptedBytes = cipher.doFinal(encryptedBytes); 39 | 40 | return new String(decryptedBytes, StandardCharsets.UTF_8); 41 | } catch (Exception e) { 42 | // Handle decryption errors 43 | log.error("decryption error" + e.getMessage()); 44 | return null; 45 | } 46 | } 47 | 48 | public static SecretKey generateSecretKey() { 49 | try { 50 | KeyGenerator keyGenerator = KeyGenerator.getInstance("AES"); 51 | SecureRandom secureRandom = new SecureRandom(); 52 | keyGenerator.init(128, secureRandom); 53 | return keyGenerator.generateKey(); 54 | } catch (NoSuchAlgorithmException e) { 55 | log.error(e.getMessage()); 56 | return null; 57 | } 58 | } 59 | 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/client/response/DescribeCollectionResp.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka.client.response; 2 | 3 | import com.google.gson.annotations.SerializedName; 4 | import com.milvus.io.kafka.client.common.ConsistencyLevel; 5 | import lombok.Data; 6 | import lombok.NoArgsConstructor; 7 | 8 | import java.util.List; 9 | import java.util.Map; 10 | import java.util.Objects; 11 | 12 | @Data 13 | @NoArgsConstructor 14 | public class DescribeCollectionResp { 15 | 16 | @SerializedName("collectionName") 17 | private String collectionName; 18 | 19 | @SerializedName("description") 20 | private String description = ""; 21 | 22 | @SerializedName("autoId") 23 | private Boolean autoID; 24 | 25 | @SerializedName("enableDynamicField") 26 | private Boolean enableDynamicField; 27 | 28 | @SerializedName("consistencyLevel") 29 | private ConsistencyLevel consistencyLevel; 30 | 31 | @SerializedName("collectionID") 32 | private Long collectionID; 33 | 34 | @SerializedName("fields") 35 | private List fields; 36 | 37 | @SerializedName("indexes") 38 | private List indexes; 39 | 40 | @SerializedName("load") 41 | private String loadState; 42 | 43 | @SerializedName("partitionsNum") 44 | private Integer partitionsNum; 45 | 46 | @SerializedName("shardsNum") 47 | private Integer shardsNum; 48 | 49 | @SerializedName("properties") 50 | private List> properties; 51 | 52 | public FieldSchema getField(String name) { 53 | for (FieldSchema field : fields) { 54 | if (Objects.equals(field.name, name)) { 55 | return field; 56 | } 57 | } 58 | return null; 59 | } 60 | 61 | // Nested FieldSchema Class 62 | @Data 63 | @NoArgsConstructor 64 | public static class FieldSchema { 65 | @SerializedName("id") 66 | private Integer id; 67 | 68 | @SerializedName("name") 69 | private String name; 70 | 71 | @SerializedName("type") 72 | private String type; 73 | 74 | @SerializedName("description") 75 | private String description; 76 | 77 | @SerializedName("primaryKey") 78 | private Boolean primaryKey; 79 | 80 | @SerializedName("partitionKey") 81 | private Boolean partitionKey; 82 | 83 | @SerializedName("autoId") 84 | private Boolean autoId; 85 | 86 | @SerializedName("clusteringKey") 87 | private Boolean clusteringKey; 88 | 89 | @SerializedName("params") 90 | private List> params; 91 | } 92 | 93 | // Nested IndexSchema Class 94 | @Data 95 | @NoArgsConstructor 96 | public static class IndexSchema { 97 | @SerializedName("fieldName") 98 | private String fieldName; 99 | 100 | @SerializedName("indexName") 101 | private String indexName; 102 | 103 | @SerializedName("metricType") 104 | private String metricType; 105 | } 106 | } -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/client/common/IndexParam.java: -------------------------------------------------------------------------------- 1 | /* 2 | * Licensed to the Apache Software Foundation (ASF) under one 3 | * or more contributor license agreements. See the NOTICE file 4 | * distributed with this work for additional information 5 | * regarding copyright ownership. The ASF licenses this file 6 | * to you under the Apache License, Version 2.0 (the 7 | * "License"); you may not use this file except in compliance 8 | * with the License. You may obtain a copy of the License at 9 | * 10 | * http://www.apache.org/licenses/LICENSE-2.0 11 | * 12 | * Unless required by applicable law or agreed to in writing, 13 | * software distributed under the License is distributed on an 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 | * KIND, either express or implied. See the License for the 16 | * specific language governing permissions and limitations 17 | * under the License. 18 | */ 19 | 20 | package com.milvus.io.kafka.client.common; 21 | 22 | import lombok.Builder; 23 | import lombok.Data; 24 | import lombok.Getter; 25 | import lombok.NonNull; 26 | import lombok.experimental.SuperBuilder; 27 | 28 | import java.util.Map; 29 | 30 | @Data 31 | @SuperBuilder 32 | public class IndexParam { 33 | @NonNull 34 | private String fieldName; 35 | private String indexName; 36 | @Builder.Default 37 | private IndexType indexType = IndexType.AUTOINDEX; 38 | private MetricType metricType; 39 | private Map extraParams; 40 | 41 | public String getIndexName() { 42 | if (indexName == null) { 43 | return fieldName; 44 | } 45 | return indexName; 46 | } 47 | 48 | public enum MetricType { 49 | INVALID, 50 | // Only for float vectors 51 | L2, 52 | IP, 53 | COSINE, 54 | 55 | // Only for binary vectors 56 | HAMMING, 57 | JACCARD, 58 | 59 | // Only for sparse vector with BM25 60 | BM25, 61 | } 62 | 63 | @Getter 64 | public enum IndexType { 65 | None(0), 66 | // Only supported for float vectors 67 | FLAT(1), 68 | IVF_FLAT(2), 69 | IVF_SQ8(3), 70 | IVF_PQ(4), 71 | HNSW(5), 72 | DISKANN(10), 73 | AUTOINDEX(11), 74 | SCANN(12), 75 | 76 | // GPU indexes only for float vectors 77 | GPU_IVF_FLAT(50), 78 | GPU_IVF_PQ(51), 79 | GPU_BRUTE_FORCE(52), 80 | GPU_CAGRA(53), 81 | 82 | // Only supported for binary vectors 83 | BIN_FLAT(80), 84 | BIN_IVF_FLAT(81), 85 | 86 | // Only for varchar type field 87 | TRIE("Trie", 100), 88 | // Only for scalar type field 89 | STL_SORT(200), // only for numeric type field 90 | INVERTED(201), // works for all scalar fields except JSON type field 91 | BITMAP(202), // works for all scalar fields except JSON, FLOAT and DOUBLE type fields 92 | 93 | // Only for sparse vectors 94 | SPARSE_INVERTED_INDEX(300), 95 | SPARSE_WAND(301); 96 | 97 | private final String name; 98 | private final int code; 99 | 100 | IndexType() { 101 | this.name = this.toString(); 102 | this.code = this.ordinal(); 103 | } 104 | 105 | IndexType(int code) { 106 | this.name = this.toString(); 107 | this.code = code; 108 | } 109 | 110 | IndexType(String name, int code) { 111 | this.name = name; 112 | this.code = code; 113 | } 114 | } 115 | } 116 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/client/MilvusRestClient.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka.client; 2 | 3 | import com.google.gson.reflect.TypeToken; 4 | import com.milvus.io.kafka.client.common.JsonUtils; 5 | import com.milvus.io.kafka.client.request.UpsertReq; 6 | import com.milvus.io.kafka.client.response.DescribeCollectionResp; 7 | import com.milvus.io.kafka.client.response.GetLoadStateResp; 8 | import com.milvus.io.kafka.client.response.HasCollectionResp; 9 | import com.milvus.io.kafka.client.response.RestfulResponse; 10 | import kong.unirest.HttpResponse; 11 | import kong.unirest.Unirest; 12 | import org.slf4j.Logger; 13 | import org.slf4j.LoggerFactory; 14 | 15 | import java.util.HashMap; 16 | import java.util.Map; 17 | 18 | public class MilvusRestClient { 19 | private static final Logger log = LoggerFactory.getLogger(MilvusRestClient.class); 20 | 21 | private final String url; 22 | private final String token; 23 | private final String database; 24 | 25 | public MilvusRestClient(String url, String token, String database) { 26 | this.url = url; 27 | this.token = token; 28 | this.database = database; 29 | } 30 | 31 | public Boolean hasCollection(String collectionName) { 32 | String endpoint = url + "/v2/vectordb/collections/has"; 33 | Map params = createBaseParams(collectionName); 34 | 35 | String response = doPost(endpoint, params); 36 | HasCollectionResp restfulResponse = JsonUtils.fromJson(response, new TypeToken() {}.getType()); 37 | return restfulResponse.getHas(); 38 | } 39 | 40 | public DescribeCollectionResp describeCollection(String collectionName) { 41 | String endpoint = url + "/v2/vectordb/collections/describe"; 42 | Map params = createBaseParams(collectionName); 43 | 44 | String response = doPost(endpoint, params); 45 | return JsonUtils.fromJson(response, new TypeToken() {}.getType()); 46 | } 47 | 48 | public GetLoadStateResp getLoadState(String collectionName) { 49 | String endpoint = url + "/v2/vectordb/collections/get_load_state"; 50 | Map params = createBaseParams(collectionName); 51 | 52 | String response = doPost(endpoint, params); 53 | return JsonUtils.fromJson(response, new TypeToken() {}.getType()); 54 | } 55 | 56 | public void upsert(UpsertReq upsertReq) { 57 | upsertReq.setDbName(database); 58 | String endpoint = url + "/v2/vectordb/entities/upsert"; 59 | doPost(endpoint, upsertReq); 60 | } 61 | 62 | private String doPost(String endpoint, Object params) { 63 | try { 64 | HttpResponse response = Unirest.post(endpoint) 65 | .header("Authorization", "Bearer " + token) 66 | .header("Content-Type", "application/json") 67 | .body(JsonUtils.toJson(params)) 68 | .asString(); 69 | 70 | if (response.getStatus() != 200) { 71 | log.error("HTTP Error {}: {}", response.getStatus(), response.getStatusText()); 72 | throw new RuntimeException("Failed to call Milvus server"); 73 | } 74 | 75 | RestfulResponse restfulResponse = JsonUtils.fromJson(response.getBody(), new TypeToken>() {}.getType()); 76 | 77 | if (restfulResponse.getCode() != 0) { 78 | log.error("Milvus API Error: {}", restfulResponse.getMessage()); 79 | throw new RuntimeException("Milvus server returned an error: " + restfulResponse.getMessage()); 80 | } 81 | 82 | return JsonUtils.toJson(restfulResponse.getData()); 83 | } catch (Exception e) { 84 | log.error("Error calling Milvus server at {}: {}", endpoint, e.getMessage()); 85 | throw new RuntimeException("Failed to call Milvus server", e); 86 | } 87 | } 88 | 89 | private Map createBaseParams(String collectionName) { 90 | Map params = new HashMap<>(); 91 | params.put("dbName", database); 92 | params.put("collectionName", collectionName); 93 | return params; 94 | } 95 | } -------------------------------------------------------------------------------- /README_OSS.md: -------------------------------------------------------------------------------- 1 | # Quick Start 2 | In this quick start guide we show how to setup open source kafka and Zilliz 3 | Cloud to ingest vector data. 4 | ## Step 1: Download the kafka-connect-milvus plugin 5 | 6 | Complete the following steps to download the kafka-connect-milvus plugin. 7 | 8 | 1. download the latest plugin zip file `zilliz-kafka-connect-milvus-xxx.zip` from [here](https://github.com/zilliztech/kafka-connect-milvus/releases). 9 | 10 | ## Step 2: Download Kafka 11 | 1. Download the latest kafka from [here](https://kafka.apache.org/downloads). 12 | 2. Unzip the downloaded file and go to the kafka directory. 13 | ```shell 14 | $ tar -xzf kafka_2.13-3.6.1.tgz 15 | $ cd kafka_2.13-3.6.1 16 | ``` 17 | ## STEP 3: Start the Kafka Environment 18 | NOTE: Your local environment must have Java 8+ installed. 19 | 20 | Run the following commands in order to start all services in the correct order: 21 | 22 | 1. Start the ZooKeeper service 23 | ```shell 24 | $ bin/zookeeper-server-start.sh config/zookeeper.properties 25 | ``` 26 | Open another terminal session and run: 27 | 28 | 2. Start the Kafka broker service 29 | ```shell 30 | $ bin/kafka-server-start.sh config/server.properties 31 | ``` 32 | Once all services have successfully launched, you will have a basic Kafka environment running and ready to use. 33 | 34 | - check the official quick start guide form kafka for details: https://kafka.apache.org/quickstart 35 | 36 | ## Step 4: Configure Kafka and Zilliz Cloud 37 | 38 | Ensure you have Kafka and Zilliz Cloud setup and properly configured. 39 | 1. If you don't already have a topic in Kafka, create a topic (e.g. `topic_0`) in Kafka. 40 | ```shell 41 | $ bin/kafka-topics.sh --create --topic topic_0 --bootstrap-server localhost:9092 42 | ``` 43 | 2. If you don't already have a collection in Zilliz Cloud, create a collection with a vector field (in this example the vector has `dimension=8`). You can use the following example schema on Zilliz Cloud: 44 | 45 | 46 | 47 | Note: Make sure the schema on both sides match each other. In the schema, there is exactly one vector field. The names of each field on both sides are exactly the same. 48 | ## Step 5: Load the kafka-connect-milvus plugin to Kafka Instance 49 | 1. unzip the `zilliz-kafka-connect-milvus-xxx.zip` file you downloaded in Step 1. 50 | 2. copy the `zilliz-kafka-connect-milvus` directories to the `libs` directory of your Kafka installation. 51 | 3. modify the `connect-standalone.properties` file in the `config` directory of your Kafka installation. 52 | ```properties 53 | key.converter.schemas.enable=false 54 | value.converter.schemas.enable=false 55 | plugin.path=libs/zilliz-kafka-connect-milvus-xxx 56 | ``` 57 | 4. create and configure a `milvus-sink-connector.properties` file in the `config` directory of your Kafka installation. 58 | ```properties 59 | name=zilliz-kafka-connect-milvus 60 | connector.class=com.milvus.io.kafka.MilvusSinkConnector 61 | public.endpoint=https://:port 62 | token=***************************************** 63 | collection.name=topic_0 64 | topics=topic_0 65 | ``` 66 | ## Step 6: Launch the connector 67 | 1. Start the connector with the previous configuration file 68 | ```shell 69 | $ bin/connect-standalone.sh config/connect-standalone.properties config/milvus-sink-connector.properties 70 | ``` 71 | 2. Try produce a message to the Kafka topic you just created in Kafka 72 | ```shell 73 | bin/kafka-console-producer.sh --topic topic_0 --bootstrap-server localhost:9092 74 | >{"id": 0, "title": "The Reported Mortality Rate of Coronavirus Is Not Important", "title_vector": [0.041732933, 0.013779674, -0.027564144, -0.013061441, 0.009748648, 0.00082446384, -0.00071647146, 0.048612226], "link": "https://medium.com/swlh/the-reported-mortality-rate-of-coronavirus-is-not-important-369989c8d912"} 75 | ``` 76 | 77 | 3. Check if the entity has been inserted into the collection in Zilliz Cloud. Here is what it looks like on Zilliz Cloud if the insertion succeeds: 78 | 79 | 80 | 81 | ### Support 82 | 83 | If you require any assistance or have questions regarding the Kafka Connect Milvus Connector, please feel free to reach out to our support team: **Email:** [support@zilliz.com](mailto:support@zilliz.com) 84 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/MilvusSinkTask.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka; 2 | 3 | import com.google.gson.JsonObject; 4 | import static com.milvus.io.kafka.MilvusSinkConnectorConfig.TOKEN; 5 | import com.milvus.io.kafka.client.MilvusRestClient; 6 | import com.milvus.io.kafka.client.request.UpsertReq; 7 | import com.milvus.io.kafka.client.response.DescribeCollectionResp; 8 | import com.milvus.io.kafka.client.response.GetLoadStateResp; 9 | import com.milvus.io.kafka.helper.MilvusClientHelper; 10 | import com.milvus.io.kafka.utils.DataConverter; 11 | import com.milvus.io.kafka.utils.Utils; 12 | import com.milvus.io.kafka.utils.VersionUtil; 13 | import org.apache.kafka.connect.sink.SinkRecord; 14 | import org.apache.kafka.connect.sink.SinkTask; 15 | import org.slf4j.Logger; 16 | import org.slf4j.LoggerFactory; 17 | 18 | import java.util.ArrayList; 19 | import java.util.Collection; 20 | import java.util.List; 21 | import java.util.Map; 22 | import java.util.Objects; 23 | 24 | public class MilvusSinkTask extends SinkTask { 25 | 26 | private static final Logger log = LoggerFactory.getLogger(MilvusSinkTask.class); 27 | private MilvusSinkConnectorConfig config; 28 | private MilvusRestClient myMilvusClient; 29 | private DataConverter converter; 30 | private DescribeCollectionResp response; 31 | 32 | @Override 33 | public String version() { 34 | return VersionUtil.getVersion(); 35 | } 36 | 37 | @Override 38 | public void start(Map props) { 39 | start(props, null); 40 | } 41 | 42 | // make visible for test 43 | protected void start(Map props, MilvusRestClient milvusClient) { 44 | log.info("Starting MilvusSinkTask."); 45 | props.put(TOKEN, Utils.encryptToken(props.get(TOKEN))); 46 | this.config = new MilvusSinkConnectorConfig(props); 47 | this.converter = new DataConverter(config); 48 | this.myMilvusClient = milvusClient == null ? new MilvusClientHelper().createMilvusClient(config) : milvusClient; 49 | log.info("Started MilvusSinkTask, Connecting to Zilliz Cluster:" + config.getUrl()); 50 | preValidate(); 51 | } 52 | 53 | private void preValidate() { 54 | // check if the collection exists 55 | if (!myMilvusClient.hasCollection(config.getCollectionName())) { 56 | log.error("Collection not exist"); 57 | throw new RuntimeException("Collection not exist" + config.getCollectionName()); 58 | } 59 | // check if the collection is loaded 60 | GetLoadStateResp getLoadStateResp = myMilvusClient.getLoadState(config.getCollectionName()); 61 | if (!Objects.equals(getLoadStateResp.getLoadState(), "LoadStateLoaded")) { 62 | log.error("Collection not loaded"); 63 | throw new RuntimeException("Collection not loaded" + config.getCollectionName()); 64 | } 65 | this.response = myMilvusClient.describeCollection(config.getCollectionName()); 66 | } 67 | 68 | @Override 69 | public void put(Collection records) { 70 | log.info("Putting {} records to Milvus.", records.size()); 71 | if (records.isEmpty()) { 72 | log.info("No records to put."); 73 | return; 74 | } 75 | 76 | // not support dynamic schema for now, for dynamic schema, we need to put the data into a JSONObject 77 | List datas = new ArrayList<>(); 78 | for (SinkRecord record : records) { 79 | log.debug("Writing {} to Milvus.", record); 80 | if (record.value() == null) { 81 | log.warn("Skipping record with null value."); 82 | continue; 83 | } 84 | try { 85 | JsonObject data = converter.convertRecord(record, response); 86 | datas.add(data); 87 | } catch (Exception e) { 88 | log.error("Failed to convert record to JSONObject, skip it", e); 89 | } 90 | } 91 | 92 | if (!response.getAutoID()) { 93 | // default to use upsert 94 | UpsertReq upsertReq = UpsertReq.builder() 95 | .collectionName(config.getCollectionName()) 96 | .data(datas) 97 | .build(); 98 | log.info("Upserting data to collection: {} with datas: {}", config.getCollectionName(), datas); 99 | myMilvusClient.upsert(upsertReq); 100 | } 101 | 102 | } 103 | 104 | @Override 105 | public void stop() { 106 | log.info("Stopping Milvus client."); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /src/main/java/com/milvus/io/kafka/utils/DataConverter.java: -------------------------------------------------------------------------------- 1 | package com.milvus.io.kafka.utils; 2 | 3 | import com.google.gson.Gson; 4 | import com.google.gson.JsonObject; 5 | import com.milvus.io.kafka.MilvusSinkConnectorConfig; 6 | import com.milvus.io.kafka.client.common.DataType; 7 | import com.milvus.io.kafka.client.response.DescribeCollectionResp; 8 | import org.apache.kafka.connect.data.Struct; 9 | import org.apache.kafka.connect.sink.SinkRecord; 10 | import org.slf4j.Logger; 11 | import org.slf4j.LoggerFactory; 12 | 13 | import java.nio.ByteBuffer; 14 | import java.util.ArrayList; 15 | import java.util.HashMap; 16 | import java.util.List; 17 | 18 | public class DataConverter { 19 | 20 | private static final Logger log = LoggerFactory.getLogger(DataConverter.class); 21 | private final MilvusSinkConnectorConfig config; 22 | 23 | public DataConverter(MilvusSinkConnectorConfig config) { 24 | this.config = config; 25 | } 26 | 27 | /* 28 | * Convert SinkRecord to JsonObject 29 | */ 30 | public JsonObject convertRecord(SinkRecord sr, DescribeCollectionResp describeCollectionResp) { 31 | // parse sinkRecord to get field name and value 32 | if (sr.value() instanceof Struct) { 33 | return parseValue((Struct) sr.value(), describeCollectionResp); 34 | } else if (sr.value() instanceof HashMap) { 35 | return parseValue((HashMap) sr.value(), describeCollectionResp); 36 | } else { 37 | throw new RuntimeException("Unsupported SinkRecord data type: " + sr.value()); 38 | } 39 | } 40 | 41 | private JsonObject parseValue(HashMap mapValue, DescribeCollectionResp collectionSchema) { 42 | JsonObject fields = new JsonObject(); 43 | Gson gson = new Gson(); 44 | mapValue.forEach((field, value) -> { 45 | if (collectionSchema.getField(field.toString()) != null) { 46 | // if the key exists in the collection, store the value by collectionSchema DataType 47 | Object object = convertValueByMilvusType(value, collectionSchema.getField(field.toString()).getType()); 48 | fields.add(field.toString(), gson.toJsonTree(object)); 49 | } else { 50 | log.warn("Field {} not exists in collection", field); 51 | } 52 | }); 53 | return fields; 54 | } 55 | 56 | private JsonObject parseValue(Struct structValue, DescribeCollectionResp collectionSchema) { 57 | JsonObject fields = new JsonObject(); 58 | Gson gson = new Gson(); 59 | structValue.schema().fields().forEach(field -> { 60 | if (collectionSchema.getField(field.name()) != null) { 61 | // if the key exists in the collection, store the value by collectionSchema DataType 62 | Object object = convertValueByMilvusType(structValue.get(field.name()), collectionSchema.getField(field.name()).getType()); 63 | fields.add(field.name(), gson.toJsonTree(object)); 64 | } else { 65 | log.warn("Field {} not exists in collection", field); 66 | } 67 | }); 68 | 69 | return fields; 70 | } 71 | 72 | private Object convertValueByMilvusType(Object value, String dataType) { 73 | DataType type = DataType.valueOf(dataType); 74 | Gson gson = new Gson(); 75 | switch (type) { 76 | case Bool: 77 | return Boolean.parseBoolean(value.toString()); 78 | case Int8: 79 | case Int16: 80 | return Short.parseShort(value.toString()); 81 | case Int32: 82 | return Integer.parseInt(value.toString()); 83 | case Int64: 84 | return Long.parseLong(value.toString()); 85 | case Float: 86 | return Float.parseFloat(value.toString()); 87 | case Double: 88 | return Double.parseDouble(value.toString()); 89 | case VarChar: 90 | case String: 91 | return value.toString(); 92 | case JSON: 93 | return gson.toJson(value); 94 | case BinaryVector: 95 | return parseBinaryVectorField(value.toString()); 96 | case FloatVector: 97 | return parseFloatVectorField(value.toString()); 98 | case SparseFloatVector: 99 | return gson.toJsonTree(value).getAsJsonObject(); 100 | default: 101 | throw new RuntimeException("Unsupported data type: " + dataType); 102 | } 103 | } 104 | 105 | protected List parseFloatVectorField(String vectors) { 106 | try { 107 | log.debug("parse float vectors: {}", vectors); 108 | 109 | String[] vectorArrays = vectors.replaceAll("\\[", "").replaceAll("\\]", "") 110 | .replaceAll(" ", "").split(","); 111 | 112 | List floatList = new ArrayList<>(); 113 | for (String vector : vectorArrays) { 114 | floatList.add(Float.valueOf(vector)); 115 | } 116 | 117 | return floatList; 118 | } catch (Exception e) { 119 | throw new RuntimeException("parse float vector field error: " + e.getMessage() + " " + vectors); 120 | } 121 | } 122 | 123 | protected ByteBuffer parseBinaryVectorField(String vectors) { 124 | try { 125 | log.debug("parse binary vectors: {}", vectors); 126 | 127 | String[] vectorArrays = vectors.replaceAll("\\[", "").replaceAll("\\]", "") 128 | .replaceAll(" ", "").split(","); 129 | 130 | ByteBuffer buffer = ByteBuffer.allocate(vectorArrays.length); 131 | for (String vectorArray : vectorArrays) { 132 | int vector = Integer.parseInt(vectorArray); 133 | buffer.put((byte) vector); 134 | } 135 | 136 | return buffer; 137 | } catch (Exception e) { 138 | throw new RuntimeException("parse binary vector field error: " + e.getMessage() + " " + vectors); 139 | } 140 | } 141 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kafka Connect Milvus Connector 2 | 3 | This is a Kafka sink connector for Milvus. It allows you to stream vector data from Kafka to Milvus. 4 | 5 | This current version supports connection from 6 | [Confluent Cloud](https://www.confluent.io/confluent-cloud/) (hosted Kafka) and Open-Source Kafka to Milvus (self-hosted or 7 | [Zilliz Cloud](https://zilliz.com/cloud)). 8 | 9 | Zilliz Cloud and Milvus are vector databases where you can ingest, store and search vector data. 10 | An entity in Zilliz Cloud or Milvus contains one vector field and multiple scalar fields such 11 | as string, integer and float. To stream data, the Kafka message schema must match the schema of 12 | the Milvus collection, containing one and only one vector field, and zero to many scalar fields. 13 | The name of each field must be exactly the same on both sides. 14 | 15 | # Quick Start 16 | 17 | In this quick start guide we show how to setup Confluent Cloud and Zilliz Cloud to ingest vector data. 18 | For self-hosted Milvus the setup is similar. 19 | 20 | - If you want to use this connector with open source Kafka, see [README_OSS.md](README_OSS.md) 21 | 22 | ## Step 1: Download the kafka-connect-milvus plugin 23 | 24 | Complete the following steps to download the kafka-connect-milvus plugin. 25 | 26 | 1. download the latest plugin zip file `zilliz-kafka-connect-milvus-xxx.zip` from [here](https://github.com/zilliztech/kafka-connect-milvus/releases). 27 | 28 | ## Step 2: Configure Confluent Cloud and Zilliz Cloud 29 | 30 | Ensure you have Confluent Cloud and Zilliz Cloud setup and properly configured. 31 | 1. If you don't already have a topic in Confluent Cloud, create a topic (e.g. `topic_0`) in Confluent Cloud. 32 | 2. If you don't already have a collection in Zilliz Cloud, create a collection with a vector field (in this example the vector has `dimension=8`). You can use the following example schema on Zilliz Cloud: 33 | 34 | 35 | 36 | Note: Make sure the schema on both sides match each other. In the schema, there is exactly one vector field. The names of each field on both sides are exactly the same. 37 | 38 | ## Step 3: Load the kafka-connect-milvus plugin to a Confluent Cloud instance 39 | 1. Go to the Connectors section in your Confluent Cloud cluster. 40 | 2. Click on `Add Plugin`. 41 | 3. Upload the `zilliz-kafka-connect-milvus-xxx.zip` file you downloaded in Step 1. 42 | 43 | 44 | 45 | - Fill in Connector plugin details 46 | - Set `com.milvus.io.kafka.MilvusSinkConnector` as Connector class. 47 | - For Sensitive properties, fill in the config field you want to hide, such as `token` field. Please see [custom-connector](https://docs.confluent.io/cloud/current/connectors/bring-your-connector/custom-connector-qs.html) for more details. 48 | 49 | 50 | ## Step 4: Configure the kafka-connect-milvus Connector 51 | 52 | 1. Go to the `Connectors` section in your Confluent Cloud cluster. 53 | 2. Click on `Get Started`. 54 | 3. Enter Confluent Cloud API Key and Secret 55 | 4. Add Configuration for the connector 56 | 57 | Provide the Milvus connector config as follows: 58 | 59 | ```json 60 | { 61 | "public.endpoint": "https://:port", 62 | "token": "*****************************************", 63 | "collection.name": "topic_0", 64 | "topics": "topic_0" 65 | } 66 | ``` 67 | 68 | - The token field is either the API token or `:`, depending on the instance type of your collection in Milvus or Zilliz Cloud. 69 | 70 | 5. Enter Connection endpoints, which is the public endpoint of your Milvus or Zilliz Cloud instance. Like: `in01-034b8444ab99cab.aws-us-west-2.vectordb.zillizcloud.com` 71 | 72 | 6. Choose size, the number of tasks to be run 73 | 74 | 7. Review and launch 75 | 76 | #### Sample Config 77 | 78 | 79 | 80 | 81 | 82 | #### Kafka Topic Converter 83 | We support Json, JsonSchema, Avro, ProtoBuf format for Kafka topic, 84 | if your topic has an output format other than plain json, you need to choose the converter type in `Auto Configure Schema Registry`. 85 | 86 | 87 | 88 | 89 | - Sample Config for kafka topic with schema, check [here](https://docs.confluent.io/platform/current/schema-registry/connect.html) for more details 90 | 91 | 92 | 93 | ## Step 5: Launch the connector 94 | 95 | Start the connector to begin streaming data from Kafka to Milvus. 96 | 97 | 1. Try produce a message to the Kafka topic you just created in Confluent Cloud 98 | ```json 99 | { 100 | "id": 0, 101 | "title": "The Reported Mortality Rate of Coronavirus Is Not Important", 102 | "title_vector": [0.041732933, 0.013779674, -0.027564144, -0.013061441, 0.009748648, 0.00082446384, -0.00071647146, 0.048612226], 103 | "link": "https://medium.com/swlh/the-reported-mortality-rate-of-coronavirus-is-not-important-369989c8d912" 104 | } 105 | ``` 106 | - Topic Page on Confluent Cloud 107 | 108 | 109 | 110 | 2. Check if the entity has been inserted into the collection in Zilliz Cloud. Here is what it looks like on Zilliz Cloud if the insertion succeeds: 111 | 112 | 113 | 114 | 115 | ### Support 116 | 117 | If you require any assistance or have questions regarding the Kafka Connect Milvus Connector, please feel free to reach out to our support team: **Email:** [support@zilliz.com](mailto:support@zilliz.com) 118 | 119 | ### Disclaimer 120 | 121 | Regarding Kafka hosting, currently this connector only supports Confluent Cloud. If used for the other forms of Kafka hosting, including but not limited to Confluent Platform and self-hosted Kafka instances, the security and usability of this connector has not been fully verified. Please use at your discretion or contact us if you intend to use it in such settings. 122 | 123 | Regarding Milvus, both the self-hosted Milvus and Zilliz Cloud are supported. 124 | 125 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.milvus.io 6 | kafka-connect-milvus 7 | ${project-version} 8 | 9 | kafka-connect-milvus 10 | https://github.com/zilliztech/kafka-connect-milvus 11 | 12 | 13 | 1.0.1 14 | https://packages.confluent.io/maven/ 15 | 16 | 17 | 18 | 19 | confluent 20 | Confluent 21 | ${confluent.maven.repo} 22 | 23 | 24 | 25 | 26 | 27 | org.apache.kafka 28 | connect-api 29 | 3.9.0 30 | 31 | 32 | com.konghq 33 | unirest-java 34 | 3.14.5 35 | 36 | 37 | 38 | org.projectlombok 39 | lombok 40 | 1.18.36 41 | 42 | 43 | 44 | com.google.code.gson 45 | gson 46 | 2.11.0 47 | 48 | 49 | org.slf4j 50 | slf4j-api 51 | 2.0.7 52 | 53 | 54 | ch.qos.logback 55 | logback-classic 56 | 1.5.13 57 | 58 | 59 | org.mockito 60 | mockito-core 61 | 5.6.0 62 | test 63 | 64 | 65 | org.junit.jupiter 66 | junit-jupiter 67 | 5.8.1 68 | test 69 | 70 | 71 | 72 | 73 | 74 | 75 | io.confluent 76 | kafka-connect-maven-plugin 77 | 0.12.0 78 | 79 | 80 | 81 | kafka-connect 82 | 83 | 84 | Milvus Sink Connector 85 | https://github.com/zilliztech/kafka-connect-milvus/tree/main 86 | 87 | This is a Kafka sink connector for Milvus. It allows you to stream vector data from Kafka to Milvus. This current version supports connection from Confluent Cloud (hosted Kafka) and Open-Source Kafka to Milvus (self-hosted or Zilliz Cloud). 88 | 89 | Zilliz Cloud and Milvus are vector databases where you can ingest, store and search vector data. A data record in Zilliz Cloud or Milvus contains one vector field and multiple scalar fields such as string, integer, and float. To stream data, the Kafka message schema must match the schema of the Milvus collection, containing one and only one vector field, and zero to many scalar fields. Each field name must be exactly the same on both sides. 90 | 91 | zilliz 92 | organization 93 | Zilliz, Inc. 94 | https://zilliz.com/ 95 | assets/zilliz_logo.png 96 | Zilliz, Inc. 97 | 98 | If you require any assistance or have questions regarding the Kafka Connect Milvus Connector, please feel free to reach out to our support team: support@zilliz.com 99 | 100 | https://zilliz.com 101 | assets/zilliz_logo.png 102 | 103 | 104 | sink 105 | 106 | 107 | 108 | Milvus 109 | Zilliz Cloud 110 | vector database 111 | similarity search 112 | 113 | 114 | true 115 | 116 | 117 | 118 | 119 | 120 | 121 | org.apache.maven.plugins 122 | maven-failsafe-plugin 123 | 3.0.0-M5 124 | 125 | 126 | 127 | integration-test 128 | verify 129 | 130 | 131 | 132 | 133 | 134 | org.apache.maven.plugins 135 | maven-resources-plugin 136 | 3.3.1 137 | 138 | UTF-8 139 | 140 | 141 | 142 | org.owasp 143 | dependency-check-maven 144 | 11.1.0 145 | 146 | 147 | 148 | check 149 | 150 | 151 | 152 | 153 | false 154 | 05865c08-9d5c-4ba0-82b8-f3d8030f3683 155 | 156 | 157 | 158 | 159 | 160 | 161 | src/main/resources 162 | true 163 | 164 | 165 | 166 | 167 | 168 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | --------------------------------------------------------------------------------