├── .gitignore ├── CHANGELOG.md ├── Emakefile ├── README.md ├── bin ├── clear_kafkerl_test_topics.sh ├── create_test_topics.sh ├── server0.properties ├── server1.properties ├── server2.properties ├── start_broker.sh ├── start_zk.sh ├── stop_all_brokers.sh ├── stop_broker.sh ├── stop_zk.sh └── zookeeper.properties ├── config └── sys.config ├── elvis.config ├── include └── kafkerl.hrl ├── rebar.config ├── rebar.lock ├── relx.config ├── run.sh ├── src ├── kafkerl.app.src ├── kafkerl.erl ├── kafkerl_app.erl ├── kafkerl_broker_connection.erl ├── kafkerl_connector.erl ├── kafkerl_error.erl ├── kafkerl_protocol.erl ├── kafkerl_sup.erl └── kafkerl_utils.erl └── test └── kafkerl_meta_SUITE.erl /.gitignore: -------------------------------------------------------------------------------- 1 | hexer.config 2 | .eunit 3 | deps 4 | *.o 5 | *.beam 6 | *.plt 7 | erl_crash.dump 8 | ebin 9 | log 10 | *.log 11 | *.bak 12 | rebar 13 | .rebar 14 | *.dump 15 | bin/tmp 16 | doc 17 | _build 18 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | 3 | ## [1.0.0](https://github.com/inaka/kafkerl/tree/1.0.0) (2016-08-16) 4 | **Closed issues:** 5 | 6 | - Move from erlang.mk to rebar3 [\#26](https://github.com/inaka/kafkerl/issues/26) 7 | 8 | **Merged pull requests:** 9 | 10 | - Euen.26.rebar3 [\#27](https://github.com/inaka/kafkerl/pull/27) ([Euen](https://github.com/Euen)) 11 | - Parity [\#23](https://github.com/inaka/kafkerl/pull/23) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 12 | - Parity [\#22](https://github.com/inaka/kafkerl/pull/22) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 13 | - Parity [\#21](https://github.com/inaka/kafkerl/pull/21) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 14 | - Parity [\#20](https://github.com/inaka/kafkerl/pull/20) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 15 | - Parity [\#19](https://github.com/inaka/kafkerl/pull/19) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 16 | - Parity [\#18](https://github.com/inaka/kafkerl/pull/18) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 17 | - Parity [\#17](https://github.com/inaka/kafkerl/pull/17) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 18 | - Par [\#16](https://github.com/inaka/kafkerl/pull/16) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 19 | - Parity [\#15](https://github.com/inaka/kafkerl/pull/15) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 20 | - Parity [\#14](https://github.com/inaka/kafkerl/pull/14) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 21 | - Parity [\#13](https://github.com/inaka/kafkerl/pull/13) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 22 | - Parity [\#12](https://github.com/inaka/kafkerl/pull/12) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 23 | - Parity [\#11](https://github.com/inaka/kafkerl/pull/11) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 24 | - Parity [\#10](https://github.com/inaka/kafkerl/pull/10) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 25 | - Parity [\#9](https://github.com/inaka/kafkerl/pull/9) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 26 | - Parity [\#8](https://github.com/inaka/kafkerl/pull/8) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 27 | - Parity, again [\#7](https://github.com/inaka/kafkerl/pull/7) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 28 | - Parity [\#6](https://github.com/inaka/kafkerl/pull/6) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 29 | - Added the subscription to kafkerl events API [\#5](https://github.com/inaka/kafkerl/pull/5) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 30 | - Parity [\#4](https://github.com/inaka/kafkerl/pull/4) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 31 | - Parity [\#3](https://github.com/inaka/kafkerl/pull/3) ([HernanRivasAcosta](https://github.com/HernanRivasAcosta)) 32 | - Fix for port\(\) [\#2](https://github.com/inaka/kafkerl/pull/2) ([elbrujohalcon](https://github.com/elbrujohalcon)) 33 | 34 | 35 | 36 | \* *This Change Log was automatically generated by [github_changelog_generator](https://github.com/skywinder/Github-Changelog-Generator)* -------------------------------------------------------------------------------- /Emakefile: -------------------------------------------------------------------------------- 1 | {"src/*", [warn_unused_vars, 2 | warn_export_all, 3 | warn_shadow_vars, 4 | warn_unused_import, 5 | warn_unused_function, 6 | warn_bif_clash, 7 | warn_unused_record, 8 | warn_deprecated_function, 9 | warn_obsolete_guard, 10 | strict_validation, 11 | report, 12 | warn_export_vars, 13 | warn_exported_vars, 14 | debug_info, 15 | {outdir, "/ebin"}, 16 | {i, "include"}]}. 17 | {"test/*", [warn_unused_vars, 18 | warn_export_all, 19 | warn_shadow_vars, 20 | warn_unused_import, 21 | warn_unused_function, 22 | warn_bif_clash, 23 | warn_unused_record, 24 | warn_deprecated_function, 25 | warn_obsolete_guard, 26 | strict_validation, 27 | report, 28 | warn_export_vars, 29 | warn_exported_vars, 30 | debug_info, 31 | {outdir, "/ebin"}, 32 | {i, "include"}]}. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | kafkerl v2.1.0 2 | ============== 3 | Apache Kafka 0.8.2 high performance producer/consumer for erlang. 4 | Developed thanks to the support and sponsorship of [TigerText](http://www.tigertext.com/) and [Inaka](https://github.com/inaka/). 5 | 6 | ## Features (aka, why kafkerl?) 7 | - Fast binary creation. 8 | - Caching requests to build more optimally compressed multi message TCP packages. 9 | - Highly concurrent, using @jaynel concurrency tools. 10 | - Messages are not lost but cached before sending to kafka. 11 | - Handles server side errors and broker/leadership changes. 12 | - Flexible API allows consumer of messages to define pids, funs or M:F pairs as callbacks for the received messages. 13 | - Simple yet flexible consumer API to retrieve the messages from Kafka. 14 | 15 | ## Missing features (aka, what I am working on but haven't finished yet) 16 | - There is no communication with Zookeeper. 17 | - Tests suites. 18 | 19 | Special thanks to [@nitzanharel](https://github.com/nitzanharel) who found some really nasty bugs and helped me understand the subtleties of kafka's design and to the rest of the [TigerText](http://www.tigertext.com/) and [Inaka](https://github.com/inaka/) teams for their support and code reviews. 20 | -------------------------------------------------------------------------------- /bin/clear_kafkerl_test_topics.sh: -------------------------------------------------------------------------------- 1 | rm -rf tmp/ -------------------------------------------------------------------------------- /bin/create_test_topics.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | show_help() { 4 | echo "Usage:"; 5 | echo " -h, --help: display this message"; 6 | echo " -d, --kafka_path: The path to the kafka installation (required)"; 7 | exit 1; 8 | } 9 | 10 | while true ; do 11 | case "$1" in 12 | -h|--help) 13 | show_help ;; 14 | -d|--kafka-path) 15 | d=${2} ; 16 | shift 2 ;; 17 | *) 18 | break ; 19 | shift 2 ;; 20 | esac 21 | done 22 | 23 | # make sure the path is defined 24 | if [ ! -d "${d}" ]; then echo "invalid kafka path ${d}" ; exit 1 ; fi 25 | 26 | "${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic test1 --partitions 3 --replication-factor 3 27 | "${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic test2 --partitions 3 --replication-factor 3 28 | "${d}/bin/kafka-topics.sh" --zookeeper localhost:2181 --create --topic test3 --partitions 4 --replication-factor 3 -------------------------------------------------------------------------------- /bin/server0.properties: -------------------------------------------------------------------------------- 1 | broker.id=0 2 | port=9090 3 | 4 | num.network.threads=2 5 | num.io.threads=8 6 | socket.send.buffer.bytes=1048576 7 | socket.receive.buffer.bytes=1048576 8 | socket.request.max.bytes=104857600 9 | log.dirs=tmp/kafka-logs-0 10 | num.partitions=2 11 | log.retention.hours=168 12 | log.segment.bytes=536870912 13 | log.retention.check.interval.ms=60000 14 | log.cleaner.enable=false 15 | zookeeper.connect=localhost:2181 16 | zookeeper.connection.timeout.ms=1000000 17 | auto.create.topics.enable = true -------------------------------------------------------------------------------- /bin/server1.properties: -------------------------------------------------------------------------------- 1 | broker.id=1 2 | port=9091 3 | 4 | num.network.threads=2 5 | num.io.threads=8 6 | socket.send.buffer.bytes=1048576 7 | socket.receive.buffer.bytes=1048576 8 | socket.request.max.bytes=104857600 9 | log.dirs=tmp/kafka-logs-1 10 | num.partitions=2 11 | log.retention.hours=168 12 | log.segment.bytes=536870912 13 | log.retention.check.interval.ms=60000 14 | log.cleaner.enable=false 15 | zookeeper.connect=localhost:2181 16 | zookeeper.connection.timeout.ms=1000000 17 | auto.create.topics.enable = true -------------------------------------------------------------------------------- /bin/server2.properties: -------------------------------------------------------------------------------- 1 | broker.id=2 2 | port=9092 3 | 4 | num.network.threads=2 5 | num.io.threads=8 6 | socket.send.buffer.bytes=1048576 7 | socket.receive.buffer.bytes=1048576 8 | socket.request.max.bytes=104857600 9 | log.dirs=tmp/kafka-logs-2 10 | num.partitions=2 11 | log.retention.hours=168 12 | log.segment.bytes=536870912 13 | log.retention.check.interval.ms=60000 14 | log.cleaner.enable=false 15 | zookeeper.connect=localhost:2181 16 | zookeeper.connection.timeout.ms=1000000 17 | auto.create.topics.enable = true -------------------------------------------------------------------------------- /bin/start_broker.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | show_help() { 4 | echo "Usage:"; 5 | echo " -h, --help: display this message"; 6 | echo " -d, --kafka_path: The path to the kafka installation (required)"; 7 | echo " -c, --config: the path of the configuration file"; 8 | exit 1; 9 | } 10 | 11 | cfg="server0.properties" 12 | 13 | while true ; do 14 | case "$1" in 15 | -h|--help) 16 | show_help ;; 17 | -d|--kafka-path) 18 | d=${2} ; 19 | shift 2 ;; 20 | -c|--config) 21 | cfg=${2} ; 22 | shift 2 ;; 23 | *) 24 | break ; 25 | shift 2 ;; 26 | esac 27 | done 28 | 29 | # make sure the path is defined 30 | if [ ! -d "${d}" ]; then echo "invalid kafka path ${d}" ; exit 1 ; fi 31 | 32 | "${d}/bin/kafka-server-start.sh" ${cfg} & 33 | disown 34 | 35 | exit 1 -------------------------------------------------------------------------------- /bin/start_zk.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | show_help() { 4 | echo "Usage:"; 5 | echo " -h, --help: display this message"; 6 | echo " -d, --kafka_path: The path to the kafka installation (required)"; 7 | echo " -c, --config: the path of the configuration file"; 8 | exit 1; 9 | } 10 | 11 | cfg="zookeeper.properties" 12 | 13 | while true ; do 14 | case "$1" in 15 | -h|--help) 16 | show_help ;; 17 | -d|--kafka-path) 18 | d=${2} ; 19 | shift 2 ;; 20 | -c|--config) 21 | cfg=${2} ; 22 | shift 2 ;; 23 | *) 24 | break ; 25 | shift 2 ;; 26 | esac 27 | done 28 | 29 | # make sure the path is defined 30 | if [ ! -d "${d}" ]; then echo "invalid kafka path ${d}" ; exit 1 ; fi 31 | 32 | "${d}/bin/zookeeper-server-start.sh" ${cfg} & 33 | disown 34 | 35 | exit 1 -------------------------------------------------------------------------------- /bin/stop_all_brokers.sh: -------------------------------------------------------------------------------- 1 | ps ax | grep -i 'kafka\.Kafka' | grep java | grep -v grep | awk '{print $1}' | xargs kill -9 2 | -------------------------------------------------------------------------------- /bin/stop_broker.sh: -------------------------------------------------------------------------------- 1 | set -e 2 | 3 | show_help() { 4 | echo "Usage:"; 5 | echo " -h, --help: display this message";] 6 | echo " -c, --config: the path of the configuration file"; 7 | exit 1; 8 | } 9 | 10 | cfg="server0.properties" 11 | 12 | while true ; do 13 | case "$1" in 14 | -h|--help) 15 | show_help ;; 16 | -c|--config) 17 | cfg=${2} ; 18 | shift 2 ;; 19 | *) 20 | break ; 21 | shift 2 ;; 22 | esac 23 | done 24 | 25 | ps -ax | grep kafka | grep ${cfg} | awk '{print $1}' | xargs kill -15 26 | 27 | exit 1 -------------------------------------------------------------------------------- /bin/stop_zk.sh: -------------------------------------------------------------------------------- 1 | ps ax | grep -i 'zookeeper' | grep -v grep | awk '{print $1}' | xargs kill -9 2 | -------------------------------------------------------------------------------- /bin/zookeeper.properties: -------------------------------------------------------------------------------- 1 | dataDir=tmp/zookeeper 2 | clientPort=2181 3 | maxClientCnxns=0 4 | default.replication.factor=3 -------------------------------------------------------------------------------- /config/sys.config: -------------------------------------------------------------------------------- 1 | [{kafkerl, [%{gen_server_name, kafkerl_client}, 2 | {disabled, false}, 3 | {conn_config, [{brokers, [{"localhost", 9090}, 4 | {"localhost", 9091}, 5 | {"localhost", 9092}]}, 6 | {client_id, kafkerl_client}, % Sent to kafka 7 | {max_broker_retries, 2}, 8 | {broker_tcp_timeout, 1000}, 9 | {max_metadata_retries, -1}, 10 | {assume_autocreate_topics, true}, 11 | {metadata_tcp_timeout, 1000}, 12 | {max_queue_size, 20}, % In items, per topic/partition 13 | {max_time_queued, 5}, % In seconds 14 | {metadata_request_cooldown, 1500}, % In milliseconds 15 | {consumer_min_bytes, 1}, 16 | {consumer_max_wait, 1500}]}, 17 | {topics, [test1, test2, test3]}, 18 | {tests, [{kafka_installation, "~/kafka"}]}]}]. -------------------------------------------------------------------------------- /elvis.config: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | elvis, 4 | [ 5 | {config, 6 | [#{dirs => ["src", "test"], 7 | filter => "*.erl", 8 | rules => [ {elvis_style, invalid_dynamic_call, #{ignore => [kafkerl_utils]}} 9 | , {elvis_style, nesting_level, #{level => 4}} 10 | , {elvis_style, dont_repeat_yourself, #{min_complexity => 15}} 11 | ], 12 | ruleset => erl_files 13 | }, 14 | #{dirs => ["."], 15 | filter => "rebar.config", 16 | ruleset => rebar_config 17 | }, 18 | #{dirs => ["."], 19 | filter => "elvis.config", 20 | ruleset => elvis_config 21 | } 22 | ] 23 | } 24 | ] 25 | } 26 | ]. 27 | -------------------------------------------------------------------------------- /include/kafkerl.hrl: -------------------------------------------------------------------------------- 1 | %% Error codes 2 | -define(NO_ERROR, 0). 3 | -define(OFFSET_OUT_OF_RANGE, 1). 4 | -define(INVALID_MESSAGE, 2). 5 | -define(UNKNOWN_TOPIC_OR_PARTITION, 3). 6 | -define(INVALID_MESSAGE_SIZE, 4). 7 | -define(LEADER_NOT_AVAILABLE, 5). 8 | -define(NOT_LEADER_FOR_PARTITION, 6). 9 | -define(REQUEST_TIMEDOUT, 7). 10 | -define(BROKER_NOT_AVAILABLE, 8). 11 | -define(REPLICA_NOT_AVAILABLE, 9). 12 | -define(MESSAGE_SIZE_TOO_LARGE, 10). 13 | -define(STALE_CONTROLLER_EPOCH, 11). 14 | -define(OFFSET_METADATA_TOO_LARGE, 12). 15 | -define(OFFSETS_LOAD_IN_PROGRESS_CODE, 14). 16 | -define(CONSUMER_COORDINATOR_NOT_AVAILABLE_CODE, 15). 17 | -define(NOT_COORDINATOR_FOR_CONSUMER_CODE, 16). 18 | -define(UNKNOWN, -1). -------------------------------------------------------------------------------- /rebar.config: -------------------------------------------------------------------------------- 1 | %% -*- mode: erlang;erlang-indent-level: 2;indent-tabs-mode: nil -*- 2 | %% ex: ts=4 sw=4 ft=erlang et 3 | 4 | %% == Erlang Compiler == 5 | 6 | %% Erlang compiler options 7 | {erl_opts, [ warn_unused_vars 8 | , warnings_as_errors % 9 | , warn_export_all 10 | , warn_shadow_vars 11 | , warn_unused_import 12 | , warn_unused_function 13 | , warn_bif_clash 14 | , warn_unused_record 15 | , warn_deprecated_function 16 | , warn_obsolete_guard 17 | , strict_validation 18 | , warn_export_vars 19 | , warn_exported_vars 20 | , warn_missing_spec % 21 | , warn_untyped_record 22 | , debug_info % 23 | , {i, "include"} %% 24 | ]}. 25 | 26 | % {sub_dirs, ["rel", "deps"]}. 27 | 28 | {profiles, [ 29 | {test, [ 30 | {deps, [ {mixer, {git, "https://github.com/inaka/mixer.git", {tag, "0.1.5"}}} 31 | , {katana_test, "0.1.1"} 32 | ]} 33 | ]}, 34 | {shell, [ 35 | {deps, [ 36 | {sync, {git, "https://github.com/rustyio/sync.git", {ref, "9c78e7b"}}} 37 | ]} 38 | ]} 39 | ]}. 40 | 41 | %% == Common Test == 42 | 43 | {ct_compile_opts, [ warn_unused_vars 44 | , warn_export_all 45 | , warn_shadow_vars 46 | , warn_unused_import 47 | , warn_unused_function 48 | , warn_bif_clash 49 | , warn_unused_record 50 | , warn_deprecated_function 51 | , warn_obsolete_guard 52 | , strict_validation 53 | , warn_export_vars 54 | , warn_exported_vars 55 | , warn_missing_spec 56 | , warn_untyped_record 57 | , debug_info]}. 58 | 59 | {ct_opts, []}. 60 | 61 | %% == Cover == 62 | 63 | {cover_enabled, true}. 64 | 65 | {cover_opts, [verbose]}. 66 | 67 | %% == Dependencies == 68 | 69 | {deps, [ {epocxy, {git, "https://github.com/duomark/epocxy", {tag, "1.1.0"}}} 70 | , {validerl, {git, "https://github.com/HernanRivasAcosta/validerl", {branch, "master"}}} 71 | ]}. 72 | 73 | %% == Dialyzer == 74 | 75 | {dialyzer, [ {warnings, [ underspecs 76 | , no_return 77 | , unmatched_returns 78 | , error_handling 79 | ]} 80 | , {get_warnings, true} 81 | , {plt_apps, top_level_deps} 82 | , {plt_extra_apps, []} 83 | , {plt_location, local} 84 | , {base_plt_apps, [stdlib, kernel]} 85 | , {base_plt_location, global}]}. 86 | 87 | %% == Shell == 88 | 89 | {shell, [{apps, [sync]}]}. -------------------------------------------------------------------------------- /rebar.lock: -------------------------------------------------------------------------------- 1 | [{<<"epocxy">>, 2 | {git,"https://github.com/duomark/epocxy", 3 | {ref,"665d79dfdb78df036bac3b3cb6db55e78406880e"}}, 4 | 0}, 5 | {<<"proper">>, 6 | {git,"https://github.com/manopapad/proper", 7 | {ref,"9f6a6501430479bed66d08cd795cd34d36ec83aa"}}, 8 | 1}, 9 | {<<"validerl">>, 10 | {git,"https://github.com/HernanRivasAcosta/validerl", 11 | {ref,"c2bb4c3ce83ce01a8004afe01bf226d052d8a5dd"}}, 12 | 0}]. 13 | -------------------------------------------------------------------------------- /relx.config: -------------------------------------------------------------------------------- 1 | {release, {kafkerl, "2.1.1"}, [kafkerl]}. 2 | {sys_config, "config/sys.config"}. 3 | {extended_start_script, true}. -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | make shell -------------------------------------------------------------------------------- /src/kafkerl.app.src: -------------------------------------------------------------------------------- 1 | { application 2 | , kafkerl 3 | , [ {description, "Apache Kafka 0.8.2 high performance producer/consumer for erlang."} 4 | , {vsn, "2.1.1"} 5 | , {applications, [kernel, validerl, epocxy, stdlib]} 6 | , {mod, {kafkerl_app, []}} 7 | , {modules, [ kafkerl 8 | , kafkerl_app 9 | , kafkerl_broker_connection 10 | , kafkerl_connector 11 | , kafkerl_error 12 | , kafkerl_protocol 13 | , kafkerl_sup 14 | , kafkerl_utils]} 15 | , {maintainers, ["hernanrivasacosta@gmail.com", "Inaka"]} 16 | , {licenses, ["Apache Licence 2.0"]} 17 | , {links, [{"Github", "https://github.com/hernanrivasacosta/kafkerl"}]} 18 | , {build_tools, ["rebar3"]} 19 | ] 20 | }. 21 | -------------------------------------------------------------------------------- /src/kafkerl.erl: -------------------------------------------------------------------------------- 1 | -module(kafkerl). 2 | -author('hernanrivasacosta@gmail.com'). 3 | 4 | -export([start/0]). 5 | -export([produce/3, produce/4, produce/5, 6 | consume/2, consume/3, consume/4, stop_consuming/2, stop_consuming/3, 7 | request_metadata/0, request_metadata/1, request_metadata/2, 8 | partitions/0, partitions/1]). 9 | -export([version/0]). 10 | 11 | %% Types 12 | -type offset() :: integer(). 13 | 14 | -type callback() :: pid() | 15 | fun() | 16 | {atom(), atom()} | 17 | {atom(), atom(), [any()]}. 18 | -type option() :: {buffer_size, integer() | infinity} | 19 | {dump_location, string()} | 20 | {consumer, callback()} | 21 | {min_bytes, integer()} | 22 | {max_wait, integer()} | 23 | {offset, offset()} | 24 | {fetch_interval, false | integer()}. 25 | -type options() :: [option()]. 26 | -type server_ref() :: atom() | pid(). 27 | 28 | -type error() :: {error, atom() | {atom(), any()}}. 29 | 30 | -type topic() :: binary(). 31 | -type partition() :: integer(). 32 | -type payload() :: binary() | [binary()]. 33 | -type basic_message() :: {topic(), partition(), payload()}. 34 | 35 | -export_type([server_ref/0, error/0, options/0, callback/0, 36 | topic/0, partition/0, payload/0, basic_message/0]). 37 | 38 | %%============================================================================== 39 | %% API 40 | %%============================================================================== 41 | -spec start() -> ok | {error, term()}. 42 | start() -> 43 | ok = application:load(?MODULE), 44 | application:start(?MODULE). 45 | 46 | %%============================================================================== 47 | %% Access API 48 | %%============================================================================== 49 | %% Produce API 50 | 51 | -spec produce(server_ref(), basic_message(), options()) -> ok; 52 | (topic(), partition(), payload()) -> ok. 53 | produce(_ServerRef, {Topic, Partition, Message}, Options) -> 54 | produce(?MODULE, Topic, Partition, Message, Options); 55 | produce(Topic, Partition, Message) -> 56 | produce(?MODULE, Topic, Partition, Message, []). 57 | 58 | -spec produce(server_ref(), topic(), partition(), payload()) -> ok; 59 | (topic(), partition(), payload(), options()) -> ok. 60 | produce(Topic, Partition, Message, Options) when is_list(Options) -> 61 | produce(?MODULE, {Topic, Partition, Message}, Options); 62 | produce(ServerRef, Topic, Partition, Message) -> 63 | produce(ServerRef, {Topic, Partition, Message}, []). 64 | 65 | -spec produce(server_ref(), topic(), partition(), payload(), options()) -> ok. 66 | produce(ServerRef, Topic, Partition, Message, Options) -> 67 | kafkerl_connector:send(ServerRef, {Topic, Partition, Message}, Options). 68 | 69 | %% Consume API 70 | -spec consume(topic(), partition()) -> ok | error(). 71 | consume(Topic, Partition) -> 72 | consume(?MODULE, Topic, Partition, []). 73 | 74 | -spec consume(topic(), partition(), options()) -> ok | error(); 75 | (server_ref(), topic(), partition()) -> ok | error(). 76 | consume(Topic, Partition, Options) when is_list(Options) -> 77 | consume(?MODULE, Topic, Partition, Options); 78 | consume(ServerRef, Topic, Partition) -> 79 | consume(ServerRef, Topic, Partition, []). 80 | 81 | -spec consume(server_ref(), topic(), partition(), options()) -> 82 | ok | {[payload()], offset()} | error(). 83 | consume(ServerRef, Topic, Partition, Options) -> 84 | case {proplists:get_value(consumer, Options, undefined), 85 | proplists:get_value(fetch_interval, Options, false)} of 86 | {undefined, false} -> 87 | NewOptions = [{consumer, self()} | Options], 88 | ok = kafkerl_connector:fetch(ServerRef, Topic, Partition, NewOptions), 89 | kafkerl_utils:gather_consume_responses(); 90 | {undefined, _} -> 91 | {error, fetch_interval_specified_with_no_consumer}; 92 | _ -> 93 | kafkerl_connector:fetch(ServerRef, Topic, Partition, Options) 94 | end. 95 | 96 | -spec stop_consuming(topic(), partition()) -> ok. 97 | stop_consuming(Topic, Partition) -> 98 | stop_consuming(?MODULE, Topic, Partition). 99 | 100 | -spec stop_consuming(server_ref(), topic(), partition()) -> ok. 101 | stop_consuming(ServerRef, Topic, Partition) -> 102 | kafkerl_connector:stop_fetch(ServerRef, Topic, Partition). 103 | 104 | %% Metadata API 105 | -spec request_metadata() -> ok. 106 | request_metadata() -> 107 | request_metadata(?MODULE). 108 | 109 | -spec request_metadata(atom() | [topic()]) -> ok. 110 | request_metadata(Topics) when is_list(Topics) -> 111 | request_metadata(?MODULE, Topics); 112 | request_metadata(ServerRef) -> 113 | kafkerl_connector:request_metadata(ServerRef). 114 | 115 | -spec request_metadata(atom(), [topic()]) -> ok. 116 | request_metadata(ServerRef, Topics) -> 117 | kafkerl_connector:request_metadata(ServerRef, Topics). 118 | 119 | %% Partitions 120 | -spec partitions() -> [{topic(), [partition()]}] | error(). 121 | partitions() -> 122 | partitions(?MODULE). 123 | 124 | -spec partitions(server_ref()) -> [{topic(), [partition()]}] | error(). 125 | partitions(ServerRef) -> 126 | kafkerl_connector:get_partitions(ServerRef). 127 | 128 | %% Utils 129 | -spec version() -> {integer(), integer(), integer()}. 130 | version() -> 131 | {2, 0, 0}. -------------------------------------------------------------------------------- /src/kafkerl_app.erl: -------------------------------------------------------------------------------- 1 | -module(kafkerl_app). 2 | 3 | -behaviour(application). 4 | 5 | -export([start/2, stop/1]). 6 | 7 | -spec start(any(), any()) -> {ok, pid()}. 8 | start(_StartType, _StartArgs) -> 9 | kafkerl_sup:start_link(). 10 | 11 | -spec stop(any()) -> ok. 12 | stop(_State) -> 13 | ok. -------------------------------------------------------------------------------- /src/kafkerl_broker_connection.erl: -------------------------------------------------------------------------------- 1 | -module(kafkerl_broker_connection). 2 | -author('hernanrivasacosta@gmail.com'). 3 | 4 | -behaviour(gen_server). 5 | 6 | -include_lib("kernel/include/logger.hrl"). 7 | 8 | %% API 9 | -export([add_buffer/2, clear_buffers/1, fetch/4, stop_fetch/3]). 10 | % Only for internal use 11 | -export([connect/6]). 12 | % Supervisors 13 | -export([start_link/4]). 14 | % gen_server callbacks 15 | -export([init/1, terminate/2, code_change/3, 16 | handle_call/3, handle_cast/2, handle_info/2]). 17 | 18 | -include("kafkerl.hrl"). 19 | 20 | -type conn_idx() :: 0..1023. 21 | -type start_link_response() :: {ok, atom(), pid()} | ignore | {error, any()}. 22 | 23 | -record(fetch, {correlation_id = 0 :: kafkerl_protocol:correlation_id(), 24 | server_ref = undefined :: undefined | kafkerl:server_ref(), 25 | topic = undefined :: undefined | kafkerl:topic(), 26 | partition = undefined :: undefined | kafkerl:partition(), 27 | options = undefined :: undefined | kafkerl:options(), 28 | state = void :: kafkerl_protocol:fetch_state()}). 29 | 30 | -record(state, {name = undefined :: undefined | atom(), 31 | buffers = [] :: [atom()], 32 | conn_idx = undefined :: undefined | conn_idx(), 33 | client_id = undefined :: undefined | binary(), 34 | socket = undefined :: undefined | port(), 35 | address = undefined :: undefined | 36 | kafkerl_connector:address(), 37 | connector = undefined :: undefined | pid(), 38 | tref = undefined :: undefined | any(), 39 | tcp_options = [] :: [any()], 40 | max_retries = 0 :: integer(), 41 | retry_interval = 0 :: integer(), 42 | request_number = 0 :: integer(), 43 | pending_requests = [] :: [integer()], 44 | max_time_queued = 0 :: integer(), 45 | ets = undefined :: undefined | atom(), 46 | fetches = [] :: [#fetch{}], 47 | current_fetch = void :: kafkerl_protocol:correlation_id() | 48 | void, 49 | scheduled_fetches = [] :: [{{kafkerl:topic(), 50 | kafkerl:partition()}, 51 | timer:tref()}]}). 52 | -type state() :: #state{}. 53 | 54 | %%============================================================================== 55 | %% API 56 | %%============================================================================== 57 | -spec start_link(conn_idx(), pid(), kafkerl_connector:address(), any()) -> 58 | start_link_response(). 59 | start_link(Id, Connector, Address, Config) -> 60 | NameStr = atom_to_list(?MODULE) ++ "_" ++ integer_to_list(Id), 61 | Name = list_to_atom(NameStr), 62 | Params = [Id, Connector, Address, Config, Name], 63 | case gen_server:start_link({local, Name}, ?MODULE, Params, []) of 64 | {ok, Pid} -> 65 | {ok, Name, Pid}; 66 | Other -> 67 | Other 68 | end. 69 | 70 | -spec add_buffer(kafkerl:server_ref(), atom()) -> ok. 71 | add_buffer(ServerRef, Buffer) -> 72 | gen_server:call(ServerRef, {add_buffer, Buffer}). 73 | 74 | -spec clear_buffers(kafkerl:server_ref()) -> ok. 75 | clear_buffers(ServerRef) -> 76 | gen_server:call(ServerRef, {clear_buffers}). 77 | 78 | -spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), 79 | kafkerl:options()) -> ok | kafkerl:error(). 80 | fetch(ServerRef, Topic, Partition, Options) -> 81 | gen_server:call(ServerRef, {fetch, ServerRef, Topic, Partition, Options}). 82 | 83 | -spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> 84 | ok. 85 | stop_fetch(ServerRef, Topic, Partition) -> 86 | gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). 87 | 88 | %%============================================================================== 89 | %% gen_server callbacks 90 | %%============================================================================== 91 | -spec handle_call(any(), any(), state()) -> {reply, ok, state()}. 92 | handle_call({add_buffer, Buffer}, _From, State = #state{buffers = Buffers}) -> 93 | {reply, ok, State#state{buffers = [Buffer| Buffers]}}; 94 | handle_call({clear_buffers}, _From, State) -> 95 | {reply, ok, State#state{buffers = []}}; 96 | handle_call({fetch, ServerRef, Topic, Partition, Options}, _From, State) -> 97 | handle_fetch(ServerRef, Topic, Partition, Options, State); 98 | handle_call({stop_fetch, Topic, Partition}, _From, State) -> 99 | handle_stop_fetch(Topic, Partition, State). 100 | 101 | -spec handle_info(any(), state()) -> {noreply, state()}. 102 | handle_info({connected, Socket}, State) -> 103 | handle_flush(State#state{socket = Socket}); 104 | handle_info(connection_timeout, State) -> 105 | {stop, {error, unable_to_connect}, State}; 106 | handle_info({tcp_closed, _Socket}, State = #state{name = Name, 107 | address = {Host, Port}}) -> 108 | ok = ?LOG_WARNING("~p lost connection to ~p:~p", [Name, Host, Port]), 109 | NewState = handle_tcp_close(State), 110 | {noreply, NewState}; 111 | handle_info({tcp, _Socket, Bin}, State) -> 112 | case handle_tcp_data(Bin, State) of 113 | {ok, NewState} -> {noreply, NewState}; 114 | {error, Reason} -> {stop, {error, Reason}, State} 115 | end; 116 | handle_info({flush, Time}, State) -> 117 | {ok, _Tref} = queue_flush(Time), 118 | handle_flush(State); 119 | handle_info(Msg, State = #state{name = Name}) -> 120 | ok = ?LOG_NOTICE("~p got unexpected info message: ~p on ~p", [Name, Msg]), 121 | {noreply, State}. 122 | 123 | % Boilerplate 124 | -spec handle_cast(any(), state()) -> {noreply, state()}. 125 | handle_cast(_Msg, State) -> {noreply, State}. 126 | -spec terminate(atom(), state()) -> ok. 127 | terminate(_Reason, _State) -> ok. 128 | -spec code_change(string(), state(), any()) -> {ok, state()}. 129 | code_change(_OldVsn, State, _Extra) -> {ok, State}. 130 | 131 | %%============================================================================== 132 | %% Handlers 133 | %%============================================================================== 134 | -spec init([any()]) -> {ok, state()} | {stop, bad_config}. 135 | init([Id, Connector, Address, Config, Name]) -> 136 | Schema = [{tcp_options, [any], {default, []}}, 137 | {retry_interval, positive_integer, {default, 1000}}, 138 | {max_retries, positive_integer, {default, 3}}, 139 | {client_id, binary, {default, <<"kafkerl_client">>}}, 140 | {max_time_queued, positive_integer, required}], 141 | case normalizerl:normalize_proplist(Schema, Config) of 142 | {ok, [TCPOpts, RetryInterval, MaxRetries, ClientId, MaxTimeQueued]} -> 143 | NewTCPOpts = kafkerl_utils:get_tcp_options(TCPOpts), 144 | EtsName = list_to_atom(atom_to_list(Name) ++ "_ets"), 145 | _ = ets:new(EtsName, [named_table, public, {write_concurrency, true}, 146 | {read_concurrency, true}]), 147 | State = #state{conn_idx = Id, tcp_options = NewTCPOpts, address = Address, 148 | max_retries = MaxRetries, retry_interval = RetryInterval, 149 | connector = Connector, client_id = ClientId, name = Name, 150 | max_time_queued = MaxTimeQueued, ets = EtsName}, 151 | Params = [self(), Name, NewTCPOpts, Address, RetryInterval, MaxRetries], 152 | _Pid = spawn_link(?MODULE, connect, Params), 153 | {ok, _Tref} = queue_flush(MaxTimeQueued), 154 | {ok, State}; 155 | {errors, Errors} -> 156 | ok = lists:foreach(fun(E) -> 157 | ok = ?LOG_CRITICAL("configuration error: ~p", [E]) 158 | end, Errors), 159 | {stop, bad_config} 160 | end. 161 | 162 | handle_flush(State = #state{socket = undefined}) -> 163 | {noreply, State}; 164 | handle_flush(State = #state{buffers = []}) -> 165 | {noreply, State}; 166 | handle_flush(State = #state{socket = Socket, ets = EtsName, buffers = Buffers, 167 | client_id = ClientId, connector = Connector, 168 | name = Name}) -> 169 | {ok, CorrelationId, NewState} = build_correlation_id(State), 170 | % TODO: Maybe buffer all this messages in case something goes wrong 171 | AllMessages = get_all_messages(Buffers), 172 | case kafkerl_utils:merge_messages(AllMessages) of 173 | [] -> 174 | {noreply, NewState}; 175 | MergedMessages -> 176 | Request = kafkerl_protocol:build_produce_request(MergedMessages, 177 | ClientId, 178 | CorrelationId), 179 | true = ets:insert_new(EtsName, {CorrelationId, MergedMessages}), 180 | ok = ?LOG_DEBUG("~p sending ~p", [Name, Request]), 181 | case gen_tcp:send(Socket, Request) of 182 | {error, Reason} -> 183 | ok = ?LOG_CRITICAL("~p was unable to write to socket, reason: ~p", 184 | [Name, Reason]), 185 | gen_tcp:close(Socket), 186 | ets:delete_all_objects(EtsName), 187 | ok = resend_messages(MergedMessages, Connector), 188 | {noreply, handle_tcp_close(NewState)}; 189 | ok -> 190 | ok = ?LOG_DEBUG("~p sent message ~p", [Name, CorrelationId]), 191 | {noreply, NewState} 192 | end 193 | end. 194 | 195 | handle_fetch(ServerRef, Topic, Partition, Options, 196 | State = #state{fetches = Fetches, client_id = ClientId, 197 | socket = Socket, name = Name, 198 | scheduled_fetches = ScheduledFetches}) -> 199 | Scheduled = proplists:get_bool(scheduled, Options), 200 | case {get_fetch(Topic, Partition, Fetches), 201 | lists:keytake({Topic, Partition}, 1, ScheduledFetches), 202 | Scheduled} of 203 | % An scheduled fetch we can't identify? We ignore it 204 | {_, false, true} -> 205 | ?LOG_WARNING("ignoring unknown scheduled fetch"), 206 | {reply, ok, State}; 207 | % We are already fetching that topic/partition pair 208 | {#fetch{}, _, false} -> 209 | {reply, {error, fetch_in_progress}, State}; 210 | % We have a scheduled fetch for that topic/partition pair and this is not an 211 | % scheduled fetch 212 | {not_found, Tuple, false} when is_tuple(Tuple) -> 213 | {reply, {error, fetch_in_progress}, State}; 214 | % We have a valid fetch request! 215 | {not_found, KeyTakeResult, Scheduled} -> 216 | {ok, CorrelationId, NewState} = build_correlation_id(State), 217 | Offset = proplists:get_value(offset, Options, 0), 218 | Request = {Topic, {Partition, Offset, 2147483647}}, 219 | MaxWait = proplists:get_value(max_wait, Options), 220 | MinBytes = proplists:get_value(min_bytes, Options), 221 | Payload = kafkerl_protocol:build_fetch_request(Request, 222 | ClientId, 223 | CorrelationId, 224 | MaxWait, 225 | MinBytes), 226 | case gen_tcp:send(Socket, Payload) of 227 | {error, Reason} -> 228 | ok = ?LOG_CRITICAL("~p was unable to write to socket, reason: ~p", 229 | [Name, Reason]), 230 | ok = gen_tcp:close(Socket), 231 | {reply, {error, no_connection}, handle_tcp_close(State)}; 232 | ok -> 233 | ok = ?LOG_DEBUG("~p sent request ~p", [Name, CorrelationId]), 234 | NewFetch = #fetch{correlation_id = CorrelationId, 235 | server_ref = ServerRef, 236 | topic = Topic, 237 | partition = Partition, 238 | options = Options}, 239 | NewScheduledFetches = case KeyTakeResult of 240 | false -> ScheduledFetches; 241 | {_, _, List} -> List 242 | end, 243 | {reply, ok, NewState#state{fetches = [NewFetch | Fetches], 244 | scheduled_fetches = NewScheduledFetches}} 245 | end 246 | end. 247 | 248 | handle_stop_fetch(Topic, Partition, State) -> 249 | % Cancel any timers we have for scheduled fetches 250 | case lists:keytake({Topic, Partition}, 1, State#state.scheduled_fetches) of 251 | false -> 252 | NewFetches = remove_fetch(Topic, Partition, false, State#state.fetches), 253 | {reply, ok, State#state{fetches = NewFetches}}; 254 | {value, {{Topic, Partition}, TRef}, NewScheduledFetches} -> 255 | _ = timer:cancel(TRef), 256 | NewFetches = remove_fetch(Topic, Partition, force, State#state.fetches), 257 | {reply, ok, State#state{fetches = NewFetches, 258 | scheduled_fetches = NewScheduledFetches}} 259 | end. 260 | 261 | remove_fetch(Topic, Partition, Force, CurrentFetches) -> 262 | remove_fetch(Topic, Partition, Force, CurrentFetches, []). 263 | 264 | remove_fetch(_Topic, _Partition, _Force, [], Acc) -> 265 | Acc; 266 | remove_fetch(Topic, Partition, force, 267 | [#fetch{topic = Topic, partition = Partition} | T], Acc) -> 268 | % If we are forcing the removal, just remove the fetch 269 | Acc ++ T; 270 | remove_fetch(Topic, Partition, _, 271 | [#fetch{topic = Topic, partition = Partition} = Fetch | T], Acc) -> 272 | % Clearing the fetch options ensures this fetch will stop sending any messages 273 | % since there is no consumer. This also removes the fetch_interval so it won't 274 | % be requested again. 275 | % Simply removing the fetch here doesn't work since we will still get a server 276 | % response, but we won't be able to handle it. 277 | [Fetch#fetch{options = []} | Acc] ++ T; 278 | remove_fetch(Topic, Partition, Force, [H | T], Acc) -> 279 | remove_fetch(Topic, Partition, Force, T, [H | Acc]). 280 | 281 | % TCP Handlers 282 | handle_tcp_close(State = #state{retry_interval = RetryInterval, 283 | tcp_options = TCPOpts, 284 | max_retries = MaxRetries, 285 | address = Address, 286 | name = Name}) -> 287 | Params = [self(), Name, TCPOpts, Address, RetryInterval, MaxRetries], 288 | _Pid = spawn_link(?MODULE, connect, Params), 289 | State#state{socket = undefined}. 290 | 291 | handle_tcp_data(Bin, State = #state{fetches = Fetches, 292 | current_fetch = CurrentFetch}) -> 293 | {ok, CorrelationId, _NewBin} = parse_correlation_id(Bin, CurrentFetch), 294 | case get_fetch(CorrelationId, Fetches) of 295 | Fetch = #fetch{} -> 296 | handle_fetch_response(Bin, Fetch, State); 297 | _ -> 298 | handle_produce_response(Bin, State) 299 | end. 300 | 301 | handle_fetch_response(Bin, Fetch, 302 | State = #state{fetches = Fetches, 303 | scheduled_fetches = ScheduledFetches}) -> 304 | Options = Fetch#fetch.options, 305 | Consumer = proplists:get_value(consumer, Options), 306 | case kafkerl_protocol:parse_fetch_response(Bin, Fetch#fetch.state) of 307 | {ok, _CorrelationId, [{_, [{{_, Offset}, Messages}]}]} -> 308 | % The messages can be empty, for example when there are no new messages in 309 | % this partition, if that happens, don't send anything and end the fetch. 310 | ok = send_messages(Consumer, 311 | case Messages of 312 | [] -> []; 313 | _ -> [{consumed, Messages}, {offset, Offset}] 314 | end), 315 | NewFetches = lists:delete(Fetch, Fetches), 316 | NewState = State#state{current_fetch = void, fetches = NewFetches}, 317 | case proplists:get_value(fetch_interval, Options, false) of 318 | false -> 319 | {ok, NewState}; 320 | Interval -> 321 | NewOptions = kafkerl_utils:proplists_set(Options, [{scheduled, true}, 322 | {offset, Offset}]), 323 | Topic = Fetch#fetch.topic, 324 | Partition = Fetch#fetch.partition, 325 | ServerRef = Fetch#fetch.server_ref, 326 | Arguments = [ServerRef, Topic, Partition, NewOptions], 327 | {ok, Tref} = timer:apply_after(Interval, ?MODULE, fetch, Arguments), 328 | NewScheduledFetches = [{{Topic, Partition}, Tref} | ScheduledFetches], 329 | {ok, NewState#state{scheduled_fetches = NewScheduledFetches}} 330 | end; 331 | {incomplete, CorrelationId, Data, NewFetchState} -> 332 | ok = case Data of 333 | [{_, [{_, Messages = [_ | _]}]}] -> 334 | send_messages(Consumer, {consumed, Messages}); 335 | _ -> 336 | % On some cases, kafka will return an incomplete response with no 337 | % messages, but we shouldn't send the empty message list. 338 | ok 339 | end, 340 | {ok, State#state{fetches = [Fetch#fetch{state = NewFetchState} | 341 | lists:delete(Fetch, Fetches)], 342 | current_fetch = CorrelationId}}; 343 | Error -> 344 | ok = send_messages(Consumer, Error), 345 | NewFetches = lists:delete(Fetch, Fetches), 346 | {ok, State#state{current_fetch = void, fetches = NewFetches}} 347 | end. 348 | 349 | handle_produce_response(Bin, State = #state{connector = Connector, name = Name, 350 | ets = EtsName}) -> 351 | try 352 | {ok, CorrelationId, Topics} = kafkerl_protocol:parse_produce_response(Bin), 353 | case ets:lookup(EtsName, CorrelationId) of 354 | [{CorrelationId, Messages}] -> 355 | ets:delete(EtsName, CorrelationId), 356 | {Errors, Successes} = split_errors_and_successes(Topics), 357 | % First, send the offsets and messages that were delivered 358 | _ = spawn(fun() -> 359 | notify_success(Successes, Messages, Connector) 360 | end), 361 | % Then handle the errors 362 | case handle_errors(Errors, Messages, Name) of 363 | ignore -> 364 | {ok, State}; 365 | {request_metadata, MessagesToResend} -> 366 | kafkerl_connector:request_metadata(Connector), 367 | ok = resend_messages(MessagesToResend, Connector), 368 | {ok, State} 369 | end; 370 | _ -> 371 | ok = ?LOG_WARNING("~p was unable to get produce response", [Name]), 372 | {error, invalid_produce_response} 373 | end 374 | catch 375 | _:Er -> 376 | ok = ?LOG_CRITICAL("~p got unexpected response when parsing message: ~p", 377 | [Name, Er]), 378 | {ok, State} 379 | end. 380 | 381 | %%============================================================================== 382 | %% Utils 383 | %%============================================================================== 384 | resend_messages(Messages, Connector) -> 385 | F = fun(M) -> kafkerl_connector:send(Connector, M, []) end, 386 | lists:foreach(F, Messages). 387 | 388 | notify_success([], _Messages, _Pid) -> 389 | ok; 390 | notify_success([{Topic, Partition, Offset} | T], Messages, Pid) -> 391 | MergedMessages = kafkerl_utils:merge_messages(Messages), 392 | Partitions = partitions_in_topic(Topic, MergedMessages), 393 | M = messages_in_partition(Partition, Partitions), 394 | kafkerl_connector:produce_succeeded(Pid, {Topic, Partition, M, Offset}), 395 | notify_success(T, Messages, Pid). 396 | 397 | partitions_in_topic(Topic, Messages) -> 398 | lists:flatten([P || {T, P} <- Messages, T =:= Topic]). 399 | messages_in_partition(Partition, Messages) -> 400 | lists:flatten([M || {P, M} <- Messages, P =:= Partition]). 401 | 402 | build_correlation_id(State = #state{request_number = RequestNumber, 403 | conn_idx = ConnIdx}) -> 404 | % CorrelationIds are 32 bit integers, of those, the first 10 bits are used for 405 | % the connectionId (hence the 1023 limit on it) and the other 22 bits are used 406 | % for the sequential numbering, this magic number down here is actually 2^10-1 407 | NextRequest = case RequestNumber > 4194303 of 408 | true -> 0; 409 | false -> RequestNumber + 1 410 | end, 411 | CorrelationId = (ConnIdx bsl 22) bor NextRequest, 412 | {ok, CorrelationId, State#state{request_number = NextRequest}}. 413 | 414 | split_errors_and_successes(Topics) -> 415 | split_errors_and_successes(Topics, {[], []}). 416 | 417 | split_errors_and_successes([], Acc) -> 418 | Acc; 419 | split_errors_and_successes([{Topic, Partitions} | T], Acc) -> 420 | F = fun({Partition, ?NO_ERROR, Offset}, {E, S}) -> 421 | {E, [{Topic, Partition, Offset} | S]}; 422 | ({Partition, Error, _}, {E, S}) -> 423 | {[{Topic, Partition, Error} | E], S} 424 | end, 425 | split_errors_and_successes(T, lists:foldl(F, Acc, Partitions)). 426 | 427 | handle_errors([], _Messages, _Name) -> 428 | ignore; 429 | handle_errors(Errors, Messages, Name) -> 430 | F = fun(E) -> handle_error(E, Messages, Name) end, 431 | case lists:filtermap(F, Errors) of 432 | [] -> ignore; 433 | L -> {request_metadata, L} 434 | end. 435 | 436 | handle_error({Topic, Partition, Error}, Messages, Name) 437 | when Error =:= ?UNKNOWN_TOPIC_OR_PARTITION orelse 438 | Error =:= ?NOT_LEADER_FOR_PARTITION orelse 439 | Error =:= ?LEADER_NOT_AVAILABLE -> 440 | case get_message_for_error(Topic, Partition, Messages, Name) of 441 | undefined -> false; 442 | Message -> {true, Message} 443 | end; 444 | handle_error({Topic, Partition, Error}, _Messages, Name) -> 445 | ErrorName = kafkerl_error:get_error_name(Error), 446 | ok = ?LOG_ERROR("~p was unable to handle ~p error on topic ~p, partition ~p", 447 | [Name, ErrorName, Topic, Partition]), 448 | false. 449 | 450 | get_message_for_error(Topic, Partition, SavedMessages, Name) -> 451 | case lists:keyfind(Topic, 1, SavedMessages) of 452 | false -> 453 | print_error(Name, Topic, Partition), 454 | undefined; 455 | {Topic, Partitions} -> 456 | case lists:keyfind(Partition, 1, Partitions) of 457 | false -> 458 | print_error(Name, Topic, Partition), 459 | undefined; 460 | {Partition, Messages} -> 461 | {Topic, Partition, Messages} 462 | end 463 | end. 464 | 465 | print_error(Name, Topic, Partition) -> 466 | ok = ?LOG_ERROR("~p found no messages for topic ~p, partition ~p", 467 | [Name, Topic, Partition]). 468 | 469 | -spec connect(pid(), 470 | atom(), 471 | list(), 472 | kafkerl_connector:address(), 473 | any(), 474 | any()) -> any(). 475 | connect(Pid, Name, _TCPOpts, {Host, Port} = _Address, _Timeout, 0) -> 476 | ok = ?LOG_ERROR("~p was unable to connect to ~p:~p", [Name, Host, Port]), 477 | Pid ! connection_timeout; 478 | connect(Pid, Name, TCPOpts, {Host, Port} = Address, Timeout, Retries) -> 479 | ok = ?LOG_DEBUG("~p attempting connection to ~p:~p", [Name, Host, Port]), 480 | case gen_tcp:connect(Host, Port, TCPOpts, 5000) of 481 | {ok, Socket} -> 482 | ok = ?LOG_DEBUG("~p connnected to ~p:~p", [Name, Host, Port]), 483 | ok = gen_tcp:controlling_process(Socket, Pid), 484 | Pid ! {connected, Socket}; 485 | {error, Reason} -> 486 | NewRetries = Retries - 1, 487 | ok = ?LOG_WARNING("~p unable to connect to ~p:~p. Reason: ~p 488 | (~p retries left)", 489 | [Name, Host, Port, Reason, NewRetries]), 490 | timer:sleep(Timeout), 491 | connect(Pid, Name, TCPOpts, Address, Timeout, NewRetries) 492 | end. 493 | 494 | queue_flush(Time) -> 495 | timer:send_after(Time * 1000, {flush, Time}). 496 | 497 | get_all_messages(Buffers) -> 498 | get_all_messages(Buffers, []). 499 | 500 | get_all_messages([], Acc) -> 501 | Acc; 502 | get_all_messages([H | T], Acc) -> 503 | get_all_messages(T, Acc ++ get_messages_from(H, 20)). 504 | 505 | get_messages_from(Ets, Retries) -> 506 | case ets_buffer:read_all(Ets) of 507 | L when is_list(L) -> 508 | L; 509 | _Error when Retries > 0 -> 510 | get_messages_from(Ets, Retries - 1); 511 | _Error -> 512 | ok = ?LOG_WARNING("giving up on reading from the ETS buffer"), 513 | [] 514 | end. 515 | 516 | parse_correlation_id(Bin, void) -> 517 | kafkerl_protocol:parse_correlation_id(Bin); 518 | parse_correlation_id(Bin, CorrelationId) -> 519 | {ok, CorrelationId, Bin}. 520 | 521 | get_fetch(_CorrelationId, []) -> 522 | not_found; 523 | get_fetch(CorrelationId, [H = #fetch{correlation_id = CorrelationId} | _T]) -> 524 | H; 525 | get_fetch(CorrelationId, [_H | T]) -> 526 | get_fetch(CorrelationId, T). 527 | 528 | get_fetch(_Topic, _Partition, []) -> 529 | not_found; 530 | get_fetch(Topic, Partition, [H = #fetch{topic = Topic, 531 | partition = Partition} | _T]) -> 532 | H; 533 | get_fetch(Topic, Partition, [_H | T]) -> 534 | get_fetch(Topic, Partition, T). 535 | 536 | send_messages(_Consumer, []) -> 537 | ok; 538 | send_messages(Consumer, [Event | T]) -> 539 | case send_messages(Consumer, Event) of 540 | ok -> send_messages(Consumer, T); 541 | Error -> Error 542 | end; 543 | send_messages(Consumer, Event) -> 544 | kafkerl_utils:send_event(Consumer, Event). -------------------------------------------------------------------------------- /src/kafkerl_connector.erl: -------------------------------------------------------------------------------- 1 | -module(kafkerl_connector). 2 | -author('hernanrivasacosta@gmail.com'). 3 | 4 | -behaviour(gen_server). 5 | 6 | -include_lib("kernel/include/logger.hrl"). 7 | 8 | %% API 9 | % Metadata 10 | -export([request_metadata/1, request_metadata/2, request_metadata/3, 11 | get_partitions/1]). 12 | % Produce 13 | -export([send/3]). 14 | % Consume 15 | -export([fetch/4, stop_fetch/3]). 16 | % Common 17 | -export([subscribe/2, subscribe/3, unsubscribe/2]). 18 | % Only for internal use 19 | -export([do_request_metadata/6, make_metadata_request/1]). 20 | % Only for broker connections 21 | -export([produce_succeeded/2]). 22 | % Supervisors 23 | -export([start_link/2]). 24 | % gen_server callbacks 25 | -export([init/1, terminate/2, code_change/3, 26 | handle_call/3, handle_cast/2, handle_info/2]). 27 | 28 | -include("kafkerl.hrl"). 29 | 30 | -type kafler_host() :: string(). 31 | -type kafler_port() :: 1..65535. 32 | -type address() :: {kafler_host(), kafler_port()}. 33 | 34 | -type filters() :: all | [atom()]. 35 | 36 | -type broker_mapping_key() :: {kafkerl:topic(), kafkerl:partition()}. 37 | -type broker_mapping() :: {broker_mapping_key(), kafkerl:server_ref()}. 38 | 39 | -record(state, {brokers = [] :: [address()], 40 | broker_mapping = void :: [broker_mapping()] | void, 41 | client_id = <<>> :: kafkerl_protocol:client_id(), 42 | max_metadata_retries = -1 :: integer(), 43 | retry_interval = 1 :: non_neg_integer(), 44 | config = [] :: {atom(), any()}, 45 | autocreate_topics = false :: boolean(), 46 | callbacks = [] :: [{filters(), kafkerl:callback()}], 47 | known_topics = [] :: [binary()], 48 | pending = [] :: [kafkerl:basic_message()], 49 | last_metadata_request = 0 :: integer(), 50 | metadata_request_cd = 0 :: integer(), 51 | last_dump_name = {"", 0} :: {string(), integer()}, 52 | default_fetch_options = [] :: kafkerl:options()}). 53 | -type state() :: #state{}. 54 | 55 | -export_type([address/0]). 56 | 57 | %%============================================================================== 58 | %% API 59 | %%============================================================================== 60 | -spec start_link(atom(), any()) -> {ok, pid()} | ignore | kafkerl:error(). 61 | start_link(Name, Config) -> 62 | gen_server:start_link({local, Name}, ?MODULE, [Config], []). 63 | 64 | -spec send(kafkerl:server_ref(), kafkerl:basic_message(), kafkerl:options()) -> 65 | ok | kafkerl:error(). 66 | send(ServerRef, {Topic, Partition, _Payload} = Message, Options) -> 67 | Buffer = kafkerl_utils:buffer_name(Topic, Partition), 68 | case ets_buffer:write(Buffer, Message) of 69 | NewSize when is_integer(NewSize) -> 70 | case lists:keyfind(buffer_size, 1, Options) of 71 | {buffer_size, MaxSize} when NewSize > MaxSize -> 72 | gen_server:call(ServerRef, {dump_buffer_to_disk, Buffer, Options}); 73 | _ -> 74 | ok 75 | end; 76 | Error -> 77 | ok = ?LOG_DEBUG("unable to write on ~p, reason: ~p", [Buffer, Error]), 78 | gen_server:call(ServerRef, {send, Message}) 79 | end. 80 | 81 | -spec fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition(), 82 | kafkerl:options()) -> ok | kafkerl:error(). 83 | fetch(ServerRef, Topic, Partition, Options) -> 84 | gen_server:call(ServerRef, {fetch, Topic, Partition, Options}). 85 | 86 | -spec stop_fetch(kafkerl:server_ref(), kafkerl:topic(), kafkerl:partition()) -> 87 | ok. 88 | stop_fetch(ServerRef, Topic, Partition) -> 89 | gen_server:call(ServerRef, {stop_fetch, Topic, Partition}). 90 | 91 | -spec get_partitions(kafkerl:server_ref()) -> 92 | [{kafkerl:topic(), [kafkerl:partition()]}] | kafkerl:error(). 93 | get_partitions(ServerRef) -> 94 | case gen_server:call(ServerRef, {get_partitions}) of 95 | {ok, Mapping} -> 96 | get_partitions_from_mapping(Mapping); 97 | Error -> 98 | Error 99 | end. 100 | 101 | -spec subscribe(kafkerl:server_ref(), kafkerl:callback()) -> 102 | ok | kafkerl:error(). 103 | subscribe(ServerRef, Callback) -> 104 | subscribe(ServerRef, Callback, all). 105 | -spec subscribe(kafkerl:server_ref(), kafkerl:callback(), filters()) -> 106 | ok | kafkerl:error(). 107 | subscribe(ServerRef, Callback, Filter) -> 108 | gen_server:call(ServerRef, {subscribe, {Filter, Callback}}). 109 | -spec unsubscribe(kafkerl:server_ref(), kafkerl:callback()) -> ok. 110 | unsubscribe(ServerRef, Callback) -> 111 | gen_server:call(ServerRef, {unsubscribe, Callback}). 112 | 113 | -spec request_metadata(kafkerl:server_ref()) -> ok. 114 | request_metadata(ServerRef) -> 115 | gen_server:call(ServerRef, {request_metadata}). 116 | 117 | -spec request_metadata(kafkerl:server_ref(), [kafkerl:topic()] | boolean()) -> 118 | ok. 119 | request_metadata(ServerRef, TopicsOrForced) -> 120 | gen_server:call(ServerRef, {request_metadata, TopicsOrForced}). 121 | 122 | -spec request_metadata(kafkerl:server_ref(), [kafkerl:topic()], boolean()) -> 123 | ok. 124 | request_metadata(ServerRef, Topics, Forced) -> 125 | gen_server:call(ServerRef, {request_metadata, Topics, Forced}). 126 | 127 | -spec produce_succeeded(kafkerl:server_ref(), {kafkerl:topic(), 128 | kafkerl:partition(), 129 | [binary()], 130 | integer()}) -> ok. 131 | produce_succeeded(ServerRef, Messages) -> 132 | gen_server:cast(ServerRef, {produce_succeeded, Messages}). 133 | 134 | %%============================================================================== 135 | %% gen_server callbacks 136 | %%============================================================================== 137 | -spec handle_call(any(), any(), state()) -> {reply, ok, state()} | 138 | {reply, {error, any()}, state()}. 139 | handle_call({dump_buffer_to_disk, Buffer, Options}, _From, State) -> 140 | {DumpNameStr, _} = DumpName = get_ets_dump_name(State#state.last_dump_name), 141 | AllMessages = ets_buffer:read_all(Buffer), 142 | FilePath = proplists:get_value(dump_location, Options, "") ++ DumpNameStr, 143 | _ = case file:write_file(FilePath, term_to_binary(AllMessages)) of 144 | ok -> ?LOG_DEBUG("Dumped unsent messages at ~p", [FilePath]); 145 | Error -> ?LOG_CRITICAL("Unable to save messages, reason: ~p", [Error]) 146 | end, 147 | {reply, ok, State#state{last_dump_name = DumpName}}; 148 | handle_call({send, Message}, _From, State) -> 149 | handle_send(Message, State); 150 | handle_call({fetch, Topic, Partition, Options}, _From, State) -> 151 | {reply, handle_fetch(Topic, Partition, Options, State), State}; 152 | handle_call({stop_fetch, Topic, Partition}, _From, State) -> 153 | {reply, handle_stop_fetch(Topic, Partition, State), State}; 154 | handle_call({request_metadata}, _From, State) -> 155 | {reply, ok, handle_request_metadata(State, [])}; 156 | handle_call({request_metadata, Forced}, _From, State) when is_boolean(Forced) -> 157 | {reply, ok, handle_request_metadata(State, [], true)}; 158 | handle_call({request_metadata, Topics}, _From, State) -> 159 | {reply, ok, handle_request_metadata(State, Topics)}; 160 | handle_call({request_metadata, Topics, Forced}, _From, State) -> 161 | {reply, ok, handle_request_metadata(State, Topics, Forced)}; 162 | handle_call({get_partitions}, _From, State) -> 163 | {reply, handle_get_partitions(State), State}; 164 | handle_call({subscribe, Callback}, _From, State) -> 165 | case send_mapping_to(Callback, State) of 166 | ok -> 167 | {reply, ok, State#state{callbacks = [Callback | State#state.callbacks]}}; 168 | _ -> 169 | {reply, {error, invalid_callback}, State} 170 | end; 171 | handle_call({unsubscribe, Callback}, _From, State) -> 172 | NewCallbacks = lists:keydelete(Callback, 2, State#state.callbacks), 173 | {reply, ok, State#state{callbacks = NewCallbacks}}. 174 | 175 | -spec handle_info(atom() | {atom(), [] | map()}, state()) -> 176 | {stop, {error, unable_to_retrieve_metadata}, state()} | 177 | {noreply, state()}. 178 | handle_info(metadata_timeout, State) -> 179 | {stop, {error, unable_to_retrieve_metadata}, State}; 180 | handle_info({metadata_updated, []}, State) -> 181 | % If the metadata arrived empty request it again 182 | {noreply, handle_request_metadata(State#state{broker_mapping = []}, [])}; 183 | handle_info({metadata_updated, Mapping}, State) -> 184 | % Create the topic mapping (this also starts the broker connections) 185 | NewBrokerMapping = get_broker_mapping(Mapping, State), 186 | ok = ?LOG_DEBUG("Refreshed topic mapping: ~p", [NewBrokerMapping]), 187 | % Get the partition data to send to the subscribers and send it 188 | PartitionData = get_partitions_from_mapping(NewBrokerMapping), 189 | Callbacks = State#state.callbacks, 190 | NewCallbacks = send_event({partition_update, PartitionData}, Callbacks), 191 | % Add to the list of known topics 192 | NewTopics = lists:sort([T || {T, _P} <- PartitionData]), 193 | NewKnownTopics = lists:umerge(NewTopics, State#state.known_topics), 194 | ok = ?LOG_DEBUG("Known topics: ~p", [NewKnownTopics]), 195 | % Reverse the pending messages and try to send them again 196 | RPending = lists:reverse(State#state.pending), 197 | ok = lists:foreach(fun(P) -> send(self(), P, []) end, RPending), 198 | {noreply, State#state{broker_mapping = NewBrokerMapping, pending = [], 199 | callbacks = NewCallbacks, 200 | known_topics = NewKnownTopics}}; 201 | handle_info({'DOWN', Ref, process, _, normal}, State) -> 202 | true = demonitor(Ref), 203 | {noreply, State}; 204 | handle_info({'DOWN', Ref, process, _, Reason}, State) -> 205 | ok = ?LOG_ERROR("metadata request failed, reason: ~p", [Reason]), 206 | true = demonitor(Ref), 207 | {noreply, handle_request_metadata(State, [], true)}; 208 | handle_info(Msg, State) -> 209 | ok = ?LOG_NOTICE("Unexpected info message received: ~p on ~p", [Msg, State]), 210 | {noreply, State}. 211 | 212 | -spec handle_cast(any(), state()) -> {noreply, state()}. 213 | handle_cast({produce_succeeded, Messages}, State) -> 214 | Callbacks = State#state.callbacks, 215 | NewCallbacks = send_event({produced, Messages}, Callbacks), 216 | {noreply, State#state{callbacks = NewCallbacks}}. 217 | 218 | % Boilerplate 219 | -spec terminate(atom(), state()) -> ok. 220 | terminate(_Reason, _State) -> ok. 221 | -spec code_change(string(), state(), any()) -> {ok, state()}. 222 | code_change(_OldVsn, State, _Extra) -> {ok, State}. 223 | 224 | %%============================================================================== 225 | %% Handlers 226 | %%============================================================================== 227 | -spec init([{any(), any()}]) -> {ok, state()} | {stop, bad_config}. 228 | init([Config]) -> 229 | Schema = [{brokers, [{string, {integer, {1, 65535}}}], required}, 230 | {max_metadata_retries, {integer, {-1, undefined}}, {default, -1}}, 231 | {client_id, binary, {default, <<"kafkerl_client">>}}, 232 | {topics, [binary], required}, 233 | {metadata_tcp_timeout, positive_integer, {default, 1500}}, 234 | {assume_autocreate_topics, boolean, {default, false}}, 235 | {metadata_request_cooldown, positive_integer, {default, 333}}, 236 | {consumer_min_bytes, positive_integer, {default, 1}}, 237 | {consumer_max_wait, positive_integer, {default, 1500}}], 238 | case normalizerl:normalize_proplist(Schema, Config) of 239 | {ok, [Brokers, MaxMetadataRetries, ClientId, Topics, RetryInterval, 240 | AutocreateTopics, MetadataRequestCooldown, MinBytes, MaxWait]} -> 241 | DefaultFetchOptions = [{min_bytes, MinBytes}, {max_wait, MaxWait}], 242 | State = #state{config = Config, 243 | known_topics = Topics, 244 | brokers = Brokers, 245 | client_id = ClientId, 246 | retry_interval = RetryInterval, 247 | autocreate_topics = AutocreateTopics, 248 | max_metadata_retries = MaxMetadataRetries, 249 | metadata_request_cd = MetadataRequestCooldown, 250 | default_fetch_options = DefaultFetchOptions}, 251 | {_Pid, _Ref} = make_metadata_request(State), 252 | {ok, State}; 253 | {errors, Errors} -> 254 | lists:foreach(fun(E) -> 255 | ok = ?LOG_CRITICAL("Connector config error ~p", [E]) 256 | end, Errors), 257 | {stop, bad_config} 258 | end. 259 | 260 | handle_send(Message, State = #state{autocreate_topics = false}) -> 261 | % The topic didn't exist, ignore 262 | {Topic, _Partition, Payload} = Message, 263 | ok = ?LOG_ERROR("Dropped ~p sent to non existing topic ~p", [Payload, Topic]), 264 | {reply, {error, non_existing_topic}, State}; 265 | handle_send(Message, State = #state{broker_mapping = void, 266 | pending = Pending}) -> 267 | % We should consider saving this to a new buffer instead of using the state. 268 | {reply, ok, State#state{pending = [Message | Pending]}}; 269 | handle_send(Message, State = #state{broker_mapping = Mapping, pending = Pending, 270 | known_topics = KnownTopics}) -> 271 | {Topic, Partition, Payload} = Message, 272 | case lists:any(fun({K, _}) -> K =:= {Topic, Partition} end, Mapping) of 273 | true -> 274 | % The ets takes some time to be available after being created, so we check 275 | % if the topic/partition pair is in the mapping and if it does, we know we 276 | % just need to send it again. The order is not guaranteed in this case, so 277 | % if that's a concern, don't rely on autocreate_topics (besides, don't use 278 | % autocreate_topics on production since it opens another can of worms). 279 | ok = send(self(), Message, []), 280 | {reply, ok, State}; 281 | false -> 282 | % However, if the topic/partition pair does not exist, we need to check if 283 | % the topic exists. If the topic exists, we drop the message because kafka 284 | % can't add partitions on the fly. 285 | case lists:any(fun({{T, _}, _}) -> T =:= Topic end, Mapping) of 286 | true -> 287 | ok = ?LOG_ERROR("Dropped ~p sent to topic ~p, partition ~p", 288 | [Payload, Topic, Partition]), 289 | {reply, {error, bad_partition}, State}; 290 | false -> 291 | NewKnownTopics = lists:umerge([Topic], KnownTopics), 292 | NewState = State#state{pending = [Message | Pending]}, 293 | {reply, ok, handle_request_metadata(NewState, NewKnownTopics)} 294 | end 295 | end. 296 | 297 | handle_fetch(_Topic, _Partition, _Options, #state{broker_mapping = void}) -> 298 | {error, not_connected}; 299 | handle_fetch(Topic, Partition, Options, State) -> 300 | case lists:keyfind({Topic, Partition}, 1, State#state.broker_mapping) of 301 | false -> 302 | {error, {no_broker, {Topic, Partition}}}; 303 | {_, Broker} -> 304 | NewOptions = Options ++ State#state.default_fetch_options, 305 | kafkerl_broker_connection:fetch(Broker, Topic, Partition, NewOptions) 306 | end. 307 | 308 | handle_stop_fetch(_Topic, _Partition, #state{broker_mapping = void}) -> 309 | % Ignore, there's no fetch in progress 310 | ok; 311 | handle_stop_fetch(Topic, Partition, State) -> 312 | case lists:keyfind({Topic, Partition}, 1, State#state.broker_mapping) of 313 | false -> 314 | % Ignore, there's no fetch in progress 315 | ok; 316 | {_, Broker} -> 317 | kafkerl_broker_connection:stop_fetch(Broker, Topic, Partition) 318 | end. 319 | 320 | handle_get_partitions(#state{broker_mapping = void}) -> 321 | {error, not_available}; 322 | handle_get_partitions(#state{broker_mapping = Mapping}) -> 323 | {ok, Mapping}. 324 | 325 | handle_request_metadata(State, Topics) -> 326 | handle_request_metadata(State, Topics, false). 327 | 328 | % Ignore it if the topic mapping is void, we are already requesting the metadata 329 | handle_request_metadata(State = #state{broker_mapping = void}, _, false) -> 330 | State; 331 | handle_request_metadata(State, NewTopics, _) -> 332 | SortedNewTopics = lists:sort(NewTopics), 333 | NewKnownTopics = lists:umerge(State#state.known_topics, SortedNewTopics), 334 | Now = get_timestamp(), 335 | LastRequest = State#state.last_metadata_request, 336 | Cooldown = State#state.metadata_request_cd, 337 | _ = case Cooldown - (Now - LastRequest) of 338 | Negative when Negative =< 0 -> 339 | _ = make_metadata_request(State); 340 | Time -> 341 | _ = timer:apply_after(Time, ?MODULE, request_metadata, [self(), true]) 342 | end, 343 | State#state{broker_mapping = void, known_topics = NewKnownTopics, 344 | last_metadata_request = Now}. 345 | 346 | %%============================================================================== 347 | %% Utils 348 | %%============================================================================== 349 | get_ets_dump_name({OldName, Counter}) -> 350 | {{Year, Month, Day}, {Hour, Minute, Second}} = calendar:local_time(), 351 | Ts = io_lib:format("~4.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B~2.10.0B_", 352 | [Year, Month, Day, Hour, Minute, Second]), 353 | PartialNewName = "kafkerl_messages_" ++ lists:flatten(Ts), 354 | case lists:prefix(PartialNewName, OldName) of 355 | true -> 356 | {PartialNewName ++ integer_to_list(Counter + 1) ++ ".dump", Counter + 1}; 357 | _ -> 358 | {PartialNewName ++ "0.dump", 0} 359 | end. 360 | 361 | get_metadata_tcp_options() -> 362 | kafkerl_utils:get_tcp_options([{active, false}, {packet, 4}]). 363 | 364 | -spec do_request_metadata(pid(), [address()], 365 | any(), 366 | non_neg_integer(), 367 | non_neg_integer(), 368 | iodata()) -> 369 | metadata_timeout | {metadata_updated, broker_mapping()}. 370 | do_request_metadata(Pid, _Brokers, _TCPOpts, 0, _RetryInterval, _Request) -> 371 | Pid ! metadata_timeout; 372 | do_request_metadata(Pid, Brokers, TCPOpts, Retries, RetryInterval, Request) -> 373 | case do_request_metadata(Brokers, TCPOpts, Request) of 374 | {ok, TopicMapping} -> 375 | Pid ! {metadata_updated, TopicMapping}; 376 | _Error -> 377 | timer:sleep(RetryInterval), 378 | NewRetries = case Retries of 379 | -1 -> -1; 380 | N -> N - 1 381 | end, 382 | do_request_metadata(Pid, Brokers, TCPOpts, NewRetries, RetryInterval, 383 | Request) 384 | end. 385 | 386 | do_request_metadata([], _TCPOpts, _Request) -> 387 | {error, all_down}; 388 | do_request_metadata([{Host, Port} = _Broker | T], TCPOpts, Request) -> 389 | ok = ?LOG_DEBUG("Attempting to connect to broker at ~s:~p", [Host, Port]), 390 | % Connect to the Broker 391 | case gen_tcp:connect(Host, Port, TCPOpts) of 392 | {error, Reason} -> 393 | warn_metadata_request(Host, Port, Reason), 394 | % Failed, try with the next one in the list 395 | do_request_metadata(T, TCPOpts, Request); 396 | {ok, Socket} -> 397 | % On success, send the metadata request 398 | case gen_tcp:send(Socket, Request) of 399 | {error, Reason} -> 400 | warn_metadata_request(Host, Port, Reason), 401 | % Unable to send request, try the next broker 402 | do_request_metadata(T, TCPOpts, Request); 403 | ok -> 404 | case gen_tcp:recv(Socket, 0, 6000) of 405 | {error, Reason} -> 406 | warn_metadata_request(Host, Port, Reason), 407 | gen_tcp:close(Socket), 408 | % Nothing received (probably a timeout), try the next broker 409 | do_request_metadata(T, TCPOpts, Request); 410 | {ok, Data} -> 411 | gen_tcp:close(Socket), 412 | parse_metadata_response(Data, Host, Port, T, TCPOpts, 413 | Request) 414 | end 415 | end 416 | end. 417 | 418 | parse_metadata_response(Data, Host, Port, T, TCPOpts, Request) -> 419 | case kafkerl_protocol:parse_metadata_response(Data) of 420 | {error, Reason} -> 421 | warn_metadata_request(Host, Port, Reason), 422 | % The parsing failed, try the next broker 423 | do_request_metadata(T, TCPOpts, Request); 424 | {ok, _CorrelationId, Metadata} -> 425 | % We received a metadata response, make sure it has brokers 426 | {ok, get_topic_mapping(Metadata)} 427 | end. 428 | 429 | send_event(Event, {all, Callback}) -> 430 | kafkerl_utils:send_event(Callback, Event); 431 | send_event({EventName, _Data} = Event, {Events, Callback}) -> 432 | case is_list(Events) andalso lists:member(EventName, Events) of 433 | true -> kafkerl_utils:send_event(Callback, Event); 434 | false -> ok 435 | end; 436 | send_event(Event, Callbacks) -> 437 | lists:filter(fun(Callback) -> 438 | send_event(Event, Callback) =:= ok 439 | end, Callbacks). 440 | 441 | %%============================================================================== 442 | %% Request building 443 | %%============================================================================== 444 | metadata_request(#state{client_id = ClientId}, [] = _NewTopics) -> 445 | kafkerl_protocol:build_metadata_request([], 0, ClientId); 446 | metadata_request(#state{known_topics = KnownTopics, client_id = ClientId}, 447 | NewTopics) -> 448 | AllTopics = lists:umerge(KnownTopics, NewTopics), 449 | kafkerl_protocol:build_metadata_request(AllTopics, 0, ClientId). 450 | 451 | %%============================================================================== 452 | %% Topic/broker mapping 453 | %%============================================================================== 454 | get_topic_mapping({BrokerMetadata, TopicMetadata}) -> 455 | % Converts [{ErrorCode, Topic, [Partion]}] to [{Topic, [Partition]}] 456 | Topics = lists:filtermap(fun expand_topic/1, TopicMetadata), 457 | % Converts [{Topic, [Partition]}] on [{{Topic, Partition}, BrokerId}] 458 | Partitions = lists:flatten(lists:filtermap(fun expand_partitions/1, Topics)), 459 | % Converts the BrokerIds from the previous array into socket addresses 460 | lists:filtermap(fun({{Topic, Partition}, BrokerId}) -> 461 | case lists:keyfind(BrokerId, 1, BrokerMetadata) of 462 | {BrokerId, HostData} -> 463 | {true, {{Topic, Partition, BrokerId}, HostData}}; 464 | _Any -> 465 | false 466 | end 467 | end, Partitions). 468 | 469 | expand_topic({?NO_ERROR, Topic, Partitions}) -> 470 | {true, {Topic, Partitions}}; 471 | expand_topic({Error = ?REPLICA_NOT_AVAILABLE, Topic, Partitions}) -> 472 | % Replica not available can be ignored, still, show a warning 473 | ok = ?LOG_WARNING("Ignoring ~p on metadata for topic ~p", 474 | [kafkerl_error:get_error_name(Error), Topic]), 475 | {true, {Topic, Partitions}}; 476 | expand_topic({Error, Topic, _Partitions}) -> 477 | ok = ?LOG_ERROR("Error ~p on metadata for topic ~p", 478 | [kafkerl_error:get_error_name(Error), Topic]), 479 | {true, {Topic, []}}. 480 | 481 | expand_partitions(Metadata) -> 482 | expand_partitions(Metadata, []). 483 | 484 | expand_partitions({_Topic, []}, Acc) -> 485 | {true, Acc}; 486 | expand_partitions({Topic, [{?NO_ERROR, Partition, Leader, _, _} | T]}, Acc) -> 487 | ExpandedPartition = {{Topic, Partition}, Leader}, 488 | expand_partitions({Topic, T}, [ExpandedPartition | Acc]); 489 | expand_partitions({Topic, [{Error = ?REPLICA_NOT_AVAILABLE, Partition, Leader, 490 | _, _} | T]}, Acc) -> 491 | ok = ?LOG_WARNING("Ignoring ~p on metadata for topic ~p, partition ~p", 492 | [kafkerl_error:get_error_name(Error), Topic, Partition]), 493 | ExpandedPartition = {{Topic, Partition}, Leader}, 494 | expand_partitions({Topic, T}, [ExpandedPartition | Acc]); 495 | expand_partitions({Topic, [{Error, Partition, _, _, _} | T]}, Acc) -> 496 | ok = ?LOG_ERROR("Error ~p on metadata for topic ~p, partition ~p", 497 | [kafkerl_error:get_error_name(Error), Topic, Partition]), 498 | expand_partitions({Topic, T}, Acc). 499 | 500 | get_broker_mapping(TopicMapping, State) -> 501 | get_broker_mapping(TopicMapping, State, 0, []). 502 | 503 | get_broker_mapping([], _State, _N, Acc) -> 504 | [{Key, Address} || {_ConnId, Key, Address} <- Acc]; 505 | get_broker_mapping([{{Topic, Partition, ConnId}, Address} | T], 506 | State = #state{config = Config}, N, Acc) -> 507 | Buffer = kafkerl_utils:buffer_name(Topic, Partition), 508 | _ = ets_buffer:create(Buffer, fifo), 509 | {Conn, NewN} = case lists:keyfind(ConnId, 1, Acc) of 510 | false -> 511 | {start_broker_connection(N, Address, Config), N + 1}; 512 | {ConnId, _, BrokerConnection} -> 513 | {BrokerConnection, N} 514 | end, 515 | 516 | Buffer = kafkerl_utils:buffer_name(Topic, Partition), 517 | _ = ets_buffer:create(Buffer, fifo), 518 | kafkerl_broker_connection:add_buffer(Conn, Buffer), 519 | 520 | NewMapping = {ConnId, {Topic, Partition}, Conn}, 521 | get_broker_mapping(T, State, NewN, [NewMapping | Acc]). 522 | 523 | start_broker_connection(N, Address, Config) -> 524 | case kafkerl_broker_connection:start_link(N, self(), Address, Config) of 525 | {ok, Name, _Pid} -> 526 | Name; 527 | {error, {already_started, Pid}} -> 528 | kafkerl_broker_connection:clear_buffers(Pid), 529 | Pid 530 | end. 531 | 532 | % This is used to return the available partitions for each topic 533 | get_partitions_from_mapping(Mapping) -> 534 | F = fun({{Topic, Partition}, _}, Acc) -> 535 | case lists:keytake(Topic, 1, Acc) of 536 | false -> 537 | [{Topic, [Partition]} | Acc]; 538 | {value, {Topic, Partitions}, NewAcc} -> 539 | [{Topic, [Partition | Partitions]} | NewAcc] 540 | end 541 | end, 542 | lists:foldl(F, [], Mapping). 543 | 544 | send_mapping_to(_NewCallback, #state{broker_mapping = void}) -> 545 | ok; 546 | send_mapping_to(NewCallback, #state{broker_mapping = Mapping}) -> 547 | Partitions = get_partitions_from_mapping(Mapping), 548 | send_event({partition_update, Partitions}, NewCallback). 549 | 550 | -spec make_metadata_request(state()) -> {pid(), reference()}. 551 | make_metadata_request(State = #state{brokers = Brokers, 552 | known_topics = Topics, 553 | max_metadata_retries = MaxMetadataRetries, 554 | retry_interval = RetryInterval}) -> 555 | Request = metadata_request(State, Topics), 556 | % Start requesting metadata 557 | Params = [self(), Brokers, get_metadata_tcp_options(), MaxMetadataRetries, 558 | RetryInterval, Request], 559 | spawn_monitor(?MODULE, do_request_metadata, Params). 560 | 561 | get_timestamp() -> 562 | {A, B, C} = erlang:timestamp(), 563 | (A * 1000000 + B) * 1000 + C div 1000. 564 | 565 | %%============================================================================== 566 | %% Error handling 567 | %%============================================================================== 568 | warn_metadata_request(Host, Port, Reason) -> 569 | ok = ?LOG_WARNING("Unable to retrieve metadata from ~s:~p, reason: ~p", 570 | [Host, Port, Reason]). -------------------------------------------------------------------------------- /src/kafkerl_error.erl: -------------------------------------------------------------------------------- 1 | -module(kafkerl_error). 2 | -author('hernanrivasacosta@gmail.com'). 3 | 4 | -export([get_error_name/1, get_error_description/1, get_error_tuple/1]). 5 | 6 | -include("kafkerl.hrl"). 7 | 8 | %%============================================================================== 9 | %% API 10 | %%============================================================================== 11 | -spec get_error_name(integer()) -> [1..255, ...]. 12 | get_error_name(?NO_ERROR) -> 13 | "NoError"; 14 | get_error_name(?OFFSET_OUT_OF_RANGE) -> 15 | "OffsetOutOfRange"; 16 | get_error_name(?INVALID_MESSAGE) -> 17 | "InvalidMessage"; 18 | get_error_name(?UNKNOWN_TOPIC_OR_PARTITION) -> 19 | "UnknownTopicOrPartition"; 20 | get_error_name(?INVALID_MESSAGE_SIZE) -> 21 | "InvalidMessageSize"; 22 | get_error_name(?LEADER_NOT_AVAILABLE) -> 23 | "LeaderNotAvailable"; 24 | get_error_name(?NOT_LEADER_FOR_PARTITION) -> 25 | "NotLeaderForPartition"; 26 | get_error_name(?REQUEST_TIMEDOUT) -> 27 | "RequestTimedOut"; 28 | get_error_name(?BROKER_NOT_AVAILABLE) -> 29 | "BrokerNotAvailable"; 30 | get_error_name(?REPLICA_NOT_AVAILABLE) -> 31 | "ReplicaNotAvailable"; 32 | get_error_name(?MESSAGE_SIZE_TOO_LARGE) -> 33 | "MessageSizeTooLarge"; 34 | get_error_name(?STALE_CONTROLLER_EPOCH) -> 35 | "StaleControllerEpoch"; 36 | get_error_name(?OFFSET_METADATA_TOO_LARGE) -> 37 | "OffsetMetadataTooLarge"; 38 | get_error_name(?OFFSETS_LOAD_IN_PROGRESS_CODE) -> 39 | "OffsetsLoadInProgressCode"; 40 | get_error_name(?CONSUMER_COORDINATOR_NOT_AVAILABLE_CODE) -> 41 | "ConsumerCoordinatorNotAvailableCode"; 42 | get_error_name(?NOT_COORDINATOR_FOR_CONSUMER_CODE) -> 43 | "NotCoordinatorForConsumerCode"; 44 | get_error_name(?UNKNOWN) -> 45 | "Unknown". 46 | 47 | -spec get_error_description(integer()) -> [1..255, ...]. 48 | get_error_description(?NO_ERROR) -> 49 | "No error"; 50 | get_error_description(?OFFSET_OUT_OF_RANGE) -> 51 | "The requested offset is outside the range of offsets maintained by the " ++ 52 | "server for the given topic/partition."; 53 | get_error_description(?INVALID_MESSAGE) -> 54 | "If you specify a string larger than configured maximum for offset metadata."; 55 | get_error_description(?UNKNOWN_TOPIC_OR_PARTITION) -> 56 | "This request is for a topic or partition that does not exist on this broker"; 57 | get_error_description(?INVALID_MESSAGE_SIZE) -> 58 | "The message has a negative size."; 59 | get_error_description(?LEADER_NOT_AVAILABLE) -> 60 | "This error is thrown if we are in the middle of a leadership election " ++ 61 | "and there is currently no leader for this partition and hence it is " ++ 62 | "unavailable for writes."; 63 | get_error_description(?NOT_LEADER_FOR_PARTITION) -> 64 | "This error is thrown if the client attempts to send messages to a " ++ 65 | "replica that is not the leader for some partition. It indicates that the " ++ 66 | "clients metadata is out of date."; 67 | get_error_description(?REQUEST_TIMEDOUT) -> 68 | "This error is thrown if the request exceeds the user-specified time " ++ 69 | "limit in the request."; 70 | get_error_description(?BROKER_NOT_AVAILABLE) -> 71 | "This is not a client facing error and is used only internally by " ++ 72 | "intra-cluster broker communication."; 73 | get_error_description(?REPLICA_NOT_AVAILABLE) -> 74 | "Unused."; 75 | get_error_description(?MESSAGE_SIZE_TOO_LARGE) -> 76 | "The server has a configurable maximum message size to avoid unbounded " ++ 77 | "memory allocation. This error is thrown if the client attempt to produce " ++ 78 | "a message larger than this maximum."; 79 | get_error_description(?STALE_CONTROLLER_EPOCH) -> 80 | "Internal error code for broker-to-broker communication."; 81 | get_error_description(?OFFSET_METADATA_TOO_LARGE) -> 82 | "If you specify a string larger than configured maximum for offset metadata."; 83 | get_error_description(?OFFSETS_LOAD_IN_PROGRESS_CODE) -> 84 | "The broker returns this error code for an offset fetch request if it is " ++ 85 | "still loading offsets (after a leader change for that offsets topic " ++ 86 | "partition)."; 87 | get_error_description(?CONSUMER_COORDINATOR_NOT_AVAILABLE_CODE) -> 88 | "The broker returns this error code for consumer metadata requests or " ++ 89 | "offset commit requests if the offsets topic has not yet been created."; 90 | get_error_description(?NOT_COORDINATOR_FOR_CONSUMER_CODE) -> 91 | "The broker returns this error code if it receives an offset fetch or " ++ 92 | "commit request for a consumer group that it is not a coordinator for."; 93 | get_error_description(?UNKNOWN) -> 94 | "An unexpected server error". 95 | 96 | -spec get_error_tuple(integer()) -> {error, atom()}. 97 | get_error_tuple(?NO_ERROR) -> 98 | {error, no_error}; 99 | get_error_tuple(?OFFSET_OUT_OF_RANGE) -> 100 | {error, offset_out_of_range}; 101 | get_error_tuple(?INVALID_MESSAGE) -> 102 | {error, invalid_message}; 103 | get_error_tuple(?UNKNOWN_TOPIC_OR_PARTITION) -> 104 | {error, unknown_topic_or_partition}; 105 | get_error_tuple(?INVALID_MESSAGE_SIZE) -> 106 | {error, invalid_message_size}; 107 | get_error_tuple(?LEADER_NOT_AVAILABLE) -> 108 | {error, leader_not_available}; 109 | get_error_tuple(?NOT_LEADER_FOR_PARTITION) -> 110 | {error, not_leader_for_partition}; 111 | get_error_tuple(?REQUEST_TIMEDOUT) -> 112 | {error, request_timedout}; 113 | get_error_tuple(?BROKER_NOT_AVAILABLE) -> 114 | {error, broker_not_available}; 115 | get_error_tuple(?REPLICA_NOT_AVAILABLE) -> 116 | {error, replica_not_available}; 117 | get_error_tuple(?MESSAGE_SIZE_TOO_LARGE) -> 118 | {error, message_size_too_large}; 119 | get_error_tuple(?STALE_CONTROLLER_EPOCH) -> 120 | {error, stale_controller_epoch}; 121 | get_error_tuple(?OFFSET_METADATA_TOO_LARGE) -> 122 | {error, offset_metadata_too_large}; 123 | get_error_tuple(?OFFSETS_LOAD_IN_PROGRESS_CODE) -> 124 | {error, offsets_load_in_progress_code}; 125 | get_error_tuple(?CONSUMER_COORDINATOR_NOT_AVAILABLE_CODE) -> 126 | {error, consumer_coordinator_not_available_code}; 127 | get_error_tuple(?NOT_COORDINATOR_FOR_CONSUMER_CODE) -> 128 | {error, not_coordinator_for_consumer_code}; 129 | get_error_tuple(?UNKNOWN) -> 130 | {error, unknown}. -------------------------------------------------------------------------------- /src/kafkerl_protocol.erl: -------------------------------------------------------------------------------- 1 | -module(kafkerl_protocol). 2 | -author('hernanrivasacosta@gmail.com'). 3 | 4 | -include_lib("kernel/include/logger.hrl"). 5 | 6 | -export([build_produce_request/3, build_produce_request/4, 7 | build_fetch_request/5, 8 | build_metadata_request/3]). 9 | 10 | -export([parse_correlation_id/1, 11 | parse_produce_response/1, parse_fetch_response/1, 12 | parse_fetch_response/2, parse_metadata_response/1]). 13 | 14 | %% Common 15 | -type error_code() :: -1..16. 16 | -type correlation_id() :: non_neg_integer(). 17 | -type broker_id() :: integer(). 18 | -type broker() :: {broker_id(), kafkerl_connector:address()}. 19 | 20 | %% Requests 21 | -type client_id() :: binary(). 22 | -type merged_message() :: kafkerl:basic_message() | 23 | {kafkerl:topic(), 24 | [{kafkerl:partition(), kafkerl:payload()}]} | 25 | [merged_message()]. 26 | -type fetch_offset() :: integer(). 27 | -type fetch_max_bytes() :: integer(). 28 | -type fetch_partition() :: {kafkerl:partition(), fetch_offset(), 29 | fetch_max_bytes()} | 30 | [fetch_partition()]. 31 | -type fetch_request() :: {kafkerl:topic(), fetch_partition()} | 32 | [fetch_request()]. 33 | 34 | %% Responses 35 | -type produce_partition() :: {kafkerl:partition(), error_code(), integer()}. 36 | -type produce_topic() :: {kafkerl:topic(), [produce_partition()]}. 37 | -type produce_response() :: {ok, correlation_id(), [produce_topic()]}. 38 | -type replica() :: integer(). 39 | -type isr() :: integer(). 40 | -type partition_metadata() :: {error_code(), kafkerl:partition(), broker_id(), 41 | [replica()], [isr()]}. 42 | -type topic_metadata() :: {error_code(), kafkerl:topic(), 43 | [partition_metadata()]}. 44 | -type metadata() :: {[broker()], [topic_metadata()]}. 45 | -type metadata_response() :: {ok, correlation_id(), metadata()} | 46 | kafkerl:error(). 47 | -type messages() :: [{kafkerl:topic(), 48 | [{{kafkerl:partition(), integer()}, 49 | [binary() | {binary(), binary()}]}]}]. 50 | -type fetch_state() :: {binary(), integer(), [any()]} | void. 51 | -type fetch_response() :: {ok, correlation_id(), messages()} | 52 | {incomplete, correlation_id(), messages(), 53 | fetch_state()} | 54 | kafkerl:error(). 55 | 56 | % Compression 57 | -define(COMPRESSION_NONE, none). 58 | -define(COMPRESSION_GZIP, gzip). 59 | -define(COMPRESSION_SNAPPY, snappy). 60 | -define(KAFKERL_COMPRESSION_TYPES, [?COMPRESSION_NONE, 61 | ?COMPRESSION_GZIP, 62 | ?COMPRESSION_SNAPPY]). 63 | 64 | %% Configuration 65 | -type compression() :: ?COMPRESSION_NONE | 66 | ?COMPRESSION_GZIP | 67 | ?COMPRESSION_SNAPPY. 68 | 69 | % API keys 70 | -define(PRODUCE_KEY, 0). 71 | -define(FETCH_KEY, 1). 72 | -define(OFFSET_KEY, 2). 73 | -define(METADATA_KEY, 3). 74 | 75 | % C style binary types 76 | -define(SHORT, 16/signed-integer). 77 | -define(INT, 32/signed-integer). 78 | -define(UCHAR, 8/unsigned-integer). 79 | -define(USHORT, 16/unsigned-integer). 80 | -define(UINT, 32/unsigned-integer). 81 | -define(ULONG, 64/unsigned-integer). 82 | 83 | % Type exports 84 | -export_type([merged_message/0, client_id/0, correlation_id/0, fetch_state/0]). 85 | 86 | %%============================================================================== 87 | %% API 88 | %%============================================================================== 89 | % Message building 90 | -spec build_produce_request(merged_message(), client_id(), correlation_id()) -> 91 | iodata(). 92 | build_produce_request(Data, ClientId, CorrelationId) -> 93 | build_produce_request(Data, ClientId, CorrelationId, ?COMPRESSION_NONE). 94 | -spec build_produce_request(merged_message(), client_id(), correlation_id(), 95 | compression()) -> iodata(). 96 | build_produce_request(Data, ClientId, CorrelationId, Compression) -> 97 | {Size, Request} = build_produce_request(Data, Compression), 98 | [build_request_header(ClientId, ?PRODUCE_KEY, CorrelationId, Size), Request]. 99 | 100 | -spec build_fetch_request(fetch_request(), client_id(), correlation_id(), 101 | integer(), integer()) -> iodata(). 102 | build_fetch_request(Data, ClientId, CorrelationId, MaxWait, MinBytes) -> 103 | {Size, Request} = build_fetch_request(Data, MaxWait, MinBytes), 104 | [build_request_header(ClientId, ?FETCH_KEY, CorrelationId, Size), Request]. 105 | 106 | -spec build_metadata_request(kafkerl:topic() | [kafkerl:topic()], 107 | correlation_id(), 108 | client_id()) -> iodata(). 109 | build_metadata_request(Topics, CorrelationId, ClientId) -> 110 | {_Size, Request} = build_metadata_request(Topics), 111 | [build_request_header(ClientId, ?METADATA_KEY, CorrelationId), Request]. 112 | 113 | % Message parsing 114 | -spec parse_correlation_id(binary()) -> {ok, integer(), binary()}. 115 | parse_correlation_id(<<_Size:?UINT, 116 | CorrelationId:?UINT, 117 | Remainder/binary>>) -> 118 | {ok, CorrelationId, Remainder}. 119 | 120 | -spec parse_produce_response(binary()) -> produce_response(). 121 | parse_produce_response(<<_Size:?UINT, 122 | CorrelationId:?UINT, 123 | TopicCount:?UINT, 124 | TopicsBin/binary>>) -> 125 | {ok, Topics} = parse_produced_topics(TopicCount, TopicsBin), 126 | {ok, CorrelationId, Topics}. 127 | 128 | -spec parse_fetch_response(binary()) -> fetch_response(). 129 | parse_fetch_response(<<_Size:?UINT, 130 | CorrelationId:?UINT, 131 | TopicCount:?UINT, 132 | TopicsBin/binary>>) -> 133 | case parse_topics(TopicCount, TopicsBin) of 134 | {ok, Topics} -> 135 | {ok, CorrelationId, Topics}; 136 | {incomplete, Topics, {Bin, Steps}} -> 137 | {incomplete, CorrelationId, Topics, {Bin, CorrelationId, Steps}}; 138 | {error, _Reason} = Error -> 139 | Error 140 | end; 141 | parse_fetch_response(_Other) -> 142 | {error, unexpected_binary}. 143 | 144 | -spec parse_fetch_response(binary(), fetch_state()) -> fetch_response(). 145 | parse_fetch_response(Bin, void) -> 146 | parse_fetch_response(Bin); 147 | parse_fetch_response(Bin, {Remainder, CorrelationId, Steps}) -> 148 | NewBin = <>, 149 | parse_steps(NewBin, CorrelationId, Steps). 150 | 151 | -spec parse_metadata_response(binary()) -> metadata_response(). 152 | parse_metadata_response(<>) -> 155 | case parse_brokers(BrokerCount, BrokersBin) of 156 | {ok, Brokers, <>} -> 157 | case parse_topic_metadata(TopicCount, TopicsBin) of 158 | {ok, Metadata} -> 159 | {ok, CorrelationId, {Brokers, Metadata}}; 160 | {error, _Reason} = Error -> 161 | Error 162 | end; 163 | {error, _Reason} = Error -> 164 | Error 165 | end; 166 | parse_metadata_response(_Other) -> 167 | {error, unexpected_binary}. 168 | 169 | %%============================================================================== 170 | %% Message Building 171 | %%============================================================================== 172 | build_request_header(ClientId, ApiKey, CorrelationId) -> 173 | % Build the header (http://goo.gl/5SNNTV) 174 | ApiVersion = 0, % The version should be 0, it's not a placeholder 175 | ClientIdSize = byte_size(ClientId), 176 | [<>, 180 | ClientId]. 181 | 182 | build_request_header(ClientId, ApiKey, CorrelationId, RequestSize) -> 183 | % 10 is the size of the header 184 | MessageSize = byte_size(ClientId) + RequestSize + 10, 185 | [<>, 186 | build_request_header(ClientId, ApiKey, CorrelationId)]. 187 | 188 | %% PRODUCE REQUEST 189 | build_produce_request([{Topic, Partition, Messages}], Compression) -> 190 | build_produce_request({Topic, Partition, Messages}, Compression); 191 | build_produce_request([{Topic, [{Partition, Messages}]}], Compression) -> 192 | build_produce_request({Topic, Partition, Messages}, Compression); 193 | build_produce_request({Topic, [{Partition, Messages}]}, Compression) -> 194 | build_produce_request({Topic, Partition, Messages}, Compression); 195 | build_produce_request({Topic, Partition, Messages}, Compression) -> 196 | % This is a fast version used when producing for a single topic and partition 197 | TopicSize = byte_size(Topic), 198 | {Size, MessageSet} = build_message_set(Messages, Compression), 199 | {Size + TopicSize + 24, 200 | [<<-1:?SHORT, 201 | -1:?INT, % Timeout 202 | 1:?UINT, % TopicCount 203 | TopicSize:?USHORT>>, 204 | Topic, 205 | <<1:?UINT, % PartitionCount 206 | Partition:?UINT, 207 | Size:?UINT>>, 208 | MessageSet]}; 209 | build_produce_request(Data, Compression) -> 210 | % Build the body of the request with multiple topics/partitions 211 | % (Docs at: http://goo.gl/J3C50c) 212 | TopicCount = length(Data), 213 | {TopicsSize, Topics} = build_topics(Data, Compression), 214 | % 10 is the size of the header 215 | {TopicsSize + 10, 216 | [<<-1:?SHORT, % RequiredAcks 217 | -1:?INT, % Timeout 218 | TopicCount:?UINT>>, 219 | Topics]}. 220 | 221 | build_topics(Topics, Compression) -> 222 | build_topics(Topics, Compression, {0, []}). 223 | 224 | build_topics([] = _Topics, _Compression, {Size, IOList}) -> 225 | {Size, lists:reverse(IOList)}; 226 | build_topics([H | T] = _Topics, Compression, {OldSize, IOList}) -> 227 | {Size, Topic} = build_topic(H, Compression), 228 | build_topics(T, Compression, {OldSize + Size, [Topic | IOList]}). 229 | 230 | build_topic({Topic, Partition, Value}, Compression) -> 231 | build_topic({Topic, [{Partition, Value}]}, Compression); 232 | build_topic({Topic, Partitions}, Compression) -> 233 | TopicSize = byte_size(Topic), 234 | PartitionCount = length(Partitions), 235 | {Size, BuiltPartitions} = build_partitions(Partitions, Compression), 236 | % 6 is the size of both the partition count int and the topic size int 237 | {Size + TopicSize + 6, 238 | [<>, 241 | BuiltPartitions]}. 242 | 243 | build_partitions(Partitions, Compression) -> 244 | build_partitions(Partitions, Compression, {0, []}). 245 | 246 | build_partitions([] = _Partitions, _Compression, {Size, IOList}) -> 247 | {Size, lists:reverse(IOList)}; 248 | build_partitions([H | T] = _Partitions, Compression, {OldSize, IOList}) -> 249 | {Size, Partition} = build_partition(H, Compression), 250 | build_partitions(T, Compression, {OldSize + Size, [Partition | IOList]}). 251 | 252 | build_partition({Partition, Message}, Compression) when is_binary(Message) -> 253 | build_partition({Partition, [Message]}, Compression); 254 | build_partition({Partition, Messages}, Compression) -> 255 | {Size, MessageSet} = build_message_set(Messages, Compression), 256 | % 8 is the size of the header, 4 bytes of the partition and 4 for the size 257 | {Size + 8, 258 | [<>, 260 | MessageSet]}. 261 | 262 | % Docs at http://goo.gl/4W7J0r 263 | build_message_set(Message, _Compression) when is_binary(Message) -> 264 | build_message(Message); 265 | build_message_set(Messages, Compression) -> 266 | build_message_set(Messages, Compression, {0, []}). 267 | 268 | build_message_set([] = _Messages, ?COMPRESSION_NONE, {Size, IOList}) -> 269 | {Size, lists:reverse(IOList)}; 270 | build_message_set([] = _Messages, Compression, {_Size, IOList}) -> 271 | Compressed = compress(Compression, lists:reverse(IOList)), 272 | CompressedSize = iolist_size(Compressed), 273 | Header = get_message_header(CompressedSize, Compression), 274 | {byte_size(Header) + CompressedSize, [Header, Compressed]}; 275 | build_message_set([H | T] = _Messages, Compression, {OldSize, IOList}) -> 276 | {Size, Message} = build_message(H), 277 | build_message_set(T, Compression, {OldSize + Size, [Message | IOList]}). 278 | 279 | build_message(Bin) -> 280 | % Docs at: http://goo.gl/xWrdPF 281 | BinSize = byte_size(Bin), 282 | Message = [get_message_header(BinSize, ?COMPRESSION_NONE), Bin], 283 | Offset = 0, % This number is completely irrelevant when sent from the producer 284 | Size = BinSize + 14, % 14 is the size of the header plus the Crc 285 | Crc = erlang:crc32(Message), 286 | % 12 is the size of the offset plus the size int itself 287 | {Size + 12, 288 | [<>, 291 | Message]}. 292 | 293 | get_message_header(MessageSize, Compression) -> 294 | MagicByte = 0, % Version id 295 | Attributes = compression_to_int(Compression), 296 | <>. 300 | 301 | compression_to_int(?COMPRESSION_NONE) -> 0; 302 | compression_to_int(?COMPRESSION_GZIP) -> 1; 303 | compression_to_int(?COMPRESSION_SNAPPY) -> 2. 304 | 305 | compress(?COMPRESSION_NONE, Data) -> Data; 306 | compress(?COMPRESSION_GZIP, Data) -> Data; 307 | compress(?COMPRESSION_SNAPPY, Data) -> Data. 308 | 309 | %% FETCH REQUEST 310 | build_fetch_request([{Topic, {Partition, Offset, MaxBytes}}], 311 | MaxWait, MinBytes) -> 312 | build_fetch_request({Topic, {Partition, Offset, MaxBytes}}, 313 | MaxWait, MinBytes); 314 | build_fetch_request([{Topic, [{Partition, Offset, MaxBytes}]}], 315 | MaxWait, MinBytes) -> 316 | build_fetch_request({Topic, {Partition, Offset, MaxBytes}}, 317 | MaxWait, MinBytes); 318 | build_fetch_request({Topic, {Partition, Offset, MaxBytes}}, 319 | MaxWait, MinBytes) -> 320 | TopicSize = byte_size(Topic), 321 | {TopicSize + 38, 322 | [<<-1:?INT, % ReplicaId 323 | MaxWait:?UINT, 324 | MinBytes:?UINT, 325 | 1:?UINT, % TopicCount 326 | TopicSize:?USHORT>>, 327 | Topic, 328 | <<1:?UINT, % PartitionCount 329 | Partition:?UINT, 330 | Offset:?ULONG, 331 | MaxBytes:?UINT>>]}; 332 | build_fetch_request(Data, MaxWait, MinBytes) -> 333 | ReplicaId = -1, % This should always be -1 334 | TopicCount = length(Data), 335 | {TopicSize, Topics} = build_fetch_topics(Data), 336 | % 16 is the size of the header 337 | {TopicSize + 16, 338 | [<>, 342 | Topics]}. 343 | 344 | build_fetch_topics(Topics) -> 345 | build_fetch_topics(Topics, {0, []}). 346 | 347 | build_fetch_topics([] = _Topics, {Size, IOList}) -> 348 | {Size, lists:reverse(IOList)}; 349 | build_fetch_topics([H | T] = _Topics, {OldSize, IOList}) -> 350 | {Size, Topic} = build_fetch_topic(H), 351 | build_fetch_topics(T, {OldSize + Size, [Topic | IOList]}). 352 | 353 | build_fetch_topic({Topic, Partition}) when is_tuple(Partition) -> 354 | build_fetch_topic({Topic, [Partition]}); 355 | build_fetch_topic({Topic, Partitions}) -> 356 | TopicSize = byte_size(Topic), 357 | PartitionCount = length(Partitions), 358 | {Size, BuiltPartitions} = build_fetch_partitions(Partitions), 359 | % 6 is the size of the topicSize's 16 bytes + 32 from the partition count 360 | {Size + TopicSize + 6, 361 | [<>, 364 | BuiltPartitions]}. 365 | 366 | build_fetch_partitions(Partitions) -> 367 | build_fetch_partitions(Partitions, {0, []}). 368 | 369 | build_fetch_partitions([] = _Partitions, {Size, IOList}) -> 370 | {Size, lists:reverse(IOList)}; 371 | build_fetch_partitions([H | T] = _Partitions, {OldSize, IOList}) -> 372 | {Size, Partition} = build_fetch_partition(H), 373 | build_fetch_partitions(T, {OldSize + Size, [Partition | IOList]}). 374 | 375 | build_fetch_partition({Partition, Offset, MaxBytes}) -> 376 | {16, 377 | <>}. 380 | 381 | build_metadata_request([]) -> 382 | % Builds an empty metadata request that returns all topics and partitions 383 | {4, <<0:?UINT>>}; 384 | build_metadata_request(Topic) when is_binary(Topic) -> 385 | build_metadata_request([Topic]); 386 | build_metadata_request(Topics) -> 387 | TopicCount = length(Topics), 388 | {Size, BuiltTopics} = build_metadata_topics(Topics), 389 | {Size + 4, 390 | [<>, 391 | BuiltTopics]}. 392 | 393 | build_metadata_topics(Topics) -> 394 | build_metadata_topics(Topics, {0, []}). 395 | 396 | build_metadata_topics([] = _Topics, {Size, IOList}) -> 397 | {Size, lists:reverse(IOList)}; 398 | build_metadata_topics([H | T] = _Partitions, {OldSize, IOList}) -> 399 | Size = byte_size(H), 400 | Topic = [<>, H], 401 | build_metadata_topics(T, {OldSize + Size + 2, [Topic | IOList]}). 402 | 403 | %%============================================================================== 404 | %% Message Parsing 405 | %%============================================================================== 406 | % Parse produce response (http://goo.gl/f7zhbg) 407 | parse_produced_topics(Count, Bin) -> 408 | parse_produced_topics(Count, Bin, []). 409 | 410 | parse_produced_topics(Count, <<>>, Acc) when Count =< 0 -> 411 | {ok, lists:reverse(Acc)}; 412 | parse_produced_topics(Count, Bin, Acc) when Count =< 0 -> 413 | ok = ?LOG_WARNING("Finished parsing produce response, ignoring bytes: ~p", 414 | [Bin]), 415 | {ok, lists:reverse(Acc)}; 416 | parse_produced_topics(Count, <>, Acc) -> 420 | {ok, Partitions, Remainder} = parse_produced_partitions(PartitionCount, 421 | PartitionsBin), 422 | parse_produced_topics(Count - 1, Remainder, [{TopicName, Partitions} | Acc]). 423 | 424 | parse_produced_partitions(Count, Bin) -> 425 | parse_produced_partitions(Count, Bin, []). 426 | 427 | parse_produced_partitions(Count, Bin, Acc) when Count =< 0 -> 428 | {ok, lists:reverse(Acc), Bin}; 429 | parse_produced_partitions(Count, <>, Acc) -> 433 | PartitionData = {Partition, ErrorCode, Offset}, 434 | parse_produced_partitions(Count - 1, Remainder, [PartitionData | Acc]). 435 | 436 | 437 | % Parse fetch response (http://goo.gl/eba5z3) 438 | parse_topics(Count, Bin) -> 439 | parse_topics(Count, Bin, []). 440 | 441 | parse_topics(Count, <<>>, Acc) when Count =< 0 -> 442 | {ok, lists:reverse(Acc)}; 443 | parse_topics(Count, Bin, Acc) when Count =< 0 -> 444 | ok = ?LOG_WARNING("Finished parsing topics, ignoring bytes: ~p", [Bin]), 445 | {ok, lists:reverse(Acc)}; 446 | parse_topics(Count, Bin, Acc) -> 447 | case parse_topic(Bin) of 448 | {ok, Topic, Remainder} -> 449 | parse_topics(Count - 1, Remainder, [Topic | Acc]); 450 | {incomplete, Topic, {Remainder, Steps}} -> 451 | Step = {topics, Count}, 452 | {incomplete, lists:reverse(Acc, [Topic]), {Remainder, Steps ++ [Step]}}; 453 | incomplete -> 454 | {incomplete, lists:reverse(Acc), {Bin, [{topics, Count}]}}; 455 | {error, _Reason} = Error -> 456 | Error 457 | end. 458 | 459 | parse_topic(<>) -> 463 | case parse_partitions(PartitionCount, PartitionsBin) of 464 | {ok, Partitions, Remainder} -> 465 | {ok, {TopicName, Partitions}, Remainder}; 466 | {incomplete, Partitions, {Bin, Steps}} -> 467 | Step = {topic, TopicName}, 468 | {incomplete, {TopicName, Partitions}, {Bin, Steps ++ [Step]}}; 469 | {error, _Reason} = Error -> 470 | Error 471 | end; 472 | parse_topic(_Bin) -> 473 | incomplete. 474 | 475 | parse_partitions(Count, Bin) -> 476 | parse_partitions(Count, Bin, []). 477 | 478 | parse_partitions(Count, Bin, Acc) when Count =< 0 -> 479 | {ok, lists:reverse(Acc), Bin}; 480 | parse_partitions(Count, Bin, Acc) -> 481 | case parse_partition(Bin) of 482 | {ok, Partition, Remainder} -> 483 | parse_partitions(Count - 1, Remainder, [Partition | Acc]); 484 | {incomplete, Partition, {Remainder, Steps}} -> 485 | Step = {partitions, Count}, 486 | NewState = {Remainder, Steps ++ [Step]}, 487 | {incomplete, lists:reverse(Acc, [Partition]), NewState}; 488 | incomplete -> 489 | Step = {partitions, Count}, 490 | {incomplete, lists:reverse(Acc), {Bin, [Step]}}; 491 | {error, _Reason} = Error -> 492 | Error 493 | end. 494 | 495 | parse_partition(<>) -> 500 | Partition = {PartitionId, HighwaterMarkOffset}, 501 | case parse_message_set(MessageSetSize, MessageSetBin) of 502 | {ok, Messages, Remainder} -> 503 | {ok, {Partition, Messages}, Remainder}; 504 | {incomplete, Messages, {Bin, Steps}} -> 505 | Step = {partition, Partition}, 506 | {incomplete, {Partition, Messages}, {Bin, Steps ++ [Step]}} 507 | end; 508 | parse_partition(<<_Partition:?UINT, 509 | ErrorCode:?SHORT, 510 | _/binary>>) -> 511 | kafkerl_error:get_error_tuple(ErrorCode); 512 | parse_partition(<<>>) -> 513 | incomplete. 514 | 515 | parse_message_set(Size, Bin) -> 516 | parse_message_set(Size, Bin, []). 517 | 518 | parse_message_set(Count, Bin, Acc) when Count =< 0 -> 519 | {ok, lists:reverse(Acc), Bin}; 520 | parse_message_set(RemainingSize, Bin, Acc) -> 521 | case parse_message(Bin) of 522 | {ok, {Message, Size}, Remainder} -> 523 | parse_message_set(RemainingSize - Size, Remainder, [Message | Acc]); 524 | incomplete -> 525 | {incomplete, lists:reverse(Acc), {Bin, [{message_set, RemainingSize}]}} 526 | end. 527 | 528 | parse_message(<<_Offset:?ULONG, 529 | MessageSize:?INT, 530 | Message:MessageSize/binary, 531 | Remainder/binary>>) -> 532 | <<_Crc:?UINT, 533 | _MagicByte:?UCHAR, 534 | _Attributes:?UCHAR, 535 | KeyValue/binary>> = Message, 536 | KV = case KeyValue of 537 | <> -> 539 | {Key, Value}; 540 | % 4294967295 is -1 and it signifies an empty Key http://goo.gl/Ssl4wq 541 | <<4294967295:?UINT, 542 | ValueSize:?UINT, Value:ValueSize/binary>> -> 543 | Value 544 | end, 545 | % 12 is the size of the offset plus the size of the MessageSize int 546 | {ok, {KV, MessageSize + 12}, Remainder}; 547 | parse_message(_) -> 548 | incomplete. 549 | 550 | % Parse metadata response (http://goo.gl/3wxlZt) 551 | parse_brokers(Count, Bin) -> 552 | parse_brokers(Count, Bin, []). 553 | 554 | parse_brokers(Count, Bin, Acc) when Count =< 0 -> 555 | {ok, lists:reverse(Acc), Bin}; 556 | parse_brokers(Count, <>, Acc) -> 561 | HostStr = binary_to_list(Host), 562 | parse_brokers(Count - 1, Remainder, [{Id, {HostStr, Port}} | Acc]); 563 | parse_brokers(_Count, _Bin, _Acc) -> 564 | {error, bad_binary}. 565 | 566 | parse_topic_metadata(Count, Bin) -> 567 | parse_topic_metadata(Count, Bin, []). 568 | 569 | parse_topic_metadata(Count, <<>>, Acc) when Count =< 0 -> 570 | {ok, lists:reverse(Acc)}; 571 | parse_topic_metadata(Count, Bin, Acc) when Count =< 0 -> 572 | ok = ?LOG_WARNING("Finished parsing topic metadata, ignoring bytes: ~p", 573 | [Bin]), 574 | {ok, lists:reverse(Acc)}; 575 | parse_topic_metadata(Count, <>, Acc) -> 579 | {ok, PartitionsMetadata, Remainder} = parse_partition_metadata(PartitionCount, 580 | PartitionsBin), 581 | TopicMetadata = {ErrorCode, <<"unknown">>, PartitionsMetadata}, 582 | parse_topic_metadata(Count - 1, Remainder, [TopicMetadata | Acc]); 583 | parse_topic_metadata(Count, <>, Acc) -> 588 | {ok, PartitionsMetadata, Remainder} = parse_partition_metadata(PartitionCount, 589 | PartitionsBin), 590 | TopicMetadata = {ErrorCode, TopicName, PartitionsMetadata}, 591 | parse_topic_metadata(Count - 1, Remainder, [TopicMetadata | Acc]). 592 | 593 | parse_partition_metadata(Count, Bin) -> 594 | parse_partition_metadata(Count, Bin, []). 595 | 596 | parse_partition_metadata(Count, Remainder, Acc) when Count =< 0 -> 597 | {ok, lists:reverse(Acc), Remainder}; 598 | parse_partition_metadata(Count, <>, Acc) -> 603 | {ok, Replicas, Remainder} = parse_replica_metadata(ReplicaCount, ReplicasBin), 604 | <> = Remainder, 605 | {ok, Isr, IsrRemainder} = parse_isr_metadata(IsrCount, IsrBin), 606 | PartitionMetadata = {ErrorCode, Partition, Leader, Replicas, Isr}, 607 | parse_partition_metadata(Count - 1, IsrRemainder, [PartitionMetadata | Acc]). 608 | 609 | parse_replica_metadata(Count, Bin) -> 610 | parse_replica_metadata(Count, Bin, []). 611 | 612 | parse_replica_metadata(Count, Remainder, Acc) when Count =< 0 -> 613 | {ok, lists:reverse(Acc), Remainder}; 614 | parse_replica_metadata(Count, <>, Acc) -> 616 | parse_replica_metadata(Count - 1, Remainder, [Replica | Acc]). 617 | 618 | parse_isr_metadata(Count, Bin) -> 619 | parse_isr_metadata(Count, Bin, []). 620 | 621 | parse_isr_metadata(Count, Remainder, Acc) when Count =< 0 -> 622 | {ok, lists:reverse(Acc), Remainder}; 623 | parse_isr_metadata(Count, <>, Acc) -> 625 | parse_isr_metadata(Count - 1, Remainder, [Isr | Acc]). 626 | 627 | %%============================================================================== 628 | %% Utils (aka: don't repeat code) 629 | %%============================================================================== 630 | parse_steps(Bin, CorrelationId, Steps) -> 631 | parse_steps(Bin, CorrelationId, Steps, void). 632 | 633 | parse_steps(<<>>, CorrelationId, [], Data) -> 634 | {ok, CorrelationId, Data}; 635 | parse_steps(Bin, CorrelationId, [Step | T], Data) -> 636 | case parse_step(Bin, Step, Data) of 637 | {ok, NewData} -> 638 | {ok, CorrelationId, NewData}; 639 | {ok, NewData, NewBin} -> 640 | parse_steps(NewBin, CorrelationId, T, NewData); 641 | {incomplete, NewData, {NewBin, Steps}} -> 642 | NewState = {NewBin, CorrelationId, Steps ++ T}, 643 | DataWithContext = add_context_to_data(NewData, Steps ++ T), 644 | {incomplete, CorrelationId, DataWithContext, NewState}; 645 | {incomplete, Steps} -> 646 | NewState = {Bin, CorrelationId, Steps ++ T}, 647 | DataWithContext = add_context_to_data(Data, Steps ++ T), 648 | {incomplete, CorrelationId, DataWithContext, NewState}; 649 | {add_steps, NewBin, NewData, Steps} -> 650 | parse_steps(NewBin, CorrelationId, Steps ++ T, NewData); 651 | Error = {error, _Reason} -> 652 | Error 653 | end. 654 | 655 | parse_step(Bin, {topic, void}, Topics) -> 656 | case parse_topic(Bin) of 657 | {incomplete, Topic, {Remainder, Steps}} -> 658 | {add_steps, Remainder, lists:reverse(Topics, [Topic]), Steps}; 659 | incomplete -> 660 | {incomplete, []}; 661 | {ok, Topic, Remainder} -> 662 | {ok, [Topic | Topics], Remainder} 663 | end; 664 | parse_step(Bin, {topic, TopicName}, 665 | [{Partition, Partitions} | Topics]) when is_integer(Partition) -> 666 | {ok, [{TopicName, [{Partition, Partitions}]} | Topics], Bin}; 667 | parse_step(<<>>, {topic, TopicName}, Data) -> 668 | {ok, [{TopicName, Data}]}; 669 | parse_step(_Bin, {topic, TopicName}, _Data) -> 670 | {incomplete, [{topic, TopicName}]}; 671 | 672 | parse_step(Bin, {topics, Count}, void) -> 673 | parse_topics(Count, Bin); 674 | parse_step(Bin, {topics, 1}, Topics) -> 675 | {ok, Topics, Bin}; 676 | parse_step(Bin, {topics, Count}, Topics) -> 677 | {add_steps, Bin, Topics, [{topic, void}, {topics, Count - 1}]}; 678 | 679 | parse_step(Bin, {partition, Partition}, Messages) -> 680 | {ok, [{Partition, Messages}], Bin}; 681 | 682 | parse_step(Bin, {partitions, Count}, void) -> 683 | parse_partitions(Count, Bin); 684 | parse_step(Bin, {partitions, 1}, Partitions) -> 685 | {ok, Partitions, Bin}; 686 | 687 | parse_step(Bin, {message_set, RemainingSize}, _) -> 688 | case parse_message_set(RemainingSize, Bin) of 689 | {incomplete, Messages, State} -> 690 | {incomplete, Messages, State}; 691 | {ok, Messages, <<>>} -> 692 | {ok, Messages, <<>>}; 693 | {ok, Messages, Remainder} -> 694 | {ok, Messages, Remainder} 695 | end. 696 | 697 | add_context_to_data(Data, []) -> 698 | Data; 699 | add_context_to_data(Data, [{partition, Partition} | T]) -> 700 | add_context_to_data([{Partition, Data}], T); 701 | add_context_to_data(Data, [{topic, Topic} | T]) -> 702 | add_context_to_data([{Topic, Data}], T); 703 | add_context_to_data(Data, [_H | T]) -> 704 | add_context_to_data(Data, T). -------------------------------------------------------------------------------- /src/kafkerl_sup.erl: -------------------------------------------------------------------------------- 1 | -module(kafkerl_sup). 2 | -author('hernanrivasacosta@gmail.com'). 3 | 4 | -behaviour(supervisor). 5 | 6 | -export([start_link/0, init/1]). 7 | 8 | -define(SERVER, ?MODULE). 9 | 10 | %%============================================================================== 11 | %% API 12 | %%============================================================================== 13 | -spec start_link() -> {ok, pid()}. 14 | start_link() -> 15 | supervisor:start_link({local, ?SERVER}, ?MODULE, []). 16 | 17 | %%============================================================================== 18 | %% Utils 19 | %%============================================================================== 20 | -spec init([]) -> {ok, {{one_for_one, 5, 10}, [supervisor:child_spec()]}}. 21 | init([]) -> 22 | ChildSpecs = case application:get_env(kafkerl, disabled, false) of 23 | true -> 24 | lager:notice("Kafkerl is disabled, ignoring"), 25 | []; 26 | false -> 27 | [get_connector_child_spec()] 28 | end, 29 | {ok, {{one_for_one, 5, 10}, ChildSpecs}}. 30 | 31 | get_connector_child_spec() -> 32 | Name = application:get_env(kafkerl, gen_server_name, kafkerl), 33 | {ok, ConnConfig} = application:get_env(kafkerl, conn_config), 34 | Topics = application:get_env(kafkerl, topics, []), 35 | Params = [Name, [{topics, Topics} | ConnConfig]], 36 | MFA = {kafkerl_connector, start_link, Params}, 37 | {Name, MFA, permanent, 2000, worker, [kafkerl_connector]}. -------------------------------------------------------------------------------- /src/kafkerl_utils.erl: -------------------------------------------------------------------------------- 1 | -module(kafkerl_utils). 2 | -author('hernanrivasacosta@gmail.com'). 3 | 4 | -export([send_event/2, send_error/2]). 5 | -export([get_tcp_options/1]). 6 | -export([merge_messages/1, split_messages/1, valid_message/1]). 7 | -export([buffer_name/2]). 8 | -export([gather_consume_responses/0, gather_consume_responses/1]). 9 | -export([proplists_set/2]). 10 | 11 | %%============================================================================== 12 | %% API 13 | %%============================================================================== 14 | -spec send_error(kafkerl:callback(), any()) -> ok. 15 | send_error(Callback, Reason) -> 16 | send_event(Callback, {error, Reason}). 17 | 18 | -spec send_event(kafkerl:callback(), any()) -> 19 | ok | {error, {bad_callback, any()}}. 20 | send_event({M, F}, Data) -> 21 | spawn(fun() -> M:F(Data) end), 22 | ok; 23 | send_event({M, F, A}, Data) -> 24 | spawn(fun() -> apply(M, F, A ++ [Data]) end), 25 | ok; 26 | send_event(Pid, Data) when is_pid(Pid) -> 27 | Pid ! Data, 28 | ok; 29 | send_event(Function, Data) when is_function(Function, 1) -> 30 | spawn(fun() -> Function(Data) end), 31 | ok; 32 | send_event(BadCallback, _Data) -> 33 | {error, {bad_callback, BadCallback}}. 34 | 35 | default_tcp_options() -> 36 | % This list has to be sorted 37 | [{mode, binary}, {packet, 0}]. 38 | 39 | -spec get_tcp_options([{any(), any()}]) -> list(). 40 | get_tcp_options(Options) -> % TODO: refactor 41 | UnfoldedOptions = proplists:unfold(Options), 42 | lists:ukeymerge(1, lists:sort(UnfoldedOptions), default_tcp_options()). 43 | 44 | % This is rather costly, and for obvious reasons does not maintain the order of 45 | % the partitions or topics, but it does keep the order of the messages within a 46 | % specific topic-partition pair 47 | -spec merge_messages([kafkerl_protocol:basic_message()]) -> 48 | kafkerl_protocol:merged_message(). 49 | merge_messages(Topics) -> 50 | merge_topics(Topics). 51 | 52 | % Not as costly, but still avoid this in a place where performance is critical 53 | -spec split_messages(kafkerl_protocol:merged_message()) -> 54 | [kafkerl_protocol:basic_message()]. 55 | split_messages({Topic, {Partition, Messages}}) -> 56 | {Topic, Partition, Messages}; 57 | split_messages({Topic, Partitions}) -> 58 | [{Topic, Partition, Messages} || {Partition, Messages} <- Partitions]; 59 | split_messages(Topics) -> 60 | lists:flatten([split_messages(Topic) || Topic <- Topics]). 61 | 62 | -spec valid_message(any()) -> boolean(). 63 | valid_message({Topic, Partition, Messages}) -> 64 | is_binary(Topic) andalso is_integer(Partition) andalso Partition >= 0 andalso 65 | (is_binary(Messages) orelse is_list_of_binaries(Messages)); 66 | valid_message({Topic, Partition}) -> 67 | is_binary(Topic) andalso (is_partition(Partition) orelse 68 | is_partition_list(Partition)); 69 | valid_message(L) when is_list(L) -> 70 | lists:all(fun valid_message/1, L); 71 | valid_message(_Any) -> 72 | false. 73 | 74 | -spec buffer_name(kafkerl_protocol:topic(), kafkerl_protocol:partition()) -> 75 | atom(). 76 | buffer_name(Topic, Partition) -> 77 | Bin = <>, 78 | binary_to_atom(Bin, utf8). 79 | 80 | -type proplist_value() :: {atom(), any()}. 81 | -type proplist() :: [proplist_value]. 82 | -spec proplists_set(proplist(), proplist_value() | [proplist_value()]) -> 83 | proplist(). 84 | proplists_set(Proplist, {K, _V} = NewValue) -> 85 | lists:keyreplace(K, 1, Proplist, NewValue); 86 | proplists_set(Proplist, []) -> 87 | Proplist; 88 | proplists_set(Proplist, [H | T]) -> 89 | proplists_set(proplists_set(Proplist, H), T). 90 | 91 | %%============================================================================== 92 | %% Utils 93 | %%============================================================================== 94 | %% Merge 95 | merge_topics({Topic, Partition, Message}) -> 96 | merge_topics([{Topic, Partition, Message}]); 97 | merge_topics([{Topic, Partition, Message}]) -> 98 | [{Topic, [{Partition, Message}]}]; 99 | merge_topics(Topics) -> 100 | merge_topics(Topics, []). 101 | 102 | merge_topics([], Acc) -> 103 | Acc; 104 | merge_topics([{Topic, Partition, Messages} | T], Acc) -> 105 | merge_topics([{Topic, [{Partition, Messages}]} | T], Acc); 106 | merge_topics([{Topic, Partitions} | T], Acc) -> 107 | case lists:keytake(Topic, 1, Acc) of 108 | false -> 109 | merge_topics(T, [{Topic, merge_partitions(Partitions)} | Acc]); 110 | {value, {Topic, OldPartitions}, NewAcc} -> 111 | NewPartitions = Partitions ++ OldPartitions, 112 | merge_topics(T, [{Topic, merge_partitions(NewPartitions)} | NewAcc]) 113 | end. 114 | 115 | merge_partitions(Partitions) -> 116 | merge_partitions(Partitions, []). 117 | 118 | merge_partitions([], Acc) -> 119 | Acc; 120 | merge_partitions([{Partition, Messages} | T], Acc) -> 121 | case lists:keytake(Partition, 1, Acc) of 122 | false -> 123 | merge_partitions(T, [{Partition, Messages} | Acc]); 124 | {value, {Partition, OldMessages}, NewAcc} -> 125 | NewMessages = merge_messages(OldMessages, Messages), 126 | merge_partitions(T, [{Partition, NewMessages} | NewAcc]) 127 | end. 128 | 129 | merge_messages(A, B) -> 130 | case {is_list(A), is_list(B)} of 131 | {true, true} -> B ++ A; 132 | {false, true} -> B ++ [A]; 133 | {true, false} -> [B | A]; 134 | {false, false} -> [B, A] 135 | end. 136 | 137 | is_list_of_binaries(L) when is_list(L) -> 138 | length(L) > 0 andalso lists:all(fun is_binary/1, L); 139 | is_list_of_binaries(_Any) -> 140 | false. 141 | 142 | is_partition_list(L) when is_list(L) -> 143 | length(L) > 0 andalso lists:all(fun is_partition/1, L); 144 | is_partition_list(_Any) -> 145 | false. 146 | 147 | is_partition({Partition, Messages}) -> 148 | is_integer(Partition) andalso Partition >= 0 andalso 149 | (is_binary(Messages) orelse is_list_of_binaries(Messages)); 150 | is_partition(_Any) -> 151 | false. 152 | 153 | -spec gather_consume_responses() -> [] | {any(), any()}. 154 | gather_consume_responses() -> 155 | gather_consume_responses(2500). 156 | 157 | -spec gather_consume_responses(integer()) -> [] | {any(), any()}. 158 | gather_consume_responses(Timeout) -> 159 | gather_consume_responses(Timeout, []). 160 | gather_consume_responses(Timeout, Acc) -> 161 | receive 162 | {consumed, Messages} -> 163 | gather_consume_responses(Timeout, Acc ++ Messages); 164 | {offset, Offset} -> 165 | {Acc, Offset}; 166 | {error, _Reason} = Error -> 167 | Error 168 | after Timeout -> 169 | [] 170 | end. -------------------------------------------------------------------------------- /test/kafkerl_meta_SUITE.erl: -------------------------------------------------------------------------------- 1 | -module(kafkerl_meta_SUITE). 2 | 3 | -include_lib("mixer/include/mixer.hrl"). 4 | -mixin([{ktn_meta_SUITE 5 | , [ all/0 6 | , xref/1 7 | , dialyzer/1 8 | , elvis/1 9 | ] 10 | }]). 11 | 12 | -export([init_per_suite/1]). 13 | 14 | -type config() :: [{atom(), term()}]. 15 | 16 | -spec init_per_suite(config()) -> config(). 17 | init_per_suite(Config) -> 18 | [{application, kafkerl} | Config]. 19 | --------------------------------------------------------------------------------