├── .circleci
    └── config.yml
├── .gcloudignore
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── cloudbuild.yaml
├── cluster
    ├── sys.config
    └── vm.args
├── config
    ├── prod_sys.config
    ├── prod_vm.args
    ├── proper.config
    ├── shared.config
    ├── sys.config
    ├── test.config
    └── vm.args
├── docs
    ├── design.md
    └── implementation.md
├── helm
    ├── README.md
    ├── storage-class.yaml
    └── vonnegut
    │   ├── .helmignore
    │   ├── Chart.yaml
    │   ├── templates
    │       ├── NOTES.txt
    │       ├── _helpers.tpl
    │       ├── config_map.yaml
    │       ├── service.yaml
    │       └── stateful-set.yaml
    │   └── values.yaml
├── include
    └── vg.hrl
├── rebar.config
├── rebar.lock
├── src
    ├── vg.erl
    ├── vg_active_segment.erl
    ├── vg_chain_state.erl
    ├── vg_cleaner.erl
    ├── vg_client.erl
    ├── vg_client_pool.erl
    ├── vg_cluster_mgr.erl
    ├── vg_config.erl
    ├── vg_conn.erl
    ├── vg_elli_handler.erl
    ├── vg_index.erl
    ├── vg_log_segments.erl
    ├── vg_peer_service.erl
    ├── vg_pool.erl
    ├── vg_pool_sup.erl
    ├── vg_protocol.erl
    ├── vg_socket.erl
    ├── vg_topic_mgr.erl
    ├── vg_topic_sup.erl
    ├── vg_topics.erl
    ├── vg_topics_sup.erl
    ├── vg_utils.erl
    ├── vonnegut.app.src
    ├── vonnegut_app.erl
    └── vonnegut_sup.erl
└── test
    ├── cleanup_SUITE.erl
    ├── kafka_client_SUITE.erl
    ├── log_roll_SUITE.erl
    ├── prop_vg.erl
    ├── protocol_SUITE.erl
    ├── test_utils.hrl
    ├── topic_SUITE.erl
    ├── vg_consumer_SUITE.erl
    ├── vg_statem.erl
    ├── vg_test_utils.erl
    └── z_cluster_SUITE.erl


/.circleci/config.yml:
--------------------------------------------------------------------------------
  1 | defaults: &defaults
  2 |   shell: /bin/bash
  3 |   working_directory: /home/circleci/vonnegut
  4 |   docker:
  5 |     - image: tsloughter/erlang-alpine:21.0.5
  6 | 
  7 | version: 2
  8 | jobs:
  9 |   build:
 10 |     <<: *defaults
 11 |     steps:
 12 |       - checkout
 13 | 
 14 |       - restore_cache:
 15 |           keys:
 16 |             - vonnegut-{{ checksum "rebar.lock" }}
 17 |             - vonnegut-hex-packages
 18 | 
 19 |       - run:
 20 |           command: rebar3 compile
 21 | 
 22 |       - store_artifacts:
 23 |           path: /home/circleci/vonnegut/rebar3.crashdump
 24 |           destination: rebar3_crashdump.txt
 25 |           when: on_fail
 26 | 
 27 |       - save-cache:
 28 |           key: vonnegut-{{ checksum "rebar.lock" }}
 29 |           paths:
 30 |             - /home/circleci/vonnegut/_build/default/lib
 31 |             - /home/circleci/vonnegut/_build/default/plugins
 32 | 
 33 |       - save-cache:
 34 |           key: vonnegut-hex-packages
 35 |           paths:
 36 |             - /root/.cache/rebar3/hex/default/packages
 37 | 
 38 |   dialyzer:
 39 |     <<: *defaults
 40 |     steps:
 41 |       - checkout
 42 | 
 43 |       - attach_workspace:
 44 |           at: /home/circleci/vonnegut
 45 | 
 46 |       - restore_cache:
 47 |           keys:
 48 |             - erlang-plt-21.0.5
 49 | 
 50 |       - restore_cache:
 51 |           keys:
 52 |             - vonnegut-{{ checksum "rebar.lock" }}
 53 |             - vonnegut-hex-packages
 54 | 
 55 |       - run:
 56 |           command: rebar3 dialyzer
 57 | 
 58 |       - save-cache:
 59 |           key: erlang-plt-21.0.5
 60 |           paths:
 61 |             - /root/.cache/rebar3/rebar3_21.0.5_plt
 62 | 
 63 |   xref:
 64 |     <<: *defaults
 65 |     steps:
 66 |       - checkout
 67 | 
 68 |       - attach_workspace:
 69 |           at: /home/circleci/vonnegut
 70 | 
 71 |       - restore_cache:
 72 |           keys:
 73 |             - vonnegut-{{ checksum "rebar.lock" }}
 74 |             - vonnegut-hex-packages
 75 | 
 76 |       - run:
 77 |           command: rebar3 xref
 78 | 
 79 |   tests:
 80 |     <<: *defaults
 81 |     steps:
 82 |       - checkout
 83 | 
 84 |       - attach_workspace:
 85 |           at: /home/circleci/vonnegut
 86 | 
 87 |       - restore_cache:
 88 |           keys:
 89 |             - vonnegut-{{ checksum "rebar.lock" }}
 90 |             - vonnegut-hex-packages
 91 | 
 92 |       - run:
 93 |           command: |
 94 |             set -eux
 95 |             epmd -daemon
 96 |             rebar3 do ct --name=testrunner@127.0.0.1, cover
 97 |             rebar3 covertool generate
 98 |             apk add --update python python-dev py-pip
 99 |             pip install codecov && codecov -f _build/test/covertool/vonnegut.covertool.xml
100 | 
101 |       - store_test_results:
102 |           path: /home/circleci/vonnegut/_build/test/logs/
103 | 
104 |       - store_artifacts:
105 |           path: /home/circleci/vonnegut/_build/test/logs
106 |           destination: common_test
107 | 
108 |       - store_artifacts:
109 |           path: /home/circleci/vonnegut/rebar3.crashdump
110 |           destination: rebar3_crashdump.txt
111 |           when: on_fail
112 | 
113 | workflows:
114 |   version: 2
115 |   build_and_test:
116 |     jobs:
117 |       - build
118 |       - dialyzer:
119 |           requires:
120 |             - build
121 |       - xref:
122 |           requires:
123 |             - build
124 |       - tests:
125 |           requires:
126 |             - build
127 | 


--------------------------------------------------------------------------------
/.gcloudignore:
--------------------------------------------------------------------------------
 1 | # This file specifies files that are *not* uploaded to Google Cloud Platform
 2 | # using gcloud. It follows the same syntax as .gitignore, with the addition of
 3 | # "#!include" directives (which insert the entries of the given .gitignore-style
 4 | # file at that point).
 5 | #
 6 | # For more information, run:
 7 | #   $ gcloud topic gcloudignore
 8 | #
 9 | .gcloudignore
10 | _build
11 | ebin
12 | _checkouts
13 | # If you would like to upload your .git directory, .gitignore file or files
14 | # from your .gitignore file, remove the corresponding line
15 | # below:
16 | #.git
17 | #.gitignore
18 | !include:.gitignore
19 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .rebar3
 2 | data
 3 | _*
 4 | .eunit
 5 | *.o
 6 | *.beam
 7 | *.plt
 8 | *.swp
 9 | *.swo
10 | .erlang.cookie
11 | ebin
12 | log
13 | erl_crash.dump
14 | .rebar
15 | _rel
16 | _deps
17 | _plugins
18 | _tdeps
19 | logs
20 | _build
21 | rebar3.crashdump
22 | vonnegut.tar.gz
23 | properdata/
24 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM erlang:21.0.5-alpine as builder
 2 | 
 3 | RUN apk add --no-cache --update tar curl git bash make libc-dev gcc g++ vim
 4 | 
 5 | RUN set -xe \
 6 |     && curl -fSL -o rebar3 "https://s3.amazonaws.com/rebar3-nightly/rebar3" \
 7 |     && chmod +x ./rebar3 \
 8 |     && ./rebar3 local install \
 9 |     && rm ./rebar3
10 | 
11 | ENV PATH "$PATH:/root/.cache/rebar3/bin"
12 | 
13 | WORKDIR /usr/src/app
14 | COPY . /usr/src/app
15 | 
16 | RUN rebar3 as prod tar
17 | 
18 | RUN mkdir -p /opt/rel
19 | RUN tar -zxvf /usr/src/app/_build/prod/rel/*/*.tar.gz -C /opt/rel
20 | 
21 | FROM alpine:3.8
22 | 
23 | RUN apk add --no-cache openssl-dev ncurses
24 | 
25 | WORKDIR /opt/vonnegut
26 | 
27 | ENV RELX_REPLACE_OS_VARS true
28 | ENV NODE 127.0.0.1
29 | ENV COOKIE vonnegut
30 | ENV CHAIN_NAME chain1
31 | ENV REPLICAS 1
32 | ENV PEER_IP 127.0.0.1
33 | ENV DISCOVERY_DOMAIN local
34 | 
35 | COPY --from=builder /opt/rel /opt/vonnegut
36 | 
37 | EXPOSE 5555 5555
38 | 
39 | ENTRYPOINT ["/opt/vonnegut/bin/vonnegut"]
40 | 
41 | CMD ["foreground"]
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | vonnegut
 2 | =====
 3 | 
 4 | [![CircleCI](https://circleci.com/gh/SpaceTime-IoT/vonnegut.svg?style=svg)](https://circleci.com/gh/SpaceTime-IoT/vonnegut)
 5 | 
 6 | [![codecov](https://codecov.io/gh/SpaceTime-IoT/vonnegut/branch/master/graph/badge.svg)](https://codecov.io/gh/SpaceTime-IoT/vonnegut)
 7 | 
 8 | Vonnegut is a append-only log that follows the file format and API of Kafka 1.0. The server can be run standalone, with 1 or more chains each with 1 or more replicas, or as part of another Erlang release which can talk to it directly.
 9 | 
10 | Each chain is responsible for a range of the topic space. A read or write to a topic requires finding what chain the topic belongs to and then making a request to the head, in the case of a write, or the tail, in the case of a read.
11 | 
12 | Configuration
13 | -----
14 | 
15 | ### Server
16 | 
17 | A node in a chain can discover other nodes within the chain through DNS SRV record queries. The `replicas` configuration tells vonnegut node how many other nodes it needs to connect to to form the required chain length to ack writes.
18 | 
19 | ```
20 | {vonnegut, [{chain, [{name, "chain-1"},
21 |                      {discovery, {srv, "chain-1.service.cluster.local"}},
22 |                      {replicas, "2"},
23 |                      {port, 5555}]}
24 |             ]}
25 | ```
26 | 
27 | ### Client
28 | 
29 | Clients start a pool of connections to the head and tail of each chain. Chains are found through DNS queries against endpoints:
30 | 
31 | ```
32 | {vonnegut, [{client, [{endpoints, [{"chain-1.service.cluster.local", 5555}]}]}]}
33 | ```
34 | 
35 | Erlang Interface
36 | ---
37 | 
38 | A local interface can be used to create, read and write topics. 
39 | 
40 | ```shell
41 | $ rebar3 shell
42 | 1> vg:create_topic(<<"test_topic">>).
43 | 2> vg:write(<<"test_topic">>, [<<"some log message">>, <<"more log message">>]).
44 | 3> vg:fetch(<<"test_topic">>).
45 | {ok,#{high_water_mark => 1,partition => 0,
46 |       record_batches =>
47 |           [#{headers => [],key => <<>>,offset => 1,sequence_number => 1,
48 |              timestamp => 1517613646458,value => <<"more log message">>},
49 |            #{headers => [],key => <<>>,offset => 0,sequence_number => 0,
50 |              timestamp => 1517613646458,
51 |              value => <<"some log message">>}]}}
52 | ```
53 | 
54 | By default index and log files will be written to `./data`:
55 | 
56 | ```shell
57 | $ ls data/test_topic-0/
58 | 00000000000000000000.index  00000000000000000000.log
59 | ```
60 | 
61 | Kafkaesque Client
62 | ---
63 | 
64 | ```erlang
65 | $ rebar3 shell
66 | 1> vg_client_pool:start().
67 | ok
68 | 2> vg_client:produce(<<"my-topic-2">>, [<<"message 1">>, <<"message 2">>]).
69 | {ok,1}
70 | 3> vg_client:fetch(<<"my-topic-2">>).
71 | {ok,#{<<"test_topic-2">> =>
72 |           #{0 =>
73 |                 #{error_code => 0,high_water_mark => 1,
74 |                   record_batches =>
75 |                       [#{headers => [],key => <<>>,offset => 1,
76 |                          sequence_number => 1,timestamp => 1517616861441,
77 |                          value => <<"message 2">>},
78 |                        #{headers => [],key => <<>>,offset => 0,
79 |                          sequence_number => 0,timestamp => 1517616861441,
80 |                          value => <<"message 1">>}],
81 |                   record_batches_size => 95}}}}
82 | ```
83 | 
84 | Running Tests
85 | -----
86 | 
87 | The tests require opening thousands of files and so may require increasing the limit per process on your system with:
88 | 
89 | ```shell
90 | $ ulimit -n 63536
91 | ```
92 | 
93 | Tests also require a nodename:
94 | 
95 | ```shell
96 | $ rebar3 ct
97 | ```
98 | 


--------------------------------------------------------------------------------
/cloudbuild.yaml:
--------------------------------------------------------------------------------
1 | steps:
2 | - name: 'gcr.io/cloud-builders/docker'
3 |   entrypoint: 'sh'
4 |   args: ['-c', 'docker build -t us.gcr.io/$PROJECT_ID/vonnegut:$$(git describe --abbrev=4 HEAD --tags) .']
5 | images: ['us.gcr.io/nucleus-sti/vonnegut']
6 | 


--------------------------------------------------------------------------------
/cluster/sys.config:
--------------------------------------------------------------------------------
 1 | %% -*- erlang -*-
 2 | [{vonnegut, [{chain, [{name, chain1},
 3 |                       {discovery, {direct, [{'chain1-0', "127.0.0.1", 15555, 5588},
 4 |                                             {'chain1-1', "127.0.0.1", 15556, 5589},
 5 |                                             {'chain1-2', "127.0.0.1", 15557, 5590}]}},
 6 |                       {replicas, 3}]},
 7 | 
 8 |              {client, [{endpoints, [{"127.0.0.1", 5588}]}]}
 9 |              %% {log_dirs, ["${LOG_DIR}"]}
10 |             ]},
11 | 
12 |  {partisan, [{peer_ip, {127,0,0,1}},
13 |              {partisan_peer_service_manager,
14 |               partisan_default_peer_service_manager}]},
15 | 
16 |  {kernel, [{start_time, true}]},
17 | 
18 |  {opencensus, [{reporter, {oc_reporter_noop, #{project => <<"nucleus-sti">>,
19 |                                                service_account => <<"default">>}}}]},
20 | 
21 | {lager,
22 |   [{error_logger_hwm, 100},
23 |    {crash_log_count, 5},
24 |    {crash_log_date, "$D0"},
25 |    {crash_log_size, 10485760},
26 |    {crash_log_msg_size, 65536},
27 |    {crash_log, "./log/crash.log"},
28 |    {handlers,
29 |     [{lager_console_backend, [{level, info},
30 |                               {formatter, lager_default_formatter},
31 |                               {formatter_config,
32 |                                [time, color, " [",severity,"] ",
33 |                                 pid, " ",
34 |                                 "mod=", module,
35 |                                 " fun=", function, " ", message, "\e[0m\r\n"]}]},
36 |      {lager_file_backend,
37 |       [{file, "./log/error.log"},
38 |        {level, error},
39 |        {formatter, lager_default_formatter},
40 |        {formatter_config,
41 |         [time, color, " [",severity,"] ",
42 |          pid, " ",
43 |          "mod=", module,
44 |          " fun=", function, " ", message, "\e[0m\r\n"]},
45 |        {size, 10485760},
46 |        {date, "$D0"},
47 |        {count, 5}]},
48 |      {lager_file_backend,
49 |       [{file, "./log/debug.log"},
50 |        {level, debug},
51 |        {formatter, lager_default_formatter},
52 |        {formatter_config,
53 |         [time, color, " [",severity,"] ",
54 |          pid, " ",
55 |          "mod=", module,
56 |          " fun=", function, " ", message, "\e[0m\r\n"]},
57 |        {size, 10485760},
58 |        {date, "$D0"},
59 |        {count, 5}]}]},
60 |    {error_logger_redirect, true}]},
61 | 
62 |  "../../../../config/shared"
63 | ].
64 | 


--------------------------------------------------------------------------------
/cluster/vm.args:
--------------------------------------------------------------------------------
1 | -name ${NODE}
2 | 
3 | -setcookie vonnegut
4 | 
5 | +A 100
6 | +K true
7 | 
8 | -partisan peer_port ${PEER_PORT}
9 | 


--------------------------------------------------------------------------------
/config/prod_sys.config:
--------------------------------------------------------------------------------
 1 | [{vonnegut, [{chain, [{name, "${CHAIN_NAME}"},
 2 | 
 3 |                       {discovery, {srv, "${DISCOVERY_DOMAIN}"}},
 4 | 
 5 |                       %% with direct we do not need to set the # replicas expected
 6 |                       {replicas, "${REPLICAS}"},
 7 |                       {port, 5588}]}
 8 |              ]},
 9 | 
10 |  {partisan, [{peer_ip, "${PEER_IP}"},
11 |              {peer_port, 10200},
12 |              {partisan_peer_service_manager,
13 |               partisan_default_peer_service_manager}]},
14 | 
15 |  {kernel, [{start_time, true}]},
16 | 
17 |  {opencensus, [{reporter, {oc_reporter_noop, #{project => <<"nucleus-sti">>,
18 |                                                service_account => <<"default">>}}}]},
19 | 
20 |  {lager, [{error_logger_redirect, true},
21 |           {handlers,
22 |            [{lager_console_backend,
23 |              [{level, info},
24 |               {formatter, lager_default_formatter},
25 |               {formatter_config,
26 |                [time, " [",severity,"] ",
27 |                 pid, " ",
28 |                 {module, ["mod=", module, {function, [" fun=", function], ""}, " "], ""}, message, "\n"]}]}]}]},
29 | 
30 |  "config/shared"
31 | ].
32 | 


--------------------------------------------------------------------------------
/config/prod_vm.args:
--------------------------------------------------------------------------------
1 | -name vonnegut@${NODE}
2 | 
3 | -setcookie ${COOKIE}
4 | 
5 | +A 100
6 | +K true
7 | 


--------------------------------------------------------------------------------
/config/proper.config:
--------------------------------------------------------------------------------
 1 | %% -*- erlang -*-
 2 | [{vonnegut, [{chain, [{name, chain1},
 3 |                                      %% {nodename, host, data-port, partisan-port}
 4 |                       {discovery, local}, %% {direct, [{'chain1-0', "127.0.0.1", 10200, 5555},
 5 |                                          %%    {'chain1-1', "127.0.0.1", 10201, 5556},
 6 |                                          %%    {'chain1-2', "127.0.0.1", 10202, 5557}]}},
 7 | 
 8 |                       %% with direct we do not need to set the # replicas expected
 9 |                       %% {replicas, "2"}
10 | 
11 |                       {port, 5588}]},
12 |              {segment_bytes, 1024},
13 |              {index_max_bytes, 128},
14 |              {index_interval_bytes, 256},
15 | 
16 |              %% client config for if we want to use only the vonnegut client
17 |              {client, [{endpoint, [{"127.0.0.1", 5588}]}]}
18 |             ]},
19 | 
20 |  {partisan, [{peer_ip, {127,0,0,1}},
21 |              {peer_port, 10200},
22 |              {partisan_peer_service_manager,
23 |               partisan_default_peer_service_manager}]},
24 | 
25 |  {kernel, [{start_time, true}]},
26 | 
27 |  {opencensus, [{reporter, {oc_noop_reporter, #{project => <<"nucleus-sti">>,
28 |                                                service_account => <<"default">>}}}]},
29 | 
30 |  {sasl, [{sasl_error_logger, false}]},
31 | 
32 |  {lager, [{handlers,
33 |            [{lager_console_backend,
34 |              [{level, info},
35 |               {formatter, lager_default_formatter},
36 |               {formatter_config,
37 |                [time, color, " [",severity,"] ",
38 |                 pid, " ",
39 |                 "mod=", module,
40 |                 " fun=", function, " ", message, "\e[0m\r\n"]}]}]}
41 |           ]},
42 | 
43 |  "config/shared"
44 | ].
45 | 


--------------------------------------------------------------------------------
/config/shared.config:
--------------------------------------------------------------------------------
 1 | [
 2 |  {sasl, [{sasl_error_logger, false}]},
 3 | 
 4 |  {prometheus, [{collectors, [default]},
 5 |                {default_metrics,
 6 |                 [{gauge, [{name, active_topics},
 7 |                           {labels, []},
 8 |                           {help, "number of active topic processes"},
 9 |                           {registry, default}]},
10 |                  {gauge, [{name, log_segments},
11 |                           {labels, [topic]},
12 |                           {help, "number of log segments for topics"},
13 |                           {registry, default}]},
14 |                  {gauge, [{name, open_connections},
15 |                           {labels, []},
16 |                           {help, "number of open connections through the client"},
17 |                           {registry, default}]},
18 |                  {gauge, [{name, replicas},
19 |                           {labels, []},
20 |                           {help, "number of replicas in this chain"},
21 |                           {registry, default}]},
22 |                  {gauge, [{name, chains},
23 |                           {labels, []},
24 |                           {help, "number of chains in the cluster"},
25 |                           {registry, default}]},
26 |                  %% this is good to know but doesn't make sense right now
27 |                  %% {gauge, [{name, pending_write_repairs},
28 |                  %%          {labels, []},
29 |                  %%          {help, "number of records that have been repaired on this node"},
30 |                  %%          {registry, default}]},
31 | 
32 |                  %% same as active_topics right now
33 |                  %% {gauge, [{name, topics},
34 |                  %%          {labels, [chain]},
35 |                  %%          {help, "number of topics in a chain"},
36 |                  %%          {registry, default}]},
37 | 
38 |                  {boolean, [{name, is_active},
39 |                             {labels, []},
40 |                             {help, "is this brick active in the chain"},
41 |                             {registry, default}]},
42 |                  {boolean, [{name, is_solo},
43 |                             {labels, []},
44 |                             {help, "is this chain a single node"},
45 |                             {registry, default}]},
46 |                  {boolean, [{name, is_head},
47 |                             {labels, []},
48 |                             {help, "is this brick the head of a chain"},
49 |                             {registry, default}]},
50 |                  {boolean, [{name, is_middle},
51 |                             {labels, []},
52 |                             {help, "is this brick in the middle of a chain"},
53 |                             {registry, default}]},
54 |                  {boolean, [{name, is_tail},
55 |                             {labels, []},
56 |                             {help, "is this brick the tail of a chain"},
57 |                             {registry, default}]},
58 | 
59 |                  {counter, [{name, write_repairs},
60 |                             {labels, []},
61 |                             {help, "number of write repairs"},
62 |                             {registry, default}]},
63 |                  {counter, [{name, client_requests},
64 |                             {labels, []},
65 |                             {help, "requests count"},
66 |                             {registry, default}]}]}]}
67 | ].
68 | 


--------------------------------------------------------------------------------
/config/sys.config:
--------------------------------------------------------------------------------
 1 | %% -*- erlang -*-
 2 | [{vonnegut, [{chain, [{name, chain1},
 3 |                                      %% {nodename, host, data-port, partisan-port}
 4 |                       {discovery, local}, %% {direct, [{'chain1-0', "127.0.0.1", 10200, 5555},
 5 |                                          %%    {'chain1-1', "127.0.0.1", 10201, 5556},
 6 |                                          %%    {'chain1-2', "127.0.0.1", 10202, 5557}]}},
 7 | 
 8 |                       %% with direct we do not need to set the # replicas expected
 9 |                       %% {replicas, "2"}
10 | 
11 |                       {port, 5588}]},
12 | 
13 |              %% client config for if we want to use only the vonnegut client
14 |              {client, [{endpoints, [{"127.0.0.1", 5588}]}]}
15 |             ]},
16 | 
17 |  {partisan, [{peer_ip, {127,0,0,1}},
18 |              {peer_port, 10200},
19 |              {partisan_peer_service_manager,
20 |               partisan_default_peer_service_manager}]},
21 | 
22 |  {kernel, [{start_time, true}]},
23 | 
24 |  {opencensus, [{reporter, {oc_reporter_noop, #{project => <<"nucleus-sti">>,
25 |                                                service_account => <<"default">>}}}]},
26 | 
27 |  {lager, [{error_logger_redirect, true},
28 |           %% {suppress_application_start_stop, true},
29 |           %% {suppress_supervisor_start_stop, true},
30 |           {handlers,
31 |            [{lager_console_backend,
32 |              [{level, info},
33 |               {formatter, lager_default_formatter},
34 |               {formatter_config,
35 |                [time, " [",severity,"] ",
36 |                 pid, " ",
37 |                 {module, ["mod=", module, {function, [" fun=", function], ""}, " "], ""}, message, "\n"]}]}]}]},
38 | 
39 |  "config/shared"
40 | ].
41 | 


--------------------------------------------------------------------------------
/config/test.config:
--------------------------------------------------------------------------------
 1 | %% -*- erlang -*-
 2 | [{lager,
 3 |   [{error_logger_hwm, 100},
 4 |    {crash_log_count, 5},
 5 |    {crash_log_date, "$D0"},
 6 |    {crash_log_size, 10485760},
 7 |    {crash_log_msg_size, 65536},
 8 |    {crash_log, "./log/crash.log"},
 9 |    {handlers,
10 |     [{lager_console_backend,
11 |       [{level, info},
12 |        {formatter, lager_default_formatter},
13 |        {formatter_config,
14 |         [time, " [",severity,"] ",
15 |          pid, " ",
16 |          {module, ["mod=", module, {function, [" fun=", function], ""}, " "], ""}, message, "\n"]}]},
17 |      {lager_file_backend,
18 |       [{file, "./log/error.log"},
19 |        {level, error},
20 |        {formatter, lager_default_formatter},
21 |        {formatter_config,
22 |         [time, " [",severity,"] ",
23 |          pid, " ",
24 |          {module, ["mod=", module, {function, [" fun=", function], ""}, " "], ""}, message, "\n"]}]},
25 |      {lager_file_backend,
26 |       [{file, "./log/debug.log"},
27 |        {level, debug},
28 |        {formatter, lager_default_formatter},
29 |        {formatter_config,
30 |         [time, " [",severity,"] ",
31 |          pid, " ",
32 |          {module, ["mod=", module, {function, [" fun=", function], ""}, " "], ""}, message, "\n"]}]}]},
33 |    {error_logger_redirect, true}]},
34 | 
35 | 
36 |  "config/shared"
37 | ].
38 | 


--------------------------------------------------------------------------------
/config/vm.args:
--------------------------------------------------------------------------------
1 | -name chain1-0@127.0.0.1
2 | 
3 | -setcookie vonnegut
4 | 
5 | +A 100
6 | +K true
7 | 


--------------------------------------------------------------------------------
/docs/design.md:
--------------------------------------------------------------------------------
 1 | Vonnegut Design Doc (1st Iteration)
 2 | -----------------------------------------
 3 | 
 4 | Vonnegut is an append only replicated log utilizing Kubernetes Stateful Sets for consistency and resource utilization.
 5 | 
 6 | ## Log
 7 | 
 8 | Append only ordered sequence of records made up of multiple log segment files stored on disk.
 9 | 
10 | ## Log Segment
11 | 
12 | A log segment is a file with the name of its first contained log record id. The `active` log segment is the newest and the only one that has writes appended to it. When it or the corresponding index becomes too large a new active log segment is created. The index allows a reader to quickly find the start position of a record by id within a log segment.
13 | 
14 | ## Chains
15 | 
16 | Chains consist of `N` vonnegut nodes, the first node in the chain is the `head` and the last is the `tail`. If `N=1` then these are the same and no replication occurs.
17 | 
18 | All writes are sent to the `head` of a chain, all reads occur on the `tail`. Dirty reads or historical reads (reads on data only in inactive log segments) can occur on any node in the chain.
19 | 
20 | ## (not really) Virtual Nodes
21 | 
22 | Initial work will be on vonnegut nodes and virtual nodes having a 1 to 1 mapping. Meaning vonnegut nodes only take part in a single chain. Increasing the number of chains requires adding vonnegut nodes and overlapping chains across physical machines or virtual machines requires overlapping the separate vonnegut nodes. We'll be utilizing Kubernetes for handling the scheduling and resource utilization optimization for overlapping vonnegut nodes within a cluster of virtual machines.
23 | 
24 | ## Cluster Membership
25 | 
26 | The vonnegut nodes form a cluster through finding nodes in DNS and [partisan](https://github.com/lasp-lang/partisan) for connecting and failure detection. When a failure is detected by partisan a call back is triggered on each node and the nodes wait to continue replication until the entire chain is healthy again.
27 | 
28 | Reads can continue as usual during failure. Unless, of course, it is the tail is unreachable by the clients, in which case the client requests will simply fail.
29 | 
30 | ## Chain Membership
31 | 
32 | Chains are manually created and all nodes within a chain are added together to the cluster. No rebalancing is done within the cluster when a new chain is created. Instead, the weighted chain selection will return the new chain for new topics until the chains become balanced.
33 | 
34 | The order of a chain is a lexiographical sort of the node names guaranteeing each node sees the same chain structure. Nodes are named `<chain>-{0..N-1}` where `N` is the number of nodes in the chain.
35 | 
36 | ## Adding Nodes to Existing Chains
37 | 
38 | New nodes are added to the end of the chain, Stateful Sets ordered node names ensures this. It is the responsibility of the current tail to promote the new node to the new tail after it has synced all topic log segments from the tail, at which point client requests for reads are redirected to the new tail and the reset of the members of the chain are notified that it is now `active`, making them capable of answering a client request for who is the current `tail`.
39 | 
40 | ## Mapping Topics to Chains
41 | 
42 | New topics select a chain through randomly from the chains with the lowest weight. Weight can take into account numerous metrics of load but for starters will simply be the # topics on the chain.
43 | 
44 | ## Replication
45 | 
46 | Chain replication is used for durability. Each write to the `head` is replicated to the next node in the chain, and so on until the `tail` is reached. Writes are acked from the `tail` to the client and on an interval the latest id written to disk is acked to the preceding on in a chain by each node except the `head`.
47 | 
48 | ## Handling Failure
49 | 
50 | Each chain is a Kubernetes Stateful Set. A Stateful Set provides the ordering of the nodes and replacing a failed node with one of the same name and persistent storage. Thus in the case of partisan detecting a failure the chain will stop attempting to replicate and stop accepting new writies until healthy again.
51 | 
52 | Each node acks writes from its predecessor, `N-`, after receiving an ack from their successor, `N+`. Until receiving the `N+` ack all writes are kept in a history. In the event of a failure `N` sends the writes from the history to the new `N+`.
53 | 
54 | After an update is written (though possibly not flushed to disk depending on configuration) and sent to the next node in the chain it can continue to receive more updates without having recieved an `ack` from the next node in the chain. Nodes will periodically send acks with the latest record id they have written. Until an `ack` of an id larger or equal to a record it is kept in the history for possibly resending if a link has failed and it needs to send the writes to the new `N+`.
55 | 
56 | **To deal with potentially dropped messages by Erlang's messaging layer we need to rely on the fact record id is always increasing by 1 to force a failure. This failure notifies the predecessor to resend starting at the last in-order record received.**
57 | 
58 | The client is responsible for whether it wants to wait for an ack from the tail. It can keep sending writes before an ack arrives if it is ok with potentially losing writes.
59 | 
60 | **What happens if our process restarts and loses this in memory history of records not yet acked? We wouldn't want to have to consider the entire node dead when pontentially other topics are fine. Maybe an ets table under the top level supervisor is required for storing this information?**
61 | 
62 | **How do we handle a case of all nodes going down and having to resync with each other from what they have on disk?**
63 | 
64 | ## Clients
65 | 
66 | Clients discover chains through DNS records much the same as chains discover each other. After discovery chains (each individual chain has a unique DNS record named for the chain) the client can query a vonnegut node for information on the location of topics.
67 | 
68 | The client caches the information about where the head and tail of chains are and have no need to update this information unless a request fails. In the event a client attempts to read from an old `tail` the node it is requesting from (assuming it is alive and the request doesn't just fail) returns the `tail` of the chain if it knows it, otherwise the client must query the `head` for an updated view of the chain.
69 | 
70 | ## DNS
71 | 
72 | SRV query `vonnegut` resolves to all chain node records `<chain>-{0..N}.vonnegut`. The records `<chain>-{0..N}.vonnegut` resolve to individual nodes in the chain named `<chain>`.
73 | 
74 | ## References
75 | 
76 | * [Chain Replication for Supporting High Throughput and Availability](http://www.cs.cornell.edu/home/rvr/papers/OSDI04.pdf)
77 | * [Chain replication in theory and in practice](http://www.snookles.com/scott/publications/erlang2010-slf.pdf)
78 | * [Kubernetes](http://kubernetes.io/)
79 | * [Kubernetes StatefulSets](http://kubernetes.io/docs/concepts/abstractions/controllers/statefulsets/)
80 | * [Kafka Protocol](https://kafka.apache.org/protocol)
81 | 


--------------------------------------------------------------------------------
/docs/implementation.md:
--------------------------------------------------------------------------------
 1 | ## Iteration 0 Implementation
 2 | 
 3 | ### Cluster Manager
 4 | 
 5 | A single global process in the cluster, `vg_cluster_mgr`, is responsible for topic creation and mapping between topics and chains. This manager is to be an abstraction on top of multiple implementations for management, such as riak ensemble based consensus.
 6 | 
 7 | ### ETS Tables
 8 | 
 9 | * `logs_segments_table`: This table is an in memory and index representation of the topic segments found on disk. This allows for a quick ets query to find the segment a specific offset is to be found. The segments index file is then searched to find the exact file position to read from.
10 | 
11 | * `high_watermarks_table`: Fetch responses must include the high watermark (highest message offset) for the topics included in the response. This value is tracked by a global ets table mapping topics to high watermarks. This value is updated after a messageset is written, so it does not get updated per message but per set written.
12 | 
13 | * `chains_table`
14 | 
15 | * `topic_map`: Requests bound for a specific topic must lookup the head or tail node in the chain that is responsible for that specific topic. This ets table is responsible for storing this mapping which is updated by querying the `cluster manager` directly or a `vg_client:metadata` or Kafka client metadata request to any node in the vonnegut cluster of chains.
16 | 
17 | ### Client Refresh
18 | 


--------------------------------------------------------------------------------
/helm/README.md:
--------------------------------------------------------------------------------
  1 | Running in Minikube
  2 | ---
  3 | 
  4 | Requirements:
  5 | 
  6 |  * [minikube](https://github.com/kubernetes/minikube)
  7 |  * [helm](http://helm.sh/)
  8 | 
  9 | After installing (see mac instructions below) and starting [minikube](https://github.com/kubernetes/minikube) set your docker environment to use the docker daemon in the minikube VM:
 10 | 
 11 | ```shell
 12 | $ eval $(minikube docker-env)
 13 | ```
 14 | 
 15 | Now when you run the script `bin/docker_build.sh` it will create an image accessible by kubernetes in minikube:
 16 | 
 17 | ```shell
 18 | $ bin/docker_build.sh
 19 | + rebar3 as prod tar
 20 | ....
 21 | + mv _build/prod/rel/vonnegut/vonnegut-0.0.1.tar.gz ./
 22 | ++ sed -n 's/vonnegut-\(.*\).tar.gz/\1/p'
 23 | ++ ls vonnegut-0.0.1.tar.gz
 24 | + VERSION=0.0.1
 25 | + mv vonnegut-0.0.1.tar.gz vonnegut.tar.gz
 26 | + docker build --rm=false -t us.gcr.io/nucleus-sti/vonnegut:0.0.1 .
 27 | Sending build context to Docker daemon 120.2 MB
 28 | Step 1 : FROM ubuntu:16.04
 29 | ....
 30 | Step 7 : ENTRYPOINT /opt/vonnegut/bin/vonnegut
 31 |  ---> Running in 6471152fa506
 32 |  ---> 2dc5ff8e5568
 33 | Successfully built 2dc5ff8e5568
 34 | + docker push us.gcr.io/nucleus-sti/vonnegut:0.0.1
 35 | The push refers to a repository [us.gcr.io/nucleus-sti/vonnegut]
 36 | 5f1584b0d108: Pushed
 37 | ....
 38 | 32d75bc97c41: Layer already exists
 39 | 0.0.1: digest: sha256:c354e26c97d74db6f5a22c8f593f28b05c3233f9ab6b3bdefaae1e83f679625e size: 1987
 40 | + rm vonnegut.tar.gz
 41 | ```
 42 | 
 43 | Next, use the helm package provided in the repo to create a vonnegut cluster:
 44 | 
 45 | ```shell
 46 | $ cd helm
 47 | $ helm init
 48 | $ helm install vonnegut
 49 | NAME: solemn-otter
 50 | LAST DEPLOYED: Fri Jan  6 13:21:46 2017
 51 | NAMESPACE: default
 52 | STATUS: DEPLOYED
 53 | 
 54 | RESOURCES:
 55 | ==> v1/ConfigMap
 56 | NAME                           DATA      AGE
 57 | solemn-otter-vonnegut-config   2         0s
 58 | 
 59 | ==> v1/Service
 60 | NAME       CLUSTER-IP   EXTERNAL-IP   PORT(S)    AGE
 61 | vonnegut   None         <none>        5555/TCP   0s
 62 | 
 63 | ==> apps/StatefulSet
 64 | NAME      DESIRED   CURRENT   AGE
 65 | chain1    2         1         0s
 66 | ```
 67 | 
 68 | After a few seconds both pods should come up and form a cluster:
 69 | 
 70 | ```shell
 71 | $ kubectl get statefulset
 72 | NAME      DESIRED   CURRENT   AGE
 73 | chain1    2         2         25m
 74 | $ kubectl get pods
 75 | NAME       READY     STATUS    RESTARTS   AGE
 76 | chain1-0   1/1       Running   0          26m
 77 | chain1-1   1/1       Running   0          25m
 78 | ```
 79 | 
 80 | #### Mac installation notes
 81 | 
 82 | Install minikube via the commands in the instructions and helm via `brew install kubernetes-helm`, and go (via `brew install go`) if you don't have it installed.  If you have to install go, remember to set up your `GOPATH`.
 83 | 
 84 | Then the following incantation will build the xhyve driver to work with Docker for Mac:
 85 | 
 86 | ```shell
 87 | brew install xhyve
 88 | export GO15VENDOREXPERIMENT=1
 89 | go get -u -d github.com/zchee/docker-machine-driver-xhyve
 90 | cd $GOPATH/src/github.com/zchee/docker-machine-driver-xhyve
 91 | make install  # this will prompt for your password
 92 | sudo chown root:wheel /usr/local/bin/docker-machine-driver-xhyve
 93 | sudo chmod u+s /usr/local/bin/docker-machine-driver-xhyve
 94 | minikube start --vm-driver=xhyve --container-runtime=docker --show-libmachine-logs --v=10 --alsologtostderr
 95 | eval $(minikube docker-env)
 96 | bin/docker_build.sh
 97 | ```
 98 | 
 99 | Note that this does not currently work because of a version mismatch between the version of docker bundled with minikube and the one in Docker for Mac, but I'm preserving these instructions in the hopes that it works one day.
100 | 


--------------------------------------------------------------------------------
/helm/storage-class.yaml:
--------------------------------------------------------------------------------
1 | kind: StorageClass
2 | apiVersion: storage.k8s.io/v1beta1
3 | metadata:
4 |   name: slow
5 | provisioner: kubernetes.io/gce-pd
6 | parameters:
7 |   type: pd-standard
8 | 


--------------------------------------------------------------------------------
/helm/vonnegut/.helmignore:
--------------------------------------------------------------------------------
 1 | # Patterns to ignore when building packages.
 2 | # This supports shell glob matching, relative path matching, and
 3 | # negation (prefixed with !). Only one pattern per line.
 4 | .DS_Store
 5 | # Common VCS dirs
 6 | .git/
 7 | .gitignore
 8 | .bzr/
 9 | .bzrignore
10 | .hg/
11 | .hgignore
12 | .svn/
13 | # Common backup files
14 | *.swp
15 | *.bak
16 | *.tmp
17 | *~
18 | # Various IDEs
19 | .project
20 | .idea/
21 | *.tmproj
22 | 


--------------------------------------------------------------------------------
/helm/vonnegut/Chart.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | description: A Helm chart for Kubernetes
3 | name: vonnegut
4 | version: 0.1.0
5 | 


--------------------------------------------------------------------------------
/helm/vonnegut/templates/NOTES.txt:
--------------------------------------------------------------------------------
1 | 1. Get the application URL by running these commands:
2 |   export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app={{ template "fullname" . }}" -o jsonpath="{.items[0].metadata.name}")
3 |   echo "Visit http://127.0.0.1:8080 to use your application"
4 |   kubectl port-forward $POD_NAME 8080:{{ .Values.service.externalPort }}
5 | 


--------------------------------------------------------------------------------
/helm/vonnegut/templates/_helpers.tpl:
--------------------------------------------------------------------------------
 1 | {{/*
 2 | Create a default fully qualified app name.
 3 | We truncate at 24 chars because some Kubernetes name fields are limited to this
 4 | (by the DNS naming spec).
 5 | */}}
 6 | {{define "fullname"}}
 7 | {{- $name := default "vonnegut" .Values.nameOverride -}}
 8 | {{printf "%s-%s" .Release.Name $name | trunc 24 -}}
 9 | {{end}}
10 | 


--------------------------------------------------------------------------------
/helm/vonnegut/templates/config_map.yaml:
--------------------------------------------------------------------------------
1 | apiVersion: v1
2 | kind: ConfigMap
3 | metadata:
4 |   name: "{{ .Release.Name }}-vonnegut-config"
5 | data:
6 |   vonnegut.discovery_domain: "_partisan._tcp.{{ .Values.service.name }}.default.svc.cluster.local"
7 |   vonnegut.replicas: "{{ .Values.replicaCount }}"
8 | 


--------------------------------------------------------------------------------
/helm/vonnegut/templates/service.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: v1
 2 | kind: Service
 3 | metadata:
 4 |   name: "{{ .Values.service.name }}"
 5 |   labels:
 6 |     chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
 7 |     app: {{ template "fullname" . }}
 8 |     type: service
 9 | spec:
10 |   clusterIP: None
11 |   ports:
12 |   - port: {{ .Values.service.externalPort }}
13 |     targetPort: {{ .Values.service.internalPort }}
14 |     protocol: TCP
15 |     name: data
16 |   - port: {{ .Values.service.partisanPort }}
17 |     targetPort: {{ .Values.service.partisanPort }}
18 |     protocol: TCP
19 |     name: partisan
20 |   selector:
21 |     app: {{ template "fullname" . }}
22 |     type: node
23 | 


--------------------------------------------------------------------------------
/helm/vonnegut/templates/stateful-set.yaml:
--------------------------------------------------------------------------------
 1 | apiVersion: apps/v1beta1
 2 | kind: StatefulSet
 3 | metadata:
 4 |   name: {{ .Values.chain.name }}
 5 |   labels:
 6 |     chart: "{{ .Chart.Name }}-{{ .Chart.Version }}"
 7 |     app: {{ template "fullname" . }}
 8 |     type: statefulset
 9 | spec:
10 |   serviceName: "{{ .Values.service.name }}"
11 |   replicas: {{ .Values.replicaCount }}
12 |   template:
13 |     metadata:
14 |       labels:
15 |         app: {{ template "fullname" . }}
16 |         type: node
17 |     spec:
18 |       terminationGracePeriodSeconds: 10
19 |       containers:
20 |       - name: {{ .Chart.Name }}
21 |         image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}"
22 |         imagePullPolicy: {{ .Values.image.pullPolicy }}
23 | 
24 |         command: ["/opt/vonnegut/bin/vonnegut", "foreground"]
25 | 
26 |         ports:
27 |         - containerPort: {{ .Values.service.internalPort }}
28 |           name: data
29 |           protocol: TCP
30 |         - containerPort: {{ .Values.service.partisanPort }}
31 |           name: partisan
32 |           protocol: TCP
33 |         volumeMounts:
34 |         - name: data
35 |           mountPath: /opt/vonnegut/data
36 |         env:
37 |         - name: DISCOVERY_DOMAIN
38 |           valueFrom:
39 |             configMapKeyRef:
40 |               name: "{{ .Release.Name }}-vonnegut-config"
41 |               key: vonnegut.discovery_domain
42 |         - name: REPLICAS
43 |           valueFrom:
44 |             configMapKeyRef:
45 |               name: "{{ .Release.Name }}-vonnegut-config"
46 |               key: vonnegut.replicas
47 |         - name: CHAIN_NAME
48 |           value: {{ .Values.chain.name }}
49 | 
50 |   volumeClaimTemplates:
51 |   - metadata:
52 |       name: data
53 |       annotations:
54 |         volume.alpha.kubernetes.io/storage-class: hostpath
55 |     spec:
56 |       accessModes: [ "ReadWriteOnce" ]
57 |       resources:
58 |         requests:
59 |           storage: 256Mi
60 | 


--------------------------------------------------------------------------------
/helm/vonnegut/values.yaml:
--------------------------------------------------------------------------------
 1 | # Default values for vonnegut.
 2 | # This is a YAML-formatted file.
 3 | # Declare variables to be passed into your templates.
 4 | replicaCount: 2
 5 | image:
 6 |   repository: us.gcr.io/nucleus-sti/vonnegut
 7 |   tag: 0.0.1
 8 |   pullPolicy: IfNotPresent
 9 | service:
10 |   name: vonnegut
11 |   externalPort: 5555
12 |   internalPort: 5555
13 |   partisanPort: 10200
14 | chain:
15 |   name: chain1
16 | resources:
17 |   limits:
18 |     cpu: 100m
19 |     memory: 128Mi
20 |   requests:
21 |     cpu: 100m
22 |     memory: 128Mi
23 | 


--------------------------------------------------------------------------------
/include/vg.hrl:
--------------------------------------------------------------------------------
 1 | -define(CLIENT_ID, "vg_client").
 2 | -define(DEFAULT_PORT, 5588).
 3 | -define(MAX_REQUEST_ID, 2147483647).
 4 | 
 5 | -define(MAGIC_TWO, 2).
 6 | -define(API_VERSION, 2).
 7 | 
 8 | %% a recordbatch starts with FirstOffset:64, Length:32
 9 | %% so 12 is an often used constant when reading batches
10 | -define(OFFSET_AND_LENGTH_BYTES, 12).
11 | 
12 | -define(INDEX_ENTRY_SIZE, 8). % bytes
13 | -define(INDEX_OFFSET_BITS, 32).
14 | -define(INDEX_POS_BITS, 32).
15 | 
16 | -define(PRODUCE_REQUEST, 0).
17 | -define(FETCH_REQUEST, 1).
18 | -define(METADATA_REQUEST, 3).
19 | 
20 | -define(COMPRESS_NONE, 0).
21 | -define(COMPRESS_GZIP, 1).
22 | -define(COMPRESS_SNAPPY, 2).
23 | -define(COMPRESS_LZ4, 3).
24 | 
25 | -define(COMPRESSION_MASK, 7).
26 | -define(COMPRESSION(Attr), ?COMPRESSION_MASK band Attr).
27 | 
28 | %% non-kafka extension
29 | -define(TOPICS_REQUEST, 1000).
30 | -define(FETCH2_REQUEST, 1001).
31 | -define(ENSURE_REQUEST, 1002).
32 | -define(REPLICATE_REQUEST, 1003).
33 | -define(DELETE_TOPIC_REQUEST, 1004).
34 | -define(REPLICATE_DELETE_TOPIC_REQUEST, 1005).
35 | 
36 | -define(UNKNOWN_ERROR, -1).
37 | -define(NO_ERROR, 0).
38 | -define(UNKNOWN_TOPIC_OR_PARTITION, 3).
39 | -define(NOT_LEADER_ERROR, 6).  % reusing this to mean topic map has chaned
40 | -define(TIMEOUT_ERROR, 7).
41 | 
42 | %% non-kafka extensions
43 | -define(FETCH_DISALLOWED_ERROR, 129).
44 | -define(PRODUCE_DISALLOWED_ERROR, 131).
45 | -define(WRITE_REPAIR, 133).
46 | -define(REPLICATE_DISALLOWED_ERROR, 135).
47 | 
48 | -define(SEGMENTS_TABLE, logs_segments_table).
49 | -define(WATERMARK_TABLE, high_watermarks_table).
50 | -define(CHAINS_TABLE, chains_table).
51 | 
52 | -define(topic_map, topic_map).
53 | 
54 | -record(chain, {
55 |           name  :: binary() | atom(),
56 |           nodes :: [atom()] | undefined,
57 |           topics_start :: binary() | start_space | undefined, % undef required because there's no way
58 |           topics_end :: binary() | end_space | undefined,     % to encode these in metadata :\
59 |           head  :: {inet:ip_address() | inet:hostname(), inet:port_number()},
60 |           tail  :: {inet:ip_address() | inet:hostname(), inet:port_number()}
61 |          }).
62 | -type chain() :: #chain{}.
63 | 
64 | -ifdef('OTP_RELEASE').
65 | -define(WITH_STACKTRACE(T, R, S), T:R:S ->).
66 | -else.
67 | -define(WITH_STACKTRACE(T, R, S), T:R -> S = erlang:get_stacktrace(),).
68 | -endif.
69 | 


--------------------------------------------------------------------------------
/rebar.config:
--------------------------------------------------------------------------------
 1 | %% -*- erlang -*-
 2 | {erl_opts,
 3 |  [debug_info,
 4 |   warn_untyped_records,
 5 |   warnings_as_errors,
 6 |   nowarn_export_all,
 7 |   {parse_transform, lager_transform}]}.
 8 | 
 9 | {xref_checks,[undefined_function_calls,undefined_functions,locals_not_used,
10 |               deprecated_function_calls,
11 |               deprecated_functions]}.
12 | %% ignore these warnings because lz4 and snappyer are optional dependencies
13 | {xref_ignores, [{lz4, unpack, 1},
14 |                 {snappyer, decompress, 1}]}.
15 | 
16 | {deps, [erlware_commons,
17 |         acceptor_pool,
18 |         {shackle, {git, "https://github.com/lpgauth/shackle.git", {branch, "master"}}},
19 |         {partisan, {git, "https://github.com/lasp-lang/partisan.git", {branch, "master"}}},
20 |         lager,
21 |         recon,
22 |         gproc,
23 |         backoff,
24 |         oc_google_reporter,
25 |         {opencensus, {git, "https://github.com/census-instrumentation/opencensus-erlang.git", {branch, "master"}}},
26 | 
27 |         %% metrics and health check deps
28 |         elli,
29 |         prometheus,
30 |         elli_prometheus]}.
31 | 
32 | {relx, [{release, {vonnegut, "semver"},
33 |          [vonnegut]},
34 | 
35 |         {dev_mode, true},
36 |         {include_erts, false},
37 | 
38 |         {sys_config, "config/sys.config"},
39 |         {vm_args, "config/vm.args"},
40 | 
41 |         {extended_start_script, true},
42 | 
43 |         {overlay, [{copy, "config/shared.config", "config/shared.config"}]}]}.
44 | 
45 | {project_plugins, [{rebar3_proper, {git, "https://github.com/ferd/rebar3_proper.git", {branch, "master"}}},
46 |                    covertool]}.
47 | 
48 | {cover_enabled, true}.
49 | {cover_opts, [verbose]}.
50 | {cover_export_enabled, true}.
51 | 
52 | {covertool, [{coverdata_files, ["ct.coverdata"]}]}.
53 | 
54 | {profiles,
55 |  [{test, [
56 |           {deps, [{brod, "3.0.0"}, meck, {proper, "1.2.0"}]}
57 |          ]},
58 |  {prod, [{relx, [{sys_config, "config/prod_sys.config"},
59 |                  {vm_args, "config/prod_vm.args"},
60 |                  {dev_mode, false},
61 |                  {include_erts, true},
62 |                  {include_src, false},
63 |                  {debug_info, strip}
64 |                 ]}]}
65 |  ]}.
66 | 
67 | {proper_opts, [{sys_config, "config/proper.config"}]}.
68 | {ct_opts, [{sys_config, "config/test.config"},
69 |            {ct_hooks, [cth_surefire]}]}.
70 | {dist_node, [{name, 'testrunner@127.0.0.1'}]}.
71 | 


--------------------------------------------------------------------------------
/rebar.lock:
--------------------------------------------------------------------------------
 1 | {"1.1.0",
 2 | [{<<"accept">>,{pkg,<<"accept">>,<<"0.3.0">>},1},
 3 |  {<<"acceptor_pool">>,{pkg,<<"acceptor_pool">>,<<"1.0.0-rc.0">>},0},
 4 |  {<<"augle">>,{pkg,<<"augle">>,<<"0.3.0">>},1},
 5 |  {<<"backoff">>,
 6 |   {git,"https://github.com/evanmcc/backoff.git",
 7 |        {ref,"13f23b9ebb3604a4322e3a96d0633a75015c792d"}},
 8 |   0},
 9 |  {<<"certifi">>,{pkg,<<"certifi">>,<<"2.0.0">>},2},
10 |  {<<"cf">>,{pkg,<<"cf">>,<<"0.3.1">>},1},
11 |  {<<"counters">>,{pkg,<<"counters">>,<<"0.2.0">>},1},
12 |  {<<"ctx">>,{pkg,<<"ctx">>,<<"0.4.1">>},1},
13 |  {<<"elli">>,{pkg,<<"elli">>,<<"3.0.0">>},0},
14 |  {<<"elli_prometheus">>,{pkg,<<"elli_prometheus">>,<<"0.1.1">>},0},
15 |  {<<"erlware_commons">>,{pkg,<<"erlware_commons">>,<<"1.2.0">>},0},
16 |  {<<"foil">>,{pkg,<<"foil">>,<<"0.1.1">>},1},
17 |  {<<"goldrush">>,{pkg,<<"goldrush">>,<<"0.1.9">>},1},
18 |  {<<"gproc">>,{pkg,<<"gproc">>,<<"0.6.1">>},0},
19 |  {<<"granderl">>,{pkg,<<"granderl">>,<<"0.1.5">>},1},
20 |  {<<"hackney">>,{pkg,<<"hackney">>,<<"1.9.0">>},1},
21 |  {<<"idna">>,{pkg,<<"idna">>,<<"5.1.0">>},2},
22 |  {<<"jsx">>,{pkg,<<"jsx">>,<<"2.8.2">>},1},
23 |  {<<"lager">>,{pkg,<<"lager">>,<<"3.6.1">>},0},
24 |  {<<"metal">>,{pkg,<<"metal">>,<<"0.1.1">>},1},
25 |  {<<"metrics">>,{pkg,<<"metrics">>,<<"1.0.1">>},2},
26 |  {<<"mimerl">>,{pkg,<<"mimerl">>,<<"1.0.2">>},2},
27 |  {<<"oc_google_reporter">>,
28 |   {git,"https://github.com/tsloughter/oc_google_reporter.git",
29 |        {ref,"4ebd9918fecca28cd3629f0524dc811b2b3653aa"}},
30 |   0},
31 |  {<<"opencensus">>,
32 |   {git,"https://github.com/census-instrumentation/opencensus-erlang.git",
33 |        {ref,"2096818702a242102dcfb8dd18010a4bb0cc17df"}},
34 |   0},
35 |  {<<"partisan">>,
36 |   {git,"https://github.com/lasp-lang/partisan.git",
37 |        {ref,"8e2a6508ed958fd1dd0b4926a35e6e96b441b17d"}},
38 |   0},
39 |  {<<"prometheus">>,{pkg,<<"prometheus">>,<<"4.1.0">>},0},
40 |  {<<"quickrand">>,{pkg,<<"quickrand">>,<<"1.7.3">>},2},
41 |  {<<"rand_compat">>,{pkg,<<"rand_compat">>,<<"0.0.3">>},1},
42 |  {<<"recon">>,{pkg,<<"recon">>,<<"2.3.4">>},0},
43 |  {<<"rfc3339">>,{pkg,<<"rfc3339">>,<<"0.9.0">>},2},
44 |  {<<"shackle">>,
45 |   {git,"https://github.com/lpgauth/shackle.git",
46 |        {ref,"58f4adb067e677512b3a2af43c445734a4aeec64"}},
47 |   0},
48 |  {<<"ssl_verify_fun">>,{pkg,<<"ssl_verify_fun">>,<<"1.1.1">>},2},
49 |  {<<"time_compat">>,{pkg,<<"time_compat">>,<<"0.0.1">>},1},
50 |  {<<"types">>,{pkg,<<"types">>,<<"0.1.6">>},1},
51 |  {<<"unicode_util_compat">>,{pkg,<<"unicode_util_compat">>,<<"0.3.1">>},3},
52 |  {<<"uuid">>,{pkg,<<"uuid_erl">>,<<"1.7.3">>},1},
53 |  {<<"wts">>,{pkg,<<"wts">>,<<"0.3.0">>},1}]}.
54 | [
55 | {pkg_hash,[
56 |  {<<"accept">>, <<"2505B60BCB992CA79BD03AB7B8FEC8A520A47D9730F286DF1A479CC98B03F94B">>},
57 |  {<<"acceptor_pool">>, <<"679D741DF87FC13599B1AEF2DF8F78F1F880449A6BEFAB7C44FB6FAE0E92A2DE">>},
58 |  {<<"augle">>, <<"25633E47BB163ECB74EB34628DB49B622FEC659137C2A3250FA5430965DF272B">>},
59 |  {<<"certifi">>, <<"A0C0E475107135F76B8C1D5BC7EFB33CD3815CB3CF3DEA7AEFDD174DABEAD064">>},
60 |  {<<"cf">>, <<"5CB902239476E141EA70A740340233782D363A31EEA8AD37049561542E6CD641">>},
61 |  {<<"counters">>, <<"EF00F33404FDD9BD233F9B7966233222469E4560DBE1C712EA2E1AB63BB8FEFD">>},
62 |  {<<"ctx">>, <<"E4297DD25CCDE992BC7DE298F514BEACD0A44FAA9126A1F2567306D94C519A13">>},
63 |  {<<"elli">>, <<"D7CC24CFA886AC6A51D369B2C974392BEC9CD1E1CAA3931194D2BF52B763D82F">>},
64 |  {<<"elli_prometheus">>, <<"FF41EA8D88D1EBD1CD7A6D43FCC02B33B47FF20272C097B9D3A3CCCD79980C05">>},
65 |  {<<"erlware_commons">>, <<"2BAB99CF88941145767A502F1209886F1F0D31695EEF21978A30F15E645721E0">>},
66 |  {<<"foil">>, <<"4D07B62C114636BBC3EEBD5CEE04B23A7AAB1262B0F68AA79005A6FBC3790472">>},
67 |  {<<"goldrush">>, <<"F06E5D5F1277DA5C413E84D5A2924174182FB108DABB39D5EC548B27424CD106">>},
68 |  {<<"gproc">>, <<"4579663E5677970758A05D8F65D13C3E9814EC707AD51D8DCEF7294EDA1A730C">>},
69 |  {<<"granderl">>, <<"F20077A68BD80B8D8783BD15A052813C6483771DEC1A5B837D307CBE92F14122">>},
70 |  {<<"hackney">>, <<"51C506AFC0A365868469DCFC79A9D0B94D896EC741CFD5BD338F49A5EC515BFE">>},
71 |  {<<"idna">>, <<"D72B4EFFEB324AD5DA3CAB1767CB16B17939004E789D8C0AD5B70F3CEA20C89A">>},
72 |  {<<"jsx">>, <<"7ACC7D785B5ABE8A6E9ADBDE926A24E481F29956DD8B4DF49E3E4E7BCC92A018">>},
73 |  {<<"lager">>, <<"9D29C5FF7F926D25ECD9899990867C9152DCF34EEE65BAC8EC0DFC0D16A26E0C">>},
74 |  {<<"metal">>, <<"5D3D1322DA7BCD34B94FED5486F577973685298883954F7A3E517EF5EF6953F5">>},
75 |  {<<"metrics">>, <<"25F094DEA2CDA98213CECC3AEFF09E940299D950904393B2A29D191C346A8486">>},
76 |  {<<"mimerl">>, <<"993F9B0E084083405ED8252B99460C4F0563E41729AB42D9074FD5E52439BE88">>},
77 |  {<<"prometheus">>, <<"3BB851DF031C204D1C94BF55FFF2ECC9AB834F0236E64C080C9D5945B48D428D">>},
78 |  {<<"quickrand">>, <<"0E4FB48FAC904FE0C6E21D7E8C31A288A0700E1E81A35B38B649FC119079755D">>},
79 |  {<<"rand_compat">>, <<"011646BC1F0B0C432FE101B816F25B9BBB74A085713CEE1DAFD2D62E9415EAD3">>},
80 |  {<<"recon">>, <<"B406C2FCCDEAA0D94E23B5E30AE3D635A2D461E363A5C9C6316897037CF050D2">>},
81 |  {<<"rfc3339">>, <<"2075653DC9407541C84B1E15F8BDA2ABE95FB17C9694025E079583F2D19C1060">>},
82 |  {<<"ssl_verify_fun">>, <<"28A4D65B7F59893BC2C7DE786DEC1E1555BD742D336043FE644AE956C3497FBE">>},
83 |  {<<"time_compat">>, <<"23FE0AD1FDF3B5B88821B2D04B4B5E865BF587AE66056D671FE0F53514ED8139">>},
84 |  {<<"types">>, <<"03BB7140016C896D3441A77CB0B7D6ACAA583D6D6E9C4A3E1FD3C25123710290">>},
85 |  {<<"unicode_util_compat">>, <<"A1F612A7B512638634A603C8F401892AFBF99B8CE93A45041F8AACA99CADB85E">>},
86 |  {<<"uuid">>, <<"C5DF97D1A3D626235C2415E74053C47B2138BB863C5CD802AB5CAECB8ECC019F">>},
87 |  {<<"wts">>, <<"5CDF22C775CB1EBAE24C326A5DB6074D753C42F4BD12A9AA47CC62D3E2C71AD1">>}]}
88 | ].
89 | 


--------------------------------------------------------------------------------
/src/vg.erl:
--------------------------------------------------------------------------------
  1 | -module(vg).
  2 | 
  3 | %% client interface
  4 | -export([ensure_topic/1,
  5 |          write_record_batch/3,
  6 |          write/3, write/4,
  7 |          fetch/1, fetch/2, fetch/4,
  8 |          fetch/5]).
  9 | 
 10 | %% ops interface.
 11 | -export([
 12 |          create_topic/1,
 13 |          delete_topic/1,
 14 |          describe_topic/1,
 15 |          deactivate_topic/1,
 16 |          regenerate_topic_index/1,
 17 |          tail_topic/1, tail_topic/2,
 18 |          running_topics/0
 19 |         ]).
 20 | 
 21 | -include("vg.hrl").
 22 | 
 23 | -type topic() :: binary().
 24 | 
 25 | -type record() :: #{offset => integer(),
 26 |                     timestamp => integer(),
 27 |                     key => binary(),
 28 |                     value := binary(),
 29 |                     headers => [{unicode:characters_binary(), binary()}]}.
 30 | 
 31 | -type record_batch() :: #{crc := integer(),
 32 |                           producer_id => integer(),
 33 |                           producer_epoch => integer(),
 34 |                           sequence_number => integer(),
 35 |                           records := [record()]}.
 36 | 
 37 | -export_types([topic/0,
 38 |                record/0,
 39 |                record_batch/0]).
 40 | 
 41 | -spec create_topic(Topic :: topic()) -> ok.
 42 | create_topic(Topic) ->
 43 |     case validate_topic(Topic) of
 44 |         ok ->
 45 |             {ok, _Chain} = vg_cluster_mgr:create_topic(Topic),
 46 |             ok;
 47 |         {error, Reason} ->
 48 |             {error, Reason}
 49 |     end.
 50 | 
 51 | -spec ensure_topic(Topic :: topic()) -> ok.
 52 | ensure_topic(Topic) ->
 53 |     case validate_topic(Topic) of
 54 |         ok ->
 55 |             {ok, _Chain} = vg_cluster_mgr:ensure_topic(Topic),
 56 |             ok;
 57 |         {error, Reason} ->
 58 |             {error, Reason}
 59 |     end.
 60 | 
 61 | validate_topic(B) when is_binary(B) ->
 62 |     Disallowed =
 63 |         [
 64 |          <<0>>,
 65 |          <<"/">>, % path separators
 66 |          <<"\\">>,
 67 |          <<"*">>,
 68 |          <<".">>, <<"..">>,
 69 |          <<"[">>, <<"]">>,
 70 |          <<"(">>, <<")">>,
 71 |          <<"{">>, <<"}">>
 72 |         ],
 73 |     case binary:match(B, Disallowed) of
 74 |         nomatch ->
 75 |             ok;
 76 |         _ ->
 77 |             {error, invalid_characters}
 78 |     end;
 79 | validate_topic(_) ->
 80 |     {error, non_binary_topic}.
 81 | 
 82 | -spec write_record_batch(Topic, Partition, RecordBatch) -> {ok, integer()} | {error, any()} when
 83 |       Topic :: topic(),
 84 |       Partition :: non_neg_integer(),
 85 |       RecordBatch :: vg:record_batch().
 86 | write_record_batch(Topic, Partition, RecordBatch) ->
 87 |     vg_active_segment:write(Topic, Partition, RecordBatch).
 88 | 
 89 | -spec write(Topic, Partition, Records) -> ok | {error, any()} when
 90 |       Topic :: topic(),
 91 |       Partition :: non_neg_integer(),
 92 |       Records :: binary() | [binary()].
 93 | write(Topic, Partition, Records) ->
 94 |     RecordBatch = vg_protocol:encode_record_batch(Records),
 95 |     vg_active_segment:write(Topic, Partition, RecordBatch).
 96 | 
 97 | write(Topic, Partition, ExpectedId, RecordBatch) ->
 98 |     vg_active_segment:write(Topic, Partition, ExpectedId, RecordBatch).
 99 | 
100 | fetch(Topic) ->
101 |     fetch(Topic, 0).
102 | 
103 | -spec fetch(Topic, Offset) -> {ok, RecordBatches} when
104 |       Topic :: topic(),
105 |       Offset :: integer(),
106 |       RecordBatches :: #{high_water_mark := integer(),
107 |                          partition := 0,
108 |                          record_batches := [vg:record_batch()]}.
109 | fetch(Topic, Offset) ->
110 |     fetch(Topic, 0, Offset, -1).
111 | 
112 | fetch(Topic, Partition, Offset, Count) ->
113 |     {_, _, {File, Position, Bytes}} = fetch(Topic, Partition, Offset, 0, Count),
114 |     {ok, Fd} = file:open(File, [read, binary, raw]),
115 |     try
116 |         {ok, [Data]} = file:pread(Fd, [{Position, Bytes}]),
117 |         {ok, #{high_water_mark => vg_topics:lookup_hwm(Topic, Partition),
118 |                partition => Partition,
119 |                record_batches => vg_protocol:decode_record_batches(Data)}}
120 |     after
121 |         file:close(Fd)
122 |     end.
123 | 
124 | %% fetch/5 is a special form that only returns sizes and positions for
125 | %% later framing and sending
126 | 
127 | %% A fetch of offset -1 returns Limit number of the records up to the
128 | %% high watermark
129 | fetch(Topic, Partition, -1, MaxBytes, Limit) ->
130 |     HWM = vg_topics:lookup_hwm(Topic, Partition),
131 |     fetch(Topic, Partition, erlang:max(0, HWM - Limit + 1), MaxBytes, Limit);
132 | fetch(Topic, Partition, Offset, MaxBytes, Limit) ->
133 |     {SegmentId, {Position, _}} = vg_log_segments:find_segment_offset(Topic, Partition, Offset),
134 |     File = vg_utils:log_file(Topic, Partition, SegmentId),
135 |     SendBytes =
136 |         case Limit of
137 |             -1 ->
138 |                 filelib:file_size(File) - Position;
139 |             _ ->
140 |                 LastOffset = Offset + Limit,
141 |                 case vg_log_segments:find_log_segment(Topic, Partition, LastOffset) of
142 |                     %% lastoffset is on the same segment, so limit fetch to lastoffset position
143 |                     SegmentId ->
144 |                         {EndPosition, EndSize} =
145 |                             vg_log_segments:find_record_offset(Topic, Partition, SegmentId, LastOffset),
146 |                         case EndPosition of
147 |                             Position ->
148 |                                 %% in the same RecordBatch
149 |                                 EndSize;
150 |                             _ ->
151 |                                 (EndPosition + EndSize) - Position
152 |                         end;
153 |                     %% some higher segment, so send this whole segment
154 |                     _ ->
155 |                         filelib:file_size(File) - Position
156 |                 end
157 |         end,
158 | 
159 |     lager:info("at=fetch_request topic=~s partition=~p offset=~p segment_id=~p position=~p",
160 |                [Topic, Partition, Offset, SegmentId, Position]),
161 | 
162 |     Bytes =
163 |         case MaxBytes of
164 |             0 -> SendBytes;
165 |             _ -> min(SendBytes, MaxBytes)
166 |         end,
167 |     ErrorCode = 0,
168 |     HWM = vg_topics:lookup_hwm(Topic, Partition),
169 |     Response = vg_protocol:encode_fetch_topic_response(Partition, ErrorCode, HWM, Bytes),
170 |     lager:debug("sending hwm=~p bytes=~p", [HWM, Bytes]),
171 |     {erlang:iolist_size(Response)+Bytes, Response, {File, Position, Bytes}}.
172 | 
173 | %% these are here mostly for ergonomics.  right now they just forward
174 | %% the work to the cluster manager, but we might need to change that
175 | %% later and this allows us to keep a easy to type interface that
176 | %% doesn't have to change.
177 | delete_topic(Topic) ->
178 |     vg_cluster_mgr:delete_topic(Topic).
179 | 
180 | describe_topic(Topic) ->
181 |     vg_cluster_mgr:describe_topic(Topic).
182 | 
183 | deactivate_topic(Topic) ->
184 |     vg_cluster_mgr:deactivate_topic(Topic).
185 | 
186 | %% there's a debate here to be had about doing this all at once vs. a
187 | %% per segment approach.  wrt to format changes (which should be
188 | %% ultra-rare), this is the right thing, but wrt index corruption
189 | %% (which should also be super rare?), we might want the fine control
190 | %% of regenerating a particular segment's index alone.
191 | regenerate_topic_index(Topic) ->
192 |     vg_topic_mgr:regenerate_index(Topic, 0).
193 | 
194 | tail_topic(Topic) ->
195 |     tail_topic(Topic, #{}).
196 | 
197 | -spec tail_topic(binary(), Opts) -> ok when
198 |       Opts :: #{records => pos_integer(), % default 10 records
199 |                 time => pos_integer()}.   % default 30 seconds
200 | tail_topic(Topic, Opts) ->
201 |     Printer = erlang:spawn_opt(fun() -> tail_printer(Topic, Opts) end,
202 |                                [{max_heap_size, 1024 * 1024}]),
203 |     vg_active_segment:tail(Topic, 0, Printer).
204 | 
205 | %% this is shaping up to be quite expensive and could block lazy
206 | %% starts of deactivated topics.  use in production with caution.
207 | running_topics() ->
208 |     vg_cluster_mgr:running_topics().
209 | 
210 | tail_printer(Topic, Opts) ->
211 |     Records = maps:get(records, Opts, 10),
212 |     Time = maps:get(time, Opts, timer:seconds(30)),
213 |     EndTime = erlang:monotonic_time(milli_seconds) + Time,
214 |     F = fun Loop(0, _End) ->
215 |                 io:format("printed ~p records, terminating~n", [Records]);
216 |             Loop(R, End) ->
217 |                 Left = End - erlang:monotonic_time(milli_seconds),
218 |                 case Left > 0 of
219 |                     true ->
220 |                         receive
221 |                             {'$print', Term} ->
222 |                                 io:format("~p: ~p~n", [Topic, Term]),
223 |                                 Loop(R - 1, End)
224 |                         after Left ->
225 |                                 io:format("tail session timed out~n")
226 |                         end;
227 |                     false ->
228 |                         io:format("tail session timed out~n")
229 |                 end
230 |         end,
231 |     F(Records, EndTime).
232 | 


--------------------------------------------------------------------------------
/src/vg_active_segment.erl:
--------------------------------------------------------------------------------
  1 | %%
  2 | -module(vg_active_segment).
  3 | 
  4 | -behaviour(gen_statem).
  5 | 
  6 | -export([start_link/3,
  7 |          write/3,
  8 |          write/4,
  9 |          halt/2,
 10 |          tail/3,
 11 |          where/2,
 12 |          stop_indexing/2,
 13 |          resume_indexing/2]).
 14 | 
 15 | -export([init/1,
 16 |          callback_mode/0,
 17 |          active/3,
 18 |          halted/3,
 19 |          handle_event/3,
 20 |          terminate/3]).
 21 | 
 22 | -include("vg.hrl").
 23 | 
 24 | -record(config, {log_dir              :: file:filename(),
 25 |                  segment_bytes        :: integer(),
 26 |                  index_max_bytes      :: integer(),
 27 |                  index_interval_bytes :: integer()}).
 28 | 
 29 | -record(data, {topic_dir       :: file:filename(),
 30 |                next_id         :: integer(),
 31 |                next_brick      :: atom(),
 32 |                byte_count      :: integer(),
 33 |                pos             :: integer(),
 34 |                index_pos       :: integer(),
 35 |                log_fd          :: file:fd(),
 36 |                segment_id      :: integer(),
 37 |                index_fd        :: file:fd() | undefined,
 38 |                topic           :: binary(),
 39 |                partition       :: integer(),
 40 |                config          :: #config{},
 41 |                halted = false  :: boolean(),
 42 |                index = true    :: boolean(),
 43 |                tailer          :: pid() | undefined,
 44 |                terminate_after :: integer(),
 45 |                timer_ref       :: reference()
 46 |               }).
 47 | 
 48 | %% need this until an Erlang release with `hibernate_after` spec added to gen option type
 49 | -dialyzer({nowarn_function, start_link/3}).
 50 | 
 51 | -define(ACTIVE_SEG(Topic, Partition), {via, gproc, {n, l, {active, Topic, Partition}}}).
 52 | 
 53 | start_link(Topic, Partition, NextBrick) ->
 54 |     HibernateAfter = application:get_env(vonnegut, hibernate_after, timer:minutes(1)),
 55 |     case gen_statem:start_link(?ACTIVE_SEG(Topic, Partition), ?MODULE, [Topic, Partition, NextBrick],
 56 |                                [{hibernate_after, HibernateAfter}]) of % hibernate after 5 minutes with no messages
 57 |         {ok, Pid} ->
 58 |             {ok, Pid};
 59 |         {error, {already_started, Pid}} ->
 60 |             {ok, Pid};
 61 |         {error, Reason} ->
 62 |             {error, Reason}
 63 |     end.
 64 | 
 65 | -spec write(Topic, Partition, RecordBatch) -> {ok, Offset} | {error, any()} when
 66 |       Topic :: binary(),
 67 |       Partition :: integer(),
 68 |       RecordBatch :: vg:record_batch() | [vg:record_batch()],
 69 |       Offset :: integer().
 70 | write(Topic, Partition, RecordBatch) ->
 71 |     write(Topic, Partition, head, RecordBatch).
 72 | 
 73 | write(Topic, Partition, ExpectedId, [RecordBatch]) ->
 74 |     write_(Topic, Partition, ExpectedId, RecordBatch);
 75 | write(Topic, Partition, ExpectedId, RecordBatch) ->
 76 |     write_(Topic, Partition, ExpectedId, RecordBatch).
 77 | 
 78 | write_(Topic, Partition, ExpectedId, RecordBatch) ->
 79 |     try
 80 |         case gen_statem:call(?ACTIVE_SEG(Topic, Partition), {write, ExpectedId, RecordBatch}) of
 81 |             retry ->
 82 |                 write_(Topic, Partition, ExpectedId, RecordBatch);
 83 |             R -> R
 84 |         end
 85 |     catch _:{noproc, _} ->
 86 |             create_retry(Topic, Partition, ExpectedId, RecordBatch);
 87 |           error:badarg ->  %% is this too broad?  how to restrict?
 88 |             create_retry(Topic, Partition, ExpectedId, RecordBatch);
 89 |           exit:{timeout, _} ->
 90 |             {error, timeout}
 91 |     end.
 92 | 
 93 | create_retry(Topic, Partition, ExpectedId, RecordBatch)->
 94 |     lager:warning("write to nonexistent topic '~s', creating", [Topic]),
 95 |     {ok, _} = vg_cluster_mgr:ensure_topic(Topic),
 96 |     write_(Topic, Partition, ExpectedId, RecordBatch).
 97 | 
 98 | halt(Topic, Partition) ->
 99 |     gen_statem:call(?ACTIVE_SEG(Topic, Partition), halt).
100 | 
101 | tail(Topic, Partition, Printer) ->
102 |     gen_statem:call(?ACTIVE_SEG(Topic, Partition), {tail, Printer}).
103 | 
104 | where(Topic, Partition) ->
105 |     {_, _, Where} = ?ACTIVE_SEG(Topic, Partition),
106 |     gproc:where(Where).
107 | 
108 | stop_indexing(Topic, Partition) ->
109 |     gen_statem:call(?ACTIVE_SEG(Topic, Partition), stop_indexing).
110 | 
111 | resume_indexing(Topic, Partition) ->
112 |     gen_statem:call(?ACTIVE_SEG(Topic, Partition), resume_indexing).
113 | 
114 | %%%%%%%%%%%%
115 | 
116 | init([Topic, Partition, NextNode]) ->
117 |     lager:info("at=init topic=~p next_server=~p", [Topic, NextNode]),
118 |     Config = setup_config(),
119 |     Partition = 0,
120 |     LogDir = Config#config.log_dir,
121 |     TerminateAfter = application:get_env(vonnegut, terminate_after, timer:minutes(5)),
122 |     TopicDir = filename:join(LogDir, [binary_to_list(Topic), "-", integer_to_list(Partition)]),
123 |     filelib:ensure_dir(filename:join(TopicDir, "ensure")),
124 | 
125 |     vg_log_segments:load_all(Topic, Partition),
126 | 
127 |     {Id, LatestIndex, LatestLog} = vg_log_segments:find_latest_id(TopicDir, Topic, Partition),
128 |     LastLogId = filename:basename(LatestLog, ".log"),
129 |     {ok, LogFD} = vg_utils:open_append(LatestLog),
130 |     {ok, IndexFD} = vg_utils:open_append(LatestIndex),
131 | 
132 |     {ok, Position} = file:position(LogFD, eof),
133 |     {ok, IndexPosition} = file:position(IndexFD, eof),
134 | 
135 |     vg_topics:insert_hwm(Topic, Partition, Id),
136 | 
137 |     {ok, active, #data{next_id = Id + 1,
138 |                        next_brick = NextNode,
139 |                        topic_dir = TopicDir,
140 |                        byte_count = 0,
141 |                        pos = Position,
142 |                        index_pos = IndexPosition,
143 |                        log_fd = LogFD,
144 |                        segment_id = list_to_integer(LastLogId),
145 |                        index_fd = IndexFD,
146 |                        topic = Topic,
147 |                        partition = Partition,
148 |                        config = Config,
149 |                        terminate_after = TerminateAfter,
150 |                        timer_ref = erlang:start_timer(TerminateAfter, self(), terminate)
151 |                       }}.
152 | 
153 | callback_mode() ->
154 |     state_functions.
155 | 
156 | %% keep any new writes from coming in while we delete the topic
157 | halted({call, From}, _, _) ->
158 |     {keep_state_and_data, [{reply, From, halted}]}.
159 | 
160 | active({call, From}, halt, Data) ->
161 |     {next_state, halted, Data, [{reply, From, halted}]};
162 | active({call, From}, {tail, Printer}, Data) ->
163 |     monitor(process, Printer),
164 |     {keep_state, Data#data{tailer = Printer}, [{reply, From, ok}]};
165 | active({call, From}, stop_indexing, Data=#data{index_fd=undefined}) ->
166 |     {keep_state, Data#data{index = false}, [{reply, From, ok}]};
167 | active({call, From}, stop_indexing, Data=#data{index_fd=FD}) ->
168 |     %% no need to sync here, we're about to unlink
169 |     file:close(FD),
170 |     {keep_state, Data#data{index = false, index_fd = undefined}, [{reply, From, ok}]};
171 | active({call, From}, resume_indexing, Data) ->
172 |     {keep_state, Data#data{index = true}, [{reply, From, ok}]};
173 | active({call, From}, {write, ExpectedID0, Record=#{last_offset_delta := LastOffsetDelta,
174 |                                                    record_batch := RecordBatch}}, Data=#data{next_id=ID,
175 |                                                                                              tailer=Tailer,
176 |                                                                                              topic=Topic,
177 |                                                                                              next_brick=NextBrick,
178 |                                                                                              terminate_after=TerminateAfter,
179 |                                                                                              timer_ref=TRef}) ->
180 |     erlang:cancel_timer(TRef),
181 |     TRef1 = erlang:start_timer(TerminateAfter, self(), terminate),
182 |     Data1 = Data#data{timer_ref=TRef1},
183 | 
184 |     %% TODO: add pipelining of requests
185 |     try
186 |         ExpectedID =
187 |             case ExpectedID0 of
188 |                 head ->
189 |                     ID + LastOffsetDelta + 1;
190 |                 Supplied when is_integer(Supplied) ->
191 |                     case (ID + LastOffsetDelta + 1) == Supplied of
192 |                         true ->
193 |                             ExpectedID0;
194 |                         %% should we check > vs < here?  one is repair
195 |                         %% the other is bad corruption
196 |                         _ ->
197 |                             %% inferred current id of the writing segment
198 |                             WriterID = ExpectedID0 - LastOffsetDelta,
199 |                             %% this should probably be limited, if
200 |                             %% we're going back too far, we need to be
201 |                             %% in some sort of catch-up mode
202 |                             lager:debug("starting write repair, ~p", [WriterID]),
203 |                             WriteRepairSet = write_repair(WriterID, Data1),
204 |                             throw({write_repair, WriteRepairSet, Data1})
205 |                     end
206 |             end,
207 | 
208 |         Result =
209 |             case NextBrick of
210 |                 Role when Role == solo; Role == tail -> proceed;
211 |                 _ ->
212 |                     (fun Loop(_, Remaining) when Remaining =< 0 ->
213 |                              {error, timeout};
214 |                          Loop(Start, Remaining) ->
215 |                              case vg_client:replicate(next_brick, Topic, ExpectedID, RecordBatch, Remaining) of
216 |                                  retry ->
217 |                                      Now = erlang:monotonic_time(milli_seconds),
218 |                                      Elapsed = Now - Start,
219 |                                      Loop(Now, Remaining - Elapsed);
220 |                                  Result ->
221 |                                      Result
222 |                              end
223 |                      end)(erlang:monotonic_time(milli_seconds), timeout() * 5)
224 |             end,
225 | 
226 |         case Result of
227 |             Go when Go =:= proceed orelse
228 |                     element(1, Go) =:= ok ->
229 |                 Data2 = write_record_batch(Record, Data1),
230 |                 case Tailer of
231 |                     undefined ->
232 |                         ok;
233 |                     Pid ->
234 |                         Pid ! {'$print', {Data2#data.next_id - 1, Record}}
235 |                 end,
236 |                 {keep_state, Data2, [{reply, From, {ok, Data2#data.next_id - 1}}]};
237 |             {write_repair, RepairSet} ->
238 |                 prometheus_counter:inc(write_repairs),
239 |                 %% add in the following when pipelining is added, if it makes sense
240 |                 %% prometheus_gauge:inc(pending_write_repairs, length(RepairSet)),
241 |                 Data2 = write_record_batch(RepairSet, Data1),
242 |                 case ExpectedID0 of
243 |                     head ->
244 |                         {keep_state, Data2, [{reply, From, retry}]};
245 |                     _ ->
246 |                         {keep_state, Data2, [{reply, From, {write_repair, RepairSet}}]}
247 |                 end;
248 |             {error, Reason} ->
249 |                 {keep_state, Data1, [{reply, From, {error, Reason}}]}
250 |         end
251 |     catch throw:{write_repair, RS, D} ->
252 |             {keep_state, D, [{reply, From, {write_repair, RS}}]};
253 |           throw:{E, D} ->
254 |             {keep_state, D, [{reply, From, {error, E}}]}
255 |     end;
256 | active(Type, Event, Data) ->
257 |     handle_event(Type, Event, Data).
258 | 
259 | 
260 | handle_event(info, {timeout, _TRef, terminate}, _Data) ->
261 |     {stop, normal};
262 | handle_event(info, {'DOWN', _MonitorRef, _Type, _Object, _Info}, Data) ->
263 |     {keep_state, Data#data{tailer = undefined}}.
264 | 
265 | terminate(_, _Reason, _Data=#data{log_fd=LogFile,
266 |                                   index_fd=IndexFile}) ->
267 |     file:close(LogFile),
268 |     file:close(IndexFile),
269 |     ok.
270 | 
271 | %
272 | 
273 | write_record_batch(Batches, Data) when is_list(Batches) ->
274 |     lists:foldl(fun(Batch, DataAcc) ->
275 |                         write_record_batch(Batch, DataAcc)
276 |                 end, Data, Batches);
277 | write_record_batch(#{last_offset_delta := LastOffsetDelta,
278 |                      size := Size0,
279 |                      record_batch := Bytes}, Data=#data{topic=Topic,
280 |                                                         partition=Partition,
281 |                                                         next_id=Id,
282 |                                                         byte_count=ByteCount}) ->
283 |     Size = Size0 + ?OFFSET_AND_LENGTH_BYTES,
284 |     NextId = Id + LastOffsetDelta + 1,
285 |     Data1 = #data{pos=Position1,
286 |                   log_fd=LogFile} = maybe_roll(Size, Data),
287 | 
288 |     %% write to log
289 |     ok = file:write(LogFile, [<<Id:64/signed-integer, Size0:32/signed-integer>>, Bytes]),
290 |     Data2 = Data1#data{byte_count=ByteCount+Size},
291 | 
292 |     %% maybe write index entry
293 |     Data3 = update_index(Data2),
294 | 
295 |     %% update highwatermark in ets table
296 |     vg_topics:update_hwm(Topic, Partition, NextId-1),
297 | 
298 |     Data3#data{next_id=NextId,
299 |                pos=Position1+Size}.
300 | 
301 | %% Create new log segment and index file if current segment is too large
302 | %% or if the index file is over its max and would be written to again.
303 | maybe_roll(Size, Data=#data{next_id=Id,
304 |                             topic_dir=TopicDir,
305 |                             log_fd=LogFile,
306 |                             index_fd=IndexFile,
307 |                             pos=Position,
308 |                             byte_count=ByteCount,
309 |                             index_pos=IndexPosition,
310 |                             index = Indexing,
311 |                             topic=Topic,
312 |                             partition=Partition,
313 |                             config=#config{segment_bytes=SegmentBytes,
314 |                                            index_max_bytes=IndexMaxBytes,
315 |                                            index_interval_bytes=IndexIntervalBytes}})
316 |   when Position+Size > SegmentBytes
317 |        orelse (ByteCount+Size >= IndexIntervalBytes
318 |                andalso IndexPosition+?INDEX_ENTRY_SIZE > IndexMaxBytes) ->
319 |     lager:debug("seg size ~p max size ~p", [Position+Size, SegmentBytes]),
320 |     lager:debug("index interval size ~p max size ~p", [ByteCount+Size, IndexIntervalBytes]),
321 |     lager:debug("index pos ~p max size ~p", [IndexPosition+?INDEX_ENTRY_SIZE, IndexMaxBytes]),
322 |     ok = file:sync(LogFile),
323 |     ok = file:close(LogFile),
324 | 
325 |     case Indexing of
326 |         true ->
327 |             ok = file:sync(IndexFile),
328 |             ok = file:close(IndexFile);
329 |         _ ->
330 |             ok
331 |     end,
332 | 
333 |     {NewIndexFile, NewLogFile} = vg_log_segments:new_index_log_files(TopicDir, Id),
334 |     vg_log_segments:insert(Topic, Partition, Id),
335 | 
336 |     Data#data{log_fd=NewLogFile,
337 |               index_fd=NewIndexFile,
338 |               %% we assume here that new indexes are good, and
339 |               %% re-enable writing, expecting the old indexes to
340 |               %% catch up eventually.  This might be racy
341 |               index = true,
342 |               segment_id = Id,
343 |               byte_count=0,
344 |               pos=0,
345 |               index_pos=0};
346 | maybe_roll(_, Data) ->
347 |     Data.
348 | 
349 | %% skip writing indexes if they're disabled.
350 | update_index(Data=#data{index = false}) ->
351 |     Data;
352 | %% Add to index if the number of bytes written to the log since the last index record was written
353 | update_index(Data=#data{next_id=Id,
354 |                         pos=Position,
355 |                         index_fd=IndexFile,
356 |                         byte_count=ByteCount,
357 |                         index_pos=IndexPosition,
358 |                         segment_id=BaseOffset,
359 |                         config=#config{index_interval_bytes=IndexIntervalBytes}})
360 |   when ByteCount >= IndexIntervalBytes ->
361 |     IndexEntry = <<(Id - BaseOffset):?INDEX_OFFSET_BITS/unsigned, Position:?INDEX_OFFSET_BITS/unsigned>>,
362 |     ok = file:write(IndexFile, IndexEntry),
363 |     Data#data{index_pos=IndexPosition+?INDEX_ENTRY_SIZE,
364 |                 byte_count=0};
365 | update_index(Data) ->
366 |     Data.
367 | 
368 | write_repair(Start, #data{next_id = ID, topic = Topic, partition = Partition}) ->
369 |     %% two situations: replaying single-segment writes, and writes
370 |     %% that span multiple segments
371 |     {StartSegmentID, {StartPosition, _}} = vg_log_segments:find_segment_offset(Topic, Partition, Start),
372 |     {EndSegmentID, {EndPosition, EndSize}} = vg_log_segments:find_segment_offset(Topic, Partition, ID),
373 |     File = vg_utils:log_file(Topic, Partition, StartSegmentID),
374 |     lager:debug("at=write_repair file=~p start=~p end=~p", [File, StartPosition, EndPosition]),
375 |     case StartSegmentID == EndSegmentID of
376 |         true ->
377 |             {ok, FD} = file:open(File, [read, binary, raw]),
378 |             try
379 |                 {ok, Data} = file:pread(FD, StartPosition, (EndPosition + EndSize) - StartPosition),
380 |                 [{StartSegmentID, Data}]
381 |             after
382 |                 file:close(FD)
383 |             end;
384 |         _ ->
385 |             error(not_implemented)
386 |     end.
387 | 
388 | setup_config() ->
389 |     {ok, [LogDir]} = application:get_env(vonnegut, log_dirs),
390 |     {ok, SegmentBytes} = application:get_env(vonnegut, segment_bytes),
391 |     {ok, IndexMaxBytes} = application:get_env(vonnegut, index_max_bytes),
392 |     {ok, IndexIntervalBytes} = application:get_env(vonnegut, index_interval_bytes),
393 |     #config{log_dir=LogDir,
394 |             segment_bytes=SegmentBytes,
395 |             index_max_bytes=IndexMaxBytes,
396 |             index_interval_bytes=IndexIntervalBytes}.
397 | 
398 | timeout() ->
399 |     application:get_env(vonnegut, ack_timeout, 1000).
400 | 


--------------------------------------------------------------------------------
/src/vg_chain_state.erl:
--------------------------------------------------------------------------------
  1 | %%%-------------------------------------------------------------------
  2 | %% @doc Track the current state of the chain this node is a member of.
  3 | %%
  4 | %% @end
  5 | %%%-------------------------------------------------------------------
  6 | -module(vg_chain_state).
  7 | 
  8 | -behaviour(gen_statem).
  9 | 
 10 | -export([start_link/0,
 11 |          next/0,
 12 |          head/0]).
 13 | 
 14 | -export([init/1,
 15 |          active/3,
 16 |          inactive/3,
 17 |          callback_mode/0,
 18 |          terminate/3,
 19 |          code_change/4]).
 20 | 
 21 | -include_lib("kernel/include/inet.hrl").
 22 | 
 23 | -type chain_name() :: atom().
 24 | -type role() :: head | tail | middle | solo | undefined.
 25 | -type chain_node() :: {atom(), inet:ip_address() | inet:hostname(), inet:port_number(), inet:port_number()}.
 26 | 
 27 | -export_types([role/0,
 28 |                chain_node/0]).
 29 | 
 30 | -record(data, {
 31 |           name         :: chain_name(),
 32 |           role         :: role(),
 33 |           head         :: node(),
 34 |           cluster_type :: vg_utils:cluster_type(),
 35 |           members      :: ordsets:ordset(),
 36 |           all_nodes    :: [chain_node()] | undefined,
 37 |           next_node    :: atom() | tail,
 38 |           replicas     :: integer()
 39 |          }).
 40 | 
 41 | -define(SERVER, ?MODULE).
 42 | -define(NODENAME, vonnegut).
 43 | 
 44 | start_link() ->
 45 |     gen_statem:start_link({local, ?SERVER}, ?MODULE, [], []).
 46 | 
 47 | next() ->
 48 |     gen_statem:call(?SERVER, next_node).
 49 | 
 50 | head() ->
 51 |     gen_statem:call(?SERVER, head).
 52 | 
 53 | init([]) ->
 54 |     ChainName = vg_config:chain_name(),
 55 |     ClusterType = vg_config:cluster_type(),
 56 |     Replicas = vg_config:replicas(),
 57 |     {ok, inactive, #data{name=ChainName,
 58 |                          replicas=Replicas,
 59 |                          cluster_type=ClusterType}, [{state_timeout, 0, connect}]}.
 60 | 
 61 | inactive(enter, _, _Data) ->
 62 |     prometheus_boolean:set(is_active, false),
 63 |     keep_state_and_data;
 64 | inactive({call, From}, next_node, _Data) ->
 65 |     {keep_state_and_data, [{reply, From, undefined}]};
 66 | inactive({call, From}, head, _Data) ->
 67 |     {keep_state_and_data, [{reply, From, undefined}]};
 68 | inactive(state_timeout, connect, Data=#data{name=Name,
 69 |                                             replicas=Replicas,
 70 |                                             cluster_type=ClusterType}) ->
 71 |     {Members, AllNodes} = join(ClusterType),
 72 |     lager:info("cluster_type=~p members=~p all_nodes=~p", [ClusterType, Members, AllNodes]),
 73 |     case {whereis(vg_topics_sup), role(node(), Members, Replicas, ClusterType)} of
 74 |         {undefined, _} ->
 75 |             {keep_state, Data#data{members=Members,
 76 |                                    role=undefined}, [{state_timeout, 1000, connect}]};
 77 |         {_P, solo} when is_pid(_P) ->
 78 |             lager:info("at=chain_complete role=solo requested_size=1", []),
 79 |             lager:info("at=start_cluster_mgr role=solo"),
 80 |             vonnegut_sup:start_acceptor_pool(solo),
 81 |             ClientPort = vg_config:port(),
 82 |             PartisanPort = application:get_env(partisan, peer_port, 10200),
 83 |             [N, H] = string:split(atom_to_list(node()), "@"),
 84 |             vonnegut_sup:start_cluster_mgr(solo, [{list_to_atom(N), H, PartisanPort, ClientPort}]),
 85 |             {next_state, active, Data#data{members=Members,
 86 |                                            role=solo,
 87 |                                            head=node(),
 88 |                                            all_nodes=[],
 89 |                                            next_node=tail}};
 90 |         {_P, undefined} when is_pid(_P) ->
 91 |             {keep_state, Data#data{members=Members,
 92 |                                    role=undefined}, [{state_timeout, 1000, connect}]};
 93 |         {_P, Role} when is_pid(_P) ->
 94 |             lager:info("at=chain_join role=~s members=~p", [Role, Members]),
 95 |             case length(Members) of
 96 |                 Size when Size >= Replicas ->
 97 |                     case Role of
 98 |                         head ->
 99 |                             lager:info("at=start_cluster_mgr role=head"),
100 |                             %% if cluster mgr isn't running, start it
101 |                             %% otherwise, add this chain to the cluster mgr
102 |                             %% and all our topics
103 |                             vonnegut_sup:start_cluster_mgr(Name, AllNodes);
104 |                         _ ->
105 |                             ok
106 |                     end,
107 | 
108 |                     vonnegut_sup:start_acceptor_pool(Role),
109 | 
110 |                     %% monitor next link in the chain
111 |                     NextNode = next_node(Role, node(), Members),
112 |                     case string:split(atom_to_list(NextNode), "@") of
113 |                         [N, H] ->
114 |                             [Port] = [P || {N1, H1, _, P} <- AllNodes,
115 |                                            N1 =:= list_to_atom(N),
116 |                                            H1 =:= H],
117 |                             vg_client_pool:start_pool(next_brick, #{ip => H,
118 |                                                                     port => Port});
119 |                         _ ->
120 |                             ok
121 |                     end,
122 | 
123 |                     Self = self(),
124 |                     vg_peer_service:on_down(NextNode, fun() -> Self ! {next_node_down, NextNode} end),
125 | 
126 |                     lager:info("at=chain_complete requested_size=~p", [Replicas]),
127 |                     {next_state, active, Data#data{members=Members,
128 |                                                    head=hd(Members),
129 |                                                    all_nodes=AllNodes,
130 |                                                    role=Role,
131 |                                                    next_node=NextNode}};
132 |                 Size ->
133 |                     lager:info("at=chain_incomplete requested_size=~p current_size=~p", [Replicas, Size]),
134 |                     {keep_state, Data#data{members=Members,
135 |                                            role=Role}, [{state_timeout, 1000, connect}]}
136 |             end
137 |     end;
138 | inactive(info, {next_node_down, NextNode}, _Data) ->
139 |     lager:info("state=inactive next_node_down=~p", [NextNode]),
140 |     {keep_state_and_data, [{state_timeout, 0, connect}]}.
141 | 
142 | active(enter, _, #data{role=Role, replicas=Replicas}) ->
143 |     set_metrics(Role, Replicas),
144 |     keep_state_and_data;
145 | active({call, From}, next_node, #data{next_node=NextNode}) ->
146 |     {keep_state_and_data, [{reply, From, NextNode}]};
147 | active({call, From}, head, #data{head=Head}) ->
148 |     {keep_state_and_data, [{reply, From, Head}]};
149 | active(info, {next_node_down, NextNode}, Data) ->
150 |     lager:info("state=active next_node_down=~p", [NextNode]),
151 |     {next_state, inactive, Data, 0}.
152 | 
153 | callback_mode() ->
154 |     [state_functions, state_enter].
155 | 
156 | terminate(_Reason, _State, _Data) ->
157 |     ok.
158 | 
159 | code_change(_, _OldState, Data, _) ->
160 |     {ok, Data}.
161 | 
162 | %% Internal functions
163 | 
164 | %% assume we expect to find at least 1 node if using srv discovery
165 | role(_Node, _, 1, _) ->
166 |     solo;
167 | role(_Node, [], _, {srv, _}) ->
168 |     undefined;
169 | role(Node, [Node], _, {srv, _}) ->
170 |     undefined;
171 | role(_Node, [], _, local) ->
172 |     solo;
173 | role(Node, [Node], _, local) ->
174 |     solo;
175 | role(Node, [Node | _], _, _) ->
176 |     head;
177 | role(Node, Nodes, _, _) ->
178 |     case lists:reverse(Nodes) of
179 |         [Node | _] ->
180 |             tail;
181 |         _ ->
182 |             middle
183 |     end.
184 | 
185 | next_node(tail, _, _) ->
186 |     tail;
187 | next_node(head, _, [_, Next | _]) ->
188 |     Next;
189 | next_node(_, Node, []) ->
190 |     Node;
191 | next_node(_, _, [N]) ->
192 |     N;
193 | next_node(_, Node, Nodes) ->
194 |     find_next(Node, Nodes).
195 | 
196 | -spec find_next(Node :: atom(), Nodes :: ordsets:ordset()) -> atom().
197 | find_next(Node, Nodes) ->
198 |     try
199 |         %% set the accumulator when the node we are looking
200 |         %% for the next of is found and throw to return
201 |         %% the first element encountered after the acc is set
202 |         ordsets:fold(fun(N, none) when Node =:= N ->
203 |                          N;
204 |                         (_, none) ->
205 |                          none;
206 |                         (N, _) ->
207 |                          throw(N)
208 |                      end, none, Nodes)
209 |     catch
210 |         throw:N ->
211 |             N
212 |     end.
213 | 
214 | join(ClusterType) ->
215 |     AllNodes = lookup(ClusterType),
216 |     ordsets:fold(fun({Name, Host, PartisanPort, _ClientPort}, _) ->
217 |                      Node = list_to_atom(atom_to_list(Name)++"@"++Host),
218 |                      IP = case inet:parse_address(Host) of
219 |                               {error, einval} ->
220 |                                   {ok, #hostent{h_addr_list=[IPAddress | _]}} = inet_res:getbyname(Host, a),
221 |                                   IPAddress;
222 |                               {ok, IPAddress} ->
223 |                                   IPAddress
224 |                           end,
225 |                      N = #{name => Node,
226 |                            listen_addrs => [#{ip => IP, port => PartisanPort}],
227 |                            parallelism => 1},
228 |                      vg_peer_service:join(N)
229 |                  end, ok, AllNodes),
230 |     {ok, Members} = vg_peer_service:members(),
231 |     {lists:usort(Members), AllNodes}.
232 | 
233 | %% leave() ->
234 | %%     vg_peer_service:leave([]).
235 | 
236 | %%
237 | 
238 | lookup(local) ->
239 |     ordsets:new();
240 | lookup(none) ->
241 |     ordsets:new();
242 | lookup({direct, Nodes}) ->
243 |     ordsets:from_list(Nodes);
244 | lookup({srv, DiscoveryDomain}) ->
245 |     lists:foldl(fun({_, _, PartisanPort, H}, NodesAcc) ->
246 |                     Node = list_to_atom(atom_to_list(?NODENAME)++"@"++H),
247 |                     %% we could also do this by querying
248 |                     %% the srv records of _data._tcp.vonnegut.default.svc.cluster.local
249 |                     ClientPort = rpc:call(Node, vg_config, port, []),
250 |                     ordsets:add_element({?NODENAME, H, PartisanPort, ClientPort}, NodesAcc)
251 |                 end, ordsets:new(), inet_res:lookup(DiscoveryDomain, in, srv)).
252 | 
253 | set_metrics(Role, Replicas) ->
254 |     prometheus_boolean:set(is_active, true),
255 |     prometheus_gauge:set(replicas, Replicas),
256 |     RoleMetric = role_metric(Role),
257 |     [prometheus_boolean:set(B, false) || B <- [is_solo,
258 |                                                is_head,
259 |                                                is_tail,
260 |                                                is_middle], B =/= RoleMetric],
261 |     prometheus_boolean:set(RoleMetric, true).
262 | 
263 | role_metric(solo) ->
264 |     is_solo;
265 | role_metric(head) ->
266 |     is_head;
267 | role_metric(tail) ->
268 |     is_tail;
269 | role_metric(middle) ->
270 |     is_middle.
271 | 


--------------------------------------------------------------------------------
/src/vg_cleaner.erl:
--------------------------------------------------------------------------------
 1 | -module(vg_cleaner).
 2 | 
 3 | -behaviour(gen_server).
 4 | 
 5 | -export([start_link/2,
 6 |          run_cleaner/2]).
 7 | 
 8 | -export([init/1,
 9 |          handle_call/3,
10 |          handle_cast/2,
11 |          handle_info/2,
12 |          terminate/2,
13 |          code_change/3]).
14 | 
15 | -record(state, {topic_dir          :: file:filename_all(),
16 |                 topic              :: binary(),
17 |                 partition          :: integer(),
18 |                 retention_check_ms :: integer(),
19 |                 retention_seconds  :: integer(),
20 |                 t_ref              :: timer:tref()}).
21 | 
22 | -define(SERVER(Topic, Partition), {via, gproc, {n, l, {vg_cleaner, Topic, Partition}}}).
23 | 
24 | start_link(Topic, Partition) ->
25 |     gen_server:start_link(?SERVER(Topic, Partition),
26 |                           ?MODULE, [Topic, Partition], []).
27 | 
28 | run_cleaner(Topic, Partition) ->
29 |     gen_server:call(?SERVER(Topic, Partition), run_cleaner).
30 | 
31 | init([Topic, Partition]) ->
32 |     {ok, RetentionCheckMin} = application:get_env(vonnegut, log_retention_check_interval),
33 |     {ok, RetentionMinutes} = application:get_env(vonnegut, log_retention_minutes),
34 |     RetentionCheckMs = round(timer:minutes(RetentionCheckMin)),
35 |     RetentionSeconds = RetentionMinutes * 60,
36 | 
37 |     TopicDir = vg_utils:topic_dir(Topic, Partition),
38 |     {ok, TRef} = timer:send_after(RetentionCheckMs, run_cleaner),
39 |     {ok, #state{topic_dir=TopicDir,
40 |                 topic=Topic,
41 |                 partition=Partition,
42 |                 retention_check_ms=RetentionCheckMs,
43 |                 retention_seconds=RetentionSeconds,
44 |                 t_ref=TRef}}.
45 | 
46 | handle_call(run_cleaner, _From, State=#state{topic_dir=TopicDir,
47 |                                              topic=Topic,
48 |                                              partition=Partition,
49 |                                              retention_check_ms=RetentionCheckMs,
50 |                                              retention_seconds=RetentionSeconds,
51 |                                              t_ref=TRef}) ->
52 |     timer:cancel(TRef),
53 |     run_cleaner_(TopicDir, Topic, Partition, RetentionSeconds),
54 |     {ok, TRef1} = timer:send_after(RetentionCheckMs, run_cleaner),
55 |     {reply, ok, State#state{t_ref=TRef1}}.
56 | 
57 | handle_cast(_, State) ->
58 |     {noreply, State}.
59 | 
60 | handle_info(run_cleaner, State=#state{topic_dir=TopicDir,
61 |                                       topic=Topic,
62 |                                       partition=Partition,
63 |                                       retention_check_ms=RetentionCheckMs,
64 |                                       retention_seconds=RetentionSeconds}) ->
65 |     run_cleaner_(TopicDir, Topic, Partition, RetentionSeconds),
66 |     {ok, TRef} = timer:send_after(RetentionCheckMs, run_cleaner),
67 |     {noreply, State#state{t_ref=TRef}}.
68 | 
69 | terminate(_Reason, _State) ->
70 |     ok.
71 | 
72 | code_change(_, State, _) ->
73 |     {ok, State}.
74 | 
75 | %% Internal functions
76 | 
77 | run_cleaner_(TopicDir, Topic, Partition, RetentionSeconds) ->
78 |     Segments = filelib:wildcard(filename:join(ec_cnv:to_list(TopicDir), "*.log")),
79 |     Now = calendar:datetime_to_gregorian_seconds(calendar:universal_time()),
80 |     lists:foreach(fun(Segment) ->
81 |                           LastModified = filelib:last_modified(Segment),
82 |                           [LastUniversal | _] = calendar:local_time_to_universal_time_dst(LastModified),
83 |                           Diff =  Now - calendar:datetime_to_gregorian_seconds(LastUniversal),
84 |                           if
85 |                               Diff >= RetentionSeconds ->
86 |                                   SegmentId = filename:basename(Segment, ".log"),
87 |                                   lager:info("at=delete topic=~s partition=~p segment=~s", [Topic, Partition, SegmentId]),
88 |                                   RootName = filename:rootname(Segment),
89 |                                   ok = file:delete(RootName++".index"),
90 |                                   ok = file:delete(Segment);
91 |                               true ->
92 |                                   ok
93 |                           end
94 |                   end, Segments).
95 | 


--------------------------------------------------------------------------------
/src/vg_client.erl:
--------------------------------------------------------------------------------
  1 | -module(vg_client).
  2 | 
  3 | -behavior(shackle_client).
  4 | 
  5 | -export([metadata/0, metadata/1,
  6 |          ensure_topic/1,
  7 |          delete_topic/1,
  8 |          topics/0, topics/2,
  9 |          fetch/1, fetch/2, fetch/3,
 10 | 
 11 |          %% internal-only stuff
 12 |          replicate/5,
 13 |          delete_topic/2,
 14 |          %% end internal
 15 | 
 16 |          produce/2, produce/3,
 17 |          init/1,
 18 |          setup/2,
 19 |          handle_request/2,
 20 |          handle_data/2,
 21 |          terminate/1]).
 22 | 
 23 | -include("vg.hrl").
 24 | 
 25 | -record(state, {
 26 |           request_counter = 0    :: non_neg_integer(),
 27 |           buffer          = <<>> :: binary(),
 28 |           expected_size   = 0    :: non_neg_integer()
 29 |          }).
 30 | 
 31 | -define(TIMEOUT, 5000).
 32 | 
 33 | -spec metadata() -> {ok, {Chains :: vg_cluster_mgr:chains_map(),
 34 |                           Topics :: vg_cluster_mgr:topics_map()}}.
 35 | metadata() ->
 36 |     %% this is maybe a silly default, considering that it could return
 37 |     %% millions of topics
 38 |     metadata([]).
 39 | 
 40 | metadata(Topics) ->
 41 |     Request = vg_protocol:encode_metadata_request(Topics),
 42 |     scall(metadata, ?METADATA_REQUEST, Request, ?TIMEOUT).
 43 | 
 44 | -spec ensure_topic(Topic :: vg:topic()) ->
 45 |                           {ok, {Chains :: vg_cluster_mgr:chains_map(),
 46 |                                 Topics :: vg_cluster_mgr:topics_map()}} |
 47 |                           {error, Reason :: term()}.
 48 | ensure_topic(Topic) ->
 49 |     %% always use the metadata topic, creation happens inside via a global process.
 50 |     Request = vg_protocol:encode_metadata_request([Topic]),
 51 |     scall(metadata, ?ENSURE_REQUEST, Request, ?TIMEOUT).
 52 | 
 53 | -spec fetch(Topic)
 54 |            -> {ok, #{high_water_mark := integer(),
 55 |                      record_batches_size := integer(),
 56 |                      error_code := integer(),
 57 |                      record_batches := RecordBatches}}
 58 |                   when Topic :: vg:topic() | [{vg:topic(), [{integer(), integer(), integer()}]}],
 59 |                        RecordBatches :: [vg:record_batch()].
 60 | 
 61 | %% if we don't want to expose the tuple in the second clauses of
 62 | %% fetch/1 and fetch/2, we could do something like fetch_partial,
 63 | %% which would return the tuple and options, which then could be fed
 64 | %% into an execute_multifetch function which would do the right thing.
 65 | 
 66 | fetch(Topic) when is_binary(Topic) ->
 67 |     do_fetch([{Topic, 0, #{}}], ?TIMEOUT);
 68 | fetch(Requests) when is_list(Requests) ->
 69 |     do_fetch(Requests, ?TIMEOUT).
 70 | 
 71 | fetch(Topic, Position) when is_binary(Topic) ->
 72 |     do_fetch([{Topic, Position, #{}}], ?TIMEOUT);
 73 | fetch(Requests, Timeout) when is_list(Requests) ->
 74 |     do_fetch(Requests, Timeout).
 75 | 
 76 | fetch(Topic, Position, Limit) when is_binary(Topic) ->
 77 |     do_fetch([{Topic, Position, #{limit => Limit}}], ?TIMEOUT).
 78 | 
 79 | do_fetch(Requests, Timeout) ->
 80 |     try
 81 |         PoolReqs =
 82 |             lists:foldl(
 83 |               fun({Topic, _Position, _Opts} = R, Acc) ->
 84 |                       case vg_client_pool:get_pool(Topic, read) of
 85 |                           {ok, Pool} ->
 86 |                               lager:debug("fetch request to pool: ~p ~p", [Topic, Pool]),
 87 |                               case Acc of
 88 |                                   #{Pool := PoolReqs} ->
 89 |                                       Acc#{Pool => [R | PoolReqs]};
 90 |                                   _ ->
 91 |                                       Acc#{Pool => [R]}
 92 |                               end;
 93 |                           {error, Reason} ->
 94 |                               throw({error, Reason})
 95 |                       end
 96 |               end, #{}, Requests),
 97 |         %% should we do these in parallel?
 98 |         Restart = application:get_env(vonnegut, swap_restart, true),
 99 |         Resps = maps:map(
100 |                   fun(Pool, TPO0) ->
101 |                           TPO = [begin
102 |                                      MaxBytes = maps:get(max_bytes, Opts, 0),
103 |                                      Limit = maps:get(limit, Opts, -1),
104 |                                      {Topic, [{0, Position, MaxBytes, Limit}]}
105 |                                  end
106 |                                  || {Topic, Position, Opts} <- TPO0],
107 |                           ReplicaId = -1,
108 |                           MaxWaitTime = 5000,
109 |                           MinBytes = 100,
110 |                           Request = vg_protocol:encode_fetch(ReplicaId, MaxWaitTime, MinBytes, TPO),
111 |                           case scall(Pool, ?FETCH2_REQUEST, Request, Timeout) of
112 |                               %% sometimes because of cloud orchestration, and
113 |                               %% restarts, head and tail nodes will switch or
114 |                               %% move around in time for us to reconnect to them
115 |                               %% in error, so if we get these codes, start over
116 |                               {ok, Map} when is_map(Map) andalso Restart =:= true ->
117 |                                   case maps:fold(
118 |                                          fun(_, _, true) ->
119 |                                                  true;
120 |                                             (_, #{0 := #{error_code := ?FETCH_DISALLOWED_ERROR}}, _) ->
121 |                                                  true;
122 |                                             (T, #{0 := #{error_code := ?UNKNOWN_TOPIC_OR_PARTITION}}, _) ->
123 |                                                  throw({error, {T, not_found}});
124 |                                             (_, _, _) ->
125 |                                                  false
126 |                                          end, false, Map) of
127 |                                       true ->
128 |                                           throw(restart);
129 |                                       _ ->
130 |                                           {ok, Map}
131 |                                   end;
132 |                               {ok, Result} ->
133 |                                   %% if there are any error codes in any
134 |                                   %% of these, transform the whole thing
135 |                                   %% into an error
136 |                                   {ok, Result};
137 |                               {error, Reason} ->
138 |                                   {error, Reason}
139 |                           end
140 |                   end, PoolReqs),
141 | 
142 |         lists:foldl(
143 |           fun(_, {error, Response}) ->
144 |                   {error, Response};
145 |              ({_Pool, {ok, Response}}, {ok, Acc}) ->
146 |                   {ok, maps:merge(Acc, Response)};
147 |              ({_Pool, {error, Response}}, _) ->
148 |                   {error, Response}
149 |           end,
150 |           {ok, #{}}, maps:to_list(Resps))
151 |     catch throw:{error, {Topic, not_found}} ->
152 |             lager:error("tried to fetch from non-existent topic ~p", [Topic]),
153 |             {error, {Topic, not_found}};
154 |           throw:restart ->
155 |             lager:info("disallowed request error, restarting pools"),
156 |             vg_client_pool:restart(),
157 |             do_fetch(Requests, Timeout)
158 |     end.
159 | 
160 | -spec replicate(Pool, Topic, ExpectedId, RecordBatch, Timeout)
161 |              -> {ok, integer()} | {error, term()} | {write_repair, maps:map()} | retry
162 |                     when Pool :: atom(),
163 |                          Topic :: vg:topic(),
164 |                          ExpectedId :: integer(),
165 |                          RecordBatch :: vg:record_batch() | [vg:record_batch()],
166 |                          Timeout :: integer().
167 | replicate(Pool, Topic, ExpectedId, RecordBatch, Timeout) ->
168 |     lager:debug("replicate pool=~p topic=~p", [Pool, Topic]),
169 |     Request = vg_protocol:encode_replicate(0, 5000, Topic, 0, ExpectedId, RecordBatch),
170 |     case scall(Pool, ?REPLICATE_REQUEST, Request, Timeout) of
171 |         {ok, {0, #{error_code := 0,
172 |                    offset := Offset}}} ->
173 |             {ok, Offset};
174 |         {ok, {0, #{error_code := ?WRITE_REPAIR, records := RecordBatches}}} ->
175 |             {write_repair, RecordBatches};
176 |         {ok, {0, #{error_code := ?TIMEOUT_ERROR}}} ->
177 |             retry;
178 |         {ok, {0, #{error_code := ErrorCode}}} ->
179 |             {error, ErrorCode};
180 |         {error, Reason} ->
181 |             {error, Reason}
182 |     end.
183 | 
184 | delete_topic(Topic) ->
185 |     case vg_client_pool:get_pool(Topic, write) of
186 |         {ok, Pool} ->
187 |             Request = vg_protocol:encode_delete_topic(Topic),
188 |             case scall(Pool, ?DELETE_TOPIC_REQUEST, Request, timer:seconds(60)) of
189 |                 {ok, ok} -> ok;
190 |                 {error, Reason} -> {error, Reason}
191 |             end;
192 |         {error, Reason} ->
193 |             {error, Reason}
194 |     end.
195 | 
196 | delete_topic(Pool, Topic) ->
197 |     lager:debug("delete_topic pool=~p topic=~p", [Pool, Topic]),
198 |     Request = vg_protocol:encode_delete_topic(Topic),
199 |     case scall(Pool, ?REPLICATE_DELETE_TOPIC_REQUEST, Request, timer:seconds(60)) of
200 |         {ok, ok} -> ok;
201 |         {error, Reason} -> {error, Reason}
202 |     end.
203 | 
204 | -spec produce(Topic, RecordBatch)
205 |              -> {ok, integer()} | {error, term()}
206 |                     when Topic :: vg:topic(),
207 |                          RecordBatch :: vg:record_batch() | [vg:record_batch()].
208 | produce(Topic, RecordBatch) ->
209 |     produce(Topic, RecordBatch, ?TIMEOUT).
210 | 
211 | -spec produce(Topic, RecordBatch, Timeout)
212 |              -> {ok, integer()} | {error, term()}
213 |                     when Topic :: vg:topic(),
214 |                          RecordBatch :: vg:record_batch() | [vg:record_batch()],
215 |                          Timeout :: pos_integer().
216 | produce(Topic, RecordBatch, Timeout) ->
217 |     #{record_batch := EncodedRecordBatch} = vg_protocol:encode_record_batch(RecordBatch),
218 |     produce_(Topic, EncodedRecordBatch, Timeout).
219 | 
220 | produce_(Topic, EncodedRecordBatch, Timeout) ->
221 |     case vg_client_pool:get_pool(Topic, write) of
222 |         {ok, Pool} ->
223 |             lager:debug("produce request to pool: ~p ~p", [Topic, Pool]),
224 |             TopicRecords = [{Topic, [{0, EncodedRecordBatch}]}],
225 |             Restart = application:get_env(vonnegut, swap_restart, true),
226 |             Request = vg_protocol:encode_produce(0, 5000, TopicRecords),
227 |             case scall(Pool, ?PRODUCE_REQUEST, Request, Timeout) of
228 |                 {ok, #{Topic := #{0 := #{error_code := 0,
229 |                                          offset := Offset}}}} ->
230 |                     {ok, Offset};
231 |                 {ok, #{Topic := #{0 := #{error_code := ?TIMEOUT_ERROR}}}} ->
232 |                     {error, timeout};
233 |                 %% sometimes because of cloud orchestration, and
234 |                 %% restarts, head and tail nodes will switch or
235 |                 %% move around in time for us to reconnect to them
236 |                 %% in error, so if we get these codes, start over
237 |                 {ok, #{Topic := #{0 := #{error_code := ?PRODUCE_DISALLOWED_ERROR}}}}
238 |                   when Restart =:= true ->
239 |                     lager:info("disallowed request error, restarting pools"),
240 |                     vg_client_pool:restart(),
241 |                     produce_(Topic, EncodedRecordBatch, Timeout);
242 |                 {ok, #{Topic := #{0 := #{error_code := ErrorCode}}}} ->
243 |                     {error, ErrorCode};
244 |                 {error, Reason} ->
245 |                     {error, Reason}
246 |             end;
247 |         {error, Reason} ->
248 |             {error, Reason}
249 |     end.
250 | 
251 | topics() ->
252 |     topics(metadata, []).
253 | 
254 | topics(Pool, Topics) ->
255 |     Request = vg_protocol:encode_array([<<(byte_size(T)):16/signed-integer, T/binary>> || T <- Topics]),
256 |     case scall(Pool, ?TOPICS_REQUEST, Request, ?TIMEOUT) of
257 |         {ok, {_, _}} = OK ->
258 |             OK;
259 |         {error, Reason} ->
260 |             {error, Reason}
261 |     end.
262 | 
263 | -spec init(term()) -> {ok, term()}.
264 | init(_) ->
265 |     {ok, #state{}}.
266 | 
267 | -spec setup(inet:socket(), term()) -> {ok, term()} | {error, term(), term()}.
268 | setup(_Socket, State) ->
269 |     {ok, State}.
270 | 
271 | -spec handle_request({integer(), iodata()}, #state{}) -> {ok, non_neg_integer(), iodata(), term()}.
272 | handle_request({Type, Body}, State=#state{request_counter=RequestCounter}) ->
273 |     Id = request_id(RequestCounter),
274 |     Data = vg_protocol:encode_request(Type, Id, ?CLIENT_ID, Body),
275 |     {ok, Id, [<<(iolist_size(Data)):32/signed-integer>>, Data],
276 |      State#state{request_counter = RequestCounter + 1}}.
277 | 
278 | -spec handle_data(binary(), term()) -> {ok, [{term(),term()}], term()}.
279 | handle_data(Data, State=#state{buffer=Buffer}) ->
280 |     Data2 = <<Buffer/binary, Data/binary>>,
281 |     decode_data(Data2, [], State).
282 | 
283 | decode_data(<<>>, Replies, State) ->
284 |     {ok, Replies, State};
285 | decode_data(Data, Replies, State=#state{expected_size=Exp}) ->
286 |     case Exp of
287 |         N when N == 0 orelse byte_size(Data) >= N ->
288 |             case vg_protocol:decode_response(Data) of
289 |                 more ->
290 |                     {ok, Replies, State#state{buffer=Data}};
291 |                 {more, Size} ->
292 |                     {ok, Replies, State#state{buffer=Data, expected_size=Size}};
293 |                 {CorrelationId, Response, Rest} ->
294 |                     decode_data(Rest, [{CorrelationId, {ok, Response}} | Replies],
295 |                                 State#state{expected_size = 0,
296 |                                             buffer = <<>>})
297 |             end;
298 |         _ ->
299 |             {ok, Replies, State#state{buffer=Data}}
300 |     end.
301 | 
302 | -spec terminate(term()) -> ok.
303 | terminate(_State) ->
304 |     ok.
305 | 
306 | %% private
307 | request_id(RequestCounter) ->
308 |     RequestCounter rem ?MAX_REQUEST_ID.
309 | 
310 | scall(Pool, RequestType, RequestData, RequestTimeout) ->
311 |     B = backoff:init(2, 200),
312 |     B1 = backoff:type(B, jitter),
313 |     %% at these settings, 25 retries is approximately 5s
314 |     scall(Pool, RequestType, RequestData, RequestTimeout, B1, 25).
315 | 
316 | scall(_, _, _, _, _, 0) ->
317 |     {error, pool_timeout};
318 | scall(Pool, RequestType, RequestData, RequestTimeout, Backoff, Retries) ->
319 |     case shackle:call(Pool,  {RequestType, RequestData}, RequestTimeout) of
320 |         {error, timeout} ->
321 |             {error, timeout};
322 |         {error, _} ->
323 |             {Time, Backoff1} = backoff:fail(Backoff),
324 |             timer:sleep(Time),
325 |             scall(Pool, RequestType, RequestData, RequestTimeout, Backoff1, Retries - 1);
326 |         {ok, Response} ->
327 |             {ok, vg_protocol:decode_response(RequestType, Response)}
328 |     end.
329 | 


--------------------------------------------------------------------------------
/src/vg_client_pool.erl:
--------------------------------------------------------------------------------
  1 | -module(vg_client_pool).
  2 | 
  3 | -export([start/0, start/1,
  4 |          stop/0,
  5 |          restart/0,
  6 |          get_pool/2,
  7 |          start_pool/2,
  8 |          make_pool_name/2,
  9 |          refresh_topic_map/0]).
 10 | 
 11 | -include("vg.hrl").
 12 | 
 13 | -define(OPTIONS, [set, public, named_table, {read_concurrency, true}]).
 14 | 
 15 | start() ->
 16 |     start(#{}).
 17 | 
 18 | start(Opts) ->
 19 |     %% so restarts won't lose settings
 20 |     application:set_env(vonnegut, global_pool_opts, Opts),
 21 |     start(Opts, 0).
 22 | 
 23 | start(_Opts, 10) ->
 24 |     {error, could_not_start_pools};
 25 | start(Opts, N) ->
 26 |     %% maybe start this if it hasn't been
 27 |     application:ensure_all_started(shackle),
 28 |     case application:get_env(vonnegut, client) of
 29 |         {ok, ClientConfig} ->
 30 |             case proplists:get_value(endpoints, ClientConfig) of
 31 |                 undefined ->
 32 |                     lager:error("No endpoints configured for client");
 33 |                 [{Host, Port} | _] when is_integer(Port) ->
 34 |                     start_(Opts, N, Host, Port);
 35 |                 [HostPort | _] when is_list(HostPort) ->
 36 |                     case parse_host_port(HostPort) of
 37 |                         {ok, Host, Port} -> start_(Opts, N, Host, Port);
 38 |                         {error, _} -> lager:error("Invalid endpoint ~p", HostPort)
 39 |                     end
 40 |             end;
 41 |         _ ->
 42 |             lager:info("No client configuration")
 43 |     end.
 44 | 
 45 | start_(Opts, N, Host, Port) ->
 46 |   start_pool(metadata, Opts#{ip => Host,
 47 |                              port => Port}),
 48 |   try
 49 |       case vg_client:topics() of
 50 |           {ok, {_, Chains}} ->
 51 |               maybe_init_ets(),
 52 |               _ = start_pools(Chains),
 53 |               application:set_env(vonnegut, chains, Chains),
 54 |               refresh_topic_map(),
 55 |               ok
 56 |       end
 57 |   catch
 58 |       ?WITH_STACKTRACE(_, R, S)
 59 |           lager:warning("at=start_pools error=~p stacktrace=~p", [R, S]),
 60 |           timer:sleep(500),
 61 |           start(Opts, N + 1)
 62 |   end.
 63 | 
 64 | parse_host_port(HostPortString) ->
 65 |     case string:split(HostPortString, ":", all) of
 66 |         [Host] ->
 67 |             {ok, Host, ?DEFAULT_PORT};
 68 |         [Host, Port] ->
 69 |             case string:to_integer(Port) of
 70 |                 {IntegerPort, ""} -> {ok, Host, IntegerPort};
 71 |                 {_, _}            -> {error, invalid_host_port_string}
 72 |             end;
 73 |         [_|_] ->
 74 |             {error, invalid_host_port_string}
 75 |     end.
 76 |   
 77 | start_pools(Chains) ->
 78 |     [begin
 79 |          Name = <<HeadHost/binary, "-", (integer_to_binary(HeadPort))/binary>>,
 80 |          lager:info("starting chain: ~p ~p", [Name, C]),
 81 |          HeadName = make_pool_name(Name, write),
 82 |          start_pool(HeadName, #{ip => binary_to_list(HeadHost),
 83 |                                 port => HeadPort}),
 84 |          TailHost =
 85 |              case TailHost0 of
 86 |                  <<"solo">> -> HeadHost;
 87 |                  _ -> TailHost0
 88 |              end,
 89 |          %% the name of the pool can be misleading as to what host and
 90 |          %% port it's on.  Do we need to fix this?
 91 |          TailName = make_pool_name(Name, read),
 92 |          start_pool(TailName, #{ip => binary_to_list(TailHost),
 93 |                                 port => TailPort})
 94 |      end
 95 |      || #{name := _Name,
 96 |           head := {HeadHost, HeadPort},
 97 |           tail := {TailHost0, TailPort}} = C <- Chains].
 98 | 
 99 | refresh_topic_map() ->
100 |     %% TODO live migrate pools when the chain list changes?
101 |     %% or just restart the whole mess?
102 |     {ok, {_, Chains}} = vg_client:topics(),
103 |     maybe_init_ets(clean),
104 |     ets:insert(?topic_map, {chains, Chains}),
105 |     ets:insert(?topic_map, {lookup, lookup_list(Chains)}).
106 | 
107 | lookup_list(Chains) ->
108 |     [begin
109 |          Name = <<HeadHost/binary, "-", (integer_to_binary(HeadPort))/binary>>,
110 |          HeadName = make_pool_name(Name, write),
111 |          TailName = make_pool_name(Name, read),
112 |          {Start, End, HeadName, TailName}
113 |      end
114 |      || #{topics_start := Start,
115 |           topics_end := End,
116 |           head := {HeadHost, HeadPort}} <- Chains].
117 | 
118 | get_pool(Topic, RW) ->
119 |     %% at some point we should handle retries here for when the topic
120 |     %% list is being refreshed.
121 |     case ets:lookup(?topic_map, lookup) of
122 |         [] ->
123 |             refresh_topic_map(),
124 |             get_pool(Topic, RW);
125 |         [{_, Chains}] ->
126 |             case find_chain(Chains, Topic, RW) of
127 |                 {ok, Pool} ->
128 |                     lager:debug("found chain for topic=~p on pool=~p", [Topic, Pool]),
129 |                     {ok, Pool};
130 |                 {error, Reason} ->
131 |                     {error, Reason}
132 |             end
133 |     end.
134 | 
135 | %% TODO: work out how to replace this with a select, maybe
136 | find_chain([], _Topic, _RW) ->
137 |     {error, malformed_chain};
138 | find_chain([{start_space, end_space, HeadName, TailName} | _Tail], _Topic, RW)  ->
139 |     {ok, pick_pool(HeadName, TailName, RW)};
140 | find_chain([{start_space, E, HeadName, TailName} | _Tail], Topic, RW) when Topic =< E ->
141 |     {ok, pick_pool(HeadName, TailName, RW)};
142 | find_chain([{S, end_space, HeadName, TailName} | _Tail], Topic, RW) when Topic >= S ->
143 |     {ok, pick_pool(HeadName, TailName, RW)};
144 | find_chain([{S, E, HeadName, TailName} | _Tail], Topic, RW) when Topic >= S andalso Topic =< E ->
145 |     {ok, pick_pool(HeadName, TailName, RW)};
146 | find_chain([_|Tail], Topic, RW) ->
147 |     find_chain(Tail, Topic, RW).
148 | 
149 | pick_pool(Head, _Tail, write) ->
150 |     Head;
151 | pick_pool(_Head, Tail, read) ->
152 |     Tail.
153 | 
154 | make_pool_name(Chain, read) ->
155 |     binary_to_atom(<<Chain/binary, "_tail">>, utf8);
156 | make_pool_name(Chain, write) ->
157 |     binary_to_atom(<<Chain/binary, "_head">>, utf8).
158 | 
159 | maybe_init_ets() ->
160 |     maybe_init_ets(foo).
161 | 
162 | %% eventually handle the clean argument
163 | maybe_init_ets(_) ->
164 |     case ets:info(?topic_map, name) of
165 |         undefined ->
166 |             ets:new(?topic_map, ?OPTIONS);
167 |         _ ->
168 |             ok
169 |     end.
170 | 
171 | start_pool(Name, Opts) ->
172 |     ClientPoolSize = application:get_env(vonnegut, client_pool_size, 10),
173 |     SocketOpts = [binary,
174 |                   {buffer, 65535},
175 |                   {nodelay, true},
176 |                   {packet, raw},
177 |                   {send_timeout, 5000},
178 |                   {send_timeout_close, true}],
179 |     Pools = application:get_env(vonnegut, client_pools, []),
180 |     application:set_env(vonnegut, client_pools, [Name | Pools]),
181 |     shackle_pool:start(Name, vg_client,
182 |                        [{ip, maps:get(ip, Opts, "127.0.0.1")},
183 |                         {port, maps:get(port, Opts, 5588)},
184 |                         {reconnect, maps:get(reconnect, Opts, true)},
185 |                         {reconnect_time_max, 120000},
186 |                         {reconnect_time_min, 250},
187 |                         {socket_options, SocketOpts}],
188 |                        [{backlog_size, 1024},
189 |                         {pool_size, ClientPoolSize},
190 |                         {pool_strategy, round_robin}]).
191 | 
192 | stop() ->
193 |     [shackle_pool:stop(Pool)
194 |      || Pool <- application:get_env(vonnegut, client_pools, [])],
195 |     application:stop(shackle).
196 | 
197 | restart() ->
198 |     stop(),
199 |     Opts = application:get_env(vonnegut, global_pool_opts, #{}),
200 |     start(Opts).
201 | %%%%%%%%%%%%%%%%%%
202 | 


--------------------------------------------------------------------------------
/src/vg_cluster_mgr.erl:
--------------------------------------------------------------------------------
  1 | %%%-------------------------------------------------------------------
  2 | %%% @author Tristan Sloughter <>
  3 | %%% @copyright (C) 2017, Tristan Sloughter
  4 | %%% @doc
  5 | %%%
  6 | %%% @end
  7 | %%% Created :  9 Feb 2017 by Tristan Sloughter <>
  8 | %%%-------------------------------------------------------------------
  9 | -module(vg_cluster_mgr).
 10 | 
 11 | -behaviour(gen_server).
 12 | 
 13 | %% API
 14 | -export([start_link/3,
 15 |          get_map/0,
 16 |          ensure_topic/1]).
 17 | 
 18 | -export([
 19 |          create_topic/1,
 20 |          delete_topic/1,
 21 |          describe_topic/1,
 22 |          deactivate_topic/1,
 23 |          running_topics/0
 24 |         ]).
 25 | 
 26 | %% gen_server callbacks
 27 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
 28 |          terminate/2, code_change/3]).
 29 | 
 30 | -include("vg.hrl").
 31 | 
 32 | -type chain_id() :: binary().
 33 | -type topics_map() :: #{vg:topic() => chain_id()}.
 34 | -type chains_map() :: #{chain_id() => chain()}.
 35 | 
 36 | -export_types([topic/0,
 37 |                chain_id/0,
 38 |                topics_map/0,
 39 |                chains_map/0]).
 40 | 
 41 | -define(SERVER, ?MODULE).
 42 | 
 43 | -record(state, {topics = #{} :: maps:map(),
 44 |                 chains = #{} :: maps:map(),
 45 |                 epoch        :: integer()}).
 46 | 
 47 | -spec start_link(vg_chain_state:chain_name(), [vg_chain_state:chain_node()], file:filename_all()) -> {ok, pid()}.
 48 | start_link(ChainName, ChainNodes, DataDir) ->
 49 |     gen_server:start_link({local, ?SERVER}, ?MODULE, [ChainName, ChainNodes, DataDir], []).
 50 | 
 51 | %% add chain functionality needed
 52 | 
 53 | -spec get_map() -> {Topics :: topics_map(), Chains :: chains_map(), Epoch :: integer()}.
 54 | get_map() ->
 55 |     HeadNode = vg_chain_state:head(),
 56 |     gen_server:call({?SERVER, HeadNode}, get_map).
 57 | 
 58 | -spec create_topic(Topic :: binary()) -> {ok, Chain :: binary()} | {error, exists}.
 59 | create_topic(Topic) ->
 60 |     HeadNode = vg_chain_state:head(),
 61 |     gen_server:call({?SERVER, HeadNode}, {create_topic, Topic}).
 62 | 
 63 | -spec ensure_topic(Topic :: binary()) -> {error, chain_not_found} |
 64 |                                          {error, topic_exists_other_chain} |
 65 |                                          {ok, chain_id()}.
 66 | ensure_topic(Topic) ->
 67 |     HeadNode = vg_chain_state:head(),
 68 |     gen_server:call({?SERVER, HeadNode}, {ensure_topic, Topic}).
 69 | 
 70 | delete_topic(Topic) ->
 71 |     HeadNode = vg_chain_state:head(),
 72 |     gen_server:call({?SERVER, HeadNode}, {delete_topic, Topic}, infinity).
 73 | 
 74 | describe_topic(Topic) ->
 75 |     HeadNode = vg_chain_state:head(),
 76 |     gen_server:call({?SERVER, HeadNode}, {describe_topic, Topic}).
 77 | 
 78 | deactivate_topic(Topic) ->
 79 |     HeadNode = vg_chain_state:head(),
 80 |     gen_server:call({?SERVER, HeadNode}, {deactivate_topic, Topic}).
 81 | 
 82 | running_topics() ->
 83 |     HeadNode = vg_chain_state:head(),
 84 |     gen_server:call({?SERVER, HeadNode}, running_topics).
 85 | 
 86 | %%%%%%%%%%%%%%%%%%%%%%%%
 87 | 
 88 | init([ChainName, ChainNodes, DataDir]) ->
 89 |     Chain = create_chain(ChainName, ChainNodes),
 90 |     State = load_state([Chain], DataDir),
 91 |     self() ! {ensure_topics, ChainName},
 92 |     {ok, State}.
 93 | 
 94 | handle_call(get_map, _From, State=#state{topics=Topics,
 95 |                                          chains=Chains,
 96 |                                          epoch=Epoch}) ->
 97 |     {reply, {Topics, Chains, Epoch}, State};
 98 | handle_call({create_topic, Topic}, _From, State=#state{topics=Topics,
 99 |                                                        chains=Chains,
100 |                                                        epoch=Epoch}) ->
101 |     case maps:get(Topic, Topics, not_found) of
102 |         not_found ->
103 |             Keys = maps:keys(Chains),
104 |             Random = rand:uniform(length(Keys)),
105 |             Chain = lists:nth(Random, Keys),
106 | 
107 |             %% start topic process on all nodes in the chain
108 |             #chain{nodes=Nodes} = maps:get(Chain, Chains),
109 |             [{ok, _} = vg_topics_sup:start_child(Node, Topic, [0]) || Node <- Nodes],
110 | 
111 |             Topics1 = maps:put(Topic, Chain, Topics),
112 |             {reply, {ok, Chain}, State#state{topics=Topics1,
113 |                                              epoch=Epoch+1}};
114 |         Chain ->
115 |             lager:info("attempting to create topic that already exists on chain=~p", [Chain]),
116 |             {reply, {error, exists}, State}
117 |     end;
118 | handle_call({ensure_topic, Topic}, _From, State=#state{topics=Topics,
119 |                                                        chains=Chains,
120 |                                                        epoch=Epoch}) ->
121 |     case maps:get(Topic, Topics, not_found) of
122 |         not_found ->
123 |             Keys = maps:keys(Chains),
124 |             Random = rand:uniform(length(Keys)),
125 |             Chain = lists:nth(Random, Keys),
126 | 
127 |             %% start topic process on all nodes in the chain
128 |             start_on_all_nodes(Topic, Chain, Chains),
129 |             Topics1 = maps:put(Topic, Chain, Topics),
130 |             {reply, {ok, Chain}, State#state{topics=Topics1,
131 |                                              epoch=Epoch+1}};
132 |         Chain ->
133 |             start_on_all_nodes(Topic, Chain, Chains),
134 |             {reply, {ok, Chain}, State}
135 |     end;
136 | handle_call({delete_topic, Topic}, _From, State=#state{topics=Topics,
137 |                                                        chains=Chains}) ->
138 |     %% have topic mgr delete the topic segments and directory
139 |     %% deactivate the topic so that it can be recreated if desired
140 |     {Reply, Topics1}  =
141 |         case maps:get(Topic, Topics, not_found) of
142 |             not_found -> {{error, not_found}, Topics};
143 |             Chain ->
144 |                 Rep =
145 |                     try
146 |                         vg_topic_mgr:delete_topic(Topic, 0)  % eventually iterate partitions?
147 |                     catch _:{noproc, _} ->
148 |                             start_on_all_nodes(Topic, Chain, Chains),
149 |                             vg_topic_mgr:delete_topic(Topic, 0)
150 |                     end,
151 |                 stop_on_all_nodes(Topic, Chain, Chains),
152 |                 {Rep, maps:remove(Topic, Topics)}
153 |         end,
154 |     {reply, Reply, State#state{topics=Topics1}};
155 | %% handle_call({describe_topic, Topic}, _From, State=#state{topics=Topics,
156 | %%                                                          chains=Chains}) ->
157 | %%     %% get the hwm
158 | %%     %% get number of segments
159 | %%     %% size on disk
160 | %%     %% check if it's running?
161 | %%     {reply, ok, State};
162 | handle_call({deactivate_topic, Topic}, _From, State=#state{topics=Topics,
163 |                                                            chains=Chains}) ->
164 |     Ret =
165 |         case maps:get(Topic, Topics, not_found) of
166 |             not_found -> {error, not_found};
167 |             Chain -> stop_on_all_nodes(Topic, Chain, Chains)
168 |         end,
169 |     {reply, Ret, State};
170 | handle_call(running_topics, _From, State=#state{chains=_Chains}) ->
171 |     %% TODO: need to do this for all chains?
172 |     Ret = vg_topics_sup:list_topics(node()),
173 |     {reply, Ret, State};
174 | handle_call(_, _, State) ->
175 |     {noreply, State}.
176 | 
177 | handle_cast(_Msg, State) ->
178 |     {noreply, State}.
179 | 
180 | handle_info({ensure_topics, ChainName}, State) ->
181 |     State1 = ensure_all_topics(ChainName, State),
182 |     {noreply, State1}.
183 | 
184 | terminate(_Reason, _State) ->
185 |     ok.
186 | 
187 | code_change(_OldVsn, State, _Extra) ->
188 |     {ok, State}.
189 | 
190 | %%%===================================================================
191 | %%% Internal functions
192 | %%%===================================================================
193 | 
194 | start_on_all_nodes(Topic, Chain, Chains) ->
195 |     #chain{nodes=Nodes} = maps:get(Chain, Chains),
196 |     [case vg_topics_sup:start_child(Node, Topic, [0]) of
197 |          {ok, _} -> ok;
198 |          {error,{already_started, _}} -> ok;
199 |          {error, Reason} -> exit({error, Reason})
200 |      end || Node <- Nodes].
201 | 
202 | stop_on_all_nodes(Topic, Chain, Chains) ->
203 |     #chain{nodes=Nodes} = maps:get(Chain, Chains),
204 |     %% usort here to remove useless oks
205 |     lists:usort(
206 |       [case vg_topics_sup:stop_child(Node, Topic, [0]) of
207 |            [ok] -> ok;
208 |            %% annotate and pass on the error for user analysis
209 |            Other -> {Node, Topic, Other}
210 |        end || Node <- Nodes]).
211 | 
212 | %% TODO: the topic space stuff MUST be fixed before multiple chains are supported
213 | create_chain(Name, []) ->
214 |     #chain{name  = Name,
215 |            nodes = [node()],
216 |            topics_start = start_space,
217 |            topics_end = end_space,
218 |            head  = {"127.0.0.1", 5588},
219 |            tail  = {"127.0.0.1", 5588}};
220 | create_chain(Name, Nodes) ->
221 |     #chain{name  = Name,
222 |            nodes = [nodename(Node) || Node <- Nodes],
223 |            topics_start = start_space, % only valid for one chain
224 |            topics_end = end_space,     % only valid for one chain
225 |            head  = head(Nodes),
226 |            tail  = tail(Nodes)}.
227 | 
228 | nodename({Name, Host, _, _}) ->
229 |     list_to_atom(atom_to_list(Name) ++ "@" ++ Host).
230 | 
231 | load_state(Chains, _DataDir) ->
232 |     ChainsMap = lists:foldl(fun(Chain=#chain{name=Name}, Acc) ->
233 |                                 maps:put(Name, Chain, Acc)
234 |                             end, #{}, Chains),
235 |     #state{topics = #{},
236 |            chains = ChainsMap,
237 |            epoch = 0}.
238 | 
239 | head([{_, Host, _, ClientPort} | _]) ->
240 |     {Host, ClientPort}.
241 | 
242 | tail(Nodes) ->
243 |     head(lists:reverse(Nodes)).
244 | 
245 | ensure_all_topics(ChainName, State) ->
246 |     Topics = vg_utils:topics_on_disk(),
247 |     lists:foldl(fun({Topic, _}, StateAcc=#state{topics=TopicsAcc,
248 |                                                 epoch=Epoch}) ->
249 |                         TopicsAcc1 = maps:put(Topic, ChainName, TopicsAcc),
250 |                         StateAcc#state{topics=TopicsAcc1,
251 |                                        epoch=Epoch+1}
252 |                 end, State, Topics).
253 | 
254 | 


--------------------------------------------------------------------------------
/src/vg_config.erl:
--------------------------------------------------------------------------------
 1 | -module(vg_config).
 2 | 
 3 | -export([chain_name/0,
 4 |          port/0,
 5 |          cluster_type/0,
 6 |          replicas/0]).
 7 | 
 8 | -define(DEFAULT_PORT, 5588).
 9 | 
10 | -type cluster_type() :: local | {direct, [any()]} | {srv, string()} | none.
11 | 
12 | -export_types([cluster_type/0]).
13 | 
14 | -spec chain_name() -> vg_chain_state:chain_name().
15 | chain_name() ->
16 |     vg_utils:to_atom(from_chain(name, solo)).
17 | 
18 | -spec port() -> integer().
19 | port() ->
20 |     vg_utils:to_integer(from_chain(port, ?DEFAULT_PORT)).
21 | 
22 | -spec cluster_type() -> cluster_type().
23 | cluster_type() ->
24 |     case from_chain(discovery, local) of
25 |         "local" ->
26 |             local;
27 |         local ->
28 |             local;
29 |         {direct, Nodes} ->
30 |             {direct, Nodes};
31 |         {srv, Domain} ->
32 |             {srv, Domain};
33 |         Other ->
34 |             lager:error("Unknown clustering option: ~p", [Other]),
35 |             none
36 |     end.
37 | 
38 | -spec replicas() -> integer().
39 | replicas() ->
40 |     case cluster_type() of
41 |         {direct, List} ->
42 |             length(List);
43 |         {srv, _} ->
44 |             vg_utils:to_integer(from_chain(replicas, 1));
45 |         _ ->
46 |             1
47 |     end.
48 | 
49 | %% internal functions
50 | 
51 | from_chain(Key, Default) ->
52 |     case application:get_env(vonnegut, chain, []) of
53 |         [] ->
54 |             Default;
55 |         Chain ->
56 |             proplists:get_value(Key, Chain, Default)
57 |     end.
58 | 


--------------------------------------------------------------------------------
/src/vg_elli_handler.erl:
--------------------------------------------------------------------------------
 1 | -module(vg_elli_handler).
 2 | 
 3 | -export([handle/2,
 4 |          handle_event/3]).
 5 | 
 6 | -include_lib("elli/include/elli.hrl").
 7 | -behaviour(elli_handler).
 8 | 
 9 | handle(Req, _Args) ->
10 |     %% Delegate to our handler function
11 |     handle(Req#req.method, elli_request:path(Req), Req).
12 | 
13 | handle('GET', [<<"_health">>], _Req) ->
14 |     {ok, [], <<"It's all good.">>};
15 | 
16 | handle(_, _, _Req) ->
17 |     {404, [], <<"Not Found">>}.
18 | 
19 | handle_event(_Event, _Data, _Args) ->
20 |     ok.
21 | 


--------------------------------------------------------------------------------
/src/vg_index.erl:
--------------------------------------------------------------------------------
 1 | %% Index files are named [offset].index
 2 | %% Entries in the index are <<(Id-Offset):32/unsigned, Position:32/unsigned>>
 3 | %% Position is the offset in [offset].log to find the log Id
 4 | -module(vg_index).
 5 | 
 6 | -include("vg.hrl").
 7 | 
 8 | -export([find_in_index/3]).
 9 | 
10 | -spec find_in_index(Fd, BaseOffset, Id) -> integer() | not_found when
11 |       Fd         :: file:fd(),
12 |       BaseOffset :: integer(),
13 |       Id         :: integer().
14 | find_in_index(Fd, BaseOffset, Id) ->
15 |     case file:read(Fd, (2 * ?INDEX_ENTRY_SIZE)) of
16 |         {ok, Bytes} ->
17 |             find_in_index_(Fd, Id, BaseOffset, Bytes);
18 |         _ ->
19 |             0
20 |     end.
21 | 
22 | %% Optimize later. Could keep entire index in memory
23 | %% and could (in memory or not) use a binary search
24 | find_in_index_(_, _, _, <<>>) ->
25 |     0;
26 | %% special case for when below the first offset in a single entry index
27 | find_in_index_(_, Id, BaseOffset, <<Offset:?INDEX_OFFSET_BITS/unsigned,
28 |                                     _Pos:?INDEX_POS_BITS/unsigned>>)
29 |   when BaseOffset + Offset > Id->
30 |     0;
31 | find_in_index_(_, _, _, <<_Offset:?INDEX_OFFSET_BITS/unsigned,
32 |                           Position:?INDEX_POS_BITS/unsigned>>) ->
33 |     Position;
34 | find_in_index_(_, Id, BaseOffset, <<Offset:?INDEX_OFFSET_BITS/unsigned,
35 |                                     Position:?INDEX_POS_BITS/unsigned, _/binary>>)
36 |   when Id =:= BaseOffset + Offset ->
37 |     Position;
38 | %% special case for below the first offset in a multi-entry index, but
39 | %% I worry that it might be overly broad.
40 | find_in_index_(_, Id, BaseOffset, <<Offset:?INDEX_OFFSET_BITS/unsigned,
41 |                                     _Pos:?INDEX_POS_BITS/unsigned,
42 |                                     _Offset2:?INDEX_OFFSET_BITS/unsigned,
43 |                                     _Pos2:?INDEX_POS_BITS/unsigned, _/binary>>)
44 |   when BaseOffset + Offset > Id ->
45 |     0;
46 | find_in_index_(_, Id, BaseOffset, <<_Offset:?INDEX_OFFSET_BITS/unsigned,
47 |                                     Position:?INDEX_POS_BITS/unsigned,
48 |                                     Offset2:?INDEX_OFFSET_BITS/unsigned,
49 |                                     _Pos2:?INDEX_POS_BITS/unsigned, _/binary>>)
50 |   when BaseOffset + Offset2 > Id ->
51 |     Position;
52 | find_in_index_(Fd, Id, BaseOffset, <<_Offset:?INDEX_OFFSET_BITS/unsigned,
53 |                                      _Pos:?INDEX_POS_BITS/unsigned, Rest/binary>>) ->
54 |     case file:read(Fd, ?INDEX_ENTRY_SIZE) of
55 |         {ok, Bytes} ->
56 |             find_in_index_(Fd, Id, BaseOffset, <<Rest/binary, Bytes/binary>>);
57 |         _ ->
58 |             find_in_index_(Fd, Id, BaseOffset, Rest)
59 |     end.
60 | 


--------------------------------------------------------------------------------
/src/vg_log_segments.erl:
--------------------------------------------------------------------------------
  1 | %%
  2 | -module(vg_log_segments).
  3 | 
  4 | -export([init_table/0,
  5 |          load_existing/2,
  6 |          load_all/2,
  7 |          delete_segments/2,
  8 |          delete_indexes/2,
  9 |          regenerate_indexes/2,
 10 |          cleanup_segments_table/2,
 11 |          insert/3,
 12 |          local/2,
 13 |          find_log_segment/3,
 14 |          find_active_segment/2,
 15 |          find_segment_offset/3,
 16 |          find_record_offset/4,
 17 |          new_index_log_files/2,
 18 |          find_latest_id/3,
 19 | 
 20 |          %% for testing
 21 |          last_in_index/3]).
 22 | 
 23 | -include("vg.hrl").
 24 | 
 25 | -define(LOG_SEGMENT_MATCH_PATTERN(Topic, Partition), {Topic,Partition,'$1'}).
 26 | -define(LOG_SEGMENT_GUARD(RecordId), [{is_integer, '$1'}, {'=<', '$1', RecordId}]).
 27 | -define(LOG_SEGMENT_RETURN, ['$1']).
 28 | 
 29 | init_table() ->
 30 |     ets:new(?SEGMENTS_TABLE, [bag, public, named_table, {read_concurrency, true}]).
 31 | 
 32 | load_existing(Topic, Partition) ->
 33 |     TopicDir = vg_utils:topic_dir(Topic, Partition),
 34 |     case filelib:wildcard(filename:join(TopicDir, "*.log")) of
 35 |         [] ->
 36 |             throw({topic_not_found, Topic, Partition});
 37 |         LogSegments ->
 38 |             load_segments(Topic, Partition, LogSegments)
 39 |     end.
 40 | 
 41 | load_all(Topic, Partition) ->
 42 |     TopicDir = vg_utils:topic_dir(Topic, Partition),
 43 |     case filelib:wildcard(filename:join(TopicDir, "*.log")) of
 44 |         [] ->
 45 |             insert(Topic, Partition, 0),
 46 |             vg_topics:insert_hwm(Topic, Partition, 0),
 47 |             [];
 48 |         LogSegments ->
 49 |             load_segments(Topic, Partition, LogSegments)
 50 |     end.
 51 | 
 52 | load_segments(Topic, Partition, LogSegments) ->
 53 |     [begin
 54 |          SegmentId = list_to_integer(filename:basename(LogSegment, ".log")),
 55 |          insert(Topic, Partition, SegmentId),
 56 |          SegmentId
 57 |      end || LogSegment <- LogSegments].
 58 | 
 59 | delete_segments(Topic, Partition) ->
 60 |     TopicDir = vg_utils:topic_dir(Topic, Partition),
 61 |     AllFiles = filelib:wildcard(filename:join(TopicDir, "*")),
 62 |     ok = lists:foreach(fun file:delete/1, AllFiles),
 63 |     file:del_dir(TopicDir),
 64 |     ok.
 65 | 
 66 | delete_indexes(Topic, Partition) ->
 67 |     TopicDir = vg_utils:topic_dir(Topic, Partition),
 68 |     AllFiles = filelib:wildcard(filename:join(TopicDir, "*.index")),
 69 |     ok = lists:foreach(fun file:delete/1, AllFiles).
 70 | 
 71 | regenerate_indexes(Topic, Partition) ->
 72 |     TopicDir = vg_utils:topic_dir(Topic, Partition),
 73 |     AllFiles = filelib:wildcard(filename:join(TopicDir, "*.log")),
 74 |     ok = lists:foreach(fun regenerate_index/1, AllFiles).
 75 | 
 76 | regenerate_index(LogFilename) ->
 77 |     TopicDir = filename:dirname(LogFilename),
 78 |     StrID = filename:basename(LogFilename, ".log"),
 79 |     ID = list_to_integer(StrID),
 80 |     IndexFilename = vg_utils:index_file(TopicDir, ID),
 81 |     {ok, IndexFile} = vg_utils:open_append(IndexFilename),
 82 |     {ok, LogFile} = vg_utils:open_read(LogFilename),
 83 | 
 84 |     %% ignore index_max_bytes because it makes no sense without the
 85 |     %% ability to rewrite the segments
 86 |     {ok, IndexInterval} = application:get_env(vonnegut, index_interval_bytes),
 87 |     regen(file:pread(LogFile, 0, ?OFFSET_AND_LENGTH_BYTES), 0, LogFile, ID, IndexFile, 99999999, IndexInterval).
 88 | 
 89 | regen(eof, _Location, Log, _ID, Index, _Bytes, _IndexInterval) ->
 90 |     file:close(Log),
 91 |     file:close(Index),
 92 |     ok;
 93 | regen({ok, <<Offset:64/signed, Size:32/signed>>}, Location, Log, BaseOffset, Index, Bytes,
 94 |       IndexInterval) ->
 95 |     TotalSize = Size + ?OFFSET_AND_LENGTH_BYTES,
 96 |     NextLocation = Location + TotalSize,
 97 |     NewBytes =
 98 |         case Bytes + TotalSize >= IndexInterval of
 99 |             true ->
100 |                 IndexEntry = <<(Offset - BaseOffset):?INDEX_OFFSET_BITS/unsigned,
101 |                                Location:?INDEX_OFFSET_BITS/unsigned>>,
102 |                 ok = file:write(Index, IndexEntry),
103 |                 0;
104 |             _ ->
105 |                 Bytes + TotalSize
106 |         end,
107 |     regen(file:pread(Log, NextLocation, ?OFFSET_AND_LENGTH_BYTES), NextLocation, Log, BaseOffset, Index, NewBytes,
108 |           IndexInterval).
109 | 
110 | cleanup_segments_table(Topic, Partition) ->
111 |     NumDeleted = ets:select_delete(?SEGMENTS_TABLE,
112 |                                    [{?LOG_SEGMENT_MATCH_PATTERN(Topic, Partition),
113 |                                      [],
114 |                                      ?LOG_SEGMENT_RETURN}]),
115 |     lager:info("deleted ~p segments from the table", [NumDeleted]),
116 |     prometheus_gauge:dec(log_segments, [NumDeleted]).
117 | 
118 | insert(Topic, Partition, SegmentId) ->
119 |     prometheus_gauge:inc(log_segments, [Topic]),
120 |     ets:insert(?SEGMENTS_TABLE, {Topic, Partition, SegmentId}).
121 | 
122 | local(Topic, Partition) ->
123 |     case ets:select(?SEGMENTS_TABLE, [{?LOG_SEGMENT_MATCH_PATTERN(Topic, Partition),
124 |                                        ?LOG_SEGMENT_GUARD(0),
125 |                                        ?LOG_SEGMENT_RETURN}]) of
126 |         [] -> false;
127 |         _ -> true
128 |     end.
129 | 
130 | -spec find_log_segment(Topic, Partition, RecordId) -> integer() when
131 |       Topic     :: binary(),
132 |       Partition :: integer(),
133 |       RecordId :: integer().
134 | find_log_segment(Topic, Partition, RecordId) ->
135 |     %% Find all registered log segments for topic-partition < the recordid we are looking for
136 |     case find_log_segment_(Topic, Partition, RecordId) of
137 |         [] ->
138 |             %% load from disk and try again
139 |             load_existing(Topic, Partition),
140 |             find_log_segment_(Topic, Partition, RecordId);
141 |         Match ->
142 |             %% Return largest, being the largest log segment
143 |             %% offset that is still less than the record offset
144 |             Match
145 |     end.
146 | 
147 | %% internal version that won't try again if no match found
148 | find_log_segment_(Topic, Partition, RecordId) ->
149 |     %% Find all registered log segments for topic-partition < the recordid we are looking for
150 |     case ets:select(?SEGMENTS_TABLE, [{?LOG_SEGMENT_MATCH_PATTERN(Topic, Partition),
151 |                                        ?LOG_SEGMENT_GUARD(RecordId),
152 |                                        ?LOG_SEGMENT_RETURN}]) of
153 |         [] ->
154 |             [];
155 |         Matches  ->
156 |             %% Return largest, being the largest log segment
157 |             %% offset that is still less than the record offset
158 |             lists:max(Matches)
159 |     end.
160 | 
161 | -spec find_active_segment(Topic, Partition) -> integer() when
162 |       Topic     :: binary(),
163 |       Partition :: integer().
164 | find_active_segment(Topic, Partition) ->
165 |     case ets:select(?SEGMENTS_TABLE, [{?LOG_SEGMENT_MATCH_PATTERN(Topic, Partition),
166 |                                        [],
167 |                                        ?LOG_SEGMENT_RETURN}]) of
168 |         [] ->
169 |             %% check disk
170 |             case load_existing(Topic, Partition) of
171 |                 [] ->
172 |                     0;
173 |                 Segments ->
174 |                     lists:max(Segments)
175 |             end;
176 |         Matches  ->
177 |             lists:max(Matches)
178 |     end.
179 | 
180 | -spec find_segment_offset(Topic, Partition, RecordId) -> {integer(), {integer(), integer()}} when
181 |       Topic     :: binary(),
182 |       Partition :: integer(),
183 |       RecordId :: integer().
184 | find_segment_offset(_Topic, _Partition, 0) ->
185 |     {0, {0, 0}};
186 | find_segment_offset(Topic, Partition, RecordId) when RecordId >= 0 ->
187 |     SegmentId = find_log_segment(Topic, Partition, RecordId),
188 |     {SegmentId, find_record_offset(Topic, Partition, SegmentId, RecordId)}.
189 | 
190 | -spec find_record_offset(Topic, Partition, SegmentId, RecordId) -> {integer(), integer()} when
191 |       Topic     :: binary(),
192 |       Partition :: integer(),
193 |       SegmentId :: integer(),
194 |       RecordId :: integer().
195 | find_record_offset(Topic, Partition, SegmentId, RecordId) ->
196 |     TopicDir = vg_utils:topic_dir(Topic, Partition),
197 |     LogSegmentFilename = vg_utils:log_file(TopicDir, SegmentId),
198 |     IndexSegmentFilename = vg_utils:index_file(TopicDir, SegmentId),
199 | 
200 |     %% Open log and index segment files, advise the OS we'll be reading randomly from them
201 |     case vg_utils:open_read(LogSegmentFilename) of
202 |         {ok, LogSegmentFD} ->
203 |             file:advise(LogSegmentFD, 0, 0, random),
204 |             {ok, IndexSegmentFD} = vg_utils:open_read(IndexSegmentFilename),
205 |             file:advise(IndexSegmentFD, 0, 0, random),
206 | 
207 |             try
208 |                 InitialOffset = vg_index:find_in_index(IndexSegmentFD, SegmentId, RecordId),
209 |                 lager:info("InitialOffset topic=~p segment_id=~p initial_offset=~p", [Topic, SegmentId, InitialOffset]),
210 |                 find_in_log(LogSegmentFD, RecordId, InitialOffset)
211 |             after
212 |                 file:close(LogSegmentFD),
213 |                 file:close(IndexSegmentFD)
214 |             end;
215 |         {error, enoent} ->
216 |             throw({topic_not_found, Topic, Partition})
217 |     end.
218 | 
219 | %% Find the position in Log file of the start of a log with id Id
220 | -spec find_in_log(Log, Id, Position) -> {integer(), integer()} when
221 |       Log      :: file:fd(),
222 |       Id       :: integer(),
223 |       Position :: integer().
224 | find_in_log(Log, Id, Position) ->
225 |     {ok, _} = file:position(Log, Position),
226 |     find_in_log(Log, Id, Position, 0, file:read(Log, ?OFFSET_AND_LENGTH_BYTES)).
227 | 
228 | find_in_log(_Log, Id, Position, LastSize, {ok, <<FileId:64/signed, _Size:32/signed>>}) when FileId > Id ->
229 |     {Position, LastSize};
230 | find_in_log(_Log, Id, Position, LastSize, {ok, <<Id:64/signed, _Size:32/signed>>}) ->
231 |     {Position+LastSize, 0};
232 | find_in_log(Log, Id, Position, LastSize, {ok, <<FileId:64/signed, Size:32/signed>>}) ->
233 |     case file:read(Log, Size + ?OFFSET_AND_LENGTH_BYTES) of
234 |         {ok, <<_:Size/binary, Data:?OFFSET_AND_LENGTH_BYTES/binary>>} ->
235 |             find_in_log(Log, Id, Position+LastSize, Size+?OFFSET_AND_LENGTH_BYTES, {ok, Data});
236 |         {ok, <<D:Size/binary>>} ->
237 |             case D of
238 |                 <<_LeaderEpoch:32/signed-integer,
239 |                   ?MAGIC_TWO:8/signed-integer,
240 |                   _CRC:32/signed-integer,
241 |                   _Attributes:16/signed-integer,
242 |                   LastOffsetDelta:32/signed-integer, _/binary>> when LastOffsetDelta + FileId >= Id ->
243 |                     {Position+LastSize, Size+?OFFSET_AND_LENGTH_BYTES};
244 |                 _ ->
245 |                     {Position+LastSize+Size+?OFFSET_AND_LENGTH_BYTES, 0}
246 |             end;
247 |         eof ->
248 |             {Position+LastSize, Size+?OFFSET_AND_LENGTH_BYTES}
249 |     end;
250 | find_in_log(_Log, _Id, Position, LastSize, _) ->
251 |     {Position+LastSize, 0}.
252 | 
253 | find_latest_id(TopicDir, Topic, Partition) ->
254 |     SegmentId = vg_log_segments:find_active_segment(Topic, Partition),
255 |     IndexFilename = vg_utils:index_file(TopicDir, SegmentId),
256 |     {Offset, Position} = last_in_index(TopicDir, IndexFilename, SegmentId),
257 |     LogSegmentFilename = vg_utils:log_file(TopicDir, SegmentId),
258 |     {ok, Log} = vg_utils:open_read(LogSegmentFilename),
259 |     try
260 |         file:position(Log, Position),
261 |         NewId = find_last_log(Log, Offset, file:read(Log, ?OFFSET_AND_LENGTH_BYTES)),
262 |         {NewId, IndexFilename, LogSegmentFilename}
263 |     after
264 |         file:close(Log)
265 |     end.
266 | 
267 | %% Rolling log and index files, so open new empty ones for appending
268 | new_index_log_files(TopicDir, Id) ->
269 |     IndexFilename = vg_utils:index_file(TopicDir, Id),
270 |     LogFilename = vg_utils:log_file(TopicDir, Id),
271 | 
272 |     lager:debug("opening new log files: ~p ~p ~p", [Id, IndexFilename, LogFilename]),
273 |     %% Make sure empty?
274 |     {ok, IndexFile} = vg_utils:open_append(IndexFilename),
275 |     {ok, LogFile} = vg_utils:open_append(LogFilename),
276 |     {IndexFile, LogFile}.
277 | 
278 | %% consider moving this to vg_index, but then we might need to figure
279 | %% out some other, cleaner way to do the create new case
280 | last_in_index(TopicDir, IndexFilename, SegmentId) ->
281 |     case file:open(IndexFilename, [read, binary]) of
282 |         {error, enoent} when SegmentId =:= 0 ->
283 |             %% Index file doesn't exist, if this is the first segment (0)
284 |             %% we can just create the files assuming this is a topic creation.
285 |             %% Will fail if an empty topic-partition dir exists on boot since
286 |             %% vg_topic_sup will not be started yet.
287 |             {NewIndexFile, NewLogFile} = new_index_log_files(TopicDir, SegmentId),
288 |             file:close(NewIndexFile),
289 |             file:close(NewLogFile),
290 |             {-1, 0};
291 |         {ok, Index} ->
292 |             try
293 |                 case file:pread(Index, {eof, -?INDEX_ENTRY_SIZE}, ?INDEX_ENTRY_SIZE) of
294 |                     {ok, <<Offset:?INDEX_OFFSET_BITS/signed, Position:?INDEX_POS_BITS/signed>>} ->
295 |                         %% index stores offsets as offset from SegmentId
296 |                         %% so add SegmentId here to get the real id
297 |                         {Offset+SegmentId, Position};
298 |                     _ ->
299 |                         {-1, 0}
300 |                 end
301 |             after
302 |                 file:close(Index)
303 |             end
304 |     end.
305 | 
306 | %% Find the Id for the last log in the log file Log
307 | find_last_log(Log, _, {ok, <<NewId:64/signed, Size:32/signed>>}) ->
308 |     case file:read(Log, Size + ?OFFSET_AND_LENGTH_BYTES) of
309 |         {ok, <<Batch:Size/bytes, Data:?OFFSET_AND_LENGTH_BYTES/bytes>>} ->
310 |             LastOffsetDelta = vg_protocol:last_offset_delta(Batch),
311 |             find_last_log(Log, NewId+LastOffsetDelta, {ok, Data});
312 |         {ok, <<Batch:Size/bytes, _/binary>>} ->
313 |             LastOffsetDelta = vg_protocol:last_offset_delta(Batch),
314 |             NewId + LastOffsetDelta
315 |     end;
316 | find_last_log(_Log, Id, _) ->
317 |     Id.
318 | 
319 | 


--------------------------------------------------------------------------------
/src/vg_peer_service.erl:
--------------------------------------------------------------------------------
 1 | -module(vg_peer_service).
 2 | 
 3 | -export([join/1,
 4 |          leave/0,
 5 |          on_down/2,
 6 |          members/0,
 7 |          manager/0,
 8 |          stop/0,
 9 |          stop/1]).
10 | 
11 | join(Node) ->
12 |     partisan_peer_service:join(Node).
13 | 
14 | leave() ->
15 |     partisan_peer_service:leave([]).
16 | 
17 | on_down(Name, Fun) ->
18 |     partisan_default_peer_service_manager:on_down(Name, Fun).
19 | 
20 | members() ->
21 |     partisan_peer_service:members().
22 | 
23 | manager() ->
24 |     partisan_peer_service:manager().
25 | 
26 | stop() ->
27 |     partisan_peer_service:stop("received stop request").
28 | 
29 | stop(Reason) ->
30 |     partisan_peer_service:stop(Reason).
31 | 


--------------------------------------------------------------------------------
/src/vg_pool.erl:
--------------------------------------------------------------------------------
 1 | -module(vg_pool).
 2 | 
 3 | -behaviour(acceptor_pool).
 4 | 
 5 | -export([start_link/1,
 6 |          accept_socket/2]).
 7 | 
 8 | -export([init/1]).
 9 | 
10 | %% public api
11 | 
12 | start_link(Role) ->
13 |     acceptor_pool:start_link({local, ?MODULE}, ?MODULE, [Role]).
14 | 
15 | accept_socket(Socket, Acceptors) ->
16 |     acceptor_pool:accept_socket(?MODULE, Socket, Acceptors).
17 | 
18 | %% acceptor_pool api
19 | 
20 | init([Role]) ->
21 |     Conn = #{id => vg_conn,
22 |              start => {vg_conn, [Role], []},
23 |              grace => 5000}, % Give connections 5000ms to close before shutdown
24 |     {ok, {#{}, [Conn]}}.
25 | 


--------------------------------------------------------------------------------
/src/vg_pool_sup.erl:
--------------------------------------------------------------------------------
 1 | -module(vg_pool_sup).
 2 | 
 3 | -behaviour(supervisor).
 4 | 
 5 | %% public api
 6 | 
 7 | -export([start_link/1]).
 8 | 
 9 | %% supervisor api
10 | 
11 | -export([init/1]).
12 | 
13 | %% public api
14 | 
15 | start_link(Role) ->
16 |     supervisor:start_link({local, ?MODULE}, ?MODULE, [Role]).
17 | 
18 | %% supervisor api
19 | 
20 | init([Role]) ->
21 |     Flags = #{strategy => rest_for_one},
22 |     Pool = #{id => vg_pool,
23 |              start => {vg_pool, start_link, [Role]}},
24 |     Socket = #{id => vg_socket,
25 |                start => {vg_socket, start_link, []}},
26 |     {ok, {Flags, [Pool, Socket]}}.
27 | 


--------------------------------------------------------------------------------
/src/vg_socket.erl:
--------------------------------------------------------------------------------
 1 | -module(vg_socket).
 2 | 
 3 | -behaviour(gen_server).
 4 | 
 5 | %% public api
 6 | 
 7 | -export([start_link/0]).
 8 | 
 9 | %% gen_server api
10 | 
11 | -export([init/1,
12 |          handle_call/3,
13 |          handle_cast/2,
14 |          handle_info/2,
15 |          code_change/3,
16 |          terminate/2]).
17 | 
18 | %% public api
19 | 
20 | start_link() ->
21 |     gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
22 | 
23 | %% gen_server api
24 | 
25 | init([]) ->
26 |     Port = vg_config:port(),
27 |     AcceptorPoolSize = application:get_env(vonnegut, acceptor_pool_size, 10),
28 |     %% Trapping exit so can close socket in terminate/2
29 |     _ = process_flag(trap_exit, true),
30 |     Opts = [{active, once}, {reuseaddr, true}, {buffer, 65535},
31 |             {nodelay, true}, {mode, binary}, {packet, raw}],
32 |     case gen_tcp:listen(Port, Opts) of
33 |         {ok, Socket} ->
34 |             %% acceptor could close the socket if there is a problem
35 |             MRef = monitor(port, Socket),
36 |             vg_pool:accept_socket(Socket, AcceptorPoolSize),
37 |             {ok, {Socket, MRef}};
38 |         {error, Reason} ->
39 |             {stop, Reason}
40 |     end.
41 | 
42 | handle_call(Req, _, State) ->
43 |     {stop, {bad_call, Req}, State}.
44 | 
45 | handle_cast(Req, State) ->
46 |     {stop, {bad_cast, Req}, State}.
47 | 
48 | handle_info({'DOWN', MRef, port, Socket, Reason}, {Socket, MRef} = State) ->
49 |     {stop, Reason, State};
50 | handle_info(_, State) ->
51 |     {noreply, State}.
52 | 
53 | code_change(_, State, _) ->
54 |     {ok, State}.
55 | 
56 | terminate(_, {Socket, MRef}) ->
57 |     % Socket may already be down but need to ensure it is closed to avoid
58 |     % eaddrinuse error on restart
59 |     case demonitor(MRef, [flush, info]) of
60 |         true  -> gen_tcp:close(Socket);
61 |         false -> ok
62 |     end.
63 | 


--------------------------------------------------------------------------------
/src/vg_topic_mgr.erl:
--------------------------------------------------------------------------------
  1 | %% doesn't need to be constantly running along side the active segment.
  2 | %% TODO: turn into a one off proc that triggers when needed.
  3 | -module(vg_topic_mgr).
  4 | 
  5 | -behaviour(gen_server).
  6 | 
  7 | %% API
  8 | -export([
  9 |          start_link/3,
 10 |          delete_topic/2,
 11 |          regenerate_index/2
 12 |         ]).
 13 | 
 14 | %% gen_server callbacks
 15 | -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
 16 |          terminate/2, code_change/3]).
 17 | 
 18 | -define(SERVER, ?MODULE).
 19 | 
 20 | -record(state,
 21 |         {
 22 |           topic :: binary(),
 23 |           partition :: non_neg_integer(),
 24 |           next :: atom()
 25 |         }).
 26 | 
 27 | %%%===================================================================
 28 | %%% API
 29 | %%%===================================================================
 30 | 
 31 | %% need this until an Erlang release with `hibernate_after` spec added to gen option type
 32 | -dialyzer({nowarn_function, start_link/3}).
 33 | 
 34 | -define(TOPIC_MGR(Topic, Partition), {via, gproc, {n, l, {mgr, Topic, Partition}}}).
 35 | 
 36 | start_link(Topic, Partition, Next) ->
 37 |     case gen_server:start_link(?TOPIC_MGR(Topic, Partition), ?MODULE, [Topic, Partition, Next],
 38 |                                [{hibernate_after, timer:minutes(5)}]) of % hibernate after 5 minutes with no messages
 39 |         {ok, Pid} ->
 40 |             {ok, Pid};
 41 |         {error, {already_started, Pid}} ->
 42 |             {ok, Pid};
 43 |         {error, Reason} ->
 44 |             {error, Reason}
 45 |     end.
 46 | 
 47 | delete_topic(Topic, Partition) ->
 48 |     %% may need to start the topic if this fails?
 49 |     gen_server:call(?TOPIC_MGR(Topic, Partition), delete_topic, timer:seconds(45)).
 50 | 
 51 | regenerate_index(Topic, Partition) ->
 52 |     %% may need to start the topic if this fails?
 53 |     gen_server:call(?TOPIC_MGR(Topic, Partition), regenerate_index, timer:minutes(15)).
 54 | 
 55 | %%%===================================================================
 56 | %%% gen_server callbacks
 57 | %%%===================================================================
 58 | 
 59 | init([Topic, Partition, Next]) ->
 60 |     {ok, #state{topic = Topic,
 61 |                 partition = Partition,
 62 |                 next = Next}}.
 63 | 
 64 | handle_call(delete_topic, _From, #state{topic = Topic, next = Next,
 65 |                                         partition = Partition} = State) ->
 66 |     %% halt the active segment
 67 |     lager:info("halting active segment"),
 68 |     halted = vg_active_segment:halt(Topic, Partition),
 69 |     %% delete the segments
 70 |     lager:info("deleting segments"),
 71 |     ok = vg_log_segments:delete_segments(Topic, Partition),
 72 |     %% remove HWM
 73 |     true = vg_topics:delete_hwm(Topic, Partition),
 74 |     %% clean the segments table
 75 |     vg_log_segments:cleanup_segments_table(Topic, Partition),
 76 |     %% delete the next
 77 |     case Next of
 78 |         tail -> ok;
 79 |         _ ->
 80 |             lager:info("propagating delete"),
 81 |             ok = vg_client:delete_topic(next_brick, Topic)
 82 |     end,
 83 |     {reply, ok, State};
 84 | %% note that this needs to be done per node, we don't automatically
 85 | %% propagate it
 86 | handle_call(regenerate_index, _From, #state{topic = Topic,
 87 |                                             partition = Partition} = State) ->
 88 |     %% tell active_segment to stop writing indexes
 89 |     ok = vg_active_segment:stop_indexing(Topic, Partition),
 90 |     %% delete all index files
 91 |     ok = vg_log_segments:delete_indexes(Topic, Partition),
 92 |     %% fold over segments and restore indexes
 93 |     ok = vg_log_segments:regenerate_indexes(Topic, Partition),
 94 |     %% tell active_segment to resume writing indexes
 95 |     ok = vg_active_segment:resume_indexing(Topic, Partition),
 96 |     {reply, ok, State};
 97 | handle_call(_Request, _From, State) ->
 98 |     lager:warning("unexpected call ~p from ~p", [_Request, _From]),
 99 |     {noreply, State}.
100 | 
101 | handle_cast(_Msg, State) ->
102 |     lager:warning("unexpected cast ~p", [_Msg]),
103 |     {noreply, State}.
104 | 
105 | handle_info(_Info, State) ->
106 |     lager:warning("unexpected message ~p", [_Info]),
107 |     {noreply, State}.
108 | 
109 | terminate(_Reason, _State) ->
110 |     ok.
111 | 
112 | code_change(_OldVsn, State, _Extra) ->
113 |     {ok, State}.
114 | 
115 | %%%===================================================================
116 | %%% Internal functions
117 | %%%===================================================================
118 | 


--------------------------------------------------------------------------------
/src/vg_topic_sup.erl:
--------------------------------------------------------------------------------
 1 | %%%-------------------------------------------------------------------
 2 | %% @doc vonnegut top level supervisor.
 3 | %% @end
 4 | %%%-------------------------------------------------------------------
 5 | 
 6 | -module(vg_topic_sup).
 7 | 
 8 | -behaviour(supervisor).
 9 | 
10 | %% API
11 | -export([start_link/2]).
12 | 
13 | %% Supervisor callbacks
14 | -export([init/1]).
15 | 
16 | -define(SERVER, ?MODULE).
17 | 
18 | %%====================================================================
19 | %% API functions
20 | %%====================================================================
21 | 
22 | start_link(Topic, Partitions) ->
23 |     supervisor:start_link(?MODULE, [Topic, Partitions]).
24 | 
25 | %%====================================================================
26 | %% Supervisor callbacks
27 | %%====================================================================
28 | 
29 | %% Child :: {Id,StartFunc,Restart,Shutdown,Type,Modules}
30 | init([Topic, Partitions]) ->
31 |     ChildSpecs = lists:flatten([child_specs(Topic, Partition) || Partition <- Partitions]),
32 |     {ok, {{one_for_one, 0, 1}, ChildSpecs}}.
33 | 
34 | %%====================================================================
35 | %% Internal functions
36 | %%====================================================================
37 | 
38 | child_specs(Topic, Partition) ->
39 |     %% wait for the chain to be active?
40 |     Next = vg_chain_state:next(),
41 |     [#{id      => {active, Topic, Partition},
42 |        start   => {vg_active_segment, start_link, [Topic, Partition, Next]},
43 |        restart => transient,
44 |        type    => worker},
45 |      #{id      => {mgr, Topic, Partition},
46 |        start   => {vg_topic_mgr, start_link, [Topic, Partition, Next]},
47 |        restart => transient,
48 |        type    => worker}
49 |      | case application:get_env(vonnegut, log_cleaner, true) of
50 |            true ->
51 |                [#{id      => {cleaner, Topic, Partition},
52 |                   start   => {vg_cleaner, start_link, [Topic, Partition]},
53 |                   restart => permanent,
54 |                   type    => worker}];
55 |            false ->
56 |                []
57 |        end].
58 | 


--------------------------------------------------------------------------------
/src/vg_topics.erl:
--------------------------------------------------------------------------------
 1 | -module(vg_topics).
 2 | 
 3 | -export([init_table/0,
 4 | 
 5 |          all/0,
 6 |          get_chain/1,
 7 | 
 8 |          insert_hwm/3,
 9 |          lookup_hwm/2,
10 |          update_hwm/3,
11 |          delete_hwm/2]).
12 | 
13 | -include("vg.hrl").
14 | 
15 | -define(HWM_POS, 2). %% {{Topic, Partition}, HighWaterMark}
16 | 
17 | init_table() ->
18 |     ets:new(?WATERMARK_TABLE, [set, public, named_table, {write_concurrency, true}]).
19 | 
20 | all() ->
21 |     %% replace with ets table keys
22 |     {Topics, _Chains, _Epoch} = vg_cluster_mgr:get_map(),
23 |     maps:keys(Topics).
24 | 
25 | get_chain(Topic) ->
26 |     %% replace with ets table lookup
27 |     {Topics, Chains, _Epoch} = vg_cluster_mgr:get_map(),
28 |     case maps:get(Topic, Topics, not_found) of
29 |         not_found ->
30 |             lager:info("lookup for non-existant topic ~p", [Topic]),
31 |             not_found;
32 |         Chain ->
33 |             maps:get(Chain, Chains)
34 |     end.
35 | 
36 | insert_hwm(Topic, Partition, HWM) ->
37 |     ets:insert(?WATERMARK_TABLE, {{Topic, Partition}, HWM}).
38 | 
39 | lookup_hwm(Topic, Partition) ->
40 |     try ets:lookup_element(?WATERMARK_TABLE, {Topic, Partition}, ?HWM_POS)
41 |     catch
42 |         error:badarg ->
43 |             %% maybe just not loaded, try to get from disk first
44 |             TopicDir = vg_utils:topic_dir(Topic, Partition),
45 |             try vg_log_segments:find_latest_id(TopicDir, Topic, Partition) of
46 |                 {HWM, _, _} ->
47 |                     ets:insert(?WATERMARK_TABLE, {{Topic, Partition}, HWM}),
48 |                     HWM
49 |             catch error:{badmatch,{error,enoent}} ->
50 |                     throw({topic_not_found, Topic, Partition})
51 |             end
52 |     end.
53 | 
54 | update_hwm(Topic, Partition, HWMUpdate) ->
55 |     try
56 |         true = ets:update_element(?WATERMARK_TABLE, {Topic, Partition}, {?HWM_POS, HWMUpdate})
57 |     catch
58 |         error:badarg ->
59 |             throw(hwm_table_not_loaded)
60 |     end.
61 | 
62 | delete_hwm(Topic, Partition) ->
63 |     ets:delete(?WATERMARK_TABLE, {Topic, Partition}).
64 | 


--------------------------------------------------------------------------------
/src/vg_topics_sup.erl:
--------------------------------------------------------------------------------
  1 | %%%-------------------------------------------------------------------
  2 | %% @doc vonnegut topics supervisor.
  3 | %% @end
  4 | %%%-------------------------------------------------------------------
  5 | 
  6 | -module(vg_topics_sup).
  7 | 
  8 | -behaviour(supervisor).
  9 | 
 10 | %% API
 11 | -export([start_link/0,
 12 |          start_child/1,
 13 |          start_child/2,
 14 |          start_child/3,
 15 |          start_child/4,
 16 |          list_topics/1,
 17 |          stop_child/3]).
 18 | 
 19 | %% Supervisor callbacks
 20 | -export([init/1]).
 21 | 
 22 | -define(SERVER, ?MODULE).
 23 | 
 24 | %%====================================================================
 25 | %% API functions
 26 | %%====================================================================
 27 | 
 28 | start_link() ->
 29 |     supervisor:start_link({local, ?SERVER}, ?MODULE, []).
 30 | 
 31 | start_child(Topic) ->
 32 |     start_child(Topic, [0]).
 33 | 
 34 | start_child(Topic, Partitions) ->
 35 |     start_child(local, Topic, Partitions).
 36 | 
 37 | start_child(Server0, Topic, Partitions) ->
 38 |     %% since it's crucial to start remote children, block for a while
 39 |     start_child(Server0, Topic, Partitions, 300).
 40 | 
 41 | start_child(_, _, _, 0) ->
 42 |     {error, remote_node_down};
 43 | start_child(Server0, Topic, Partitions, Retries) ->
 44 |     Server = case Server0 of
 45 |                  local -> ?SERVER;
 46 |                  _ -> {?SERVER, Server0}
 47 |              end,
 48 |     lager:info("at=create_topic node=~p topic=~p partitions=~p target=~p",
 49 |                [node(), Topic, Partitions, Server0]),
 50 |     prometheus_gauge:inc(active_topics),
 51 |     try
 52 |         case supervisor:start_child(Server, [Topic, Partitions]) of
 53 |             {ok, Pid} ->
 54 |                 {ok, Pid};
 55 |             {error, {already_started, Pid}} ->
 56 |                 {ok, Pid};
 57 |             {error, {shutdown, {failed_to_start_child, _, Reason}}} ->
 58 |                 {error, Reason}
 59 |         end
 60 |     catch _C:_E->
 61 |             lager:info("~p : ~p", [_C,_E]),
 62 |             timer:sleep(100),
 63 |             start_child(Server0, Topic, Partitions, Retries - 1)
 64 |     end.
 65 | 
 66 | stop_child(Server, Topic, Partitions) ->
 67 |     %% get a list of topic_sup supervisors
 68 |     Topics = supervisor:which_children({?MODULE, Server}),
 69 |     Topics1 = [{Pid, supervisor:which_children(Pid)}
 70 |                || {_, Pid, _, _} <- Topics],
 71 |     Res =
 72 |         [[case Topic =:= Top andalso lists:member(Part, Partitions) of
 73 |               true ->
 74 |                   supervisor:terminate_child({?MODULE, Server}, Pid);
 75 |               _ ->
 76 |                   ok
 77 |           end
 78 |           || {{active, Top, Part}, _, _, _} <- Children]
 79 |          || {Pid, Children} <- Topics1],
 80 |     lists:usort(lists:flatten(Res)).
 81 | 
 82 | list_topics(Server0) ->
 83 |     Server = case Server0 of
 84 |                  local -> ?SERVER;
 85 |                  _ -> {?SERVER, Server0}
 86 |              end,
 87 |     Topics = supervisor:which_children(Server),
 88 |     [{Topic, Partition} || {{active, Topic, Partition}, _, _, _} <-
 89 |                lists:flatten([supervisor:which_children(Pid)
 90 |                               || {_, Pid, _, _} <- Topics])].
 91 | 
 92 | %%====================================================================
 93 | %% Supervisor callbacks
 94 | %%====================================================================
 95 | 
 96 | %% Child :: {Id,StartFunc,Restart,Shutdown,Type,Modules}
 97 | init([]) ->
 98 |     SupFlags = #{strategy => simple_one_for_one,
 99 |                 intensity => 0,
100 |                 period => 1},
101 |     ChildSpecs = [#{id => vg_topic_sup,
102 |                     start => {vg_topic_sup, start_link, []},
103 |                     restart => permanent,
104 |                     type => supervisor,
105 |                     shutdown => 5000}],
106 |     {ok, {SupFlags, ChildSpecs}}.
107 | 
108 | %%====================================================================
109 | %% Internal functions
110 | %%====================================================================
111 | 
112 | 


--------------------------------------------------------------------------------
/src/vg_utils.erl:
--------------------------------------------------------------------------------
 1 | -module(vg_utils).
 2 | 
 3 | -export([index_file/2,
 4 |          index_file/3,
 5 |          log_file/2,
 6 |          log_file/3,
 7 |          topic_dir/2,
 8 |          open_append/1,
 9 |          open_read/1,
10 | 
11 |          topics_on_disk/0,
12 | 
13 |          to_atom/1,
14 |          to_integer/1]).
15 | 
16 | %% Convenience functions for creating index and log file names
17 | index_file(TopicDir, Id) ->
18 |     filename:join(TopicDir, io_lib:format("~20.10.0b.index", [Id])).
19 | 
20 | index_file(Topic, Partition, Id) ->
21 |     TopicDir = topic_dir(Topic, Partition),
22 |     filename:join(TopicDir, io_lib:format("~20.10.0b.index", [Id])).
23 | 
24 | log_file(Topic, Partition, Id) ->
25 |     TopicDir = topic_dir(Topic, Partition),
26 |     filename:join(TopicDir, io_lib:format("~20.10.0b.log", [Id])).
27 | 
28 | log_file(TopicDir, Id) ->
29 |     filename:join(TopicDir, io_lib:format("~20.10.0b.log", [Id])).
30 | 
31 | topic_dir(Topic, Partition) ->
32 |     {ok, [LogDir | _]} = application:get_env(vonnegut, log_dirs),
33 |     filename:join(LogDir, [binary_to_list(Topic), "-", integer_to_list(Partition)]).
34 | 
35 | topics_on_disk() ->
36 |     {ok, [DataDir| _]} = application:get_env(vonnegut, log_dirs),
37 |     TopicPartitions = filelib:wildcard(filename:join(DataDir, "*")),
38 |     TPDict = lists:foldl(fun(TP, Acc) ->
39 |                                  case string:tokens(filename:basename(TP), "-") of
40 |                                      [_] ->
41 |                                          Acc;
42 |                                      L ->
43 |                                          [P | TopicR] = lists:reverse(L),
44 |                                          T = string:join(lists:reverse(TopicR), "-"),
45 |                                          dict:append_list(list_to_binary(T), [list_to_integer(P)], Acc)
46 |                                  end
47 |                          end, dict:new(), TopicPartitions),
48 |     dict:to_list(TPDict).
49 | 
50 | 
51 | open_append(Filename) ->
52 |     case application:get_env(vonnegut, delayed_write) of
53 |         {ok, true} ->
54 |             %% Buffer writes up to DelayedWriteSize bytes or DelayMS milliseconds to save on OS calls
55 |             {ok, DelayedWriteSize} = application:get_env(vonnegut, delayed_write_byte_size),
56 |             {ok, DelayMS} = application:get_env(vonnegut, delayed_write_milliseconds),
57 |             file:open(Filename, [append, raw, binary, {delayed_write, DelayedWriteSize, DelayMS}]);
58 |         _ ->
59 |             file:open(Filename, [append, raw, binary])
60 |     end.
61 | 
62 | open_read(Filename) ->
63 |     file:open(Filename, [read, raw, binary]).
64 | 
65 | to_integer(I) when is_integer(I) -> I;
66 | to_integer(I) when is_list(I)    -> list_to_integer(I);
67 | to_integer(I) when is_binary(I)  -> binary_to_integer(I);
68 | to_integer(_)                    -> throw(badarg).
69 | 
70 | to_atom(A) when is_list(A)   -> list_to_atom(A);
71 | to_atom(A) when is_binary(A) -> binary_to_atom(A, utf8);
72 | to_atom(A) when is_atom(A)   -> A;
73 | to_atom(_)                   -> throw(badarg).
74 | 


--------------------------------------------------------------------------------
/src/vonnegut.app.src:
--------------------------------------------------------------------------------
 1 | {application, vonnegut,
 2 |  [{description, "Replicated append-only log."},
 3 |   {vsn, git},
 4 |   {registered, []},
 5 |   {mod, {vonnegut_app, []}},
 6 |   {applications,
 7 |    [kernel,
 8 |     stdlib,
 9 |     sasl,
10 |     lager,
11 |     crypto,
12 |     ssl,
13 |     gproc,
14 |     acceptor_pool,
15 |     shackle,
16 |     erlware_commons,
17 |     backoff,
18 |     partisan,
19 |     elli,
20 |     elli_prometheus,
21 |     prometheus,
22 | 
23 |     hackney,
24 |     jsx,
25 |     oc_google_reporter,
26 |     opencensus
27 |    ]},
28 |   {env,[{log_dirs, ["./data"]},
29 |         {acceptor_pool_size, 10},
30 |         {client_pool_size, 10},
31 | 
32 |         {send_buffer_bytes, 102400},
33 | 
34 |         %% Log and index file related configs
35 |         {segment_bytes, 1073741824},
36 |         {index_max_bytes, 10485760},
37 |         {index_interval_bytes, 4096},
38 | 
39 |         {write_delayed, false},
40 |         {delayed_write_byte_size, 64000}, %% 64kb
41 |         {delayed_write_milliseconds, 2000}, %% 2 seconds
42 | 
43 |         {log_cleaner, false},
44 |         {log_retention_check_interval, 5}, %% 5 minutes
45 |         {log_retention_minutes, 10080}, %% 7 days
46 | 
47 |         {num_partitions, 1}]},
48 |   {modules, []},
49 | 
50 |   {contributors, []},
51 |   {licenses, []},
52 |   {links, []}
53 |  ]}.
54 | 


--------------------------------------------------------------------------------
/src/vonnegut_app.erl:
--------------------------------------------------------------------------------
 1 | %%%-------------------------------------------------------------------
 2 | %% @doc vonnegut public API
 3 | %% @end
 4 | %%%-------------------------------------------------------------------
 5 | 
 6 | -module(vonnegut_app).
 7 | 
 8 | -behaviour(application).
 9 | 
10 | %% Application callbacks
11 | -export([start/2,
12 |          stop/1,
13 |          swap_lager/1]).
14 | 
15 | %%====================================================================
16 | %% API
17 | %%====================================================================
18 | 
19 | start(_StartType, _StartArgs) ->
20 |     init_tables(),
21 |     vonnegut_sup:start_link().
22 | 
23 | 
24 | %%--------------------------------------------------------------------
25 | stop(_State) ->
26 |     ok.
27 | 
28 | %%====================================================================
29 | %% Internal functions
30 | %%====================================================================
31 | 
32 | init_tables() ->
33 |     vg_log_segments:init_table(),
34 |     vg_topics:init_table().
35 | 
36 | %% TODO: ifdef this out in non-test builds
37 | swap_lager(Pid) ->
38 |     %% our testing environment has provided us with a remote
39 |     %% lager sink to target messages at, but we can't target
40 |     %% it directly, so proxy message through to it.
41 |     Proxy = spawn(fun Loop() ->
42 |                           receive
43 |                               E -> Pid ! E
44 |                           end,
45 |                           Loop()
46 |                   end),
47 |     Lager = whereis(lager_event),
48 |     true = unregister(lager_event),
49 |     case (catch register(lager_event, Proxy)) of
50 |         true ->
51 |             lager:info("swapped local lager_event server with: ~p", [Pid]);
52 |         Other ->
53 |             register(lager_event, Lager),
54 |             lager:info("noes we failed: ~p", [Other])
55 |     end.
56 | 


--------------------------------------------------------------------------------
/src/vonnegut_sup.erl:
--------------------------------------------------------------------------------
 1 | %%%-------------------------------------------------------------------
 2 | %% @doc vonnegut top level supervisor.
 3 | %% @end
 4 | %%%-------------------------------------------------------------------
 5 | 
 6 | -module(vonnegut_sup).
 7 | 
 8 | -behaviour(supervisor).
 9 | 
10 | %% API
11 | -export([start_link/0,
12 |          start_cluster_mgr/2,
13 |          start_acceptor_pool/1]).
14 | 
15 | %% Supervisor callbacks
16 | -export([init/1]).
17 | 
18 | -define(SERVER, ?MODULE).
19 | 
20 | %%====================================================================
21 | %% API functions
22 | %%====================================================================
23 | 
24 | start_link() ->
25 |     supervisor:start_link({local, ?SERVER}, ?MODULE, []).
26 | 
27 | start_cluster_mgr(Name, Nodes) ->
28 |     {ok, [LogDir]} = application:get_env(vonnegut, log_dirs),
29 |     ChildSpec = #{id      => vg_cluster_mgr,
30 |                   start   => {vg_cluster_mgr, start_link, [Name, Nodes, LogDir]},
31 |                   restart => permanent,
32 |                   type    => supervisor},
33 |     supervisor:start_child(?SERVER, ChildSpec).
34 | 
35 | start_acceptor_pool(Role) ->
36 |     ChildSpec = #{id      => vg_pool_sup,
37 |                   start   => {vg_pool_sup, start_link, [Role]},
38 |                   restart => permanent,
39 |                   type    => supervisor},
40 |     supervisor:start_child(?SERVER, ChildSpec).
41 | 
42 | %%====================================================================
43 | %% Supervisor callbacks
44 | %%====================================================================
45 | 
46 | %% Child :: {Id,StartFunc,Restart,Shutdown,Type,Modules}
47 | init([]) ->
48 |     Port = application:get_env(vonnegut, http_port, 8000),
49 |     ElliChild = {vonnegut_http, {elli, start_link, [[{callback, elli_middleware},
50 |                                                      {callback_args, [{mods, [{elli_prometheus, []},
51 |                                                                               {vg_elli_handler, []}]}]},
52 |                                                      {port, Port}]]},
53 |                 permanent, 5000, worker, dynamic},
54 | 
55 |     case application:get_env(vonnegut, chain, []) of
56 |         [] ->
57 |             {ok, {{one_for_one, 10, 30}, []}};
58 |         _ ->
59 |             ChainState = {vg_chain_state, {vg_chain_state, start_link, []},
60 |                           permanent, 20000, worker, [vg_chain_state]},
61 |             TopicsSup = {vg_topics_sup, {vg_topics_sup, start_link, []},
62 |                          permanent, 20000, supervisor, [vg_topics_sup]},
63 | 
64 |             {ok, {{one_for_one, 10, 30}, [ElliChild, TopicsSup, ChainState]}}
65 |     end.
66 | 
67 | %%====================================================================
68 | %% Internal functions
69 | %%====================================================================
70 | 


--------------------------------------------------------------------------------
/test/cleanup_SUITE.erl:
--------------------------------------------------------------------------------
 1 | -module(cleanup_SUITE).
 2 | 
 3 | -include_lib("eunit/include/eunit.hrl").
 4 | -include_lib("common_test/include/ct.hrl").
 5 | -compile(export_all).
 6 | 
 7 | -include("vg.hrl").
 8 | 
 9 | all() ->
10 |     [delete_policy].
11 | 
12 | init_per_testcase(_, Config) ->
13 |     PrivDir = ?config(priv_dir, Config),
14 |     application:load(vonnegut),
15 |     application:set_env(vonnegut, log_dirs, [filename:join(PrivDir, "data")]),
16 |     application:set_env(vonnegut, segment_bytes, 86),
17 |     application:set_env(vonnegut, index_max_bytes, 18),
18 |     application:set_env(vonnegut, log_cleaner, true),
19 |     application:set_env(vonnegut, index_interval_bytes, 24),
20 |     application:set_env(vonnegut, log_retention_minutes, 5),
21 |     application:set_env(vonnegut, chain, [{discovery, local}]),
22 |     application:ensure_all_started(vonnegut),
23 |     crypto:start(),
24 |     Config.
25 | 
26 | end_per_testcase(_, Config) ->
27 |     application:stop(vonnegut),
28 |     application:unload(vonnegut),
29 |     Config.
30 | 
31 | delete_policy(_Config) ->
32 |     {ok, LogRetentionMinutes} = application:get_env(vonnegut, log_retention_minutes),
33 |     Topic = vg_test_utils:create_random_name(<<"test_topic">>),
34 |     Partition = 0,
35 |     TopicPartitionDir = vg_utils:topic_dir(Topic, Partition),
36 |     vg:create_topic(Topic),
37 |     ?assert(filelib:is_dir(TopicPartitionDir)),
38 | 
39 |     [vg:write(Topic, Partition, M) || M <- [crypto:strong_rand_bytes(60), crypto:strong_rand_bytes(60)]],
40 | 
41 | 
42 |     %% Verify 2 segments have been created
43 |     Segment0 = filename:join([TopicPartitionDir, "00000000000000000000.log"]),
44 |     Segment1 = filename:join([TopicPartitionDir, "00000000000000000001.log"]),
45 |     ?assert(filelib:is_regular(Segment0)),
46 |     ?assert(filelib:is_regular(Segment1)),
47 | 
48 |     meck:new(filelib, [unstick, passthrough]),
49 |     %% Mock last_modified return for Segment0 to be >= LogRetentionMinutes so it is deleted
50 |     Now = calendar:local_time(),
51 |     meck:expect(filelib, last_modified, fun(Segment) when Segment =:= Segment0 ->
52 |                                                 dec_datetime_by_mins(Now, LogRetentionMinutes+1);
53 |                                            (Segment) ->
54 |                                                 meck:passthrough([Segment])
55 |                                         end),
56 |     %% Execute the cleaner
57 |     vg_cleaner:run_cleaner(Topic, 0),
58 |     meck:unload(filelib),
59 | 
60 |     %% Verify Segment0 has been deleted but not Segment1
61 |     ?assertEqual(filelib:is_regular(Segment0), false),
62 |     ?assertEqual(filelib:is_regular(Segment1), true).
63 | 
64 | %%
65 | 
66 | dec_datetime_by_mins(DateTime, Minutes) ->
67 |     Seconds = calendar:datetime_to_gregorian_seconds(DateTime),
68 |     Seconds1 = Seconds - (Minutes * 60),
69 |     calendar:gregorian_seconds_to_datetime(Seconds1).
70 | 


--------------------------------------------------------------------------------
/test/kafka_client_SUITE.erl:
--------------------------------------------------------------------------------
 1 | -module(kafka_client_SUITE).
 2 | 
 3 | -include_lib("eunit/include/eunit.hrl").
 4 | -include_lib("common_test/include/ct.hrl").
 5 | -compile(export_all).
 6 | 
 7 | -include_lib("brod/include/brod.hrl").
 8 | 
 9 | all() ->
10 |     [get_metadata]. %% produce, add back when brod supports >=0.11.0 kafka
11 | 
12 | init_per_suite(Config) ->
13 |     PrivDir = ?config(priv_dir, Config),
14 |     application:load(vonnegut),
15 |     application:set_env(vonnegut, log_dirs, [filename:join(PrivDir, "data")]),
16 |     application:set_env(vonnegut, segment_bytes, 86),
17 |     application:set_env(vonnegut, index_max_bytes, 18),
18 |     application:set_env(vonnegut, index_interval_bytes, 24),
19 |     application:set_env(vonnegut, chain, [{discovery, local}]),
20 |     crypto:start(),
21 | 
22 |     Port = 5588,
23 |     Host = <<"127.0.0.1">>,
24 |     Hosts = [{"127.0.0.1", Port}],
25 | 
26 |     application:ensure_all_started(vonnegut),
27 |     application:ensure_all_started(brod),
28 | 
29 |     ok = brod:start_client(Hosts, brod_client_1, []),
30 | 
31 |     [{host, Host}, {port, Port}, {hosts, Hosts} | Config].
32 | 
33 | end_per_suite(Config) ->
34 |     application:stop(vonnegut),
35 |     application:unload(vonnegut),
36 |     Config.
37 | 
38 | get_metadata(Config) ->
39 |     Host = ?config(host, Config),
40 |     Port = ?config(port, Config),
41 |     Hosts = ?config(hosts, Config),
42 | 
43 |     Topic = vg_test_utils:create_random_name(<<"kafka_get_metadata">>),
44 |     ok = vg:create_topic(Topic),
45 | 
46 |     %% same host will be in broker list twice because we send the same broker as the tail
47 |     {ok,
48 |      [{brokers,
49 |        [[{node_id,0},{host,Host},{port,Port}],
50 |         [{node_id,0},{host,Host},{port,Port}]]},
51 |       {topic_metadata, TMs}]} = brod:get_metadata(Hosts),
52 |     ?assert(lists:any(fun(TM) -> lists:member({topic,Topic}, TM) end, TMs)).
53 | 
54 | produce(Config) ->
55 |     Hosts = ?config(hosts, Config),
56 |     Topic = vg_test_utils:create_random_name(<<"kafka_produce">>),
57 |     ok = vg:create_topic(Topic),
58 |     brod:start_producer(brod_client_1, Topic, []),
59 | 
60 |     Key = <<"I'm a key">>,
61 |     M = <<"hello from brod">>,
62 |     brod:produce_sync(brod_client_1,
63 |                       Topic,
64 |                       0,
65 |                       Key,
66 |                       M),
67 | 
68 |     ?assertMatch({ok, [#kafka_message{key=Key,
69 |                                       value=M} | _]}, brod:fetch(Hosts, Topic, 0, 0)),
70 | 
71 |     ok.
72 | 


--------------------------------------------------------------------------------
/test/log_roll_SUITE.erl:
--------------------------------------------------------------------------------
  1 | -module(log_roll_SUITE).
  2 | 
  3 | -include_lib("eunit/include/eunit.hrl").
  4 | -include_lib("common_test/include/ct.hrl").
  5 | -compile(export_all).
  6 | 
  7 | -include("vg.hrl").
  8 | 
  9 | all() ->
 10 |     [records_larger_than_max_segment, regenerate_index_test].
 11 | 
 12 | init_per_testcase(regenerate_index_test, Config) ->
 13 |     PrivDir = ?config(priv_dir, Config),
 14 |     application:load(vonnegut),
 15 |     application:set_env(vonnegut, log_dirs, [filename:join(PrivDir, "data")]),
 16 |     application:set_env(vonnegut, segment_bytes, 177),
 17 |     application:set_env(vonnegut, index_max_bytes, 50),
 18 |     application:set_env(vonnegut, index_interval_bytes, 24),
 19 |     application:set_env(vonnegut, chain, [{discovery, local}]),
 20 |     application:ensure_all_started(vonnegut),
 21 |     crypto:start(),
 22 |     Config;
 23 | init_per_testcase(_, Config) ->
 24 |     PrivDir = ?config(priv_dir, Config),
 25 |     application:load(vonnegut),
 26 |     application:set_env(vonnegut, log_dirs, [filename:join(PrivDir, "data")]),
 27 |     application:set_env(vonnegut, segment_bytes, 86),
 28 |     application:set_env(vonnegut, index_max_bytes, 18),
 29 |     application:set_env(vonnegut, index_interval_bytes, 24),
 30 |     application:set_env(vonnegut, chain, [{discovery, local}]),
 31 |     application:ensure_all_started(vonnegut),
 32 |     crypto:start(),
 33 |     Config.
 34 | 
 35 | end_per_testcase(_, Config) ->
 36 |     application:stop(vonnegut),
 37 |     %% if we don't unload the settings will stick around in other suites
 38 |     application:unload(vonnegut),
 39 |     Config.
 40 | 
 41 | records_larger_than_max_segment(_Config) ->
 42 |     Topic = vg_test_utils:create_random_name(<<"log_roll_test_topic">>),
 43 |     Partition = 0,
 44 |     TopicPartitionDir = vg_utils:topic_dir(Topic, Partition),
 45 |     vg:create_topic(Topic),
 46 |     ?assert(filelib:is_dir(TopicPartitionDir)),
 47 | 
 48 |     [vg:write(Topic, 0, M)
 49 |      || M <- [crypto:strong_rand_bytes(60), crypto:strong_rand_bytes(60),
 50 |               crypto:strong_rand_bytes(6), crypto:strong_rand_bytes(6),
 51 |               crypto:strong_rand_bytes(60)]],
 52 | 
 53 |     %% Total size of a 60 byte record when written to log becomes 86 bytes
 54 |     %% Since index interval is 24 and 86 > 24, 1 index entry of 6 bytes should exist for each as well
 55 |     ?assertEqual(8, filelib:file_size(filename:join([TopicPartitionDir, "00000000000000000000.index"]))),
 56 |     ?assertEqual(127, filelib:file_size(filename:join([TopicPartitionDir, "00000000000000000000.log"]))),
 57 |     ?assertEqual(8, filelib:file_size(filename:join([TopicPartitionDir, "00000000000000000001.index"]))),
 58 |     ?assertEqual(127, filelib:file_size(filename:join([TopicPartitionDir, "00000000000000000001.log"]))),
 59 | 
 60 |     %% Next 2 records create a log with 2 records of 6 bytes each (with headers they are 32 bytes)
 61 |     %% with ids 2 and 3. The third record (id 4) then goes in a new index and log
 62 |     ?assertEqual(8, filelib:file_size(filename:join([TopicPartitionDir, "00000000000000000002.index"]))),
 63 |     ?assertEqual(73, filelib:file_size(filename:join([TopicPartitionDir, "00000000000000000002.log"]))),
 64 |     ?assertEqual(8, filelib:file_size(filename:join([TopicPartitionDir, "00000000000000000004.index"]))),
 65 |     ?assertEqual(127, filelib:file_size(filename:join([TopicPartitionDir, "00000000000000000004.log"]))),
 66 | 
 67 |     %% regression test. check that a cold node (no data loaded) finds the right hwm for a topic
 68 | 
 69 |     application:stop(vonnegut),
 70 |     application:ensure_all_started(vonnegut),
 71 | 
 72 |     ?assertEqual(4, vg_topics:lookup_hwm(Topic, Partition)).
 73 | 
 74 | regenerate_index_test(_Config) ->
 75 |     Topic = vg_test_utils:create_random_name(<<"index_regen_test_topic">>),
 76 |     Partition = 0,
 77 |     TopicDir = vg_utils:topic_dir(Topic, Partition),
 78 |     vg:create_topic(Topic),
 79 | 
 80 |     [vg:write(Topic, 0, iolist_to_binary(lists:duplicate(rand:uniform(5), <<"A">>)))
 81 |      || _ <- lists:seq(1, 50)],
 82 | 
 83 |     AllFiles = filelib:wildcard(filename:join(TopicDir, "*.index")),
 84 |     SHAs = [begin
 85 |                 {ok, B} = file:read_file(File),
 86 |                 B
 87 |             end
 88 |             || File <- AllFiles],
 89 | 
 90 |     vg:regenerate_topic_index(Topic),
 91 | 
 92 |     AllFiles1 = filelib:wildcard(filename:join(TopicDir, "*.index")),
 93 |     SHAs1 = [begin
 94 |                  {ok, B} = file:read_file(File),
 95 |                  B
 96 |              end
 97 |              || File <- AllFiles1],
 98 |     ?assertMatch({ok,#{high_water_mark := 49,
 99 |                        partition := 0,
100 |                        record_batches :=
101 |                            [#{offset := 45}]}},
102 |                  vg:fetch(Topic, 0, 45, 1)),
103 | 
104 |     ?assertEqual(SHAs, SHAs1),
105 |     ok.
106 | 


--------------------------------------------------------------------------------
/test/prop_vg.erl:
--------------------------------------------------------------------------------
 1 | -module(prop_vg).
 2 | 
 3 | -include_lib("proper/include/proper.hrl").
 4 | 
 5 | -define(MODEL, vg_statem).
 6 | 
 7 | prop_test() ->
 8 |     ?FORALL(Cmds, more_commands(8, commands(?MODEL)),
 9 |             begin
10 |                 lager:start(),
11 |                 lager:set_loglevel(lager_console_backend, error),
12 |                 application:ensure_all_started(vonnegut),
13 |                 {History, State, Result} = run_commands(?MODEL, Cmds),
14 |                 application:stop(vonnegut),
15 |                 ?WHENFAIL(io:format("History: ~p\nState: ~p\nResult: ~p\n",
16 |                                     [History,State,Result]),
17 |                           aggregate(command_names(Cmds), Result =:= ok))
18 |             end).
19 | 


--------------------------------------------------------------------------------
/test/protocol_SUITE.erl:
--------------------------------------------------------------------------------
  1 | -module(protocol_SUITE).
  2 | 
  3 | -compile(export_all).
  4 | 
  5 | %% imo eventually this should be a propEr test
  6 | 
  7 | -include_lib("common_test/include/ct.hrl").
  8 | -include_lib("eunit/include/eunit.hrl").
  9 | 
 10 | -include("include/vg.hrl").
 11 | 
 12 | suite() ->
 13 |     [{timetrap,{seconds,30}}].
 14 | 
 15 | init_per_suite(Config) ->
 16 |     Config.
 17 | 
 18 | end_per_suite(_Config) ->
 19 |     ok.
 20 | 
 21 | init_per_group(_GroupName, Config) ->
 22 |     Config.
 23 | 
 24 | end_per_group(_GroupName, _Config) ->
 25 |     ok.
 26 | 
 27 | init_per_testcase(_TestCase, Config) ->
 28 |     Config.
 29 | 
 30 | end_per_testcase(_TestCase, _Config) ->
 31 |     ok.
 32 | 
 33 | groups() ->
 34 |     [].
 35 | 
 36 | all() ->
 37 |     [
 38 |      incomplete_fetch_decode,
 39 |      incomplete_produce_decode %,
 40 |      %% client_incomplete_handling
 41 |     ].
 42 | 
 43 | incomplete_fetch_decode(_Config) ->
 44 |     %% do we need the correlation id stuff here? or is that decoded directly?
 45 |     Topic = <<"foo">>,
 46 |     {_, EncodedSet} =
 47 |         lists:foldl(
 48 |           fun(Rec, {ID, IOL}) ->
 49 |                   #{last_offset_delta := L,
 50 |                     record_batch := RecordBatch} = vg_protocol:encode_record_batch(Rec),
 51 |                   {ID+L+1, [IOL | [<<ID:64/signed-integer, (iolist_size(RecordBatch)):32/signed-integer>>, RecordBatch]]}
 52 |           end,
 53 |           {55, []}, [<<"bar1">>, <<"bar2">>, <<"bar3">>, <<"bar4">>, <<"bar5">>]),
 54 | 
 55 |     FTR = vg_protocol:encode_fetch_topic_response(0, 0, 99, iolist_size(EncodedSet)),
 56 | 
 57 |     RespIO = [<<1:32/signed-integer>>, vg_protocol:encode_string(Topic),
 58 |               <<1:32/signed-integer>>, FTR, EncodedSet],
 59 | 
 60 |     ct:pal("resp ~p", [RespIO]),
 61 | 
 62 |     FullResponse = iolist_to_binary(RespIO),
 63 | 
 64 |     %% make sure that the full request is valid before we start breaking it up
 65 |     ?assertMatch(#{<<"foo">> :=
 66 |                             #{0 :=
 67 |                                   #{error_code := 0,high_water_mark := 99,
 68 |                                     record_batches :=
 69 |                                         [#{offset := 55,
 70 |                                            value := <<"bar1">>},
 71 |                                          #{offset := 56,
 72 |                                            value := <<"bar2">>},
 73 |                                          #{offset := 57,
 74 |                                            value := <<"bar3">>},
 75 |                                          #{offset := 58,
 76 |                                            value := <<"bar4">>},
 77 |                                          #{offset := 59,
 78 |                                            value := <<"bar5">>}],
 79 |                                     record_batches_size := 355}}},
 80 |                  vg_protocol:decode_fetch_response(FullResponse)),
 81 | 
 82 |     [begin
 83 |          Head = binary:part(FullResponse, 0, N),
 84 |          ?assertEqual(more, vg_protocol:decode_fetch_response(Head))
 85 |      end
 86 |      || N <- lists:seq(1, byte_size(FullResponse) - 1)],
 87 | 
 88 |     ok.
 89 | 
 90 | incomplete_produce_decode(_Config) ->
 91 |     Topic = <<"foo">>,
 92 |     Partition = 0,
 93 |     Results = [{Topic, [{Partition, 0, 444}]}],
 94 |     %% not sure why it won't use the macro here
 95 |     %% Results = [{Topic, [{Partition, ?NO_ERROR, 444}]}],
 96 |     ProduceResponse0 = vg_protocol:encode_produce_response(Results),
 97 |     ProduceResponse = iolist_to_binary(ProduceResponse0),
 98 |     ?assertEqual(#{<<"foo">> =>
 99 |                             #{0 => #{error_code => 0,offset => 444}}},
100 |                  vg_protocol:decode_response(?PRODUCE_REQUEST, ProduceResponse)),
101 | 
102 |     [begin
103 |          Head = binary:part(ProduceResponse, 0, N),
104 |          ?assertEqual(more, vg_protocol:decode_response(?PRODUCE_REQUEST, Head))
105 |      end
106 |      || N <- lists:seq(1, byte_size(ProduceResponse) - 1)],
107 | 
108 |     ok.
109 | 


--------------------------------------------------------------------------------
/test/test_utils.hrl:
--------------------------------------------------------------------------------
 1 | %% Try for 5 seconds
 2 | -define(UNTIL(X), (fun Until(100) ->
 3 |                            erlang:error({fail, X});
 4 |                        Until(I) ->
 5 |                            case X of true -> ok;
 6 |                                false ->
 7 |                                    timer:sleep(200),
 8 |                                    Until(I+1)
 9 |                            end
10 |                    end)(0)).
11 | 
12 | -define(until_match(Guard, Expr, Seconds),
13 |         (fun Until(I) when I =:= (Seconds * 5) ->
14 |                  ?assertMatch(Guard, Expr);
15 |              Until(I) ->
16 |                  try
17 |                      ?assertMatch(Guard, Expr)
18 |                  catch error:_ ->
19 |                          timer:sleep(200),
20 |                          Until(I+1)
21 |                  end
22 |          end)(0)).
23 | 


--------------------------------------------------------------------------------
/test/topic_SUITE.erl:
--------------------------------------------------------------------------------
  1 | -module(topic_SUITE).
  2 | 
  3 | -include_lib("eunit/include/eunit.hrl").
  4 | -include_lib("common_test/include/ct.hrl").
  5 | -include("test_utils.hrl").
  6 | -compile(export_all).
  7 | 
  8 | all() ->
  9 |     [creation, write_empty, write, index_bug, limit, index_limit,
 10 |      many, verify_lazy_load, startup_index_correctness,
 11 |      local_client_test, last_in_index, terminate_idle_active_segment,
 12 |      delete_topic].
 13 | 
 14 | init_per_suite(Config) ->
 15 |     Config.
 16 | 
 17 | end_per_suite(_Config) ->
 18 |     ok.
 19 | 
 20 | init_per_testcase(terminate_idle_active_segment, Config) ->
 21 |     PrivDir = ?config(priv_dir, Config),
 22 |     LogDir = filename:join(PrivDir, "data"),
 23 |     %% clear env from other suites
 24 |     application:unload(vonnegut),
 25 |     application:load(vonnegut),
 26 |     application:load(partisan),
 27 |     application:set_env(vonnegut, terminate_after, timer:seconds(1)),
 28 |     application:set_env(partisan, partisan_peer_service_manager, partisan_default_peer_service_manager),
 29 |     application:set_env(vonnegut, log_dirs, [LogDir]),
 30 |     application:set_env(vonnegut, chain, [{discovery, local}]),
 31 |     application:set_env(vonnegut, client, [{endpoints, [{"127.0.0.1", 5588}]}]),
 32 |     application:set_env(vonnegut, client_pool_size, 2),
 33 |     {ok, _} = application:ensure_all_started(vonnegut),
 34 |     ok = vg_client_pool:start(#{reconnect => false}),
 35 |     Topic = vg_test_utils:create_random_name(<<"topic_SUITE_default_topic">>),
 36 |     {ok, _} = vg_client:ensure_topic(Topic),
 37 |     [{topic, Topic}, {log_dir, LogDir} | Config];
 38 | init_per_testcase(_, Config) ->
 39 |     PrivDir = ?config(priv_dir, Config),
 40 |     LogDir = filename:join(PrivDir, "data"),
 41 |     %% clear env from other suites
 42 |     application:unload(vonnegut),
 43 |     application:load(vonnegut),
 44 |     application:load(partisan),
 45 |     application:set_env(partisan, partisan_peer_service_manager, partisan_default_peer_service_manager),
 46 |     application:set_env(vonnegut, log_dirs, [LogDir]),
 47 |     application:set_env(vonnegut, chain, [{discovery, local}]),
 48 |     application:set_env(vonnegut, client, [{endpoints, [{"127.0.0.1", 5588}]}]),
 49 |     application:set_env(vonnegut, client_pool_size, 2),
 50 |     {ok, _} = application:ensure_all_started(vonnegut),
 51 |     ok = vg_client_pool:start(#{reconnect => false}),
 52 |     Topic = vg_test_utils:create_random_name(<<"topic_SUITE_default_topic">>),
 53 |     {ok, _} = vg_client:ensure_topic(Topic),
 54 |     [{topic, Topic}, {log_dir, LogDir} | Config].
 55 | 
 56 | end_per_testcase(_, _Config) ->
 57 |     vg_client_pool:stop(),
 58 |     ok.
 59 | 
 60 | creation(_Config) ->
 61 |     Topic = vg_test_utils:create_random_name(<<"creation_test_topic">>),
 62 |     Partition = 0,
 63 |     TopicPartitionDir = vg_utils:topic_dir(Topic, Partition),
 64 |     vg:create_topic(Topic),
 65 |     ?assert(filelib:is_dir(TopicPartitionDir)).
 66 | 
 67 | %% leaving this in as it occasionally hits a quasi race, so if we
 68 | %% start hitting intermittent failures here, we might have a regression
 69 | write_empty(_Config) ->
 70 |     Topic = vg_test_utils:create_random_name(<<"topic_SUITE_default_topic">>),
 71 |     {ok, _} = vg_client:ensure_topic(Topic),
 72 | 
 73 |     spawn(fun() -> vg_client:produce(Topic, <<"fleerp">>) end),
 74 |     {ok, #{Topic := #{0 := #{record_batches := Reply, high_water_mark := HWM}}}} = vg_client:fetch(Topic, 0),
 75 |     case Reply of
 76 |         [#{value := <<"fleerp">>}] -> % write then read
 77 |             ?assertEqual(0, HWM),
 78 |             ok;
 79 |         [] ->
 80 |             %% spawned write could have finished after our sendfile data boundaries
 81 |             %% are figured out but before we grabbed the HWM for the response
 82 |             ?assert(-1 =:= HWM orelse 0 =:= HWM),
 83 |             ok;
 84 |         _ ->
 85 |             ct:pal("got ~p", [Reply]),
 86 |             error(bad_return)
 87 |     end.
 88 | 
 89 | write(Config) ->
 90 |     Topic = ?config(topic, Config),
 91 |     Anarchist = <<"no gods no masters">>,
 92 |     [begin
 93 |          {ok, R} = vg_client:produce(Topic, Anarchist),
 94 |          ct:pal("reply: ~p", [R])
 95 |      end
 96 |      || _ <- lists:seq(1, rand:uniform(20))],
 97 |     Communist =  <<"from each according to their abilities, to "
 98 |                    "each according to their needs">>,
 99 |     {ok, R1} = vg_client:produce(Topic, Communist),
100 |     ct:pal("reply: ~p", [R1]),
101 |     {ok, #{Topic := #{0 := #{record_batches := Reply}}}} = vg_client:fetch(Topic, R1),
102 |     ?assertMatch([#{value := Communist}], Reply),
103 | 
104 |     {ok, #{Topic := #{0 := #{record_batches := Reply1}}}} = vg_client:fetch(Topic, R1 - 1),
105 |     ?assertMatch([#{value := Anarchist}, #{value := Communist}], Reply1).
106 | 
107 | index_bug(Config) ->
108 |     Topic = ?config(topic, Config),
109 | 
110 |     %% write enough data to cause index creation but not two entries
111 |     {ok, _}  = vg_client:produce(Topic,
112 |                                  lists:duplicate(100, <<"123456789abcdef">>)),
113 | 
114 |     %% fetch from 0 to make sure that they're all there
115 |     {ok, #{Topic := #{0 := #{record_batches := Reply}}}} = vg_client:fetch(Topic, 0),
116 |     ?assertEqual(100, length(Reply)),
117 | 
118 |     %% now query for something before the first index marker
119 |     {ok, #{Topic := #{0 := #{record_batches := Reply2,
120 |                              high_water_mark := HWM}}}} =
121 |         vg_client:fetch(Topic, 10),
122 | 
123 |     ?assertEqual(99, HWM),
124 | 
125 |     %% this is a passing version before the bugfix
126 |     %% ?assertEqual([], Reply2).
127 | 
128 |     %% ?assertEqual(90, length(Reply2)),
129 |     %% change with 0.11.0 RecorBatch storage
130 |     ?assertEqual(100, length(Reply2)),
131 | 
132 |     %% write enough more data for another entry to hit the second clause
133 |     {ok, _}  = vg_client:produce(Topic,
134 |                                  lists:duplicate(100, <<"123456789abcdef">>)),
135 | 
136 |     {ok, #{Topic := #{0 := #{record_batches := Reply3}}}} = vg_client:fetch(Topic, 0),
137 |     ?assertEqual(200, length(Reply3)),
138 | 
139 |     {ok, #{Topic := #{0 := #{record_batches := Reply4,
140 |            high_water_mark := HWM4}}}} = vg_client:fetch(Topic, 10),
141 | 
142 |     ?assertEqual(199, HWM4),
143 |     ?assertEqual(200, length(Reply4)).
144 | 
145 | last_in_index(Config) ->
146 |     Topic = ?config(topic, Config),
147 | 
148 |     [{ok, _}  = vg_client:produce(Topic,
149 |                                   lists:duplicate(100, <<"123456789abcdef">>))
150 |      || _ <- lists:seq(1, 100)],
151 | 
152 |     %% try to force flush
153 |     application:stop(vonnegut),
154 | 
155 |     {ok, TopicDir0} = application:get_env(vonnegut, log_dirs),
156 |     TopicDir = TopicDir0 ++ "/" ++  binary_to_list(Topic) ++ "-0/",
157 |     Filename = vg_utils:index_file(TopicDir, 0),
158 |     ct:pal("topic dir ~p, filename ~p", [TopicDir, Filename]),
159 |     ?assertNotEqual({0, 0}, vg_log_segments:last_in_index(TopicDir, Filename, 1)).
160 | 
161 | 
162 | limit(Config) ->
163 |     Topic = ?config(topic, Config),
164 | 
165 |     {ok, P} = vg_client:produce(Topic,
166 |                                  lists:duplicate(100, <<"123456789abcdef">>)),
167 |     ?assertEqual(99, P),
168 |     {ok, #{Topic := #{0 := #{record_batches := Reply}}}} = vg_client:fetch(Topic),
169 |     ?assertEqual(100, length(Reply)),
170 | 
171 |     {ok, #{Topic := #{0 := #{record_batches := Reply2}}}} =
172 |                vg_client:fetch([{Topic, 0, #{max_bytes => 1000}}]),
173 |     %% ?assertEqual(24, length(Reply2)),
174 |     %% how is it we are geting more with the new format...
175 |     ?assertEqual(37, length(Reply2)),
176 | 
177 |     {ok, #{Topic := #{0 := #{record_batches := []}}}} =
178 |                vg_client:fetch([{Topic, 0, #{max_bytes => 1}}]),
179 | 
180 |     ok.
181 | 
182 | index_limit(Config) ->
183 |     Topic = ?config(topic, Config),
184 | 
185 |     [{ok, _} = vg_client:produce(Topic,
186 |                                 <<"123456789abcdef">>) || _ <- lists:seq(1, 100)],
187 | 
188 |     {ok, #{Topic := #{0 := #{record_batches := Reply}}}} = vg_client:fetch(Topic, 0),
189 |     ?assertEqual(100, length(Reply)),
190 | 
191 |     {ok, #{Topic := #{0 := #{record_batches := Reply2}}}} = vg_client:fetch(Topic, 0, 50),
192 |     ?assertEqual(50, length(Reply2)),
193 |     %% ?assertEqual(50, length(Reply2)),
194 | 
195 |     %% max_bytes overrides max_index
196 |     {ok, #{Topic := #{0 := #{record_batches := Reply3}}}} = vg_client:fetch([{Topic, 0, #{limit => 50, max_bytes => 1000}}]),
197 |     ?assertEqual(12, length(Reply3)),
198 |     %% ?assertEqual(24, length(Reply3)),
199 | 
200 |     %% limit returns Offset to Offset+Limit
201 |     {ok, #{Topic := #{0 := #{record_batches := Reply4}}}} = vg_client:fetch([{Topic, 10, #{limit => 20}}]),
202 |     ?assertEqual(20, length(Reply4)),
203 |     ?assertMatch(#{offset := 10}, hd(Reply4)),
204 |     ?assertMatch(#{offset := 29}, hd(lists:reverse(Reply4))),
205 | 
206 |     %% -1 Offset returns HWM-Limit to HWM
207 |     {ok, #{Topic := #{0 := #{record_batches := Reply5}}}} = vg_client:fetch([{Topic, -1, #{limit => 20}}]),
208 |     ?assertMatch(#{offset := 80}, hd(Reply5)),
209 |     ?assertMatch(#{offset := 99}, hd(lists:reverse(Reply5))),
210 |     ?assertEqual(20, length(Reply5)),
211 | 
212 |     %% -1 Offset with limit larger than HWM starts from 0
213 |     {ok, #{Topic := #{0 := #{record_batches := Reply6}}}} = vg_client:fetch([{Topic, -1, #{limit => 200}}]),
214 |     ?assertEqual(100, length(Reply6)),
215 | 
216 |     {ok, #{Topic := #{0 := #{record_batches := []}}}} = vg_client:fetch([{Topic, 0, #{max_bytes => 1}}]),
217 | 
218 |     ok.
219 | 
220 | 
221 | many(Config) ->
222 |     TopicCount = 1000,
223 |     TimeLimit = 100000,
224 | 
225 |     Start = erlang:monotonic_time(milli_seconds),
226 |     [begin
227 |          N = integer_to_binary(N0),
228 |          Topic = vg_test_utils:create_random_name(<<"many-topic-", N/binary>>),
229 |          %% adding a record to the topic will create it under current settings
230 |          ct:pal("adding to topic: ~p", [Topic]),
231 |          {ok, _} = vg_client:ensure_topic(Topic),
232 |          {ok, _} = vg_client:produce(Topic, [<<"woo">>])
233 |      end || N0 <- lists:seq(1, TopicCount)],
234 |     Duration = erlang:monotonic_time(milli_seconds) - Start,
235 |     ct:pal("creating ~p topics took ~p ms", [TopicCount, Duration]),
236 |     ?assert(Duration < TimeLimit),
237 |     Config.
238 | 
239 | wait_for_start(Topic) ->
240 |     wait_for_start(Topic, 5000).
241 | 
242 | wait_for_start(_Topic, 0) ->
243 |     error(waited_too_long);
244 | wait_for_start(Topic, N) ->
245 |     case vg_client:fetch(Topic, 0, 1) of
246 |         {ok, _} = _OK ->
247 |             %%ct:pal("ok ~p", [_OK]),
248 |             timer:sleep(150),
249 |             ok;
250 |         {error, no_socket} ->
251 |             timer:sleep(1),
252 |             wait_for_start(Topic, N - 1)
253 |     end.
254 | 
255 | startup_index_correctness(Config) ->
256 |     %% we actually want the reconnect behavior here
257 |     ok = vg_client_pool:stop(),
258 |     ok = vg_client_pool:start(#{reconnect => true}),
259 | 
260 |     Topic = ?config(topic, Config),
261 |     ct:pal("STARTING TEST"),
262 | 
263 |     {ok, _} = vg_client:produce(Topic,
264 |                                 lists:duplicate(1, <<"123456789abcdef000000000">>)),
265 |     {ok, _} = vg_client:produce(Topic,
266 |                                 lists:duplicate(1, <<"123456789abcdef111111111">>)),
267 | 
268 | 
269 |     [begin
270 |          application:stop(vonnegut),
271 |          %% bleh circle
272 |          timer:sleep(750),
273 |          {ok, _} = application:ensure_all_started(vonnegut),
274 |          wait_for_start(Topic),
275 |          A = integer_to_binary(N),
276 |          B = integer_to_binary(N + 1),
277 |          C = integer_to_binary(N + 2),
278 |          M = N + 1,
279 |          {ok, Q} = vg_client:produce(Topic, [<<"123456789abcdef-", A/binary>>,
280 |                                              <<"123456789abcdef-", B/binary>>]),
281 |          ?assertEqual(M, Q),
282 |          Y = M + 1,
283 |          {ok, Y} = vg_client:produce(Topic, <<"123456789abcdef-", C/binary>>)
284 |      end
285 |      || N <- lists:seq(2, 11, 3)],
286 | 
287 |     %% -1 Offset returns HWM-Limit to HWM
288 |     {ok, #{Topic := #{0 := #{record_batches := ReplyN}}}} = vg_client:fetch([{Topic, 0, #{limit => 2000}}]),
289 |     {ok, #{Topic := #{0 := #{record_batches := Reply0}}}} = vg_client:fetch([{Topic, -1, #{limit => 2}}]),
290 |     ct:pal("reply 0 ~p", [Reply0]),
291 |     ct:pal("whole set ~p", [ReplyN]),
292 | 
293 |     %% 11 and 12 are in one RecordBatch
294 |     ?assertEqual(3, length(Reply0)),
295 |     ?assertMatch(#{offset := 12}, hd(Reply0)),
296 |     ?assertMatch(#{offset := 13}, hd(lists:reverse(Reply0))),
297 | 
298 |     {ok, #{Topic := #{0 := #{record_batches := Reply1}}}} = vg_client:fetch([{Topic, 0, #{limit => 100}}]),
299 |     ?assertEqual(14, length(Reply1)),
300 |     ?assertMatch(#{offset := 13}, hd(lists:reverse(Reply1))),
301 |     ok.
302 | 
303 | %% verify the active topic segment process is not started until needed
304 | verify_lazy_load(_Config) ->
305 |     Topic = vg_test_utils:create_random_name(<<"verify_lazy_load">>),
306 |     Partition = 0,
307 |     TopicPartitionDir = vg_utils:topic_dir(Topic, Partition),
308 |     vg:create_topic(Topic),
309 |     ?assert(filelib:is_dir(TopicPartitionDir)),
310 | 
311 |     {ok, _}  = vg_client:produce(Topic,
312 |                                  lists:duplicate(100, <<"123456789abcdef">>)),
313 | 
314 |     %% fetch from 0 to make sure that they're all there
315 |     {ok, #{Topic := #{0 := #{record_batches := Reply}}}} = vg_client:fetch(Topic, 0),
316 |     ?assertEqual(100, length(Reply)),
317 | 
318 |     application:stop(vonnegut),
319 | 
320 |     %% delay on getting the elli port back can cause restarting to fail so pause for a bit
321 |     timer:sleep(500),
322 | 
323 |     {ok, _} = application:ensure_all_started(vonnegut),
324 |     wait_for_start(Topic),
325 | 
326 |     ?assertEqual(undefined, gproc:whereis_name({n,l,{active, Topic, Partition}})),
327 | 
328 |     {ok, #{Topic := #{0 := #{record_batches := Reply2}}}} = vg_client:fetch(Topic, 0),
329 |     ?assertEqual(100, length(Reply2)),
330 | 
331 |     ?assertEqual(undefined, gproc:whereis_name({n,l,{active, Topic, Partition}})),
332 | 
333 |     %% writing starts the process
334 |     {ok, _}  = vg_client:produce(Topic,
335 |                                  lists:duplicate(100, <<"123456789abcdef">>)),
336 | 
337 |     ?assertNotEqual(undefined, gproc:whereis_name({n,l,{active, Topic, Partition}})).
338 | 
339 | local_client_test(Config) ->
340 |     Topic = ?config(topic, Config),
341 |     vg:write(Topic, 0, <<"foo">>),
342 |     {ok, Ret} = vg:fetch(Topic),
343 |     ?assertMatch(#{high_water_mark := 0,
344 |                    partition := 0,
345 |                    record_batches :=
346 |                        [#{offset := 0,
347 |                           value := <<"foo">>}]},
348 |                  Ret),
349 |     ok.
350 | 
351 | terminate_idle_active_segment(Config) ->
352 |     Topic = ?config(topic, Config),
353 |     %% verify it is running
354 |     {ok, _} = vg:write(Topic, 0, <<"foo">>),
355 |     ?assertNotEqual(undefined, vg_active_segment:where(Topic, 0)),
356 |     %% after a second it should terminate and be undefined
357 |     ?UNTIL(vg_active_segment:where(Topic, 0) =:= undefined),
358 |     ok.
359 | 
360 | delete_topic(Config) ->
361 |     Dir = ?config(log_dir, Config),
362 |     Topic = vg_test_utils:create_random_name(<<"topic_SUITE_delete_topic">>),
363 |     {ok, _} = vg_client:ensure_topic(Topic),
364 | 
365 |     [begin
366 |          {ok, _} = vg_client:produce(Topic, <<"some datas">>)
367 |      end
368 |      || _ <- lists:seq(1, rand:uniform(20))],
369 | 
370 |     ?assert(filelib:is_dir(filename:join(Dir, <<Topic/binary, "-0">>))),
371 | 
372 |     vg_client:delete_topic(Topic),
373 | 
374 |     ?assertNot(filelib:is_dir(filename:join(Dir, <<Topic/binary, "-0">>))).
375 | 


--------------------------------------------------------------------------------
/test/vg_consumer_SUITE.erl:
--------------------------------------------------------------------------------
  1 | -module(vg_consumer_SUITE).
  2 | 
  3 | -include_lib("eunit/include/eunit.hrl").
  4 | -include_lib("common_test/include/ct.hrl").
  5 | -compile(export_all).
  6 | 
  7 | -include("test_utils.hrl").
  8 | 
  9 | all() ->
 10 |     [from_zero, multi_topic_fetch, fetch_unknown, fetch_higher_than_hwm, regression_2_23_18].
 11 | 
 12 | init_per_suite(Config) ->
 13 |     PrivDir = ?config(priv_dir, Config),
 14 |     application:load(vonnegut),
 15 |     application:set_env(vonnegut, client_pool_size, 2),
 16 |     application:set_env(vonnegut, log_dirs, [filename:join(PrivDir, "data")]),
 17 |     application:set_env(vonnegut, segment_bytes, 86),
 18 |     application:set_env(vonnegut, index_max_bytes, 18),
 19 |     application:set_env(vonnegut, index_interval_bytes, 24),
 20 |     application:set_env(vonnegut, client, [{endpoints, [{"127.0.0.1", 5588}]}]),
 21 |     application:set_env(vonnegut, chain, [{discovery, local}]),
 22 |     application:start(shackle),
 23 |     application:ensure_all_started(vonnegut),
 24 |     crypto:start(),
 25 |     Config.
 26 | 
 27 | end_per_suite(Config) ->
 28 |     application:stop(vonnegut),
 29 |     application:unload(vonnegut),
 30 |     Config.
 31 | 
 32 | init_per_testcase(_, Config) ->
 33 |     ok = vg_client_pool:start(#{reconnect => false}),
 34 |     Config.
 35 | 
 36 | end_per_testcase(_, _Config) ->
 37 |     vg_client_pool:stop(),
 38 |     ok.
 39 | 
 40 | from_zero(_Config) ->
 41 |     Topic = vg_test_utils:create_random_name(<<"consumer_SUITE_test_topic">>),
 42 |     {ok, _} = vg_client:ensure_topic(Topic),
 43 |     Partition = 0,
 44 |     TopicPartitionDir = vg_utils:topic_dir(Topic, Partition),
 45 |     ?assert(filelib:is_dir(TopicPartitionDir)),
 46 | 
 47 |     %% make sure there's enough time for the
 48 |     %% listeners to come up
 49 |     timer:sleep(250),
 50 | 
 51 |     ?assertMatch({ok, 0},
 52 |                  vg_client:produce(Topic, [#{key => <<"key">>,
 53 |                                              value => <<"record 1 wasn't long enough to make wrapping fail">>}])),
 54 |     ?assertMatch({ok, 1},
 55 |                  vg_client:produce(Topic, [<<"record 2">>])),
 56 |     {ok, #{Topic := #{0 := #{record_batches := Data, high_water_mark := HWM}}}} = vg_client:fetch(Topic, 0),
 57 |     ?assertEqual(1, HWM),
 58 |     ?assertMatch([#{offset := 0, key := <<"key">>, value := <<"record 1 wasn't long enough to make wrapping fail">>}], Data),
 59 |     {ok, #{Topic := #{0 := #{record_batches := Data1, high_water_mark := HWM1}}}} = vg_client:fetch(Topic, 1),
 60 |     ?assertEqual(1, HWM1),
 61 |     ?assertMatch([#{offset := 1, value := <<"record 2">>}], Data1),
 62 | 
 63 |     ok.
 64 | 
 65 | multi_topic_fetch(_Config) ->
 66 | 
 67 |     Topic1 = vg_test_utils:create_random_name(<<"consumer_SUITE_test_topic-1">>),
 68 |     Topic2 = vg_test_utils:create_random_name(<<"consumer_SUITE_test_topic-2">>),
 69 | 
 70 |     ok = vg:create_topic(Topic1),
 71 |     ok = vg:create_topic(Topic2),
 72 | 
 73 |     %% make sure there's enough time for the
 74 |     %% listeners to come up
 75 |     timer:sleep(250),
 76 | 
 77 |     ?assertMatch({ok, 0},
 78 |                  vg_client:produce(Topic1, [#{timestamp => erlang:system_time(millisecond),
 79 |                                               key => <<"key">>, value => <<"topic 1 record 1">>}])),
 80 |     ?assertMatch({ok, 1},
 81 |                  vg_client:produce(Topic1, [<<"topic 1 record 2">>])),
 82 | 
 83 |     ?assertMatch({ok, 0},
 84 |                  vg_client:produce(Topic2, [#{timestamp => erlang:system_time(millisecond),
 85 |                                               key => <<"key-2">>, value => <<"topic 2 record 1">>}])),
 86 |     ?assertMatch({ok, 1},
 87 |                  vg_client:produce(Topic2, [<<"topic 2 record 2">>])),
 88 | 
 89 |     {ok, #{Topic1 := #{0 := #{record_batches := Data, high_water_mark := HWM}},
 90 |            Topic2 := #{0 := #{record_batches := Data2, high_water_mark := HWM2}}}} = vg_client:fetch([{Topic1, 0, #{}},
 91 |                                                                                                   {Topic2, 1, #{}}]),
 92 | 
 93 |     ?assertEqual(1, HWM),
 94 |     ?assertMatch([#{offset := 0, key := <<"key">>, value := <<"topic 1 record 1">>}], Data),
 95 | 
 96 |     ?assertEqual(1, HWM2),
 97 |     ?assertMatch([#{offset := 1, value := <<"topic 2 record 2">>}], Data2),
 98 | 
 99 |     ok.
100 | 
101 | fetch_unknown(_Config) ->
102 |     Topic = vg_test_utils:create_random_name(<<"consumer_SUITE_test_topic">>),
103 | 
104 |     %% make sure there's enough time for the
105 |     %% listeners to come up
106 |     timer:sleep(250),
107 | 
108 |     ?assertMatch({error, {Topic, not_found}}, vg_client:fetch(Topic, 0)),
109 | 
110 |     ok.
111 | 
112 | fetch_higher_than_hwm(_Config) ->
113 |     Topic = vg_test_utils:create_random_name(<<"consumer_SUITE_fetch_higher_than_hwm">>),
114 |     {ok, _} = vg_client:ensure_topic(Topic),
115 |     Partition = 0,
116 |     TopicPartitionDir = vg_utils:topic_dir(Topic, Partition),
117 |     ?assert(filelib:is_dir(TopicPartitionDir)),
118 | 
119 |     %% fetch from an empty log
120 |     {ok, #{Topic := #{0 := #{record_batches := Data0, high_water_mark := HWM0}}}} = vg_client:fetch(Topic, 1),
121 |     ?assertEqual(-1, HWM0),
122 |     ?assertMatch([], Data0),
123 | 
124 |     %% fetch with a limit from an empty log
125 |     {ok, #{Topic := #{0 := #{record_batches := Data0, high_water_mark := HWM0}}}} = vg_client:fetch(Topic, 1, 1),
126 |     ?assertEqual(-1, HWM0),
127 |     ?assertMatch([], Data0),
128 | 
129 |     %% make sure there's enough time for the
130 |     %% listeners to come up
131 |     timer:sleep(250),
132 | 
133 |     ?assertMatch({ok, 0},
134 |                  vg_client:produce(Topic, [#{key => <<"some key">>,
135 |                                              value => <<"rsome value">>}])),
136 |     {ok, #{Topic := #{0 := #{record_batches := Data, high_water_mark := HWM1}}}} = vg_client:fetch(Topic, 1),
137 |     ?assertEqual(0, HWM1),
138 |     ?assertMatch([], Data),
139 | 
140 |     ok.
141 | 
142 | %% fetch from 5 with limit 1000 was timing out.
143 | %% issue was vonnegut claiming to send more data than it actually would, leaving the client expecting more
144 | regression_2_23_18(_Config) ->
145 |     Topic = vg_test_utils:create_random_name(<<"consumer_SUITE_regression_2-23-18">>),
146 |     {ok, _} = vg_client:ensure_topic(Topic),
147 |     Partition = 0,
148 |     TopicPartitionDir = vg_utils:topic_dir(Topic, Partition),
149 |     ?assert(filelib:is_dir(TopicPartitionDir)),
150 | 
151 |     %% make sure there's enough time for the
152 |     %% listeners to come up
153 |     timer:sleep(250),
154 | 
155 |     vg_client:produce(Topic, [#{value => <<"some value">>}]),
156 |     {ok, #{Topic := #{0 := #{record_batches := _Data, high_water_mark := HWM1}}}} = vg_client:fetch(Topic, 5, 1000),
157 |     ?assertEqual(0, HWM1),
158 | 
159 |     ok.
160 | 


--------------------------------------------------------------------------------
/test/vg_statem.erl:
--------------------------------------------------------------------------------
  1 | -module(vg_statem).
  2 | 
  3 | -include_lib("proper/include/proper.hrl").
  4 | 
  5 | -export([command/1, initial_state/0, next_state/3,
  6 |          precondition/2, postcondition/3]).
  7 | 
  8 | %% sigh
  9 | -export([restart_server/0]).
 10 | 
 11 | -record(state,
 12 |         {
 13 |           topics = #{}
 14 |         }).
 15 | 
 16 | command(_S = #state{topics = Topics}) ->
 17 |     %% replace with proper oneof when maps are supported
 18 |     {Topic, Info} = one_of(Topics),
 19 |     Index = hwm(Info) + 1,
 20 |     frequency(
 21 |       [%% to tickle close and restart validation bugs
 22 |        {2,   {call, ?MODULE, restart_server, []}},
 23 |        {2,    {call, vg, ensure_topic, [?LET(A, atom(), atom_to_binary(A, utf8))]}},
 24 |        %% write more than is common in typical workloads in order to
 25 |        %% trigger more wraps
 26 |        {40,   {call, vg, write,
 27 |                %%[Topic, message(Topic, Index, binary())]}},
 28 |                [Topic, 0, message(Topic, Index, <<>>)]}},
 29 |        {10,   {call, vg, write,
 30 |                %%[Topic, message(Topic, Index, binary())]}},
 31 |                [Topic, 0, ?LET(I, integer(2, 15),
 32 |                                [message(Topic, Index + N, <<>>)
 33 |                                 || N <- lists:seq(0, I)])]}},
 34 |        {40, {call, vg, fetch, [Topic, 0, -1, integer(1, 5)]}},
 35 |        {100, {call, vg, fetch, [Topic, integer(0, Index - 1)]}}
 36 |       ]).
 37 | 
 38 | %% Initial model value at system start. Should be deterministic.
 39 | initial_state() ->
 40 |     application:load(vonnegut),
 41 |     %% set this once per run
 42 |     application:set_env(vonnegut, log_dirs, [filename:join("properdata", integer_to_list(erlang:system_time()))]),
 43 | 
 44 |     _ = application:stop(vonnegut),
 45 |     {ok, _} = application:ensure_all_started(vonnegut),
 46 |     timer:sleep(500),
 47 |     ok = vg:create_topic(<<"seed">>),
 48 |     {ok, 0} = vg:write(<<"seed">>, 0, message(<<"seed">>, 0, <<>>)),
 49 |     #state{topics = #{<<"seed">> => 0}}.
 50 | 
 51 | %% Picks whether a command should be valid under the current state.
 52 | precondition(#state{topics = T}, {call, _Mod, Fun, _Args}) when T == #{}
 53 |                                                                  andalso (Fun == fetch orelse
 54 |                                                                           Fun == produce) ->
 55 |     false;
 56 | precondition(#state{}, _C = {call, _Mod, _Fun, _Args}) ->
 57 |     %lager:info("calling ~p", [_C]),
 58 |     true.
 59 | 
 60 | %% Given the state `State' *prior* to the call `{call, Mod, Fun, Args}',
 61 | %% determine whether the result `Res' (coming from the actual system)
 62 | %% makes sense.
 63 | postcondition(#state{topics = Topics}, {call, vg_client, fetch, [Topic, Index]}, Res) ->
 64 |     HWM = maps:get(Topic, Topics, undefined),
 65 |     {ok, #{Topic := #{0 := #{record_batches := RecordSet}}}} = Res,
 66 |     %% potentially validate what we're seeing here, also this will stop being true once more bytes start being returned
 67 |     length(RecordSet) =:= (HWM - Index) + 1;
 68 | postcondition(#state{topics = Topics}, {call, vg_client, produce, [Topic, Message]}, Res) ->
 69 |     lager:info("check ~p ~p ~p", [Topic, Res, Topics]),
 70 |     HWM = maps:get(Topic, Topics),
 71 |     {ok, Offset} = Res,
 72 |     %% potentially validate what we're seeing here
 73 |     Added =
 74 |         case Message of
 75 |             L when is_list(L) ->
 76 |                 length(L);
 77 |             _ -> 1
 78 |         end,
 79 |     Offset =:= HWM + Added;
 80 | postcondition(_State, {call, _Mod, _Fun, _Args}, _Res) ->
 81 |     true.
 82 | 
 83 | %% Assuming the postcondition for a call was true, update the model
 84 | %% accordingly for the test to proceed.
 85 | next_state(State=#state{topics = Topics}, {ok, Offset}, {call, vg_client, produce, [Topic, _Message]}) ->
 86 |     NewTopics = maps:put(Topic, Offset, Topics),
 87 |     State#state{topics = NewTopics};
 88 | next_state(State=#state{topics = Topics}, {ok, _}, {call, vg_client, ensure_topic, [Topic]}) ->
 89 |     NewTopics = maps:put(Topic, 0, Topics),
 90 |     State#state{topics = NewTopics};
 91 | next_state(State, {error, _}, {call, vg_client, ensure_topic, [_Topic]}) ->
 92 |     %% i guess just ignore invalid topic names?
 93 |     State;
 94 | next_state(State, _Res, {call, _Mod, _Fun, _Args}) ->
 95 |     NewState = State,
 96 |     NewState.
 97 | 
 98 | 
 99 | %%%%%%%%%
100 | 
101 | restart_server() ->
102 |     ok = application:stop(vonnegut),
103 |     ok = application:start(vonnegut),
104 |     timer:sleep(500),
105 |     ok.
106 | 
107 | message(Topic, Index, Gen) ->
108 |     Reps = rand:uniform(15) + 5,
109 |     iolist_to_binary(
110 |       [<<Index:64/native,
111 |          Reps:16/native>>,
112 |        lists:duplicate(Reps,
113 |                        <<Topic/binary,
114 |                          (integer_to_binary(Index))/binary>>),
115 |        <<(byte_size(Gen)):32/native, Gen/binary>>]).
116 | 
117 | %% trivial but opaque for interface reasons
118 | hwm(HWM) ->
119 |     HWM.
120 | 
121 | one_of(Empty) when Empty =:= #{} ->
122 |     %% bad feeling here, buttttt
123 |     {<<>>, 0};
124 | one_of(Map) ->
125 |     Sz = maps:size(Map),
126 |     Index = rand:uniform(Sz),
127 |     lists:nth(Index, maps:to_list(Map)).
128 | 


--------------------------------------------------------------------------------
/test/vg_test_utils.erl:
--------------------------------------------------------------------------------
1 | -module(vg_test_utils).
2 | 
3 | -compile(export_all).
4 | 
5 | create_random_name(Name) ->
6 |     <<Name/binary, "-", (erlang:integer_to_binary(rand:uniform(1000000)))/binary>>.
7 | 


--------------------------------------------------------------------------------